Auto merge of #86368 - michaelwoerister:lexing-ice, r=davidtwco
Disambiguate between SourceFiles from different crates even if they have the same path This PR fixes an ICE that can occur when the compiler encounters a source file that is part of both the local crate and an upstream crate: 1. While importing source files from an upstream crate the compiler creates a `SourceFile` entry for `foo.rs` in the `SourceMap`. Since this is an imported source file its `src` field is `None`. 2. At a later point the parser encounters `foo.rs` again. It tells the `SourceMap` to load the file but because we already have an entry for `foo.rs` the `SourceMap` will return the existing version with `src == None`. 3. The parser proceeds under the assumption that `src.is_some()` and panics when actually trying to use the file's contents. This PR fixes the issue by adding the source file's associated `CrateNum` to the `SourceMap`'s interning key. As a consequence the two instances of the file will each have a separate entry in the `SourceMap`. They just happen to share the same file path. This approach seemed less problematic to me than trying to mutate the `SourceFile` after it had already been created. Another, more involved, approach might be to merge the `src` and the `external_src` field. Fixes #85955
This commit is contained in:
commit
80926fc409
4 changed files with 89 additions and 15 deletions
|
@ -54,7 +54,7 @@ pub struct OnDiskCache<'sess> {
|
||||||
cnum_map: OnceCell<UnhashMap<StableCrateId, CrateNum>>,
|
cnum_map: OnceCell<UnhashMap<StableCrateId, CrateNum>>,
|
||||||
|
|
||||||
source_map: &'sess SourceMap,
|
source_map: &'sess SourceMap,
|
||||||
file_index_to_stable_id: FxHashMap<SourceFileIndex, StableSourceFileId>,
|
file_index_to_stable_id: FxHashMap<SourceFileIndex, EncodedSourceFileId>,
|
||||||
|
|
||||||
// Caches that are populated lazily during decoding.
|
// Caches that are populated lazily during decoding.
|
||||||
file_index_to_file: Lock<FxHashMap<SourceFileIndex, Lrc<SourceFile>>>,
|
file_index_to_file: Lock<FxHashMap<SourceFileIndex, Lrc<SourceFile>>>,
|
||||||
|
@ -111,7 +111,7 @@ pub struct OnDiskCache<'sess> {
|
||||||
// This type is used only for serialization and deserialization.
|
// This type is used only for serialization and deserialization.
|
||||||
#[derive(Encodable, Decodable)]
|
#[derive(Encodable, Decodable)]
|
||||||
struct Footer {
|
struct Footer {
|
||||||
file_index_to_stable_id: FxHashMap<SourceFileIndex, StableSourceFileId>,
|
file_index_to_stable_id: FxHashMap<SourceFileIndex, EncodedSourceFileId>,
|
||||||
query_result_index: EncodedQueryResultIndex,
|
query_result_index: EncodedQueryResultIndex,
|
||||||
diagnostics_index: EncodedQueryResultIndex,
|
diagnostics_index: EncodedQueryResultIndex,
|
||||||
// The location of all allocations.
|
// The location of all allocations.
|
||||||
|
@ -157,6 +157,32 @@ crate struct RawDefId {
|
||||||
pub index: u32,
|
pub index: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// An `EncodedSourceFileId` is the same as a `StableSourceFileId` except that
|
||||||
|
/// the source crate is represented as a [StableCrateId] instead of as a
|
||||||
|
/// `CrateNum`. This way `EncodedSourceFileId` can be encoded and decoded
|
||||||
|
/// without any additional context, i.e. with a simple `opaque::Decoder` (which
|
||||||
|
/// is the only thing available when decoding the cache's [Footer].
|
||||||
|
#[derive(Encodable, Decodable, Clone, Debug)]
|
||||||
|
struct EncodedSourceFileId {
|
||||||
|
file_name_hash: u64,
|
||||||
|
stable_crate_id: StableCrateId,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EncodedSourceFileId {
|
||||||
|
fn translate(&self, cnum_map: &UnhashMap<StableCrateId, CrateNum>) -> StableSourceFileId {
|
||||||
|
let cnum = cnum_map[&self.stable_crate_id];
|
||||||
|
StableSourceFileId { file_name_hash: self.file_name_hash, cnum }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn new(tcx: TyCtxt<'_>, file: &SourceFile) -> EncodedSourceFileId {
|
||||||
|
let source_file_id = StableSourceFileId::new(file);
|
||||||
|
EncodedSourceFileId {
|
||||||
|
file_name_hash: source_file_id.file_name_hash,
|
||||||
|
stable_crate_id: tcx.stable_crate_id(source_file_id.cnum),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl<'sess> OnDiskCache<'sess> {
|
impl<'sess> OnDiskCache<'sess> {
|
||||||
/// Creates a new `OnDiskCache` instance from the serialized data in `data`.
|
/// Creates a new `OnDiskCache` instance from the serialized data in `data`.
|
||||||
pub fn new(sess: &'sess Session, data: Vec<u8>, start_pos: usize) -> Self {
|
pub fn new(sess: &'sess Session, data: Vec<u8>, start_pos: usize) -> Self {
|
||||||
|
@ -238,7 +264,8 @@ impl<'sess> OnDiskCache<'sess> {
|
||||||
let index = SourceFileIndex(index as u32);
|
let index = SourceFileIndex(index as u32);
|
||||||
let file_ptr: *const SourceFile = &**file as *const _;
|
let file_ptr: *const SourceFile = &**file as *const _;
|
||||||
file_to_file_index.insert(file_ptr, index);
|
file_to_file_index.insert(file_ptr, index);
|
||||||
file_index_to_stable_id.insert(index, StableSourceFileId::new(&file));
|
let source_file_id = EncodedSourceFileId::new(tcx, &file);
|
||||||
|
file_index_to_stable_id.insert(index, source_file_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
(file_to_file_index, file_index_to_stable_id)
|
(file_to_file_index, file_index_to_stable_id)
|
||||||
|
@ -605,7 +632,7 @@ pub struct CacheDecoder<'a, 'tcx> {
|
||||||
source_map: &'a SourceMap,
|
source_map: &'a SourceMap,
|
||||||
cnum_map: &'a UnhashMap<StableCrateId, CrateNum>,
|
cnum_map: &'a UnhashMap<StableCrateId, CrateNum>,
|
||||||
file_index_to_file: &'a Lock<FxHashMap<SourceFileIndex, Lrc<SourceFile>>>,
|
file_index_to_file: &'a Lock<FxHashMap<SourceFileIndex, Lrc<SourceFile>>>,
|
||||||
file_index_to_stable_id: &'a FxHashMap<SourceFileIndex, StableSourceFileId>,
|
file_index_to_stable_id: &'a FxHashMap<SourceFileIndex, EncodedSourceFileId>,
|
||||||
alloc_decoding_session: AllocDecodingSession<'a>,
|
alloc_decoding_session: AllocDecodingSession<'a>,
|
||||||
syntax_contexts: &'a FxHashMap<u32, AbsoluteBytePos>,
|
syntax_contexts: &'a FxHashMap<u32, AbsoluteBytePos>,
|
||||||
expn_data: &'a FxHashMap<u32, AbsoluteBytePos>,
|
expn_data: &'a FxHashMap<u32, AbsoluteBytePos>,
|
||||||
|
@ -618,6 +645,7 @@ impl<'a, 'tcx> CacheDecoder<'a, 'tcx> {
|
||||||
ref file_index_to_file,
|
ref file_index_to_file,
|
||||||
ref file_index_to_stable_id,
|
ref file_index_to_stable_id,
|
||||||
ref source_map,
|
ref source_map,
|
||||||
|
ref cnum_map,
|
||||||
..
|
..
|
||||||
} = *self;
|
} = *self;
|
||||||
|
|
||||||
|
@ -625,7 +653,7 @@ impl<'a, 'tcx> CacheDecoder<'a, 'tcx> {
|
||||||
.borrow_mut()
|
.borrow_mut()
|
||||||
.entry(index)
|
.entry(index)
|
||||||
.or_insert_with(|| {
|
.or_insert_with(|| {
|
||||||
let stable_id = file_index_to_stable_id[&index];
|
let stable_id = file_index_to_stable_id[&index].translate(cnum_map);
|
||||||
source_map
|
source_map
|
||||||
.source_file_by_stable_id(stable_id)
|
.source_file_by_stable_id(stable_id)
|
||||||
.expect("failed to lookup `SourceFile` in new context")
|
.expect("failed to lookup `SourceFile` in new context")
|
||||||
|
|
|
@ -117,25 +117,42 @@ impl FileLoader for RealFileLoader {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// This is a `SourceFile` identifier that is used to correlate `SourceFile`s between
|
/// This is a [SourceFile] identifier that is used to correlate source files between
|
||||||
// subsequent compilation sessions (which is something we need to do during
|
/// subsequent compilation sessions (which is something we need to do during
|
||||||
// incremental compilation).
|
/// incremental compilation).
|
||||||
|
///
|
||||||
|
/// The [StableSourceFileId] also contains the CrateNum of the crate the source
|
||||||
|
/// file was originally parsed for. This way we get two separate entries in
|
||||||
|
/// the [SourceMap] if the same file is part of both the local and an upstream
|
||||||
|
/// crate. Trying to only have one entry for both cases is problematic because
|
||||||
|
/// at the point where we discover that there's a local use of the file in
|
||||||
|
/// addition to the upstream one, we might already have made decisions based on
|
||||||
|
/// the assumption that it's an upstream file. Treating the two files as
|
||||||
|
/// different has no real downsides.
|
||||||
#[derive(Copy, Clone, PartialEq, Eq, Hash, Encodable, Decodable, Debug)]
|
#[derive(Copy, Clone, PartialEq, Eq, Hash, Encodable, Decodable, Debug)]
|
||||||
pub struct StableSourceFileId(u128);
|
pub struct StableSourceFileId {
|
||||||
|
// A hash of the source file's FileName. This is hash so that it's size
|
||||||
|
// is more predictable than if we included the actual FileName value.
|
||||||
|
pub file_name_hash: u64,
|
||||||
|
|
||||||
|
// The CrateNum of the crate this source file was originally parsed for.
|
||||||
|
// We cannot include this information in the hash because at the time
|
||||||
|
// of hashing we don't have the context to map from the CrateNum's numeric
|
||||||
|
// value to a StableCrateId.
|
||||||
|
pub cnum: CrateNum,
|
||||||
|
}
|
||||||
|
|
||||||
// FIXME: we need a more globally consistent approach to the problem solved by
|
// FIXME: we need a more globally consistent approach to the problem solved by
|
||||||
// StableSourceFileId, perhaps built atop source_file.name_hash.
|
// StableSourceFileId, perhaps built atop source_file.name_hash.
|
||||||
impl StableSourceFileId {
|
impl StableSourceFileId {
|
||||||
pub fn new(source_file: &SourceFile) -> StableSourceFileId {
|
pub fn new(source_file: &SourceFile) -> StableSourceFileId {
|
||||||
StableSourceFileId::new_from_name(&source_file.name)
|
StableSourceFileId::new_from_name(&source_file.name, source_file.cnum)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn new_from_name(name: &FileName) -> StableSourceFileId {
|
fn new_from_name(name: &FileName, cnum: CrateNum) -> StableSourceFileId {
|
||||||
let mut hasher = StableHasher::new();
|
let mut hasher = StableHasher::new();
|
||||||
|
|
||||||
name.hash(&mut hasher);
|
name.hash(&mut hasher);
|
||||||
|
StableSourceFileId { file_name_hash: hasher.finish(), cnum }
|
||||||
StableSourceFileId(hasher.finish())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -274,7 +291,7 @@ impl SourceMap {
|
||||||
// be empty, so the working directory will be used.
|
// be empty, so the working directory will be used.
|
||||||
let (filename, _) = self.path_mapping.map_filename_prefix(&filename);
|
let (filename, _) = self.path_mapping.map_filename_prefix(&filename);
|
||||||
|
|
||||||
let file_id = StableSourceFileId::new_from_name(&filename);
|
let file_id = StableSourceFileId::new_from_name(&filename, LOCAL_CRATE);
|
||||||
|
|
||||||
let lrc_sf = match self.source_file_by_stable_id(file_id) {
|
let lrc_sf = match self.source_file_by_stable_id(file_id) {
|
||||||
Some(lrc_sf) => lrc_sf,
|
Some(lrc_sf) => lrc_sf,
|
||||||
|
@ -288,6 +305,10 @@ impl SourceMap {
|
||||||
self.hash_kind,
|
self.hash_kind,
|
||||||
));
|
));
|
||||||
|
|
||||||
|
// Let's make sure the file_id we generated above actually matches
|
||||||
|
// the ID we generate for the SourceFile we just created.
|
||||||
|
debug_assert_eq!(StableSourceFileId::new(&source_file), file_id);
|
||||||
|
|
||||||
let mut files = self.files.borrow_mut();
|
let mut files = self.files.borrow_mut();
|
||||||
|
|
||||||
files.source_files.push(source_file.clone());
|
files.source_files.push(source_file.clone());
|
||||||
|
|
|
@ -0,0 +1,4 @@
|
||||||
|
#[inline]
|
||||||
|
pub fn some_function() -> u32 {
|
||||||
|
1
|
||||||
|
}
|
21
src/test/ui/include-macros/same-file-in-two-crates.rs
Normal file
21
src/test/ui/include-macros/same-file-in-two-crates.rs
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
// This test makes sure that the compiler can handle the same source file to be
|
||||||
|
// part of the local crate *and* an upstream crate. This can happen, for example,
|
||||||
|
// when there is some auto-generated code that is part of both a library and an
|
||||||
|
// accompanying integration test.
|
||||||
|
//
|
||||||
|
// The test uses include!() to include a source file that is also part of
|
||||||
|
// an upstream crate.
|
||||||
|
//
|
||||||
|
// This is a regression test for https://github.com/rust-lang/rust/issues/85955.
|
||||||
|
|
||||||
|
// check-pass
|
||||||
|
// compile-flags: --crate-type=rlib
|
||||||
|
// aux-build:same-file-in-two-crates-aux.rs
|
||||||
|
extern crate same_file_in_two_crates_aux;
|
||||||
|
|
||||||
|
pub fn foo() -> u32 {
|
||||||
|
same_file_in_two_crates_aux::some_function() +
|
||||||
|
some_function()
|
||||||
|
}
|
||||||
|
|
||||||
|
include!("./auxiliary/same-file-in-two-crates-aux.rs");
|
Loading…
Add table
Add a link
Reference in a new issue