Auto merge of #119139 - michaelwoerister:cleanup-stable-source-file-id, r=cjgillot
Unify SourceFile::name_hash and StableSourceFileId This PR adapts the existing `StableSourceFileId` type so that it can be used instead of the `name_hash` field of `SourceFile`. This simplifies a few things that were kind of duplicated before. The PR should also fix issues https://github.com/rust-lang/rust/issues/112700 and https://github.com/rust-lang/rust/issues/115835, but I was not able to reproduce these issues in a regression test. As far as I can tell, the root cause of these issues is that the id of the originating crate is not hashed in the `HashStable` impl of `Span` and thus cache entries that should have been considered invalidated were loaded. After this PR, the `stable_id` field of `SourceFile` includes information about the originating crate, so that ICE should not occur anymore.
This commit is contained in:
commit
bf8716f1cd
10 changed files with 131 additions and 118 deletions
|
@ -58,7 +58,7 @@ pub use hygiene::{DesugaringKind, ExpnKind, MacroKind};
|
|||
pub use hygiene::{ExpnData, ExpnHash, ExpnId, LocalExpnId, SyntaxContext};
|
||||
use rustc_data_structures::stable_hasher::HashingControls;
|
||||
pub mod def_id;
|
||||
use def_id::{CrateNum, DefId, DefPathHash, LocalDefId, LOCAL_CRATE};
|
||||
use def_id::{CrateNum, DefId, DefPathHash, LocalDefId, StableCrateId, LOCAL_CRATE};
|
||||
pub mod edit_distance;
|
||||
mod span_encoding;
|
||||
pub use span_encoding::{Span, DUMMY_SP};
|
||||
|
@ -1333,8 +1333,10 @@ pub struct SourceFile {
|
|||
pub non_narrow_chars: Vec<NonNarrowChar>,
|
||||
/// Locations of characters removed during normalization.
|
||||
pub normalized_pos: Vec<NormalizedPos>,
|
||||
/// A hash of the filename, used for speeding up hashing in incremental compilation.
|
||||
pub name_hash: Hash128,
|
||||
/// A hash of the filename & crate-id, used for uniquely identifying source
|
||||
/// files within the crate graph and for speeding up hashing in incremental
|
||||
/// compilation.
|
||||
pub stable_id: StableSourceFileId,
|
||||
/// Indicates which crate this `SourceFile` was imported from.
|
||||
pub cnum: CrateNum,
|
||||
}
|
||||
|
@ -1352,7 +1354,7 @@ impl Clone for SourceFile {
|
|||
multibyte_chars: self.multibyte_chars.clone(),
|
||||
non_narrow_chars: self.non_narrow_chars.clone(),
|
||||
normalized_pos: self.normalized_pos.clone(),
|
||||
name_hash: self.name_hash,
|
||||
stable_id: self.stable_id,
|
||||
cnum: self.cnum,
|
||||
}
|
||||
}
|
||||
|
@ -1426,7 +1428,7 @@ impl<S: Encoder> Encodable<S> for SourceFile {
|
|||
|
||||
self.multibyte_chars.encode(s);
|
||||
self.non_narrow_chars.encode(s);
|
||||
self.name_hash.encode(s);
|
||||
self.stable_id.encode(s);
|
||||
self.normalized_pos.encode(s);
|
||||
self.cnum.encode(s);
|
||||
}
|
||||
|
@ -1453,7 +1455,7 @@ impl<D: Decoder> Decodable<D> for SourceFile {
|
|||
};
|
||||
let multibyte_chars: Vec<MultiByteChar> = Decodable::decode(d);
|
||||
let non_narrow_chars: Vec<NonNarrowChar> = Decodable::decode(d);
|
||||
let name_hash = Decodable::decode(d);
|
||||
let stable_id = Decodable::decode(d);
|
||||
let normalized_pos: Vec<NormalizedPos> = Decodable::decode(d);
|
||||
let cnum: CrateNum = Decodable::decode(d);
|
||||
SourceFile {
|
||||
|
@ -1469,7 +1471,7 @@ impl<D: Decoder> Decodable<D> for SourceFile {
|
|||
multibyte_chars,
|
||||
non_narrow_chars,
|
||||
normalized_pos,
|
||||
name_hash,
|
||||
stable_id,
|
||||
cnum,
|
||||
}
|
||||
}
|
||||
|
@ -1481,6 +1483,66 @@ impl fmt::Debug for SourceFile {
|
|||
}
|
||||
}
|
||||
|
||||
/// This is a [SourceFile] identifier that is used to correlate source files between
|
||||
/// subsequent compilation sessions (which is something we need to do during
|
||||
/// incremental compilation).
|
||||
///
|
||||
/// It is a hash value (so we can efficiently consume it when stable-hashing
|
||||
/// spans) that consists of the `FileName` and the `StableCrateId` of the crate
|
||||
/// the source file is from. The crate id is needed because sometimes the
|
||||
/// `FileName` is not unique within the crate graph (think `src/lib.rs`, for
|
||||
/// example).
|
||||
///
|
||||
/// The way the crate-id part is handled is a bit special: source files of the
|
||||
/// local crate are hashed as `(filename, None)`, while source files from
|
||||
/// upstream crates have a hash of `(filename, Some(stable_crate_id))`. This
|
||||
/// is because SourceFiles for the local crate are allocated very early in the
|
||||
/// compilation process when the `StableCrateId` is not yet known. If, due to
|
||||
/// some refactoring of the compiler, the `StableCrateId` of the local crate
|
||||
/// were to become available, it would be better to uniformely make this a
|
||||
/// hash of `(filename, stable_crate_id)`.
|
||||
///
|
||||
/// When `SourceFile`s are exported in crate metadata, the `StableSourceFileId`
|
||||
/// is updated to incorporate the `StableCrateId` of the exporting crate.
|
||||
#[derive(
|
||||
Debug,
|
||||
Clone,
|
||||
Copy,
|
||||
Hash,
|
||||
PartialEq,
|
||||
Eq,
|
||||
HashStable_Generic,
|
||||
Encodable,
|
||||
Decodable,
|
||||
Default,
|
||||
PartialOrd,
|
||||
Ord
|
||||
)]
|
||||
pub struct StableSourceFileId(Hash128);
|
||||
|
||||
impl StableSourceFileId {
|
||||
fn from_filename_in_current_crate(filename: &FileName) -> Self {
|
||||
Self::from_filename_and_stable_crate_id(filename, None)
|
||||
}
|
||||
|
||||
pub fn from_filename_for_export(
|
||||
filename: &FileName,
|
||||
local_crate_stable_crate_id: StableCrateId,
|
||||
) -> Self {
|
||||
Self::from_filename_and_stable_crate_id(filename, Some(local_crate_stable_crate_id))
|
||||
}
|
||||
|
||||
fn from_filename_and_stable_crate_id(
|
||||
filename: &FileName,
|
||||
stable_crate_id: Option<StableCrateId>,
|
||||
) -> Self {
|
||||
let mut hasher = StableHasher::new();
|
||||
filename.hash(&mut hasher);
|
||||
stable_crate_id.hash(&mut hasher);
|
||||
StableSourceFileId(hasher.finish())
|
||||
}
|
||||
}
|
||||
|
||||
impl SourceFile {
|
||||
pub fn new(
|
||||
name: FileName,
|
||||
|
@ -1491,11 +1553,7 @@ impl SourceFile {
|
|||
let src_hash = SourceFileHash::new(hash_kind, &src);
|
||||
let normalized_pos = normalize_src(&mut src);
|
||||
|
||||
let name_hash = {
|
||||
let mut hasher: StableHasher = StableHasher::new();
|
||||
name.hash(&mut hasher);
|
||||
hasher.finish()
|
||||
};
|
||||
let stable_id = StableSourceFileId::from_filename_in_current_crate(&name);
|
||||
let source_len = src.len();
|
||||
let source_len = u32::try_from(source_len).map_err(|_| OffsetOverflowError)?;
|
||||
|
||||
|
@ -1513,7 +1571,7 @@ impl SourceFile {
|
|||
multibyte_chars,
|
||||
non_narrow_chars,
|
||||
normalized_pos,
|
||||
name_hash,
|
||||
stable_id,
|
||||
cnum: LOCAL_CRATE,
|
||||
})
|
||||
}
|
||||
|
@ -2213,7 +2271,7 @@ where
|
|||
};
|
||||
|
||||
Hash::hash(&TAG_VALID_SPAN, hasher);
|
||||
Hash::hash(&file.name_hash, hasher);
|
||||
Hash::hash(&file.stable_id, hasher);
|
||||
|
||||
// Hash both the length and the end location (line/column) of a span. If we
|
||||
// hash only the length, for example, then two otherwise equal spans with
|
||||
|
|
|
@ -13,7 +13,6 @@ use crate::*;
|
|||
use rustc_data_structures::fx::FxHashMap;
|
||||
use rustc_data_structures::sync::{IntoDynSyncSend, MappedReadGuard, ReadGuard, RwLock};
|
||||
use std::fs;
|
||||
use std::hash::Hash;
|
||||
use std::io::{self, BorrowedBuf, Read};
|
||||
use std::path::{self};
|
||||
|
||||
|
@ -152,45 +151,6 @@ impl FileLoader for RealFileLoader {
|
|||
}
|
||||
}
|
||||
|
||||
/// This is a [SourceFile] identifier that is used to correlate source files between
|
||||
/// subsequent compilation sessions (which is something we need to do during
|
||||
/// incremental compilation).
|
||||
///
|
||||
/// The [StableSourceFileId] also contains the CrateNum of the crate the source
|
||||
/// file was originally parsed for. This way we get two separate entries in
|
||||
/// the [SourceMap] if the same file is part of both the local and an upstream
|
||||
/// crate. Trying to only have one entry for both cases is problematic because
|
||||
/// at the point where we discover that there's a local use of the file in
|
||||
/// addition to the upstream one, we might already have made decisions based on
|
||||
/// the assumption that it's an upstream file. Treating the two files as
|
||||
/// different has no real downsides.
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash, Encodable, Decodable, Debug)]
|
||||
pub struct StableSourceFileId {
|
||||
/// A hash of the source file's [`FileName`]. This is hash so that it's size
|
||||
/// is more predictable than if we included the actual [`FileName`] value.
|
||||
pub file_name_hash: Hash64,
|
||||
|
||||
/// The [`CrateNum`] of the crate this source file was originally parsed for.
|
||||
/// We cannot include this information in the hash because at the time
|
||||
/// of hashing we don't have the context to map from the [`CrateNum`]'s numeric
|
||||
/// value to a `StableCrateId`.
|
||||
pub cnum: CrateNum,
|
||||
}
|
||||
|
||||
// FIXME: we need a more globally consistent approach to the problem solved by
|
||||
// StableSourceFileId, perhaps built atop source_file.name_hash.
|
||||
impl StableSourceFileId {
|
||||
pub fn new(source_file: &SourceFile) -> StableSourceFileId {
|
||||
StableSourceFileId::new_from_name(&source_file.name, source_file.cnum)
|
||||
}
|
||||
|
||||
fn new_from_name(name: &FileName, cnum: CrateNum) -> StableSourceFileId {
|
||||
let mut hasher = StableHasher::new();
|
||||
name.hash(&mut hasher);
|
||||
StableSourceFileId { file_name_hash: hasher.finish(), cnum }
|
||||
}
|
||||
}
|
||||
|
||||
// _____________________________________________________________________________
|
||||
// SourceMap
|
||||
//
|
||||
|
@ -320,17 +280,17 @@ impl SourceMap {
|
|||
// be empty, so the working directory will be used.
|
||||
let (filename, _) = self.path_mapping.map_filename_prefix(&filename);
|
||||
|
||||
let file_id = StableSourceFileId::new_from_name(&filename, LOCAL_CRATE);
|
||||
match self.source_file_by_stable_id(file_id) {
|
||||
let stable_id = StableSourceFileId::from_filename_in_current_crate(&filename);
|
||||
match self.source_file_by_stable_id(stable_id) {
|
||||
Some(lrc_sf) => Ok(lrc_sf),
|
||||
None => {
|
||||
let source_file = SourceFile::new(filename, src, self.hash_kind)?;
|
||||
|
||||
// Let's make sure the file_id we generated above actually matches
|
||||
// the ID we generate for the SourceFile we just created.
|
||||
debug_assert_eq!(StableSourceFileId::new(&source_file), file_id);
|
||||
debug_assert_eq!(source_file.stable_id, stable_id);
|
||||
|
||||
self.register_source_file(file_id, source_file)
|
||||
self.register_source_file(stable_id, source_file)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -343,7 +303,7 @@ impl SourceMap {
|
|||
&self,
|
||||
filename: FileName,
|
||||
src_hash: SourceFileHash,
|
||||
name_hash: Hash128,
|
||||
stable_id: StableSourceFileId,
|
||||
source_len: u32,
|
||||
cnum: CrateNum,
|
||||
file_local_lines: FreezeLock<SourceFileLines>,
|
||||
|
@ -368,12 +328,11 @@ impl SourceMap {
|
|||
multibyte_chars,
|
||||
non_narrow_chars,
|
||||
normalized_pos,
|
||||
name_hash,
|
||||
stable_id,
|
||||
cnum,
|
||||
};
|
||||
|
||||
let file_id = StableSourceFileId::new(&source_file);
|
||||
self.register_source_file(file_id, source_file)
|
||||
self.register_source_file(stable_id, source_file)
|
||||
.expect("not enough address space for imported source file")
|
||||
}
|
||||
|
||||
|
|
|
@ -234,14 +234,14 @@ fn t10() {
|
|||
multibyte_chars,
|
||||
non_narrow_chars,
|
||||
normalized_pos,
|
||||
name_hash,
|
||||
stable_id,
|
||||
..
|
||||
} = (*src_file).clone();
|
||||
|
||||
let imported_src_file = sm.new_imported_source_file(
|
||||
name,
|
||||
src_hash,
|
||||
name_hash,
|
||||
stable_id,
|
||||
source_len.to_u32(),
|
||||
CrateNum::new(0),
|
||||
FreezeLock::new(lines.read().clone()),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue