Auto merge of #110083 - saethlin:encode-hashes-as-bytes, r=cjgillot
Encode hashes as bytes, not varint In a few places, we store hashes as `u64` or `u128` and then apply `derive(Decodable, Encodable)` to the enclosing struct/enum. It is more efficient to encode hashes directly than try to apply some varint encoding. This PR adds two new types `Hash64` and `Hash128` which are produced by `StableHasher` and replace every use of storing a `u64` or `u128` that represents a hash. Distribution of the byte lengths of leb128 encodings, from `x build --stage 2` with `incremental = true` Before: ``` ( 1) 373418203 (53.7%, 53.7%): 1 ( 2) 196240113 (28.2%, 81.9%): 3 ( 3) 108157958 (15.6%, 97.5%): 2 ( 4) 17213120 ( 2.5%, 99.9%): 4 ( 5) 223614 ( 0.0%,100.0%): 9 ( 6) 216262 ( 0.0%,100.0%): 10 ( 7) 15447 ( 0.0%,100.0%): 5 ( 8) 3633 ( 0.0%,100.0%): 19 ( 9) 3030 ( 0.0%,100.0%): 8 ( 10) 1167 ( 0.0%,100.0%): 18 ( 11) 1032 ( 0.0%,100.0%): 7 ( 12) 1003 ( 0.0%,100.0%): 6 ( 13) 10 ( 0.0%,100.0%): 16 ( 14) 10 ( 0.0%,100.0%): 17 ( 15) 5 ( 0.0%,100.0%): 12 ( 16) 4 ( 0.0%,100.0%): 14 ``` After: ``` ( 1) 372939136 (53.7%, 53.7%): 1 ( 2) 196240140 (28.3%, 82.0%): 3 ( 3) 108014969 (15.6%, 97.5%): 2 ( 4) 17192375 ( 2.5%,100.0%): 4 ( 5) 435 ( 0.0%,100.0%): 5 ( 6) 83 ( 0.0%,100.0%): 18 ( 7) 79 ( 0.0%,100.0%): 10 ( 8) 50 ( 0.0%,100.0%): 9 ( 9) 6 ( 0.0%,100.0%): 19 ``` The remaining 9 or 10 and 18 or 19 are `u64` and `u128` respectively that have the high bits set. As far as I can tell these are coming primarily from `SwitchTargets`.
This commit is contained in:
commit
b3f1379509
38 changed files with 289 additions and 138 deletions
|
@ -1,12 +1,11 @@
|
|||
use crate::{HashStableContext, Symbol};
|
||||
use rustc_data_structures::fingerprint::Fingerprint;
|
||||
use rustc_data_structures::stable_hasher::{HashStable, StableHasher, ToStableHashKey};
|
||||
use rustc_data_structures::stable_hasher::{Hash64, HashStable, StableHasher, ToStableHashKey};
|
||||
use rustc_data_structures::unhash::Unhasher;
|
||||
use rustc_data_structures::AtomicRef;
|
||||
use rustc_index::vec::Idx;
|
||||
use rustc_macros::HashStable_Generic;
|
||||
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
|
||||
use std::borrow::Borrow;
|
||||
use std::fmt;
|
||||
use std::hash::{BuildHasherDefault, Hash, Hasher};
|
||||
|
||||
|
@ -105,20 +104,20 @@ impl DefPathHash {
|
|||
/// originates from.
|
||||
#[inline]
|
||||
pub fn stable_crate_id(&self) -> StableCrateId {
|
||||
StableCrateId(self.0.as_value().0)
|
||||
StableCrateId(self.0.split().0)
|
||||
}
|
||||
|
||||
/// Returns the crate-local part of the [DefPathHash].
|
||||
///
|
||||
/// Used for tests.
|
||||
#[inline]
|
||||
pub fn local_hash(&self) -> u64 {
|
||||
self.0.as_value().1
|
||||
pub fn local_hash(&self) -> Hash64 {
|
||||
self.0.split().1
|
||||
}
|
||||
|
||||
/// Builds a new [DefPathHash] with the given [StableCrateId] and
|
||||
/// `local_hash`, where `local_hash` must be unique within its crate.
|
||||
pub fn new(stable_crate_id: StableCrateId, local_hash: u64) -> DefPathHash {
|
||||
pub fn new(stable_crate_id: StableCrateId, local_hash: Hash64) -> DefPathHash {
|
||||
DefPathHash(Fingerprint::new(stable_crate_id.0, local_hash))
|
||||
}
|
||||
}
|
||||
|
@ -129,13 +128,6 @@ impl Default for DefPathHash {
|
|||
}
|
||||
}
|
||||
|
||||
impl Borrow<Fingerprint> for DefPathHash {
|
||||
#[inline]
|
||||
fn borrow(&self) -> &Fingerprint {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
/// A [`StableCrateId`] is a 64-bit hash of a crate name, together with all
|
||||
/// `-Cmetadata` arguments, and some other data. It is to [`CrateNum`] what [`DefPathHash`] is to
|
||||
/// [`DefId`]. It is stable across compilation sessions.
|
||||
|
@ -147,15 +139,11 @@ impl Borrow<Fingerprint> for DefPathHash {
|
|||
///
|
||||
/// For more information on the possibility of hash collisions in rustc,
|
||||
/// see the discussion in [`DefId`].
|
||||
#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)]
|
||||
#[derive(HashStable_Generic, Encodable, Decodable)]
|
||||
pub struct StableCrateId(pub(crate) u64);
|
||||
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)]
|
||||
#[derive(Hash, HashStable_Generic, Encodable, Decodable)]
|
||||
pub struct StableCrateId(pub(crate) Hash64);
|
||||
|
||||
impl StableCrateId {
|
||||
pub fn to_u64(self) -> u64 {
|
||||
self.0
|
||||
}
|
||||
|
||||
/// Computes the stable ID for a crate with the given name and
|
||||
/// `-Cmetadata` arguments.
|
||||
pub fn new(crate_name: Symbol, is_exe: bool, mut metadata: Vec<String>) -> StableCrateId {
|
||||
|
@ -197,6 +185,17 @@ impl StableCrateId {
|
|||
|
||||
StableCrateId(hasher.finish())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn as_u64(self) -> u64 {
|
||||
self.0.as_u64()
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::LowerHex for StableCrateId {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fmt::LowerHex::fmt(&self.0, f)
|
||||
}
|
||||
}
|
||||
|
||||
rustc_index::newtype_index! {
|
||||
|
|
|
@ -33,7 +33,7 @@ use crate::def_id::{CrateNum, DefId, StableCrateId, CRATE_DEF_ID, LOCAL_CRATE};
|
|||
use rustc_data_structures::fingerprint::Fingerprint;
|
||||
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
|
||||
use rustc_data_structures::stable_hasher::HashingControls;
|
||||
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
|
||||
use rustc_data_structures::stable_hasher::{Hash64, HashStable, StableHasher};
|
||||
use rustc_data_structures::sync::{Lock, Lrc};
|
||||
use rustc_data_structures::unhash::UnhashMap;
|
||||
use rustc_index::vec::IndexVec;
|
||||
|
@ -123,15 +123,15 @@ impl ExpnHash {
|
|||
/// originates from.
|
||||
#[inline]
|
||||
pub fn stable_crate_id(self) -> StableCrateId {
|
||||
StableCrateId(self.0.as_value().0)
|
||||
StableCrateId(self.0.split().0)
|
||||
}
|
||||
|
||||
/// Returns the crate-local part of the [ExpnHash].
|
||||
///
|
||||
/// Used for tests.
|
||||
#[inline]
|
||||
pub fn local_hash(self) -> u64 {
|
||||
self.0.as_value().1
|
||||
pub fn local_hash(self) -> Hash64 {
|
||||
self.0.split().1
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
@ -141,7 +141,7 @@ impl ExpnHash {
|
|||
|
||||
/// Builds a new [ExpnHash] with the given [StableCrateId] and
|
||||
/// `local_hash`, where `local_hash` must be unique within its crate.
|
||||
fn new(stable_crate_id: StableCrateId, local_hash: u64) -> ExpnHash {
|
||||
fn new(stable_crate_id: StableCrateId, local_hash: Hash64) -> ExpnHash {
|
||||
ExpnHash(Fingerprint::new(stable_crate_id.0, local_hash))
|
||||
}
|
||||
}
|
||||
|
@ -350,7 +350,7 @@ pub struct HygieneData {
|
|||
/// would have collisions without a disambiguator.
|
||||
/// The keys of this map are always computed with `ExpnData.disambiguator`
|
||||
/// set to 0.
|
||||
expn_data_disambiguators: FxHashMap<u64, u32>,
|
||||
expn_data_disambiguators: FxHashMap<Hash64, u32>,
|
||||
}
|
||||
|
||||
impl HygieneData {
|
||||
|
@ -1040,7 +1040,7 @@ impl ExpnData {
|
|||
}
|
||||
|
||||
#[inline]
|
||||
fn hash_expn(&self, ctx: &mut impl HashStableContext) -> u64 {
|
||||
fn hash_expn(&self, ctx: &mut impl HashStableContext) -> Hash64 {
|
||||
let mut hasher = StableHasher::new();
|
||||
self.hash_stable(ctx, &mut hasher);
|
||||
hasher.finish()
|
||||
|
|
|
@ -59,7 +59,7 @@ pub mod fatal_error;
|
|||
|
||||
pub mod profiling;
|
||||
|
||||
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
|
||||
use rustc_data_structures::stable_hasher::{Hash128, Hash64, HashStable, StableHasher};
|
||||
use rustc_data_structures::sync::{Lock, Lrc};
|
||||
|
||||
use std::borrow::Cow;
|
||||
|
@ -282,22 +282,22 @@ impl RealFileName {
|
|||
pub enum FileName {
|
||||
Real(RealFileName),
|
||||
/// Call to `quote!`.
|
||||
QuoteExpansion(u64),
|
||||
QuoteExpansion(Hash64),
|
||||
/// Command line.
|
||||
Anon(u64),
|
||||
Anon(Hash64),
|
||||
/// Hack in `src/librustc_ast/parse.rs`.
|
||||
// FIXME(jseyfried)
|
||||
MacroExpansion(u64),
|
||||
ProcMacroSourceCode(u64),
|
||||
MacroExpansion(Hash64),
|
||||
ProcMacroSourceCode(Hash64),
|
||||
/// Strings provided as `--cfg [cfgspec]` stored in a `crate_cfg`.
|
||||
CfgSpec(u64),
|
||||
CfgSpec(Hash64),
|
||||
/// Strings provided as crate attributes in the CLI.
|
||||
CliCrateAttr(u64),
|
||||
CliCrateAttr(Hash64),
|
||||
/// Custom sources for explicit parser calls from plugins and drivers.
|
||||
Custom(String),
|
||||
DocTest(PathBuf, isize),
|
||||
/// Post-substitution inline assembly from LLVM.
|
||||
InlineAsm(u64),
|
||||
InlineAsm(Hash64),
|
||||
}
|
||||
|
||||
impl From<PathBuf> for FileName {
|
||||
|
@ -1343,7 +1343,7 @@ pub struct SourceFile {
|
|||
/// Locations of characters removed during normalization.
|
||||
pub normalized_pos: Vec<NormalizedPos>,
|
||||
/// A hash of the filename, used for speeding up hashing in incremental compilation.
|
||||
pub name_hash: u128,
|
||||
pub name_hash: Hash128,
|
||||
/// Indicates which crate this `SourceFile` was imported from.
|
||||
pub cnum: CrateNum,
|
||||
}
|
||||
|
@ -1472,7 +1472,7 @@ impl<D: Decoder> Decodable<D> for SourceFile {
|
|||
};
|
||||
let multibyte_chars: Vec<MultiByteChar> = Decodable::decode(d);
|
||||
let non_narrow_chars: Vec<NonNarrowChar> = Decodable::decode(d);
|
||||
let name_hash: u128 = Decodable::decode(d);
|
||||
let name_hash = Decodable::decode(d);
|
||||
let normalized_pos: Vec<NormalizedPos> = Decodable::decode(d);
|
||||
let cnum: CrateNum = Decodable::decode(d);
|
||||
SourceFile {
|
||||
|
@ -1514,7 +1514,7 @@ impl SourceFile {
|
|||
let name_hash = {
|
||||
let mut hasher: StableHasher = StableHasher::new();
|
||||
name.hash(&mut hasher);
|
||||
hasher.finish::<u128>()
|
||||
hasher.finish()
|
||||
};
|
||||
let end_pos = start_pos.to_usize() + src.len();
|
||||
assert!(end_pos <= u32::MAX as usize);
|
||||
|
|
|
@ -13,7 +13,7 @@ pub use crate::hygiene::{ExpnData, ExpnKind};
|
|||
pub use crate::*;
|
||||
|
||||
use rustc_data_structures::fx::FxHashMap;
|
||||
use rustc_data_structures::stable_hasher::StableHasher;
|
||||
use rustc_data_structures::stable_hasher::{Hash128, Hash64, StableHasher};
|
||||
use rustc_data_structures::sync::{AtomicU32, Lrc, MappedReadGuard, ReadGuard, RwLock};
|
||||
use std::cmp;
|
||||
use std::hash::Hash;
|
||||
|
@ -138,7 +138,7 @@ impl FileLoader for RealFileLoader {
|
|||
pub struct StableSourceFileId {
|
||||
/// A hash of the source file's [`FileName`]. This is hash so that it's size
|
||||
/// is more predictable than if we included the actual [`FileName`] value.
|
||||
pub file_name_hash: u64,
|
||||
pub file_name_hash: Hash64,
|
||||
|
||||
/// The [`CrateNum`] of the crate this source file was originally parsed for.
|
||||
/// We cannot include this information in the hash because at the time
|
||||
|
@ -331,7 +331,7 @@ impl SourceMap {
|
|||
&self,
|
||||
filename: FileName,
|
||||
src_hash: SourceFileHash,
|
||||
name_hash: u128,
|
||||
name_hash: Hash128,
|
||||
source_len: usize,
|
||||
cnum: CrateNum,
|
||||
file_local_lines: Lock<SourceFileLines>,
|
||||
|
|
|
@ -3,8 +3,12 @@ use super::*;
|
|||
#[test]
|
||||
fn test_lookup_line() {
|
||||
let source = "abcdefghijklm\nabcdefghij\n...".to_owned();
|
||||
let sf =
|
||||
SourceFile::new(FileName::Anon(0), source, BytePos(3), SourceFileHashAlgorithm::Sha256);
|
||||
let sf = SourceFile::new(
|
||||
FileName::Anon(Hash64::ZERO),
|
||||
source,
|
||||
BytePos(3),
|
||||
SourceFileHashAlgorithm::Sha256,
|
||||
);
|
||||
sf.lines(|lines| assert_eq!(lines, &[BytePos(3), BytePos(17), BytePos(28)]));
|
||||
|
||||
assert_eq!(sf.lookup_line(BytePos(0)), None);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue