1
Fork 0

Auto merge of #110083 - saethlin:encode-hashes-as-bytes, r=cjgillot

Encode hashes as bytes, not varint

In a few places, we store hashes as `u64` or `u128` and then apply `derive(Decodable, Encodable)` to the enclosing struct/enum. It is more efficient to encode hashes directly than try to apply some varint encoding. This PR adds two new types `Hash64` and `Hash128` which are produced by `StableHasher` and replace every use of storing a `u64` or `u128` that represents a hash.

Distribution of the byte lengths of leb128 encodings, from `x build --stage 2` with `incremental = true`

Before:
```
(  1) 373418203 (53.7%, 53.7%): 1
(  2) 196240113 (28.2%, 81.9%): 3
(  3) 108157958 (15.6%, 97.5%): 2
(  4)  17213120 ( 2.5%, 99.9%): 4
(  5)    223614 ( 0.0%,100.0%): 9
(  6)    216262 ( 0.0%,100.0%): 10
(  7)     15447 ( 0.0%,100.0%): 5
(  8)      3633 ( 0.0%,100.0%): 19
(  9)      3030 ( 0.0%,100.0%): 8
( 10)      1167 ( 0.0%,100.0%): 18
( 11)      1032 ( 0.0%,100.0%): 7
( 12)      1003 ( 0.0%,100.0%): 6
( 13)        10 ( 0.0%,100.0%): 16
( 14)        10 ( 0.0%,100.0%): 17
( 15)         5 ( 0.0%,100.0%): 12
( 16)         4 ( 0.0%,100.0%): 14
```

After:
```
(  1) 372939136 (53.7%, 53.7%): 1
(  2) 196240140 (28.3%, 82.0%): 3
(  3) 108014969 (15.6%, 97.5%): 2
(  4)  17192375 ( 2.5%,100.0%): 4
(  5)       435 ( 0.0%,100.0%): 5
(  6)        83 ( 0.0%,100.0%): 18
(  7)        79 ( 0.0%,100.0%): 10
(  8)        50 ( 0.0%,100.0%): 9
(  9)         6 ( 0.0%,100.0%): 19
```

The remaining 9 or 10 and 18 or 19 are `u64` and `u128` respectively that have the high bits set. As far as I can tell these are coming primarily from `SwitchTargets`.
This commit is contained in:
bors 2023-04-18 22:27:15 +00:00
commit b3f1379509
38 changed files with 289 additions and 138 deletions

View file

@ -1,12 +1,11 @@
use crate::{HashStableContext, Symbol};
use rustc_data_structures::fingerprint::Fingerprint;
use rustc_data_structures::stable_hasher::{HashStable, StableHasher, ToStableHashKey};
use rustc_data_structures::stable_hasher::{Hash64, HashStable, StableHasher, ToStableHashKey};
use rustc_data_structures::unhash::Unhasher;
use rustc_data_structures::AtomicRef;
use rustc_index::vec::Idx;
use rustc_macros::HashStable_Generic;
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
use std::borrow::Borrow;
use std::fmt;
use std::hash::{BuildHasherDefault, Hash, Hasher};
@ -105,20 +104,20 @@ impl DefPathHash {
/// originates from.
#[inline]
pub fn stable_crate_id(&self) -> StableCrateId {
StableCrateId(self.0.as_value().0)
StableCrateId(self.0.split().0)
}
/// Returns the crate-local part of the [DefPathHash].
///
/// Used for tests.
#[inline]
pub fn local_hash(&self) -> u64 {
self.0.as_value().1
pub fn local_hash(&self) -> Hash64 {
self.0.split().1
}
/// Builds a new [DefPathHash] with the given [StableCrateId] and
/// `local_hash`, where `local_hash` must be unique within its crate.
pub fn new(stable_crate_id: StableCrateId, local_hash: u64) -> DefPathHash {
pub fn new(stable_crate_id: StableCrateId, local_hash: Hash64) -> DefPathHash {
DefPathHash(Fingerprint::new(stable_crate_id.0, local_hash))
}
}
@ -129,13 +128,6 @@ impl Default for DefPathHash {
}
}
impl Borrow<Fingerprint> for DefPathHash {
#[inline]
fn borrow(&self) -> &Fingerprint {
&self.0
}
}
/// A [`StableCrateId`] is a 64-bit hash of a crate name, together with all
/// `-Cmetadata` arguments, and some other data. It is to [`CrateNum`] what [`DefPathHash`] is to
/// [`DefId`]. It is stable across compilation sessions.
@ -147,15 +139,11 @@ impl Borrow<Fingerprint> for DefPathHash {
///
/// For more information on the possibility of hash collisions in rustc,
/// see the discussion in [`DefId`].
#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)]
#[derive(HashStable_Generic, Encodable, Decodable)]
pub struct StableCrateId(pub(crate) u64);
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)]
#[derive(Hash, HashStable_Generic, Encodable, Decodable)]
pub struct StableCrateId(pub(crate) Hash64);
impl StableCrateId {
pub fn to_u64(self) -> u64 {
self.0
}
/// Computes the stable ID for a crate with the given name and
/// `-Cmetadata` arguments.
pub fn new(crate_name: Symbol, is_exe: bool, mut metadata: Vec<String>) -> StableCrateId {
@ -197,6 +185,17 @@ impl StableCrateId {
StableCrateId(hasher.finish())
}
#[inline]
pub fn as_u64(self) -> u64 {
self.0.as_u64()
}
}
impl fmt::LowerHex for StableCrateId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::LowerHex::fmt(&self.0, f)
}
}
rustc_index::newtype_index! {

View file

@ -33,7 +33,7 @@ use crate::def_id::{CrateNum, DefId, StableCrateId, CRATE_DEF_ID, LOCAL_CRATE};
use rustc_data_structures::fingerprint::Fingerprint;
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_data_structures::stable_hasher::HashingControls;
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
use rustc_data_structures::stable_hasher::{Hash64, HashStable, StableHasher};
use rustc_data_structures::sync::{Lock, Lrc};
use rustc_data_structures::unhash::UnhashMap;
use rustc_index::vec::IndexVec;
@ -123,15 +123,15 @@ impl ExpnHash {
/// originates from.
#[inline]
pub fn stable_crate_id(self) -> StableCrateId {
StableCrateId(self.0.as_value().0)
StableCrateId(self.0.split().0)
}
/// Returns the crate-local part of the [ExpnHash].
///
/// Used for tests.
#[inline]
pub fn local_hash(self) -> u64 {
self.0.as_value().1
pub fn local_hash(self) -> Hash64 {
self.0.split().1
}
#[inline]
@ -141,7 +141,7 @@ impl ExpnHash {
/// Builds a new [ExpnHash] with the given [StableCrateId] and
/// `local_hash`, where `local_hash` must be unique within its crate.
fn new(stable_crate_id: StableCrateId, local_hash: u64) -> ExpnHash {
fn new(stable_crate_id: StableCrateId, local_hash: Hash64) -> ExpnHash {
ExpnHash(Fingerprint::new(stable_crate_id.0, local_hash))
}
}
@ -350,7 +350,7 @@ pub struct HygieneData {
/// would have collisions without a disambiguator.
/// The keys of this map are always computed with `ExpnData.disambiguator`
/// set to 0.
expn_data_disambiguators: FxHashMap<u64, u32>,
expn_data_disambiguators: FxHashMap<Hash64, u32>,
}
impl HygieneData {
@ -1040,7 +1040,7 @@ impl ExpnData {
}
#[inline]
fn hash_expn(&self, ctx: &mut impl HashStableContext) -> u64 {
fn hash_expn(&self, ctx: &mut impl HashStableContext) -> Hash64 {
let mut hasher = StableHasher::new();
self.hash_stable(ctx, &mut hasher);
hasher.finish()

View file

@ -59,7 +59,7 @@ pub mod fatal_error;
pub mod profiling;
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
use rustc_data_structures::stable_hasher::{Hash128, Hash64, HashStable, StableHasher};
use rustc_data_structures::sync::{Lock, Lrc};
use std::borrow::Cow;
@ -282,22 +282,22 @@ impl RealFileName {
pub enum FileName {
Real(RealFileName),
/// Call to `quote!`.
QuoteExpansion(u64),
QuoteExpansion(Hash64),
/// Command line.
Anon(u64),
Anon(Hash64),
/// Hack in `src/librustc_ast/parse.rs`.
// FIXME(jseyfried)
MacroExpansion(u64),
ProcMacroSourceCode(u64),
MacroExpansion(Hash64),
ProcMacroSourceCode(Hash64),
/// Strings provided as `--cfg [cfgspec]` stored in a `crate_cfg`.
CfgSpec(u64),
CfgSpec(Hash64),
/// Strings provided as crate attributes in the CLI.
CliCrateAttr(u64),
CliCrateAttr(Hash64),
/// Custom sources for explicit parser calls from plugins and drivers.
Custom(String),
DocTest(PathBuf, isize),
/// Post-substitution inline assembly from LLVM.
InlineAsm(u64),
InlineAsm(Hash64),
}
impl From<PathBuf> for FileName {
@ -1343,7 +1343,7 @@ pub struct SourceFile {
/// Locations of characters removed during normalization.
pub normalized_pos: Vec<NormalizedPos>,
/// A hash of the filename, used for speeding up hashing in incremental compilation.
pub name_hash: u128,
pub name_hash: Hash128,
/// Indicates which crate this `SourceFile` was imported from.
pub cnum: CrateNum,
}
@ -1472,7 +1472,7 @@ impl<D: Decoder> Decodable<D> for SourceFile {
};
let multibyte_chars: Vec<MultiByteChar> = Decodable::decode(d);
let non_narrow_chars: Vec<NonNarrowChar> = Decodable::decode(d);
let name_hash: u128 = Decodable::decode(d);
let name_hash = Decodable::decode(d);
let normalized_pos: Vec<NormalizedPos> = Decodable::decode(d);
let cnum: CrateNum = Decodable::decode(d);
SourceFile {
@ -1514,7 +1514,7 @@ impl SourceFile {
let name_hash = {
let mut hasher: StableHasher = StableHasher::new();
name.hash(&mut hasher);
hasher.finish::<u128>()
hasher.finish()
};
let end_pos = start_pos.to_usize() + src.len();
assert!(end_pos <= u32::MAX as usize);

View file

@ -13,7 +13,7 @@ pub use crate::hygiene::{ExpnData, ExpnKind};
pub use crate::*;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::stable_hasher::StableHasher;
use rustc_data_structures::stable_hasher::{Hash128, Hash64, StableHasher};
use rustc_data_structures::sync::{AtomicU32, Lrc, MappedReadGuard, ReadGuard, RwLock};
use std::cmp;
use std::hash::Hash;
@ -138,7 +138,7 @@ impl FileLoader for RealFileLoader {
pub struct StableSourceFileId {
/// A hash of the source file's [`FileName`]. This is hash so that it's size
/// is more predictable than if we included the actual [`FileName`] value.
pub file_name_hash: u64,
pub file_name_hash: Hash64,
/// The [`CrateNum`] of the crate this source file was originally parsed for.
/// We cannot include this information in the hash because at the time
@ -331,7 +331,7 @@ impl SourceMap {
&self,
filename: FileName,
src_hash: SourceFileHash,
name_hash: u128,
name_hash: Hash128,
source_len: usize,
cnum: CrateNum,
file_local_lines: Lock<SourceFileLines>,

View file

@ -3,8 +3,12 @@ use super::*;
#[test]
fn test_lookup_line() {
let source = "abcdefghijklm\nabcdefghij\n...".to_owned();
let sf =
SourceFile::new(FileName::Anon(0), source, BytePos(3), SourceFileHashAlgorithm::Sha256);
let sf = SourceFile::new(
FileName::Anon(Hash64::ZERO),
source,
BytePos(3),
SourceFileHashAlgorithm::Sha256,
);
sf.lines(|lines| assert_eq!(lines, &[BytePos(3), BytePos(17), BytePos(28)]));
assert_eq!(sf.lookup_line(BytePos(0)), None);