Store hashes in special types so they aren't accidentally encoded as numbers

This commit is contained in:
Ben Kimock 2023-04-07 23:11:20 -04:00
parent de96f3d873
commit 0445fbdd83
38 changed files with 274 additions and 138 deletions

View file

@ -1,4 +1,4 @@
use crate::stable_hasher;
use crate::stable_hasher::{Hash64, StableHasher, StableHasherResult};
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
use std::hash::{Hash, Hasher};
@ -9,32 +9,47 @@ mod tests;
#[repr(C)]
pub struct Fingerprint(u64, u64);
pub trait FingerprintComponent {
fn as_u64(&self) -> u64;
}
impl FingerprintComponent for Hash64 {
fn as_u64(&self) -> u64 {
Hash64::as_u64(*self)
}
}
impl FingerprintComponent for u64 {
fn as_u64(&self) -> u64 {
*self
}
}
impl Fingerprint {
pub const ZERO: Fingerprint = Fingerprint(0, 0);
#[inline]
pub fn new(_0: u64, _1: u64) -> Fingerprint {
Fingerprint(_0, _1)
pub fn new<A, B>(_0: A, _1: B) -> Fingerprint
where
A: FingerprintComponent,
B: FingerprintComponent,
{
Fingerprint(_0.as_u64(), _1.as_u64())
}
#[inline]
pub fn from_smaller_hash(hash: u64) -> Fingerprint {
Fingerprint(hash, hash)
}
#[inline]
pub fn to_smaller_hash(&self) -> u64 {
pub fn to_smaller_hash(&self) -> Hash64 {
// Even though both halves of the fingerprint are expected to be good
// quality hash values, let's still combine the two values because the
// Fingerprints in DefPathHash have the StableCrateId portion which is
// the same for all DefPathHashes from the same crate. Combining the
// two halves makes sure we get a good quality hash in such cases too.
self.0.wrapping_mul(3).wrapping_add(self.1)
Hash64::new(self.0.wrapping_mul(3).wrapping_add(self.1))
}
#[inline]
pub fn as_value(&self) -> (u64, u64) {
(self.0, self.1)
pub fn split(&self) -> (Hash64, Hash64) {
(Hash64::new(self.0), Hash64::new(self.1))
}
#[inline]
@ -131,9 +146,9 @@ impl FingerprintHasher for crate::unhash::Unhasher {
}
}
impl stable_hasher::StableHasherResult for Fingerprint {
impl StableHasherResult for Fingerprint {
#[inline]
fn finish(hasher: stable_hasher::StableHasher) -> Self {
fn finish(hasher: StableHasher) -> Self {
let (_0, _1) = hasher.finalize();
Fingerprint(_0, _1)
}

View file

@ -1,11 +1,12 @@
use super::*;
use crate::stable_hasher::Hash64;
// Check that `combine_commutative` is order independent.
#[test]
fn combine_commutative_is_order_independent() {
let a = Fingerprint::new(0xf6622fb349898b06, 0x70be9377b2f9c610);
let b = Fingerprint::new(0xa9562bf5a2a5303c, 0x67d9b6c82034f13d);
let c = Fingerprint::new(0x0d013a27811dbbc3, 0x9a3f7b3d9142ec43);
let a = Fingerprint::new(Hash64::new(0xf6622fb349898b06), Hash64::new(0x70be9377b2f9c610));
let b = Fingerprint::new(Hash64::new(0xa9562bf5a2a5303c), Hash64::new(0x67d9b6c82034f13d));
let c = Fingerprint::new(Hash64::new(0x0d013a27811dbbc3), Hash64::new(0x9a3f7b3d9142ec43));
let permutations = [(a, b, c), (a, c, b), (b, a, c), (b, c, a), (c, a, b), (c, b, a)];
let f = a.combine_commutative(b).combine_commutative(c);
for p in &permutations {

View file

@ -0,0 +1,120 @@
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
use std::fmt;
use std::ops::BitXorAssign;
use crate::stable_hasher::{StableHasher, StableHasherResult};
#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
pub struct Hash64 {
inner: u64,
}
impl Hash64 {
pub const ZERO: Hash64 = Hash64 { inner: 0 };
#[inline]
pub(crate) fn new(n: u64) -> Self {
Self { inner: n }
}
#[inline]
pub fn as_u64(self) -> u64 {
self.inner
}
}
impl BitXorAssign<u64> for Hash64 {
fn bitxor_assign(&mut self, rhs: u64) {
self.inner ^= rhs;
}
}
impl<S: Encoder> Encodable<S> for Hash64 {
#[inline]
fn encode(&self, s: &mut S) {
s.emit_raw_bytes(&self.inner.to_le_bytes());
}
}
impl<D: Decoder> Decodable<D> for Hash64 {
#[inline]
fn decode(d: &mut D) -> Self {
Self { inner: u64::from_le_bytes(d.read_raw_bytes(8).try_into().unwrap()) }
}
}
impl StableHasherResult for Hash64 {
#[inline]
fn finish(hasher: StableHasher) -> Self {
Self { inner: hasher.finalize().0 }
}
}
impl fmt::Debug for Hash64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.inner.fmt(f)
}
}
impl fmt::LowerHex for Hash64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::LowerHex::fmt(&self.inner, f)
}
}
#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
pub struct Hash128 {
inner: u128,
}
impl Hash128 {
#[inline]
pub fn truncate(self) -> Hash64 {
Hash64 { inner: self.inner as u64 }
}
#[inline]
pub fn wrapping_add(self, other: Self) -> Self {
Self {
inner: self.inner.wrapping_add(other.inner),
}
}
#[inline]
pub fn as_u128(self) -> u128 {
self.inner
}
}
impl<S: Encoder> Encodable<S> for Hash128 {
#[inline]
fn encode(&self, s: &mut S) {
s.emit_raw_bytes(&self.inner.to_le_bytes());
}
}
impl<D: Decoder> Decodable<D> for Hash128 {
#[inline]
fn decode(d: &mut D) -> Self {
Self { inner: u128::from_le_bytes(d.read_raw_bytes(16).try_into().unwrap()) }
}
}
impl StableHasherResult for Hash128 {
#[inline]
fn finish(hasher: StableHasher) -> Self {
let (_0, _1) = hasher.finalize();
Self { inner: u128::from(_0) | (u128::from(_1) << 64) }
}
}
impl fmt::Debug for Hash128 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.inner.fmt(f)
}
}
impl fmt::LowerHex for Hash128 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::LowerHex::fmt(&self.inner, f)
}
}

View file

@ -86,6 +86,7 @@ pub mod work_queue;
pub use atomic_ref::AtomicRef;
pub mod aligned;
pub mod frozen;
mod hashes;
pub mod owned_slice;
pub mod sso;
pub mod steal;

View file

@ -2,6 +2,7 @@ use crate::sip128::SipHasher128;
use rustc_index::bit_set;
use rustc_index::vec;
use smallvec::SmallVec;
use std::fmt;
use std::hash::{BuildHasher, Hash, Hasher};
use std::marker::PhantomData;
use std::mem;
@ -9,6 +10,8 @@ use std::mem;
#[cfg(test)]
mod tests;
pub use crate::hashes::{Hash128, Hash64};
/// When hashing something that ends up affecting properties like symbol names,
/// we want these symbol names to be calculated independently of other factors
/// like what architecture you're compiling *from*.
@ -20,8 +23,8 @@ pub struct StableHasher {
state: SipHasher128,
}
impl ::std::fmt::Debug for StableHasher {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
impl fmt::Debug for StableHasher {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{:?}", self.state)
}
}
@ -42,21 +45,6 @@ impl StableHasher {
}
}
impl StableHasherResult for u128 {
#[inline]
fn finish(hasher: StableHasher) -> Self {
let (_0, _1) = hasher.finalize();
u128::from(_0) | (u128::from(_1) << 64)
}
}
impl StableHasherResult for u64 {
#[inline]
fn finish(hasher: StableHasher) -> Self {
hasher.finalize().0
}
}
impl StableHasher {
#[inline]
pub fn finalize(self) -> (u64, u64) {
@ -287,6 +275,9 @@ impl_stable_traits_for_trivial_type!(i128);
impl_stable_traits_for_trivial_type!(char);
impl_stable_traits_for_trivial_type!(());
impl_stable_traits_for_trivial_type!(Hash64);
impl_stable_traits_for_trivial_type!(Hash128);
impl<CTX> HashStable<CTX> for ! {
fn hash_stable(&self, _ctx: &mut CTX, _hasher: &mut StableHasher) {
unreachable!()
@ -669,7 +660,7 @@ fn stable_hash_reduce<HCX, I, C, F>(
.map(|value| {
let mut hasher = StableHasher::new();
hash_function(&mut hasher, hcx, value);
hasher.finish::<u128>()
hasher.finish::<Hash128>()
})
.reduce(|accum, value| accum.wrapping_add(value));
hash.hash_stable(hcx, hasher);

View file

@ -72,7 +72,7 @@ fn test_hash_isize() {
assert_eq!(h.finalize(), expected);
}
fn hash<T: HashStable<()>>(t: &T) -> u128 {
fn hash<T: HashStable<()>>(t: &T) -> Hash128 {
let mut h = StableHasher::new();
let ctx = &mut ();
t.hash_stable(ctx, &mut h);

View file

@ -24,7 +24,7 @@ impl Svh {
}
pub fn as_u64(&self) -> u64 {
self.hash.to_smaller_hash()
self.hash.to_smaller_hash().as_u64()
}
pub fn to_string(&self) -> String {