Add a dedicated length-prefixing method to Hasher
This accomplishes two main goals: - Make it clear who is responsible for prefix-freedom, including how they should do it - Make it feasible for a `Hasher` that *doesn't* care about Hash-DoS resistance to get better performance by not hashing lengths This does not change rustc-hash, since that's in an external crate, but that could potentially use it in future.
This commit is contained in:
parent
086bf7a8ff
commit
98054377ee
14 changed files with 166 additions and 6 deletions
|
@ -18,6 +18,7 @@
|
||||||
#![feature(generators)]
|
#![feature(generators)]
|
||||||
#![feature(let_else)]
|
#![feature(let_else)]
|
||||||
#![feature(hash_raw_entry)]
|
#![feature(hash_raw_entry)]
|
||||||
|
#![feature(hasher_prefixfree_extras)]
|
||||||
#![feature(maybe_uninit_uninit_array)]
|
#![feature(maybe_uninit_uninit_array)]
|
||||||
#![feature(min_specialization)]
|
#![feature(min_specialization)]
|
||||||
#![feature(never_type)]
|
#![feature(never_type)]
|
||||||
|
|
|
@ -462,6 +462,14 @@ impl Hasher for SipHasher128 {
|
||||||
self.slice_write(msg);
|
self.slice_write(msg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn write_str(&mut self, s: &str) {
|
||||||
|
// This hasher works byte-wise, and `0xFF` cannot show up in a `str`,
|
||||||
|
// so just hashing the one extra byte is enough to be prefix-free.
|
||||||
|
self.write(s.as_bytes());
|
||||||
|
self.write_u8(0xFF);
|
||||||
|
}
|
||||||
|
|
||||||
fn finish(&self) -> u64 {
|
fn finish(&self) -> u64 {
|
||||||
panic!("SipHasher128 cannot provide valid 64 bit hashes")
|
panic!("SipHasher128 cannot provide valid 64 bit hashes")
|
||||||
}
|
}
|
||||||
|
|
|
@ -73,6 +73,17 @@ impl Hasher for StableHasher {
|
||||||
self.state.write(bytes);
|
self.state.write(bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn write_str(&mut self, s: &str) {
|
||||||
|
self.state.write_str(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn write_length_prefix(&mut self, len: usize) {
|
||||||
|
// Our impl for `usize` will extend it if needed.
|
||||||
|
self.write_usize(len);
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn write_u8(&mut self, i: u8) {
|
fn write_u8(&mut self, i: u8) {
|
||||||
self.state.write_u8(i);
|
self.state.write_u8(i);
|
||||||
|
|
|
@ -1369,6 +1369,12 @@ impl<T: ?Sized + Hasher, A: Allocator> Hasher for Box<T, A> {
|
||||||
fn write_isize(&mut self, i: isize) {
|
fn write_isize(&mut self, i: isize) {
|
||||||
(**self).write_isize(i)
|
(**self).write_isize(i)
|
||||||
}
|
}
|
||||||
|
fn write_length_prefix(&mut self, len: usize) {
|
||||||
|
(**self).write_length_prefix(len)
|
||||||
|
}
|
||||||
|
fn write_str(&mut self, s: &str) {
|
||||||
|
(**self).write_str(s)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(not(no_global_oom_handling))]
|
#[cfg(not(no_global_oom_handling))]
|
||||||
|
|
|
@ -1990,7 +1990,7 @@ impl<'a, K: Ord + Copy, V: Copy> Extend<(&'a K, &'a V)> for BTreeMap<K, V> {
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
impl<K: Hash, V: Hash> Hash for BTreeMap<K, V> {
|
impl<K: Hash, V: Hash> Hash for BTreeMap<K, V> {
|
||||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||||
self.len().hash(state);
|
state.write_length_prefix(self.len());
|
||||||
for elt in self {
|
for elt in self {
|
||||||
elt.hash(state);
|
elt.hash(state);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1944,7 +1944,7 @@ impl<T: fmt::Debug> fmt::Debug for LinkedList<T> {
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
impl<T: Hash> Hash for LinkedList<T> {
|
impl<T: Hash> Hash for LinkedList<T> {
|
||||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||||
self.len().hash(state);
|
state.write_length_prefix(self.len());
|
||||||
for elt in self {
|
for elt in self {
|
||||||
elt.hash(state);
|
elt.hash(state);
|
||||||
}
|
}
|
||||||
|
|
|
@ -2899,7 +2899,7 @@ impl<T: Ord, A: Allocator> Ord for VecDeque<T, A> {
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
impl<T: Hash, A: Allocator> Hash for VecDeque<T, A> {
|
impl<T: Hash, A: Allocator> Hash for VecDeque<T, A> {
|
||||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||||
self.len().hash(state);
|
state.write_length_prefix(self.len());
|
||||||
// It's not possible to use Hash::hash_slice on slices
|
// It's not possible to use Hash::hash_slice on slices
|
||||||
// returned by as_slices method as their length can vary
|
// returned by as_slices method as their length can vary
|
||||||
// in otherwise identical deques.
|
// in otherwise identical deques.
|
||||||
|
|
|
@ -117,6 +117,7 @@
|
||||||
#![feature(extend_one)]
|
#![feature(extend_one)]
|
||||||
#![feature(fmt_internals)]
|
#![feature(fmt_internals)]
|
||||||
#![feature(fn_traits)]
|
#![feature(fn_traits)]
|
||||||
|
#![feature(hasher_prefixfree_extras)]
|
||||||
#![feature(inplace_iteration)]
|
#![feature(inplace_iteration)]
|
||||||
#![feature(iter_advance_by)]
|
#![feature(iter_advance_by)]
|
||||||
#![feature(layout_for_ptr)]
|
#![feature(layout_for_ptr)]
|
||||||
|
|
|
@ -333,6 +333,12 @@ pub trait Hasher {
|
||||||
///
|
///
|
||||||
/// println!("Hash is {:x}!", hasher.finish());
|
/// println!("Hash is {:x}!", hasher.finish());
|
||||||
/// ```
|
/// ```
|
||||||
|
///
|
||||||
|
/// # Note to Implementers
|
||||||
|
///
|
||||||
|
/// You generally should not do length-prefixing as part of implementing
|
||||||
|
/// this method. It's up to the [`Hash`] implementation to call
|
||||||
|
/// [`Hasher::write_length_prefix`] before sequences that need it.
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
fn write(&mut self, bytes: &[u8]);
|
fn write(&mut self, bytes: &[u8]);
|
||||||
|
|
||||||
|
@ -409,6 +415,96 @@ pub trait Hasher {
|
||||||
fn write_isize(&mut self, i: isize) {
|
fn write_isize(&mut self, i: isize) {
|
||||||
self.write_usize(i as usize)
|
self.write_usize(i as usize)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Writes a length prefix into this hasher, as part of being prefix-free.
|
||||||
|
///
|
||||||
|
/// If you're implementing [`Hash`] for a custom collection, call this before
|
||||||
|
/// writing its contents to this `Hasher`. That way
|
||||||
|
/// `(collection![1, 2, 3], collection![4, 5])` and
|
||||||
|
/// `(collection![1, 2], collection![3, 4, 5])` will provide different
|
||||||
|
/// sequences of values to the `Hasher`
|
||||||
|
///
|
||||||
|
/// The `impl<T> Hash for [T]` includes a call to this method, so if you're
|
||||||
|
/// hashing a slice (or array or vector) via its `Hash::hash` method,
|
||||||
|
/// you should **not** call this yourself.
|
||||||
|
///
|
||||||
|
/// This method is only for providing domain separation. If you want to
|
||||||
|
/// hash a `usize` that represents part of the *data*, then it's important
|
||||||
|
/// that you pass it to [`Hasher::write_usize`] instead of to this method.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// #![feature(hasher_prefixfree_extras)]
|
||||||
|
/// # // Stubs to make the `impl` below pass the compiler
|
||||||
|
/// # struct MyCollection<T>(Option<T>);
|
||||||
|
/// # impl<T> MyCollection<T> {
|
||||||
|
/// # fn len(&self) -> usize { todo!() }
|
||||||
|
/// # }
|
||||||
|
/// # impl<'a, T> IntoIterator for &'a MyCollection<T> {
|
||||||
|
/// # type Item = T;
|
||||||
|
/// # type IntoIter = std::iter::Empty<T>;
|
||||||
|
/// # fn into_iter(self) -> Self::IntoIter { todo!() }
|
||||||
|
/// # }
|
||||||
|
///
|
||||||
|
/// use std::hash::{Hash, Hasher};
|
||||||
|
/// impl<T: Hash> Hash for MyCollection<T> {
|
||||||
|
/// fn hash<H: Hasher>(&self, state: &mut H) {
|
||||||
|
/// state.write_length_prefix(self.len());
|
||||||
|
/// for elt in self {
|
||||||
|
/// elt.hash(state);
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// # Note to Implementers
|
||||||
|
///
|
||||||
|
/// If you've decided that your `Hasher` is willing to be susceptible to
|
||||||
|
/// Hash-DoS attacks, then you might consider skipping hashing some or all
|
||||||
|
/// of the `len` provided in the name of increased performance.
|
||||||
|
#[inline]
|
||||||
|
#[unstable(feature = "hasher_prefixfree_extras", issue = "96762")]
|
||||||
|
fn write_length_prefix(&mut self, len: usize) {
|
||||||
|
self.write_usize(len);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Writes a single `str` into this hasher.
|
||||||
|
///
|
||||||
|
/// If you're implementing [`Hash`], you generally do not need to call this,
|
||||||
|
/// as the `impl Hash for str` does, so you should prefer that instead.
|
||||||
|
///
|
||||||
|
/// This includes the domain separator for prefix-freedom, so you should
|
||||||
|
/// **not** call `Self::write_length_prefix` before calling this.
|
||||||
|
///
|
||||||
|
/// # Note to Implementers
|
||||||
|
///
|
||||||
|
/// The default implementation of this method includes a call to
|
||||||
|
/// [`Self::write_length_prefix`], so if your implementation of `Hasher`
|
||||||
|
/// doesn't care about prefix-freedom and you've thus overridden
|
||||||
|
/// that method to do nothing, there's no need to override this one.
|
||||||
|
///
|
||||||
|
/// This method is available to be overridden separately from the others
|
||||||
|
/// as `str` being UTF-8 means that it never contains `0xFF` bytes, which
|
||||||
|
/// can be used to provide prefix-freedom cheaper than hashing a length.
|
||||||
|
///
|
||||||
|
/// For example, if your `Hasher` works byte-by-byte (perhaps by accumulating
|
||||||
|
/// them into a buffer), then you can hash the bytes of the `str` followed
|
||||||
|
/// by a single `0xFF` byte.
|
||||||
|
///
|
||||||
|
/// If your `Hasher` works in chunks, you can also do this by being careful
|
||||||
|
/// about how you pad partial chunks. If the chunks are padded with `0x00`
|
||||||
|
/// bytes then just hashing an extra `0xFF` byte doesn't necessarily
|
||||||
|
/// provide prefix-freedom, as `"ab"` and `"ab\u{0}"` would likely hash
|
||||||
|
/// the same sequence of chunks. But if you pad with `0xFF` bytes instead,
|
||||||
|
/// ensuring at least one padding byte, then it can often provide
|
||||||
|
/// prefix-freedom cheaper than hashing the length would.
|
||||||
|
#[inline]
|
||||||
|
#[unstable(feature = "hasher_prefixfree_extras", issue = "96762")]
|
||||||
|
fn write_str(&mut self, s: &str) {
|
||||||
|
self.write_length_prefix(s.len());
|
||||||
|
self.write(s.as_bytes());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[stable(feature = "indirect_hasher_impl", since = "1.22.0")]
|
#[stable(feature = "indirect_hasher_impl", since = "1.22.0")]
|
||||||
|
@ -455,6 +551,12 @@ impl<H: Hasher + ?Sized> Hasher for &mut H {
|
||||||
fn write_isize(&mut self, i: isize) {
|
fn write_isize(&mut self, i: isize) {
|
||||||
(**self).write_isize(i)
|
(**self).write_isize(i)
|
||||||
}
|
}
|
||||||
|
fn write_length_prefix(&mut self, len: usize) {
|
||||||
|
(**self).write_length_prefix(len)
|
||||||
|
}
|
||||||
|
fn write_str(&mut self, s: &str) {
|
||||||
|
(**self).write_str(s)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A trait for creating instances of [`Hasher`].
|
/// A trait for creating instances of [`Hasher`].
|
||||||
|
@ -709,8 +811,7 @@ mod impls {
|
||||||
impl Hash for str {
|
impl Hash for str {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||||
state.write(self.as_bytes());
|
state.write_str(self);
|
||||||
state.write_u8(0xff)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -767,7 +868,7 @@ mod impls {
|
||||||
impl<T: Hash> Hash for [T] {
|
impl<T: Hash> Hash for [T] {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||||
self.len().hash(state);
|
state.write_length_prefix(self.len());
|
||||||
Hash::hash_slice(self, state)
|
Hash::hash_slice(self, state)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -233,6 +233,11 @@ impl super::Hasher for SipHasher {
|
||||||
self.0.hasher.write(msg)
|
self.0.hasher.write(msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn write_str(&mut self, s: &str) {
|
||||||
|
self.0.hasher.write_str(s);
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn finish(&self) -> u64 {
|
fn finish(&self) -> u64 {
|
||||||
self.0.hasher.finish()
|
self.0.hasher.finish()
|
||||||
|
@ -246,6 +251,11 @@ impl super::Hasher for SipHasher13 {
|
||||||
self.hasher.write(msg)
|
self.hasher.write(msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn write_str(&mut self, s: &str) {
|
||||||
|
self.hasher.write_str(s);
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn finish(&self) -> u64 {
|
fn finish(&self) -> u64 {
|
||||||
self.hasher.finish()
|
self.hasher.finish()
|
||||||
|
@ -307,6 +317,14 @@ impl<S: Sip> super::Hasher for Hasher<S> {
|
||||||
self.ntail = left;
|
self.ntail = left;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn write_str(&mut self, s: &str) {
|
||||||
|
// This hasher works byte-wise, and `0xFF` cannot show up in a `str`,
|
||||||
|
// so just hashing the one extra byte is enough to be prefix-free.
|
||||||
|
self.write(s.as_bytes());
|
||||||
|
self.write_u8(0xFF);
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn finish(&self) -> u64 {
|
fn finish(&self) -> u64 {
|
||||||
let mut state = self.state;
|
let mut state = self.state;
|
||||||
|
|
|
@ -20,6 +20,10 @@ impl Hasher for MyHasher {
|
||||||
self.hash += *byte as u64;
|
self.hash += *byte as u64;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
fn write_str(&mut self, s: &str) {
|
||||||
|
self.write(s.as_bytes());
|
||||||
|
self.write_u8(0xFF);
|
||||||
|
}
|
||||||
fn finish(&self) -> u64 {
|
fn finish(&self) -> u64 {
|
||||||
self.hash
|
self.hash
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,6 +37,7 @@
|
||||||
#![feature(future_join)]
|
#![feature(future_join)]
|
||||||
#![feature(future_poll_fn)]
|
#![feature(future_poll_fn)]
|
||||||
#![feature(array_from_fn)]
|
#![feature(array_from_fn)]
|
||||||
|
#![feature(hasher_prefixfree_extras)]
|
||||||
#![feature(hashmap_internals)]
|
#![feature(hashmap_internals)]
|
||||||
#![feature(try_find)]
|
#![feature(try_find)]
|
||||||
#![feature(inline_const)]
|
#![feature(inline_const)]
|
||||||
|
|
|
@ -3006,11 +3006,19 @@ impl Default for DefaultHasher {
|
||||||
|
|
||||||
#[stable(feature = "hashmap_default_hasher", since = "1.13.0")]
|
#[stable(feature = "hashmap_default_hasher", since = "1.13.0")]
|
||||||
impl Hasher for DefaultHasher {
|
impl Hasher for DefaultHasher {
|
||||||
|
// The underlying `SipHasher13` doesn't override the other
|
||||||
|
// `write_*` methods, so it's ok not to forward them here.
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn write(&mut self, msg: &[u8]) {
|
fn write(&mut self, msg: &[u8]) {
|
||||||
self.0.write(msg)
|
self.0.write(msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn write_str(&mut self, s: &str) {
|
||||||
|
self.0.write_str(s);
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn finish(&self) -> u64 {
|
fn finish(&self) -> u64 {
|
||||||
self.0.finish()
|
self.0.finish()
|
||||||
|
|
|
@ -270,6 +270,7 @@
|
||||||
#![feature(exact_size_is_empty)]
|
#![feature(exact_size_is_empty)]
|
||||||
#![feature(extend_one)]
|
#![feature(extend_one)]
|
||||||
#![feature(float_minimum_maximum)]
|
#![feature(float_minimum_maximum)]
|
||||||
|
#![feature(hasher_prefixfree_extras)]
|
||||||
#![feature(hashmap_internals)]
|
#![feature(hashmap_internals)]
|
||||||
#![feature(int_error_internals)]
|
#![feature(int_error_internals)]
|
||||||
#![feature(maybe_uninit_slice)]
|
#![feature(maybe_uninit_slice)]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue