auto merge of #13646 : cgaebel/rust/hashmap-cleanup, r=alexcrichton
I went through the HashMap module, fixed spelling mistakes, minor inefficiencies, added tests, and other trivial changes. Hopefully this won't be a controversial PR.
This commit is contained in:
commit
ef1b929b2f
1 changed files with 168 additions and 112 deletions
|
@ -43,7 +43,8 @@ mod table {
|
||||||
use std::ptr;
|
use std::ptr;
|
||||||
use std::ptr::RawPtr;
|
use std::ptr::RawPtr;
|
||||||
use std::rt::global_heap;
|
use std::rt::global_heap;
|
||||||
use std::intrinsics::{size_of, min_align_of, transmute, move_val_init};
|
use std::intrinsics::{size_of, min_align_of, transmute};
|
||||||
|
use std::intrinsics::{move_val_init, set_memory};
|
||||||
use std::iter::{Iterator, range_step_inclusive};
|
use std::iter::{Iterator, range_step_inclusive};
|
||||||
|
|
||||||
static EMPTY_BUCKET: u64 = 0u64;
|
static EMPTY_BUCKET: u64 = 0u64;
|
||||||
|
@ -52,15 +53,15 @@ mod table {
|
||||||
/// optimized arrays of hashes, keys, and values.
|
/// optimized arrays of hashes, keys, and values.
|
||||||
///
|
///
|
||||||
/// This design uses less memory and is a lot faster than the naive
|
/// This design uses less memory and is a lot faster than the naive
|
||||||
/// `~[Option<u64, K, V>]`, because we don't pay for the overhead of an
|
/// `Vec<Option<u64, K, V>>`, because we don't pay for the overhead of an
|
||||||
/// option on every element, and we get a generally more cache-aware design.
|
/// option on every element, and we get a generally more cache-aware design.
|
||||||
///
|
///
|
||||||
/// Key invariants of this structure:
|
/// Key invariants of this structure:
|
||||||
///
|
///
|
||||||
/// - if hashes[i] == EMPTY_BUCKET, then keys[i] and vals[i] have
|
/// - if hashes[i] == EMPTY_BUCKET, then keys[i] and vals[i] have
|
||||||
/// 'undefined' contents. Don't read from them. This invariant is
|
/// 'undefined' contents. Don't read from them. This invariant is
|
||||||
/// enforced outside this module with the [EmptyIndex], [FullIndex],
|
/// enforced outside this module with the `EmptyIndex`, `FullIndex`,
|
||||||
/// and [SafeHash] types/concepts.
|
/// and `SafeHash` types.
|
||||||
///
|
///
|
||||||
/// - An `EmptyIndex` is only constructed for a bucket at an index with
|
/// - An `EmptyIndex` is only constructed for a bucket at an index with
|
||||||
/// a hash of EMPTY_BUCKET.
|
/// a hash of EMPTY_BUCKET.
|
||||||
|
@ -69,8 +70,9 @@ mod table {
|
||||||
/// non-EMPTY_BUCKET hash.
|
/// non-EMPTY_BUCKET hash.
|
||||||
///
|
///
|
||||||
/// - A `SafeHash` is only constructed for non-`EMPTY_BUCKET` hash. We get
|
/// - A `SafeHash` is only constructed for non-`EMPTY_BUCKET` hash. We get
|
||||||
/// around hashes of zero by changing them to 0x800_0000, which will
|
/// around hashes of zero by changing them to 0x8000_0000_0000_0000,
|
||||||
/// likely hash to the same bucket, but not be represented as "empty".
|
/// which will likely map to the same bucket, while not being confused
|
||||||
|
/// with "empty".
|
||||||
///
|
///
|
||||||
/// - All three "arrays represented by pointers" are the same length:
|
/// - All three "arrays represented by pointers" are the same length:
|
||||||
/// `capacity`. This is set at creation and never changes. The arrays
|
/// `capacity`. This is set at creation and never changes. The arrays
|
||||||
|
@ -111,25 +113,27 @@ mod table {
|
||||||
|
|
||||||
/// Represents an index into a `RawTable` with no key or value in it.
|
/// Represents an index into a `RawTable` with no key or value in it.
|
||||||
pub struct EmptyIndex {
|
pub struct EmptyIndex {
|
||||||
idx: int,
|
idx: int,
|
||||||
nocopy: marker::NoCopy,
|
nocopy: marker::NoCopy,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Represents an index into a `RawTable` with a key, value, and hash
|
/// Represents an index into a `RawTable` with a key, value, and hash
|
||||||
/// in it.
|
/// in it.
|
||||||
pub struct FullIndex {
|
pub struct FullIndex {
|
||||||
idx: int,
|
idx: int,
|
||||||
hash: SafeHash,
|
hash: SafeHash,
|
||||||
nocopy: marker::NoCopy,
|
nocopy: marker::NoCopy,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FullIndex {
|
impl FullIndex {
|
||||||
/// Since we get the hash for free whenever we check the bucket state,
|
/// Since we get the hash for free whenever we check the bucket state,
|
||||||
/// this function is provided for fast access, letting us avoid making
|
/// this function is provided for fast access, letting us avoid
|
||||||
/// redundant trips back to the hashtable.
|
/// redundant trips back to the hashtable.
|
||||||
|
#[inline(always)]
|
||||||
pub fn hash(&self) -> SafeHash { self.hash }
|
pub fn hash(&self) -> SafeHash { self.hash }
|
||||||
|
|
||||||
/// Same comment as with `hash`.
|
/// Same comment as with `hash`.
|
||||||
|
#[inline(always)]
|
||||||
pub fn raw_index(&self) -> uint { self.idx as uint }
|
pub fn raw_index(&self) -> uint { self.idx as uint }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -141,7 +145,8 @@ mod table {
|
||||||
Full(FullIndex),
|
Full(FullIndex),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A hash that is not zero, since we use that to represent empty buckets.
|
/// A hash that is not zero, since we use a hash of zero to represent empty
|
||||||
|
/// buckets.
|
||||||
#[deriving(Eq)]
|
#[deriving(Eq)]
|
||||||
pub struct SafeHash {
|
pub struct SafeHash {
|
||||||
hash: u64,
|
hash: u64,
|
||||||
|
@ -149,6 +154,7 @@ mod table {
|
||||||
|
|
||||||
impl SafeHash {
|
impl SafeHash {
|
||||||
/// Peek at the hash value, which is guaranteed to be non-zero.
|
/// Peek at the hash value, which is guaranteed to be non-zero.
|
||||||
|
#[inline(always)]
|
||||||
pub fn inspect(&self) -> u64 { self.hash }
|
pub fn inspect(&self) -> u64 { self.hash }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -171,12 +177,16 @@ mod table {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_rounding() {
|
fn test_rounding() {
|
||||||
assert!(round_up_to_next(0, 4) == 0);
|
assert_eq!(round_up_to_next(0, 4), 0);
|
||||||
assert!(round_up_to_next(1, 4) == 4);
|
assert_eq!(round_up_to_next(1, 4), 4);
|
||||||
assert!(round_up_to_next(2, 4) == 4);
|
assert_eq!(round_up_to_next(2, 4), 4);
|
||||||
assert!(round_up_to_next(3, 4) == 4);
|
assert_eq!(round_up_to_next(3, 4), 4);
|
||||||
assert!(round_up_to_next(4, 4) == 4);
|
assert_eq!(round_up_to_next(4, 4), 4);
|
||||||
assert!(round_up_to_next(5, 4) == 8);
|
assert_eq!(round_up_to_next(5, 4), 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn has_alignment(n: uint, alignment: uint) -> bool {
|
||||||
|
round_up_to_next(n, alignment) == n
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns a tuple of (minimum required malloc alignment, hash_offset,
|
// Returns a tuple of (minimum required malloc alignment, hash_offset,
|
||||||
|
@ -200,6 +210,13 @@ mod table {
|
||||||
(min_align, hash_offset, keys_offset, vals_offset, end_of_vals)
|
(min_align, hash_offset, keys_offset, vals_offset, end_of_vals)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_offset_calculation() {
|
||||||
|
assert_eq!(calculate_offsets(128, 8, 15, 1, 4, 4 ), (8, 0, 128, 144, 148));
|
||||||
|
assert_eq!(calculate_offsets(3, 1, 2, 1, 1, 1 ), (1, 0, 3, 5, 6));
|
||||||
|
assert_eq!(calculate_offsets(6, 2, 12, 4, 24, 8), (8, 0, 8, 24, 48));
|
||||||
|
}
|
||||||
|
|
||||||
impl<K, V> RawTable<K, V> {
|
impl<K, V> RawTable<K, V> {
|
||||||
|
|
||||||
/// Does not initialize the buckets. The caller should ensure they,
|
/// Does not initialize the buckets. The caller should ensure they,
|
||||||
|
@ -213,9 +230,9 @@ mod table {
|
||||||
capacity.checked_mul(&size_of::< V >()).expect("capacity overflow");
|
capacity.checked_mul(&size_of::< V >()).expect("capacity overflow");
|
||||||
|
|
||||||
// Allocating hashmaps is a little tricky. We need to allocate three
|
// Allocating hashmaps is a little tricky. We need to allocate three
|
||||||
// arrays here, but since we know their sizes and alignments up front,
|
// arrays, but since we know their sizes and alignments up front,
|
||||||
// we could theoretically allocate only a single array, and then have
|
// we just allocate a single array, and then have the subarrays
|
||||||
// the subarrays just point into it.
|
// point into it.
|
||||||
//
|
//
|
||||||
// This is great in theory, but in practice getting the alignment
|
// This is great in theory, but in practice getting the alignment
|
||||||
// right is a little subtle. Therefore, calculating offsets has been
|
// right is a little subtle. Therefore, calculating offsets has been
|
||||||
|
@ -231,8 +248,7 @@ mod table {
|
||||||
// FIXME #13094: If malloc was not at as aligned as we expected,
|
// FIXME #13094: If malloc was not at as aligned as we expected,
|
||||||
// our offset calculations are just plain wrong. We could support
|
// our offset calculations are just plain wrong. We could support
|
||||||
// any alignment if we switched from `malloc` to `posix_memalign`.
|
// any alignment if we switched from `malloc` to `posix_memalign`.
|
||||||
assert!(round_up_to_next(buffer as uint, malloc_alignment)
|
assert!(has_alignment(buffer as uint, malloc_alignment));
|
||||||
== (buffer as uint));
|
|
||||||
|
|
||||||
let hashes = buffer.offset(hash_offset as int) as *mut u64;
|
let hashes = buffer.offset(hash_offset as int) as *mut u64;
|
||||||
let keys = buffer.offset(keys_offset as int) as *mut K;
|
let keys = buffer.offset(keys_offset as int) as *mut K;
|
||||||
|
@ -247,26 +263,20 @@ mod table {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/// Creates a new raw table from a given capacity. All buckets are
|
/// Creates a new raw table from a given capacity. All buckets are
|
||||||
/// initially empty.
|
/// initially empty.
|
||||||
pub fn new(capacity: uint) -> RawTable<K, V> {
|
pub fn new(capacity: uint) -> RawTable<K, V> {
|
||||||
unsafe {
|
unsafe {
|
||||||
let ret = RawTable::new_uninitialized(capacity);
|
let ret = RawTable::new_uninitialized(capacity);
|
||||||
|
set_memory(ret.hashes, 0u8, capacity);
|
||||||
for i in range(0, ret.capacity() as int) {
|
|
||||||
*ret.hashes.offset(i) = EMPTY_BUCKET;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret
|
ret
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Reads a bucket at a given index, returning an enum indicating whether
|
/// Reads a bucket at a given index, returning an enum indicating whether
|
||||||
/// there's anything there or not. You need to match on this enum to get
|
/// there's anything there or not. You need to match on this enum to get
|
||||||
/// the appropriate types to pass on to most of the rest of the functions
|
/// the appropriate types to pass on to most of the other functions in
|
||||||
/// in this module.
|
/// this module.
|
||||||
pub fn peek(&self, index: uint) -> BucketState {
|
pub fn peek(&self, index: uint) -> BucketState {
|
||||||
// FIXME #12049
|
// FIXME #12049
|
||||||
if cfg!(test) { assert!(index < self.capacity) }
|
if cfg!(test) { assert!(index < self.capacity) }
|
||||||
|
@ -279,13 +289,13 @@ mod table {
|
||||||
match hash {
|
match hash {
|
||||||
EMPTY_BUCKET =>
|
EMPTY_BUCKET =>
|
||||||
Empty(EmptyIndex {
|
Empty(EmptyIndex {
|
||||||
idx: idx,
|
idx: idx,
|
||||||
nocopy: nocopy
|
nocopy: nocopy
|
||||||
}),
|
}),
|
||||||
full_hash =>
|
full_hash =>
|
||||||
Full(FullIndex {
|
Full(FullIndex {
|
||||||
idx: idx,
|
idx: idx,
|
||||||
hash: SafeHash { hash: full_hash },
|
hash: SafeHash { hash: full_hash },
|
||||||
nocopy: nocopy,
|
nocopy: nocopy,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -321,13 +331,6 @@ mod table {
|
||||||
-> (&'a mut SafeHash, &'a mut K, &'a mut V) {
|
-> (&'a mut SafeHash, &'a mut K, &'a mut V) {
|
||||||
let idx = index.idx;
|
let idx = index.idx;
|
||||||
|
|
||||||
// I'm totally abusing the fact that a pointer to any u64 in the
|
|
||||||
// hashtable at a full index is a safe hash. Thanks to `SafeHash`
|
|
||||||
// just being a wrapper around u64, this is true. It's just really
|
|
||||||
// really really really unsafe. However, the exposed API is now
|
|
||||||
// impossible to get wrong. You cannot insert an empty hash into
|
|
||||||
// this slot now.
|
|
||||||
|
|
||||||
unsafe {
|
unsafe {
|
||||||
// FIXME #12049
|
// FIXME #12049
|
||||||
if cfg!(test) { assert!(*self.hashes.offset(idx) != EMPTY_BUCKET) }
|
if cfg!(test) { assert!(*self.hashes.offset(idx) != EMPTY_BUCKET) }
|
||||||
|
@ -340,8 +343,8 @@ mod table {
|
||||||
/// Puts a key and value pair, along with the key's hash, into a given
|
/// Puts a key and value pair, along with the key's hash, into a given
|
||||||
/// index in the hashtable. Note how the `EmptyIndex` is 'moved' into this
|
/// index in the hashtable. Note how the `EmptyIndex` is 'moved' into this
|
||||||
/// function, because that slot will no longer be empty when we return!
|
/// function, because that slot will no longer be empty when we return!
|
||||||
/// Because we know this, a FullIndex is returned for later use, pointing
|
/// A FullIndex is returned for later use, pointing to the newly-filled
|
||||||
/// to the newly-filled slot in the hashtable.
|
/// slot in the hashtable.
|
||||||
///
|
///
|
||||||
/// Use `make_hash` to construct a `SafeHash` to pass to this function.
|
/// Use `make_hash` to construct a `SafeHash` to pass to this function.
|
||||||
pub fn put(&mut self, index: EmptyIndex, hash: SafeHash, k: K, v: V) -> FullIndex {
|
pub fn put(&mut self, index: EmptyIndex, hash: SafeHash, k: K, v: V) -> FullIndex {
|
||||||
|
@ -349,7 +352,7 @@ mod table {
|
||||||
|
|
||||||
unsafe {
|
unsafe {
|
||||||
// FIXME #12049
|
// FIXME #12049
|
||||||
if cfg!(test) { assert!(*self.hashes.offset(idx) == EMPTY_BUCKET) }
|
if cfg!(test) { assert_eq!(*self.hashes.offset(idx), EMPTY_BUCKET) }
|
||||||
*self.hashes.offset(idx) = hash.inspect();
|
*self.hashes.offset(idx) = hash.inspect();
|
||||||
move_val_init(&mut *self.keys.offset(idx), k);
|
move_val_init(&mut *self.keys.offset(idx), k);
|
||||||
move_val_init(&mut *self.vals.offset(idx), v);
|
move_val_init(&mut *self.vals.offset(idx), v);
|
||||||
|
@ -371,9 +374,7 @@ mod table {
|
||||||
// FIXME #12049
|
// FIXME #12049
|
||||||
if cfg!(test) { assert!(*self.hashes.offset(idx) != EMPTY_BUCKET) }
|
if cfg!(test) { assert!(*self.hashes.offset(idx) != EMPTY_BUCKET) }
|
||||||
|
|
||||||
let hash_ptr = self.hashes.offset(idx);
|
*self.hashes.offset(idx) = EMPTY_BUCKET;
|
||||||
|
|
||||||
*hash_ptr = EMPTY_BUCKET;
|
|
||||||
|
|
||||||
// Drop the mutable constraint.
|
// Drop the mutable constraint.
|
||||||
let keys = self.keys as *K;
|
let keys = self.keys as *K;
|
||||||
|
@ -400,31 +401,48 @@ mod table {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn iter<'a>(&'a self) -> Entries<'a, K, V> {
|
pub fn iter<'a>(&'a self) -> Entries<'a, K, V> {
|
||||||
Entries { table: self, idx: 0 }
|
Entries { table: self, idx: 0, elems_seen: 0 }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn mut_iter<'a>(&'a mut self) -> MutEntries<'a, K, V> {
|
pub fn mut_iter<'a>(&'a mut self) -> MutEntries<'a, K, V> {
|
||||||
MutEntries { table: self, idx: 0 }
|
MutEntries { table: self, idx: 0, elems_seen: 0 }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn move_iter(self) -> MoveEntries<K, V> {
|
pub fn move_iter(self) -> MoveEntries<K, V> {
|
||||||
MoveEntries { table: self, idx: 0 }
|
MoveEntries { table: self, idx: 0, elems_seen: 0 }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// `read_all_mut` casts a `*u64` to a `*SafeHash`. Since we statically
|
||||||
|
// ensure that a `FullIndex` points to an index with a non-zero hash,
|
||||||
|
// and a `SafeHash` is just a `u64` with a different name, this is
|
||||||
|
// safe.
|
||||||
|
//
|
||||||
|
// This test ensures that a `SafeHash` really IS the same size as a
|
||||||
|
// `u64`. If you need to change the size of `SafeHash` (and
|
||||||
|
// consequently made this test fail), `read_all_mut` needs to be
|
||||||
|
// modified to no longer assume this.
|
||||||
|
#[test]
|
||||||
|
fn can_alias_safehash_as_u64() {
|
||||||
|
unsafe { assert_eq!(size_of::<SafeHash>(), size_of::<u64>()) };
|
||||||
|
}
|
||||||
|
|
||||||
pub struct Entries<'a, K, V> {
|
pub struct Entries<'a, K, V> {
|
||||||
table: &'a RawTable<K, V>,
|
table: &'a RawTable<K, V>,
|
||||||
idx: uint,
|
idx: uint,
|
||||||
|
elems_seen: uint,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct MutEntries<'a, K, V> {
|
pub struct MutEntries<'a, K, V> {
|
||||||
table: &'a mut RawTable<K, V>,
|
table: &'a mut RawTable<K, V>,
|
||||||
idx: uint,
|
idx: uint,
|
||||||
|
elems_seen: uint,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct MoveEntries<K, V> {
|
pub struct MoveEntries<K, V> {
|
||||||
table: RawTable<K, V>,
|
table: RawTable<K, V>,
|
||||||
idx: uint,
|
idx: uint,
|
||||||
|
elems_seen: uint,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, K, V> Iterator<(&'a K, &'a V)> for Entries<'a, K, V> {
|
impl<'a, K, V> Iterator<(&'a K, &'a V)> for Entries<'a, K, V> {
|
||||||
|
@ -435,7 +453,10 @@ mod table {
|
||||||
|
|
||||||
match self.table.peek(i) {
|
match self.table.peek(i) {
|
||||||
Empty(_) => {},
|
Empty(_) => {},
|
||||||
Full(idx) => return Some(self.table.read(&idx))
|
Full(idx) => {
|
||||||
|
self.elems_seen += 1;
|
||||||
|
return Some(self.table.read(&idx));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -443,7 +464,7 @@ mod table {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn size_hint(&self) -> (uint, Option<uint>) {
|
fn size_hint(&self) -> (uint, Option<uint>) {
|
||||||
let size = self.table.size() - self.idx;
|
let size = self.table.size() - self.elems_seen;
|
||||||
(size, Some(size))
|
(size, Some(size))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -460,7 +481,8 @@ mod table {
|
||||||
// error: lifetime of `self` is too short to guarantee its contents
|
// error: lifetime of `self` is too short to guarantee its contents
|
||||||
// can be safely reborrowed
|
// can be safely reborrowed
|
||||||
Full(idx) => unsafe {
|
Full(idx) => unsafe {
|
||||||
return Some(transmute(self.table.read_mut(&idx)))
|
self.elems_seen += 1;
|
||||||
|
return Some(transmute(self.table.read_mut(&idx)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -469,7 +491,7 @@ mod table {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn size_hint(&self) -> (uint, Option<uint>) {
|
fn size_hint(&self) -> (uint, Option<uint>) {
|
||||||
let size = self.table.size() - self.idx;
|
let size = self.table.size() - self.elems_seen;
|
||||||
(size, Some(size))
|
(size, Some(size))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -526,18 +548,14 @@ mod table {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#[unsafe_destructor]
|
#[unsafe_destructor]
|
||||||
impl<K, V> Drop for RawTable<K, V> {
|
impl<K, V> Drop for RawTable<K, V> {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
// Ideally, this should be in reverse, since we're likely to have
|
// This is in reverse because we're likely to have partially taken
|
||||||
// partially taken some elements out with `.move_iter()` from the
|
// some elements out with `.move_iter()` from the front.
|
||||||
// front.
|
|
||||||
for i in range_step_inclusive(self.capacity as int - 1, 0, -1) {
|
for i in range_step_inclusive(self.capacity as int - 1, 0, -1) {
|
||||||
// Check if the size is 0, so we don't do a useless scan when
|
// Check if the size is 0, so we don't do a useless scan when
|
||||||
// dropping empty tables such as on resize.
|
// dropping empty tables such as on resize.
|
||||||
|
|
||||||
if self.size == 0 { break }
|
if self.size == 0 { break }
|
||||||
|
|
||||||
match self.peek(i as uint) {
|
match self.peek(i as uint) {
|
||||||
|
@ -546,7 +564,7 @@ mod table {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
assert!(self.size == 0);
|
assert_eq!(self.size, 0);
|
||||||
|
|
||||||
unsafe {
|
unsafe {
|
||||||
libc::free(self.hashes as *mut libc::c_void);
|
libc::free(self.hashes as *mut libc::c_void);
|
||||||
|
@ -637,19 +655,12 @@ static INITIAL_LOAD_FACTOR: Fraction = (9, 10);
|
||||||
// This would definitely be an avenue worth exploring if people start complaining
|
// This would definitely be an avenue worth exploring if people start complaining
|
||||||
// about the size of rust executables.
|
// about the size of rust executables.
|
||||||
//
|
//
|
||||||
// There's also two optimizations that have been omitted regarding how the
|
// There's also an "optimization" that has been omitted regarding how the
|
||||||
// hashtable allocates. The first is that a hashtable which never has an element
|
// hashtable allocates. The vector type has set the expectation that a hashtable
|
||||||
// inserted should not allocate. I'm suspicious of this one, because supporting
|
// which never has an element inserted should not allocate. I'm suspicious of
|
||||||
// that internally gains no performance over just using an
|
// implementing this for hashtables, because supporting it has no performance
|
||||||
// `Option<HashMap<K, V>>`, and is significantly more complicated.
|
// benefit over using an `Option<HashMap<K, V>>`, and is significantly more
|
||||||
//
|
// complicated.
|
||||||
// The second omitted allocation optimization is that right now we allocate three
|
|
||||||
// arrays to back the hashtable. This is wasteful. In theory, we only need one
|
|
||||||
// array, and each of the three original arrays can just be slices of it. This
|
|
||||||
// would reduce the pressure on the allocator, and will play much nicer with the
|
|
||||||
// rest of the system. An initial implementation is commented out in
|
|
||||||
// `table::RawTable::new`, but I'm not confident it works for all sane alignments,
|
|
||||||
// especially if a type needs more alignment than `malloc` provides.
|
|
||||||
|
|
||||||
/// A hash map implementation which uses linear probing with Robin
|
/// A hash map implementation which uses linear probing with Robin
|
||||||
/// Hood bucket stealing.
|
/// Hood bucket stealing.
|
||||||
|
@ -745,7 +756,7 @@ impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
|
||||||
// This exploits the power-of-two size of the hashtable. As long as this
|
// This exploits the power-of-two size of the hashtable. As long as this
|
||||||
// is always true, we can use a bitmask of cap-1 to do modular arithmetic.
|
// is always true, we can use a bitmask of cap-1 to do modular arithmetic.
|
||||||
//
|
//
|
||||||
// Prefer to use this with increasing values of `idx` rather than repeatedly
|
// Prefer using this with increasing values of `idx` rather than repeatedly
|
||||||
// calling `probe_next`. This reduces data-dependencies between loops, which
|
// calling `probe_next`. This reduces data-dependencies between loops, which
|
||||||
// can help the optimizer, and certainly won't hurt it. `probe_next` is
|
// can help the optimizer, and certainly won't hurt it. `probe_next` is
|
||||||
// simply for convenience, and is no more efficient than `probe`.
|
// simply for convenience, and is no more efficient than `probe`.
|
||||||
|
@ -756,7 +767,7 @@ impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
|
||||||
((hash.inspect() as uint) + idx) & hash_mask
|
((hash.inspect() as uint) + idx) & hash_mask
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generate the next probe in a sequence. Prefer to use 'probe' by itself,
|
// Generate the next probe in a sequence. Prefer using 'probe' by itself,
|
||||||
// but this can sometimes be useful.
|
// but this can sometimes be useful.
|
||||||
fn probe_next(&self, probe: uint) -> uint {
|
fn probe_next(&self, probe: uint) -> uint {
|
||||||
let hash_mask = self.table.capacity() - 1;
|
let hash_mask = self.table.capacity() - 1;
|
||||||
|
@ -804,7 +815,7 @@ impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
|
||||||
if self.bucket_distance(&idx) < num_probes { return None }
|
if self.bucket_distance(&idx) < num_probes { return None }
|
||||||
|
|
||||||
// If the hash doesn't match, it can't be this one..
|
// If the hash doesn't match, it can't be this one..
|
||||||
if hash != &idx.hash() { continue }
|
if *hash != idx.hash() { continue }
|
||||||
|
|
||||||
let (k, _) = self.table.read(&idx);
|
let (k, _) = self.table.read(&idx);
|
||||||
|
|
||||||
|
@ -1087,7 +1098,7 @@ impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
|
||||||
/// 2) Ensure new_capacity is a power of two.
|
/// 2) Ensure new_capacity is a power of two.
|
||||||
fn resize(&mut self, new_capacity: uint) {
|
fn resize(&mut self, new_capacity: uint) {
|
||||||
assert!(self.table.size() <= new_capacity);
|
assert!(self.table.size() <= new_capacity);
|
||||||
assert!((new_capacity - 1) & new_capacity == 0);
|
assert!(num::is_power_of_two(new_capacity));
|
||||||
|
|
||||||
self.grow_at = grow_at(new_capacity, self.load_factor);
|
self.grow_at = grow_at(new_capacity, self.load_factor);
|
||||||
|
|
||||||
|
@ -1095,7 +1106,7 @@ impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
|
||||||
let old_size = old_table.size();
|
let old_size = old_table.size();
|
||||||
|
|
||||||
for (h, k, v) in old_table.move_iter() {
|
for (h, k, v) in old_table.move_iter() {
|
||||||
self.manual_insert_hashed_nocheck(h, k, v);
|
self.insert_hashed_nocheck(h, k, v);
|
||||||
}
|
}
|
||||||
|
|
||||||
assert_eq!(self.table.size(), old_size);
|
assert_eq!(self.table.size(), old_size);
|
||||||
|
@ -1171,13 +1182,13 @@ impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Manually insert a pre-hashed key-value pair, without first checking
|
/// Insert a pre-hashed key-value pair, without first checking
|
||||||
/// that there's enough room in the buckets. Returns a reference to the
|
/// that there's enough room in the buckets. Returns a reference to the
|
||||||
/// newly insert value.
|
/// newly insert value.
|
||||||
///
|
///
|
||||||
/// If the key already exists, the hashtable will be returned untouched
|
/// If the key already exists, the hashtable will be returned untouched
|
||||||
/// and a reference to the existing element will be returned.
|
/// and a reference to the existing element will be returned.
|
||||||
fn manual_insert_hashed_nocheck<'a>(
|
fn insert_hashed_nocheck<'a>(
|
||||||
&'a mut self, hash: table::SafeHash, k: K, v: V) -> &'a mut V {
|
&'a mut self, hash: table::SafeHash, k: K, v: V) -> &'a mut V {
|
||||||
|
|
||||||
for dib in range_inclusive(0u, self.table.size()) {
|
for dib in range_inclusive(0u, self.table.size()) {
|
||||||
|
@ -1226,28 +1237,25 @@ impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
|
||||||
fail!("Internal HashMap error: Out of space.");
|
fail!("Internal HashMap error: Out of space.");
|
||||||
}
|
}
|
||||||
|
|
||||||
fn manual_insert_hashed<'a>(&'a mut self, hash: table::SafeHash, k: K, v: V) -> &'a mut V {
|
/// Inserts an element which has already been hashed, returning a reference
|
||||||
|
/// to that element inside the hashtable. This is more efficient that using
|
||||||
|
/// `insert`, since the key will not be rehashed.
|
||||||
|
fn insert_hashed<'a>(&'a mut self, hash: table::SafeHash, k: K, v: V) -> &'a mut V {
|
||||||
let potential_new_size = self.table.size() + 1;
|
let potential_new_size = self.table.size() + 1;
|
||||||
self.make_some_room(potential_new_size);
|
self.make_some_room(potential_new_size);
|
||||||
self.manual_insert_hashed_nocheck(hash, k, v)
|
self.insert_hashed_nocheck(hash, k, v)
|
||||||
}
|
|
||||||
|
|
||||||
/// Inserts an element, returning a reference to that element inside the
|
|
||||||
/// hashtable.
|
|
||||||
fn manual_insert<'a>(&'a mut self, k: K, v: V) -> &'a mut V {
|
|
||||||
let hash = self.make_hash(&k);
|
|
||||||
self.manual_insert_hashed(hash, k, v)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the value corresponding to the key in the map, or insert
|
/// Return the value corresponding to the key in the map, or insert
|
||||||
/// and return the value if it doesn't exist.
|
/// and return the value if it doesn't exist.
|
||||||
pub fn find_or_insert<'a>(&'a mut self, k: K, v: V) -> &'a mut V {
|
pub fn find_or_insert<'a>(&'a mut self, k: K, v: V) -> &'a mut V {
|
||||||
match self.search(&k) {
|
let hash = self.make_hash(&k);
|
||||||
|
match self.search_hashed(&hash, &k) {
|
||||||
Some(idx) => {
|
Some(idx) => {
|
||||||
let (_, v_ref) = self.table.read_mut(&idx);
|
let (_, v_ref) = self.table.read_mut(&idx);
|
||||||
v_ref
|
v_ref
|
||||||
},
|
},
|
||||||
None => self.manual_insert(k, v)
|
None => self.insert_hashed(hash, k, v)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1255,14 +1263,15 @@ impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
|
||||||
/// insert, and return a new value if it doesn't exist.
|
/// insert, and return a new value if it doesn't exist.
|
||||||
pub fn find_or_insert_with<'a>(&'a mut self, k: K, f: |&K| -> V)
|
pub fn find_or_insert_with<'a>(&'a mut self, k: K, f: |&K| -> V)
|
||||||
-> &'a mut V {
|
-> &'a mut V {
|
||||||
match self.search(&k) {
|
let hash = self.make_hash(&k);
|
||||||
|
match self.search_hashed(&hash, &k) {
|
||||||
Some(idx) => {
|
Some(idx) => {
|
||||||
let (_, v_ref) = self.table.read_mut(&idx);
|
let (_, v_ref) = self.table.read_mut(&idx);
|
||||||
v_ref
|
v_ref
|
||||||
},
|
},
|
||||||
None => {
|
None => {
|
||||||
let v = f(&k);
|
let v = f(&k);
|
||||||
self.manual_insert(k, v)
|
self.insert_hashed(hash, k, v)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1276,8 +1285,9 @@ impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
|
||||||
v: V,
|
v: V,
|
||||||
f: |&K, &mut V|)
|
f: |&K, &mut V|)
|
||||||
-> &'a mut V {
|
-> &'a mut V {
|
||||||
match self.search(&k) {
|
let hash = self.make_hash(&k);
|
||||||
None => self.manual_insert(k, v),
|
match self.search_hashed(&hash, &k) {
|
||||||
|
None => self.insert_hashed(hash, k, v),
|
||||||
Some(idx) => {
|
Some(idx) => {
|
||||||
let (_, v_ref) = self.table.read_mut(&idx);
|
let (_, v_ref) = self.table.read_mut(&idx);
|
||||||
f(&k, v_ref);
|
f(&k, v_ref);
|
||||||
|
@ -1369,7 +1379,8 @@ impl<K: TotalEq + Hash<S>, V: Eq, S, H: Hasher<S>> Eq for HashMap<K, V, H> {
|
||||||
fn eq(&self, other: &HashMap<K, V, H>) -> bool {
|
fn eq(&self, other: &HashMap<K, V, H>) -> bool {
|
||||||
if self.len() != other.len() { return false; }
|
if self.len() != other.len() { return false; }
|
||||||
|
|
||||||
self.iter().all(|(key, value)| {
|
self.iter()
|
||||||
|
.all(|(key, value)| {
|
||||||
match other.find(key) {
|
match other.find(key) {
|
||||||
None => false,
|
None => false,
|
||||||
Some(v) => *value == *v
|
Some(v) => *value == *v
|
||||||
|
@ -1393,7 +1404,7 @@ impl<K: TotalEq + Hash<S> + Show, V: Show, S, H: Hasher<S>> Show for HashMap<K,
|
||||||
|
|
||||||
impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S> + Default> Default for HashMap<K, V, H> {
|
impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S> + Default> Default for HashMap<K, V, H> {
|
||||||
fn default() -> HashMap<K, V, H> {
|
fn default() -> HashMap<K, V, H> {
|
||||||
HashMap::with_capacity_and_hasher(INITIAL_CAPACITY, Default::default())
|
HashMap::with_hasher(Default::default())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1449,13 +1460,10 @@ pub struct HashSet<T, H = sip::SipHasher> {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> Eq for HashSet<T, H> {
|
impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> Eq for HashSet<T, H> {
|
||||||
// FIXME #11998: Since the value is a (), and `find` returns a Some(&()),
|
|
||||||
// we trigger #11998 when matching on it. I've fallen back to manual
|
|
||||||
// iteration until this is fixed.
|
|
||||||
fn eq(&self, other: &HashSet<T, H>) -> bool {
|
fn eq(&self, other: &HashSet<T, H>) -> bool {
|
||||||
if self.len() != other.len() { return false; }
|
if self.len() != other.len() { return false; }
|
||||||
|
|
||||||
self.iter().all(|key| other.map.contains_key(key))
|
self.iter().all(|key| other.contains(key))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1468,7 +1476,7 @@ impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> Mutable for HashSet<T, H> {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> Set<T> for HashSet<T, H> {
|
impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> Set<T> for HashSet<T, H> {
|
||||||
fn contains(&self, value: &T) -> bool { self.map.search(value).is_some() }
|
fn contains(&self, value: &T) -> bool { self.map.contains_key(value) }
|
||||||
|
|
||||||
fn is_disjoint(&self, other: &HashSet<T, H>) -> bool {
|
fn is_disjoint(&self, other: &HashSet<T, H>) -> bool {
|
||||||
self.iter().all(|v| !other.contains(v))
|
self.iter().all(|v| !other.contains(v))
|
||||||
|
@ -1540,8 +1548,7 @@ impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> HashSet<T, H> {
|
||||||
|
|
||||||
/// Visit the values representing the difference
|
/// Visit the values representing the difference
|
||||||
pub fn difference<'a>(&'a self, other: &'a HashSet<T, H>) -> SetAlgebraItems<'a, T, H> {
|
pub fn difference<'a>(&'a self, other: &'a HashSet<T, H>) -> SetAlgebraItems<'a, T, H> {
|
||||||
Repeat::new(other)
|
Repeat::new(other).zip(self.iter())
|
||||||
.zip(self.iter())
|
|
||||||
.filter_map(|(other, elt)| {
|
.filter_map(|(other, elt)| {
|
||||||
if !other.contains(elt) { Some(elt) } else { None }
|
if !other.contains(elt) { Some(elt) } else { None }
|
||||||
})
|
})
|
||||||
|
@ -1556,8 +1563,7 @@ impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> HashSet<T, H> {
|
||||||
/// Visit the values representing the intersection
|
/// Visit the values representing the intersection
|
||||||
pub fn intersection<'a>(&'a self, other: &'a HashSet<T, H>)
|
pub fn intersection<'a>(&'a self, other: &'a HashSet<T, H>)
|
||||||
-> SetAlgebraItems<'a, T, H> {
|
-> SetAlgebraItems<'a, T, H> {
|
||||||
Repeat::new(other)
|
Repeat::new(other).zip(self.iter())
|
||||||
.zip(self.iter())
|
|
||||||
.filter_map(|(other, elt)| {
|
.filter_map(|(other, elt)| {
|
||||||
if other.contains(elt) { Some(elt) } else { None }
|
if other.contains(elt) { Some(elt) } else { None }
|
||||||
})
|
})
|
||||||
|
@ -1568,7 +1574,6 @@ impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> HashSet<T, H> {
|
||||||
-> Chain<SetItems<'a, T>, SetAlgebraItems<'a, T, H>> {
|
-> Chain<SetItems<'a, T>, SetAlgebraItems<'a, T, H>> {
|
||||||
self.iter().chain(other.difference(self))
|
self.iter().chain(other.difference(self))
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: TotalEq + Hash<S> + fmt::Show, S, H: Hasher<S>> fmt::Show for HashSet<T, H> {
|
impl<T: TotalEq + Hash<S> + fmt::Show, S, H: Hasher<S>> fmt::Show for HashSet<T, H> {
|
||||||
|
@ -1953,7 +1958,7 @@ mod test_map {
|
||||||
m.insert(1, 2);
|
m.insert(1, 2);
|
||||||
match m.find(&1) {
|
match m.find(&1) {
|
||||||
None => fail!(),
|
None => fail!(),
|
||||||
Some(v) => assert!(*v == 2)
|
Some(v) => assert_eq!(*v, 2)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2020,6 +2025,32 @@ mod test_map {
|
||||||
assert_eq!(map.find(&k), Some(&v));
|
assert_eq!(map.find(&k), Some(&v));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_size_hint() {
|
||||||
|
let xs = [(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)];
|
||||||
|
|
||||||
|
let map: HashMap<int, int> = xs.iter().map(|&x| x).collect();
|
||||||
|
|
||||||
|
let mut iter = map.iter();
|
||||||
|
|
||||||
|
for _ in iter.by_ref().take(3) {}
|
||||||
|
|
||||||
|
assert_eq!(iter.size_hint(), (3, Some(3)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_mut_size_hint() {
|
||||||
|
let xs = [(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)];
|
||||||
|
|
||||||
|
let mut map: HashMap<int, int> = xs.iter().map(|&x| x).collect();
|
||||||
|
|
||||||
|
let mut iter = map.mut_iter();
|
||||||
|
|
||||||
|
for _ in iter.by_ref().take(3) {}
|
||||||
|
|
||||||
|
assert_eq!(iter.size_hint(), (3, Some(3)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
@ -2270,6 +2301,27 @@ mod bench {
|
||||||
use self::test::Bencher;
|
use self::test::Bencher;
|
||||||
use std::iter::{range_inclusive};
|
use std::iter::{range_inclusive};
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn new_drop(b : &mut Bencher) {
|
||||||
|
use super::HashMap;
|
||||||
|
|
||||||
|
b.iter(|| {
|
||||||
|
let m : HashMap<int, int> = HashMap::new();
|
||||||
|
assert_eq!(m.len(), 0);
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn new_insert_drop(b : &mut Bencher) {
|
||||||
|
use super::HashMap;
|
||||||
|
|
||||||
|
b.iter(|| {
|
||||||
|
let mut m = HashMap::new();
|
||||||
|
m.insert(0, 0);
|
||||||
|
assert_eq!(m.len(), 1);
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn insert(b: &mut Bencher) {
|
fn insert(b: &mut Bencher) {
|
||||||
use super::HashMap;
|
use super::HashMap;
|
||||||
|
@ -2299,7 +2351,9 @@ mod bench {
|
||||||
}
|
}
|
||||||
|
|
||||||
b.iter(|| {
|
b.iter(|| {
|
||||||
m.contains_key(&412);
|
for i in range_inclusive(1, 1000) {
|
||||||
|
m.contains_key(&i);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2314,7 +2368,9 @@ mod bench {
|
||||||
}
|
}
|
||||||
|
|
||||||
b.iter(|| {
|
b.iter(|| {
|
||||||
m.contains_key(&2048);
|
for i in range_inclusive(1001, 2000) {
|
||||||
|
m.contains_key(&i);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue