1
Fork 0

De-duplicate core::hash some, refactor the traits.

This commit is contained in:
Graydon Hoare 2012-08-02 17:17:31 -07:00
parent 009352101d
commit 4779d2b392
5 changed files with 229 additions and 222 deletions

View file

@ -9,54 +9,123 @@
* CPRNG like rand::rng. * CPRNG like rand::rng.
*/ */
pure fn hash_bytes(buf: &[const u8]) -> u64 { import io::writer;
return hash_bytes_keyed(buf, 0u64, 0u64); import io::writer_util;
export Streaming, State;
export default_state;
export hash_bytes_keyed;
export hash_str_keyed;
export hash_u64_keyed;
export hash_u32_keyed;
export hash_u16_keyed;
export hash_u8_keyed;
export hash_uint_keyed;
export hash_bytes;
export hash_str;
export hash_u64;
export hash_u32;
export hash_u16;
export hash_u8;
export hash_uint;
/// Streaming hash-functions should implement this.
trait Streaming {
fn input((&[const u8]));
// These can be refactored some when we have default methods.
fn result_bytes() -> ~[u8];
fn result_str() -> ~str;
fn result_u64() -> u64;
fn reset();
} }
pure fn hash_u64(val: u64) -> u64 { fn keyed(k0: u64, k1: u64, f: fn(s: &State)) -> u64 {
let bytes: [u8]/8 = // Explicitly say 8 bytes to be mistaken-change-proof. let s = &State(k0, k1);
[(val >> 00) as u8, f(s);
(val >> 08) as u8, s.result_u64()
(val >> 16) as u8,
(val >> 24) as u8,
(val >> 32) as u8,
(val >> 40) as u8,
(val >> 48) as u8,
(val >> 56) as u8];
hash_bytes(bytes)
}
pure fn hash_u32(val: u32) -> u64 {
let bytes: [u8]/4 = // Explicitly say 4 bytes to be mistaken-change-proof.
[(val >> 00) as u8,
(val >> 08) as u8,
(val >> 16) as u8,
(val >> 24) as u8];
hash_bytes(bytes)
}
#[cfg(target_arch = "arm")]
pure fn hash_uint(val: uint) -> u64 {
assert sys::size_of::<uint>() == sys::size_of::<u32>();
hash_u32(val as u32)
}
#[cfg(target_arch = "x86_64")]
pure fn hash_uint(val: uint) -> u64 {
assert sys::size_of::<uint>() == sys::size_of::<u64>();
hash_u64(val as u64)
}
#[cfg(target_arch = "x86")]
pure fn hash_uint(val: uint) -> u64 {
assert sys::size_of::<uint>() == sys::size_of::<u32>();
hash_u32(val as u32)
} }
pure fn hash_bytes_keyed(buf: &[const u8], k0: u64, k1: u64) -> u64 { pure fn hash_bytes_keyed(buf: &[const u8], k0: u64, k1: u64) -> u64 {
unchecked { keyed(k0, k1, |s| s.input(buf)) }
}
pure fn hash_str_keyed(s: &str, k0: u64, k1: u64) -> u64 {
unsafe {
do str::as_buf(s) |buf, len| {
do vec::unsafe::form_slice(buf, len) |slice| {
hash_bytes_keyed(slice, k0, k1)
}
}
}
}
pure fn hash_u64_keyed(val: u64, k0: u64, k1: u64) -> u64 {
unchecked { keyed(k0, k1, |s| s.write_le_u64(val)) }
}
pure fn hash_u32_keyed(val: u32, k0: u64, k1: u64) -> u64 {
unchecked { keyed(k0, k1, |s| s.write_le_u32(val)) }
}
pure fn hash_u16_keyed(val: u16, k0: u64, k1: u64) -> u64 {
unchecked { keyed(k0, k1, |s| s.write_le_u16(val)) }
}
pure fn hash_u8_keyed(val: u8, k0: u64, k1: u64) -> u64 {
unchecked { keyed(k0, k1, |s| s.write_u8(val)) }
}
pure fn hash_uint_keyed(val: uint, k0: u64, k1: u64) -> u64 {
unchecked { keyed(k0, k1, |s| s.write_le_uint(val)) }
}
let mut v0 : u64 = k0 ^ 0x736f_6d65_7073_6575; pure fn hash_bytes(val: &[const u8]) -> u64 { hash_bytes_keyed(val, 0, 0) }
let mut v1 : u64 = k1 ^ 0x646f_7261_6e64_6f6d; pure fn hash_str(val: &str) -> u64 { hash_str_keyed(val, 0, 0) }
let mut v2 : u64 = k0 ^ 0x6c79_6765_6e65_7261; pure fn hash_u64(val: u64) -> u64 { hash_u64_keyed(val, 0, 0) }
let mut v3 : u64 = k1 ^ 0x7465_6462_7974_6573; pure fn hash_u32(val: u32) -> u64 { hash_u32_keyed(val, 0, 0) }
pure fn hash_u16(val: u16) -> u64 { hash_u16_keyed(val, 0, 0) }
pure fn hash_u8(val: u8) -> u64 { hash_u8_keyed(val, 0, 0) }
pure fn hash_uint(val: uint) -> u64 { hash_uint_keyed(val, 0, 0) }
// Implement State as SipState
type State = SipState;
fn State(k0: u64, k1: u64) -> State {
SipState(k0, k1)
}
fn default_state() -> State {
State(0,0)
}
struct SipState {
k0: u64;
k1: u64;
mut length: uint; // how many bytes we've processed
mut v0: u64; // hash state
mut v1: u64;
mut v2: u64;
mut v3: u64;
tail: [mut u8]/8; // unprocessed bytes
mut ntail: uint; // how many bytes in tail are valid
}
fn SipState(key0: u64, key1: u64) -> SipState {
let state = SipState {
k0 : key0,
k1 : key1,
mut length : 0u,
mut v0 : 0u64,
mut v1 : 0u64,
mut v2 : 0u64,
mut v3 : 0u64,
tail : [mut 0u8,0,0,0,0,0,0,0],
mut ntail : 0u,
};
(&state).reset();
return state;
}
impl &SipState : io::writer {
// Methods for io::writer
fn write(msg: &[const u8]) {
#macro[[#u8to64_le(buf,i), #macro[[#u8to64_le(buf,i),
(buf[0+i] as u64 | (buf[0+i] as u64 |
@ -77,104 +146,41 @@ pure fn hash_bytes_keyed(buf: &[const u8], k0: u64, k1: u64) -> u64 {
v2 += v1; v1 = #rotl(v1, 17); v1 ^= v2; v2 = #rotl(v2, 32); v2 += v1; v1 = #rotl(v1, 17); v1 ^= v2; v2 = #rotl(v2, 32);
}]]; }]];
let len = vec::len(buf); let length = msg.len();
let end = len & (!0x7); self.length += length;
let left = len & 0x7;
let mut i = 0;
while i < end {
let m = u8to64_le!{buf, i};
v3 ^= m;
compress!{v0,v1,v2,v3};
compress!{v0,v1,v2,v3};
v0 ^= m;
i += 8;
}
let mut b : u64 = (len as u64 & 0xff) << 56;
if left > 0 { b |= buf[i] as u64; }
if left > 1 { b |= buf[i + 1] as u64 << 8; }
if left > 2 { b |= buf[i + 2] as u64 << 16; }
if left > 3 { b |= buf[i + 3] as u64 << 24; }
if left > 4 { b |= buf[i + 4] as u64 << 32; }
if left > 5 { b |= buf[i + 5] as u64 << 40; }
if left > 6 { b |= buf[i + 6] as u64 << 48; }
v3 ^= b;
compress!{v0,v1,v2,v3};
compress!{v0,v1,v2,v3};
v0 ^= b;
v2 ^= 0xff;
compress!{v0,v1,v2,v3};
compress!{v0,v1,v2,v3};
compress!{v0,v1,v2,v3};
compress!{v0,v1,v2,v3};
return v0 ^ v1 ^ v2 ^ v3;
}
trait streaming {
fn input(~[u8]);
fn input_str(~str);
fn result() -> ~[u8];
fn result_str() -> ~str;
fn reset();
}
fn siphash(key0 : u64, key1 : u64) -> streaming {
type sipstate = {
k0 : u64,
k1 : u64,
mut length : uint, // how many bytes we've processed
mut v0 : u64, // hash state
mut v1 : u64,
mut v2 : u64,
mut v3 : u64,
tail : ~[mut u8]/8, // unprocessed bytes
mut ntail : uint, // how many bytes in tail are valid
};
fn add_input(st : sipstate, msg : ~[u8]) {
let length = vec::len(msg);
st.length += length;
let mut needed = 0u; let mut needed = 0u;
if st.ntail != 0 { if self.ntail != 0 {
needed = 8 - st.ntail; needed = 8 - self.ntail;
if length < needed { if length < needed {
let mut t = 0; let mut t = 0;
while t < length { while t < length {
st.tail[st.ntail+t] = msg[t]; self.tail[self.ntail+t] = msg[t];
t += 1; t += 1;
} }
st.ntail += length; self.ntail += length;
return; return;
} }
let mut t = 0; let mut t = 0;
while t < needed { while t < needed {
st.tail[st.ntail+t] = msg[t]; self.tail[self.ntail+t] = msg[t];
t += 1; t += 1;
} }
let m = u8to64_le!{st.tail, 0}; let m = u8to64_le!{self.tail, 0};
st.v3 ^= m; self.v3 ^= m;
compress!{st.v0, st.v1, st.v2, st.v3}; compress!{self.v0, self.v1, self.v2, self.v3};
compress!{st.v0, st.v1, st.v2, st.v3}; compress!{self.v0, self.v1, self.v2, self.v3};
st.v0 ^= m; self.v0 ^= m;
st.ntail = 0; self.ntail = 0;
} }
// Buffered tail is now flushed, process new input.
let len = length - needed; let len = length - needed;
let end = len & (!0x7); let end = len & (!0x7);
let left = len & 0x7; let left = len & 0x7;
@ -183,38 +189,57 @@ fn siphash(key0 : u64, key1 : u64) -> streaming {
while i < end { while i < end {
let mi = u8to64_le!{msg, i}; let mi = u8to64_le!{msg, i};
st.v3 ^= mi; self.v3 ^= mi;
compress!{st.v0, st.v1, st.v2, st.v3}; compress!{self.v0, self.v1, self.v2, self.v3};
compress!{st.v0, st.v1, st.v2, st.v3}; compress!{self.v0, self.v1, self.v2, self.v3};
st.v0 ^= mi; self.v0 ^= mi;
i += 8; i += 8;
} }
let mut t = 0u; let mut t = 0u;
while t < left { while t < left {
st.tail[t] = msg[i+t]; self.tail[t] = msg[i+t];
t += 1 t += 1
} }
st.ntail = left; self.ntail = left;
} }
fn mk_result(st : sipstate) -> ~[u8] { fn seek(_x: int, _s: io::seek_style) {
fail;
}
fn tell() -> uint {
self.length
}
fn flush() -> int {
0
}
fn get_type() -> io::writer_type {
io::file
}
}
let mut v0 = st.v0; impl &SipState : Streaming {
let mut v1 = st.v1;
let mut v2 = st.v2;
let mut v3 = st.v3;
let mut b : u64 = (st.length as u64 & 0xff) << 56; fn input(buf: &[const u8]) {
self.write(buf);
}
if st.ntail > 0 { b |= st.tail[0] as u64 << 0; } fn result_u64() -> u64 {
if st.ntail > 1 { b |= st.tail[1] as u64 << 8; } let mut v0 = self.v0;
if st.ntail > 2 { b |= st.tail[2] as u64 << 16; } let mut v1 = self.v1;
if st.ntail > 3 { b |= st.tail[3] as u64 << 24; } let mut v2 = self.v2;
if st.ntail > 4 { b |= st.tail[4] as u64 << 32; } let mut v3 = self.v3;
if st.ntail > 5 { b |= st.tail[5] as u64 << 40; }
if st.ntail > 6 { b |= st.tail[6] as u64 << 48; } let mut b : u64 = (self.length as u64 & 0xff) << 56;
if self.ntail > 0 { b |= self.tail[0] as u64 << 0; }
if self.ntail > 1 { b |= self.tail[1] as u64 << 8; }
if self.ntail > 2 { b |= self.tail[2] as u64 << 16; }
if self.ntail > 3 { b |= self.tail[3] as u64 << 24; }
if self.ntail > 4 { b |= self.tail[4] as u64 << 32; }
if self.ntail > 5 { b |= self.tail[5] as u64 << 40; }
if self.ntail > 6 { b |= self.tail[6] as u64 << 48; }
v3 ^= b; v3 ^= b;
compress!{v0, v1, v2, v3}; compress!{v0, v1, v2, v3};
@ -227,10 +252,13 @@ fn siphash(key0 : u64, key1 : u64) -> streaming {
compress!{v0, v1, v2, v3}; compress!{v0, v1, v2, v3};
compress!{v0, v1, v2, v3}; compress!{v0, v1, v2, v3};
let h = v0 ^ v1 ^ v2 ^ v3; return (v0 ^ v1 ^ v2 ^ v3);
}
return ~[
(h >> 0) as u8, fn result_bytes() -> ~[u8] {
let h = self.result_u64();
~[(h >> 0) as u8,
(h >> 8) as u8, (h >> 8) as u8,
(h >> 16) as u8, (h >> 16) as u8,
(h >> 24) as u8, (h >> 24) as u8,
@ -238,10 +266,16 @@ fn siphash(key0 : u64, key1 : u64) -> streaming {
(h >> 40) as u8, (h >> 40) as u8,
(h >> 48) as u8, (h >> 48) as u8,
(h >> 56) as u8, (h >> 56) as u8,
]; ]
}
fn result_str() -> ~str {
let r = self.result_bytes();
let mut s = ~"";
for vec::each(r) |b| { s += uint::to_str(b as uint, 16u); }
return s;
} }
impl of streaming for sipstate {
fn reset() { fn reset() {
self.length = 0; self.length = 0;
self.v0 = self.k0 ^ 0x736f6d6570736575; self.v0 = self.k0 ^ 0x736f6d6570736575;
@ -250,32 +284,6 @@ fn siphash(key0 : u64, key1 : u64) -> streaming {
self.v3 = self.k1 ^ 0x7465646279746573; self.v3 = self.k1 ^ 0x7465646279746573;
self.ntail = 0; self.ntail = 0;
} }
fn input(msg: ~[u8]) { add_input(self, msg); }
fn input_str(msg: ~str) { add_input(self, str::bytes(msg)); }
fn result() -> ~[u8] { return mk_result(self); }
fn result_str() -> ~str {
let r = mk_result(self);
let mut s = ~"";
for vec::each(r) |b| { s += uint::to_str(b as uint, 16u); }
return s;
}
}
let st = {
k0 : key0,
k1 : key1,
mut length : 0u,
mut v0 : 0u64,
mut v1 : 0u64,
mut v2 : 0u64,
mut v3 : 0u64,
tail : ~[mut 0u8,0,0,0,0,0,0,0]/8,
mut ntail : 0u,
};
let sh = st as streaming;
sh.reset();
return sh;
} }
#[test] #[test]
@ -351,8 +359,8 @@ fn test_siphash() {
let k1 = 0x_0f_0e_0d_0c_0b_0a_09_08_u64; let k1 = 0x_0f_0e_0d_0c_0b_0a_09_08_u64;
let mut buf : ~[u8] = ~[]; let mut buf : ~[u8] = ~[];
let mut t = 0; let mut t = 0;
let stream_inc = siphash(k0,k1); let stream_inc = &State(k0,k1);
let stream_full = siphash(k0,k1); let stream_full = &State(k0,k1);
fn to_hex_str(r:[u8]/8) -> ~str { fn to_hex_str(r:[u8]/8) -> ~str {
let mut s = ~""; let mut s = ~"";

View file

@ -8,6 +8,7 @@
*/ */
import libc::size_t; import libc::size_t;
import io::writer_util;
export export
// Creating a string // Creating a string
@ -665,10 +666,7 @@ pure fn le(a: &~str, b: &~str) -> bool { *a <= *b }
/// String hash function /// String hash function
pure fn hash(s: &~str) -> uint { pure fn hash(s: &~str) -> uint {
let x = do as_bytes(*s) |bytes| { hash::hash_str(*s) as uint
hash::hash_bytes(bytes)
};
return x as uint;
} }
/* /*

View file

@ -15,6 +15,7 @@ import lib::llvm::{ModuleRef, mk_pass_manager, mk_target_data, True, False,
FileType}; FileType};
import metadata::filesearch; import metadata::filesearch;
import syntax::ast_map::{path, path_mod, path_name}; import syntax::ast_map::{path, path_mod, path_name};
import io::{writer, writer_util};
enum output_type { enum output_type {
output_type_none, output_type_none,
@ -307,7 +308,7 @@ mod write {
*/ */
fn build_link_meta(sess: session, c: ast::crate, output: ~str, fn build_link_meta(sess: session, c: ast::crate, output: ~str,
symbol_hasher: hash::streaming) -> link_meta { symbol_hasher: &hash::State) -> link_meta {
type provided_metas = type provided_metas =
{name: option<@~str>, {name: option<@~str>,
@ -338,7 +339,7 @@ fn build_link_meta(sess: session, c: ast::crate, output: ~str,
} }
// This calculates CMH as defined above // This calculates CMH as defined above
fn crate_meta_extras_hash(symbol_hasher: hash::streaming, fn crate_meta_extras_hash(symbol_hasher: &hash::State,
_crate: ast::crate, _crate: ast::crate,
metas: provided_metas, metas: provided_metas,
dep_hashes: ~[@~str]) -> ~str { dep_hashes: ~[@~str]) -> ~str {
@ -357,11 +358,11 @@ fn build_link_meta(sess: session, c: ast::crate, output: ~str,
let m = m_; let m = m_;
alt m.node { alt m.node {
ast::meta_name_value(key, value) { ast::meta_name_value(key, value) {
symbol_hasher.input_str(len_and_str(*key)); symbol_hasher.write_str(len_and_str(*key));
symbol_hasher.input_str(len_and_str_lit(value)); symbol_hasher.write_str(len_and_str_lit(value));
} }
ast::meta_word(name) { ast::meta_word(name) {
symbol_hasher.input_str(len_and_str(*name)); symbol_hasher.write_str(len_and_str(*name));
} }
ast::meta_list(_, _) { ast::meta_list(_, _) {
// FIXME (#607): Implement this // FIXME (#607): Implement this
@ -371,7 +372,7 @@ fn build_link_meta(sess: session, c: ast::crate, output: ~str,
} }
for dep_hashes.each |dh| { for dep_hashes.each |dh| {
symbol_hasher.input_str(len_and_str(*dh)); symbol_hasher.write_str(len_and_str(*dh));
} }
return truncated_hash_result(symbol_hasher); return truncated_hash_result(symbol_hasher);
@ -427,23 +428,23 @@ fn build_link_meta(sess: session, c: ast::crate, output: ~str,
return {name: name, vers: vers, extras_hash: extras_hash}; return {name: name, vers: vers, extras_hash: extras_hash};
} }
fn truncated_hash_result(symbol_hasher: hash::streaming) -> ~str unsafe { fn truncated_hash_result(symbol_hasher: &hash::State) -> ~str unsafe {
symbol_hasher.result_str() symbol_hasher.result_str()
} }
// This calculates STH for a symbol, as defined above // This calculates STH for a symbol, as defined above
fn symbol_hash(tcx: ty::ctxt, symbol_hasher: hash::streaming, t: ty::t, fn symbol_hash(tcx: ty::ctxt, symbol_hasher: &hash::State, t: ty::t,
link_meta: link_meta) -> ~str { link_meta: link_meta) -> ~str {
// NB: do *not* use abbrevs here as we want the symbol names // NB: do *not* use abbrevs here as we want the symbol names
// to be independent of one another in the crate. // to be independent of one another in the crate.
symbol_hasher.reset(); symbol_hasher.reset();
symbol_hasher.input_str(*link_meta.name); symbol_hasher.write_str(*link_meta.name);
symbol_hasher.input_str(~"-"); symbol_hasher.write_str(~"-");
symbol_hasher.input_str(link_meta.extras_hash); symbol_hasher.write_str(link_meta.extras_hash);
symbol_hasher.input_str(~"-"); symbol_hasher.write_str(~"-");
symbol_hasher.input_str(encoder::encoded_ty(tcx, t)); symbol_hasher.write_str(encoder::encoded_ty(tcx, t));
let hash = truncated_hash_result(symbol_hasher); let hash = truncated_hash_result(symbol_hasher);
// Prefix with _ so that it never blends into adjacent digits // Prefix with _ so that it never blends into adjacent digits

View file

@ -5493,7 +5493,7 @@ fn trans_crate(sess: session::session, crate: @ast::crate, tcx: ty::ctxt,
output: ~str, emap: resolve3::ExportMap, output: ~str, emap: resolve3::ExportMap,
maps: astencode::maps) maps: astencode::maps)
-> (ModuleRef, link_meta) { -> (ModuleRef, link_meta) {
let symbol_hasher = hash::siphash(0,0); let symbol_hasher = @hash::default_state();
let link_meta = let link_meta =
link::build_link_meta(sess, *crate, output, symbol_hasher); link::build_link_meta(sess, *crate, output, symbol_hasher);
let reachable = reachable::find_reachable(crate.node.module, emap, tcx, let reachable = reachable::find_reachable(crate.node.module, emap, tcx,

View file

@ -110,7 +110,7 @@ type crate_ctxt = {
module_data: hashmap<~str, ValueRef>, module_data: hashmap<~str, ValueRef>,
lltypes: hashmap<ty::t, TypeRef>, lltypes: hashmap<ty::t, TypeRef>,
names: namegen, names: namegen,
symbol_hasher: hash::streaming, symbol_hasher: @hash::State,
type_hashcodes: hashmap<ty::t, ~str>, type_hashcodes: hashmap<ty::t, ~str>,
type_short_names: hashmap<ty::t, ~str>, type_short_names: hashmap<ty::t, ~str>,
all_llvm_symbols: set<~str>, all_llvm_symbols: set<~str>,