1
Fork 0

make metadata hashes determinstic

When we hash the inputs to a MetaData node, we have to hash them in a
consistent order. We achieve this by sorting the stringfied `DefPath`
entries. Also, micro-optimie by cache more results across the saving
process.
This commit is contained in:
Niko Matsakis 2016-08-01 19:55:20 -04:00
parent 2797b2a5ca
commit 2e7df80098
7 changed files with 79 additions and 58 deletions

View file

@ -12,8 +12,10 @@ use middle::cstore::LOCAL_CRATE;
use hir::def_id::{DefId, DefIndex};
use hir::map::def_collector::DefCollector;
use rustc_data_structures::fnv::FnvHashMap;
use std::fmt::Write;
use syntax::{ast, visit};
use syntax::parse::token::InternedString;
use ty::TyCtxt;
use util::nodemap::NodeMap;
/// The definition table containing node definitions
@ -109,6 +111,28 @@ impl DefPath {
data.reverse();
DefPath { data: data, krate: krate }
}
pub fn to_string(&self, tcx: TyCtxt) -> String {
let mut s = String::with_capacity(self.data.len() * 16);
if self.krate == LOCAL_CRATE {
s.push_str(&tcx.crate_name(self.krate));
} else {
s.push_str(&tcx.sess.cstore.original_crate_name(self.krate));
}
s.push_str("/");
s.push_str(&tcx.crate_disambiguator(self.krate));
for component in &self.data {
write!(s,
"::{}[{}]",
component.data.as_interned_str(),
component.disambiguator)
.unwrap();
}
s
}
}
/// Root of an inlined item. We track the `DefPath` of the item within

View file

@ -144,12 +144,7 @@ mod svh_visitor {
}
fn hash_def_path(&mut self, path: &DefPath) {
self.tcx.crate_name(path.krate).hash(self.st);
self.tcx.crate_disambiguator(path.krate).hash(self.st);
for data in &path.data {
data.data.as_interned_str().hash(self.st);
data.disambiguator.hash(self.st);
}
path.to_string(self.tcx).hash(self.st);
}
}

View file

@ -159,12 +159,17 @@ impl<'a,'tcx> DefIdDirectoryBuilder<'a,'tcx> {
.clone()
}
pub fn lookup_def_path(&self, id: DefPathIndex) -> &DefPath {
&self.directory.paths[id.index as usize]
}
pub fn map(&mut self, node: &DepNode<DefId>) -> DepNode<DefPathIndex> {
node.map_def(|&def_id| Some(self.add(def_id))).unwrap()
}
pub fn into_directory(self) -> DefIdDirectory {
self.directory
pub fn directory(&self) -> &DefIdDirectory {
&self.directory
}
}

View file

@ -39,11 +39,11 @@ impl<'a, 'tcx> HashContext<'a, 'tcx> {
}
}
pub fn hash(&mut self, dep_node: &DepNode<DefId>) -> Option<u64> {
pub fn hash(&mut self, dep_node: &DepNode<DefId>) -> Option<(DefId, u64)> {
match *dep_node {
// HIR nodes (which always come from our crate) are an input:
DepNode::Hir(def_id) => {
Some(self.hir_hash(def_id))
Some((def_id, self.hir_hash(def_id)))
}
// MetaData from other crates is an *input* to us.
@ -51,7 +51,7 @@ impl<'a, 'tcx> HashContext<'a, 'tcx> {
// don't hash them, but we do compute a hash for them and
// save it for others to use.
DepNode::MetaData(def_id) if !def_id.is_local() => {
Some(self.metadata_hash(def_id))
Some((def_id, self.metadata_hash(def_id)))
}
_ => {

View file

@ -163,13 +163,14 @@ fn initial_dirty_nodes<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
let mut items_removed = false;
let mut dirty_nodes = FnvHashSet();
for hash in hashes {
debug!("initial_dirty_nodes: retracing {:?}", hash);
match hash.node.map_def(|&i| retraced.def_id(i)) {
Some(dep_node) => {
let current_hash = hcx.hash(&dep_node).unwrap();
let (_, current_hash) = hcx.hash(&dep_node).unwrap();
if current_hash != hash.hash {
debug!("initial_dirty_nodes: {:?} is dirty as hash is {:?}, was {:?}",
dep_node, current_hash, hash.hash);
dep_node.map_def(|&def_id| Some(tcx.def_path(def_id))).unwrap(),
current_hash,
hash.hash);
dirty_nodes.insert(dep_node);
}
}

View file

@ -16,7 +16,7 @@ use rustc::session::Session;
use rustc::ty::TyCtxt;
use rustc_data_structures::fnv::FnvHashMap;
use rustc_serialize::{Encodable as RustcEncodable};
use std::hash::{Hasher, SipHasher};
use std::hash::{Hash, Hasher, SipHasher};
use std::io::{self, Cursor, Write};
use std::fs::{self, File};
use std::path::PathBuf;
@ -30,9 +30,14 @@ pub fn save_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>) {
debug!("save_dep_graph()");
let _ignore = tcx.dep_graph.in_ignore();
let sess = tcx.sess;
if sess.opts.incremental.is_none() {
return;
}
let mut hcx = HashContext::new(tcx);
save_in(sess, dep_graph_path(tcx), |e| encode_dep_graph(&mut hcx, e));
save_in(sess, metadata_hash_path(tcx, LOCAL_CRATE), |e| encode_metadata_hashes(&mut hcx, e));
let mut builder = DefIdDirectoryBuilder::new(tcx);
let query = tcx.dep_graph.query();
save_in(sess, dep_graph_path(tcx), |e| encode_dep_graph(&mut hcx, &mut builder, &query, e));
save_in(sess, metadata_hash_path(tcx, LOCAL_CRATE), |e| encode_metadata_hashes(&mut hcx, &mut builder, &query, e));
}
pub fn save_work_products(sess: &Session, local_crate_name: &str) {
@ -96,21 +101,19 @@ fn save_in<F>(sess: &Session,
}
pub fn encode_dep_graph<'a, 'tcx>(hcx: &mut HashContext<'a, 'tcx>,
builder: &mut DefIdDirectoryBuilder,
query: &DepGraphQuery<DefId>,
encoder: &mut Encoder)
-> io::Result<()>
{
let tcx = hcx.tcx;
let query = tcx.dep_graph.query();
let (nodes, edges) = post_process_graph(hcx, query);
let mut builder = DefIdDirectoryBuilder::new(tcx);
// Create hashes for inputs.
let hashes =
nodes.iter()
.filter_map(|dep_node| {
hcx.hash(dep_node)
.map(|hash| {
.map(|(_, hash)| {
let node = builder.map(dep_node);
SerializedHash { node: node, hash: hash }
})
@ -133,15 +136,14 @@ pub fn encode_dep_graph<'a, 'tcx>(hcx: &mut HashContext<'a, 'tcx>,
debug!("graph = {:#?}", graph);
// Encode the directory and then the graph data.
let directory = builder.into_directory();
try!(directory.encode(encoder));
try!(builder.directory().encode(encoder));
try!(graph.encode(encoder));
Ok(())
}
pub fn post_process_graph<'a, 'tcx>(hcx: &mut HashContext<'a, 'tcx>,
query: DepGraphQuery<DefId>)
query: &DepGraphQuery<DefId>)
-> (Vec<DepNode<DefId>>, Vec<(DepNode<DefId>, DepNode<DefId>)>)
{
let tcx = hcx.tcx;
@ -192,11 +194,12 @@ pub fn post_process_graph<'a, 'tcx>(hcx: &mut HashContext<'a, 'tcx>,
pub fn encode_metadata_hashes<'a, 'tcx>(hcx: &mut HashContext<'a, 'tcx>,
builder: &mut DefIdDirectoryBuilder,
query: &DepGraphQuery<DefId>,
encoder: &mut Encoder)
-> io::Result<()>
{
let tcx = hcx.tcx;
let query = tcx.dep_graph.query();
let serialized_hashes = {
// Identify the `MetaData(X)` nodes where `X` is local. These are
@ -224,16 +227,32 @@ pub fn encode_metadata_hashes<'a, 'tcx>(hcx: &mut HashContext<'a, 'tcx>,
let dep_node = DepNode::MetaData(def_id);
let mut state = SipHasher::new();
debug!("save: computing metadata hash for {:?}", dep_node);
for node in query.transitive_predecessors(&dep_node) {
if let Some(hash) = hcx.hash(&node) {
debug!("save: predecessor {:?} has hash {}", node, hash);
state.write_u64(hash.to_le());
} else {
debug!("save: predecessor {:?} cannot be hashed", node);
}
let predecessors = query.transitive_predecessors(&dep_node);
let mut hashes: Vec<_> =
predecessors.iter()
.filter_map(|node| hcx.hash(&node))
.map(|(def_id, hash)| {
let index = builder.add(def_id);
let path = builder.lookup_def_path(index);
(path.to_string(tcx), hash) // (*)
})
.collect();
// (*) creating a `String` from each def-path is a bit inefficient,
// but it's the easiest way to get a deterministic ord/hash.
hashes.sort();
state.write_usize(hashes.len());
for (path, hash) in hashes {
debug!("save: predecessor {:?} has hash {}", path, hash);
path.hash(&mut state);
state.write_u64(hash.to_le());
}
let hash = state.finish();
debug!("save: metadata hash for {:?} is {}", dep_node, hash);
SerializedMetadataHash {
def_index: def_id.index,
hash: hash,

View file

@ -108,36 +108,13 @@ use rustc::ty::{self, TyCtxt, TypeFoldable};
use rustc::ty::item_path::{self, ItemPathBuffer, RootMode};
use rustc::hir::map::definitions::{DefPath, DefPathData};
use std::fmt::Write;
use syntax::attr;
use syntax::parse::token::{self, InternedString};
use serialize::hex::ToHex;
pub fn def_id_to_string<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, def_id: DefId) -> String {
let def_path = tcx.def_path(def_id);
def_path_to_string(tcx, &def_path)
}
fn def_path_to_string<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, def_path: &DefPath) -> String {
let mut s = String::with_capacity(def_path.data.len() * 16);
if def_path.krate == cstore::LOCAL_CRATE {
s.push_str(&tcx.crate_name(def_path.krate));
} else {
s.push_str(&tcx.sess.cstore.original_crate_name(def_path.krate));
}
s.push_str("/");
s.push_str(&tcx.crate_disambiguator(def_path.krate));
for component in &def_path.data {
write!(s,
"::{}[{}]",
component.data.as_interned_str(),
component.disambiguator)
.unwrap();
}
s
def_path.to_string(tcx)
}
fn get_symbol_hash<'a, 'tcx>(scx: &SharedCrateContext<'a, 'tcx>,
@ -167,7 +144,7 @@ fn get_symbol_hash<'a, 'tcx>(scx: &SharedCrateContext<'a, 'tcx>,
// the main symbol name is not necessarily unique; hash in the
// compiler's internal def-path, guaranteeing each symbol has a
// truly unique path
hash_state.input_str(&def_path_to_string(tcx, def_path));
hash_state.input_str(&def_path.to_string(tcx));
// Include the main item-type. Note that, in this case, the
// assertions about `needs_subst` may not hold, but this item-type