From 2e7df800986cb7eee66cfbb4bd98104c45189d57 Mon Sep 17 00:00:00 2001 From: Niko Matsakis Date: Mon, 1 Aug 2016 19:55:20 -0400 Subject: [PATCH] make metadata hashes determinstic When we hash the inputs to a MetaData node, we have to hash them in a consistent order. We achieve this by sorting the stringfied `DefPath` entries. Also, micro-optimie by cache more results across the saving process. --- src/librustc/hir/map/definitions.rs | 24 ++++++++ src/librustc_incremental/calculate_svh.rs | 7 +-- src/librustc_incremental/persist/directory.rs | 9 ++- src/librustc_incremental/persist/hash.rs | 6 +- src/librustc_incremental/persist/load.rs | 7 ++- src/librustc_incremental/persist/save.rs | 57 ++++++++++++------- src/librustc_trans/back/symbol_names.rs | 27 +-------- 7 files changed, 79 insertions(+), 58 deletions(-) diff --git a/src/librustc/hir/map/definitions.rs b/src/librustc/hir/map/definitions.rs index 3317585f820..0247647fc14 100644 --- a/src/librustc/hir/map/definitions.rs +++ b/src/librustc/hir/map/definitions.rs @@ -12,8 +12,10 @@ use middle::cstore::LOCAL_CRATE; use hir::def_id::{DefId, DefIndex}; use hir::map::def_collector::DefCollector; use rustc_data_structures::fnv::FnvHashMap; +use std::fmt::Write; use syntax::{ast, visit}; use syntax::parse::token::InternedString; +use ty::TyCtxt; use util::nodemap::NodeMap; /// The definition table containing node definitions @@ -109,6 +111,28 @@ impl DefPath { data.reverse(); DefPath { data: data, krate: krate } } + + pub fn to_string(&self, tcx: TyCtxt) -> String { + let mut s = String::with_capacity(self.data.len() * 16); + + if self.krate == LOCAL_CRATE { + s.push_str(&tcx.crate_name(self.krate)); + } else { + s.push_str(&tcx.sess.cstore.original_crate_name(self.krate)); + } + s.push_str("/"); + s.push_str(&tcx.crate_disambiguator(self.krate)); + + for component in &self.data { + write!(s, + "::{}[{}]", + component.data.as_interned_str(), + component.disambiguator) + .unwrap(); + } + + s + } } /// Root of an inlined item. We track the `DefPath` of the item within diff --git a/src/librustc_incremental/calculate_svh.rs b/src/librustc_incremental/calculate_svh.rs index af579fa10fb..6ab429ffe00 100644 --- a/src/librustc_incremental/calculate_svh.rs +++ b/src/librustc_incremental/calculate_svh.rs @@ -144,12 +144,7 @@ mod svh_visitor { } fn hash_def_path(&mut self, path: &DefPath) { - self.tcx.crate_name(path.krate).hash(self.st); - self.tcx.crate_disambiguator(path.krate).hash(self.st); - for data in &path.data { - data.data.as_interned_str().hash(self.st); - data.disambiguator.hash(self.st); - } + path.to_string(self.tcx).hash(self.st); } } diff --git a/src/librustc_incremental/persist/directory.rs b/src/librustc_incremental/persist/directory.rs index 332eeae7202..85aa9d28e5f 100644 --- a/src/librustc_incremental/persist/directory.rs +++ b/src/librustc_incremental/persist/directory.rs @@ -159,12 +159,17 @@ impl<'a,'tcx> DefIdDirectoryBuilder<'a,'tcx> { .clone() } + pub fn lookup_def_path(&self, id: DefPathIndex) -> &DefPath { + &self.directory.paths[id.index as usize] + } + + pub fn map(&mut self, node: &DepNode) -> DepNode { node.map_def(|&def_id| Some(self.add(def_id))).unwrap() } - pub fn into_directory(self) -> DefIdDirectory { - self.directory + pub fn directory(&self) -> &DefIdDirectory { + &self.directory } } diff --git a/src/librustc_incremental/persist/hash.rs b/src/librustc_incremental/persist/hash.rs index e16371dd0ce..0cef5fab71a 100644 --- a/src/librustc_incremental/persist/hash.rs +++ b/src/librustc_incremental/persist/hash.rs @@ -39,11 +39,11 @@ impl<'a, 'tcx> HashContext<'a, 'tcx> { } } - pub fn hash(&mut self, dep_node: &DepNode) -> Option { + pub fn hash(&mut self, dep_node: &DepNode) -> Option<(DefId, u64)> { match *dep_node { // HIR nodes (which always come from our crate) are an input: DepNode::Hir(def_id) => { - Some(self.hir_hash(def_id)) + Some((def_id, self.hir_hash(def_id))) } // MetaData from other crates is an *input* to us. @@ -51,7 +51,7 @@ impl<'a, 'tcx> HashContext<'a, 'tcx> { // don't hash them, but we do compute a hash for them and // save it for others to use. DepNode::MetaData(def_id) if !def_id.is_local() => { - Some(self.metadata_hash(def_id)) + Some((def_id, self.metadata_hash(def_id))) } _ => { diff --git a/src/librustc_incremental/persist/load.rs b/src/librustc_incremental/persist/load.rs index 82b1da67a4d..b47f2221e56 100644 --- a/src/librustc_incremental/persist/load.rs +++ b/src/librustc_incremental/persist/load.rs @@ -163,13 +163,14 @@ fn initial_dirty_nodes<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, let mut items_removed = false; let mut dirty_nodes = FnvHashSet(); for hash in hashes { - debug!("initial_dirty_nodes: retracing {:?}", hash); match hash.node.map_def(|&i| retraced.def_id(i)) { Some(dep_node) => { - let current_hash = hcx.hash(&dep_node).unwrap(); + let (_, current_hash) = hcx.hash(&dep_node).unwrap(); if current_hash != hash.hash { debug!("initial_dirty_nodes: {:?} is dirty as hash is {:?}, was {:?}", - dep_node, current_hash, hash.hash); + dep_node.map_def(|&def_id| Some(tcx.def_path(def_id))).unwrap(), + current_hash, + hash.hash); dirty_nodes.insert(dep_node); } } diff --git a/src/librustc_incremental/persist/save.rs b/src/librustc_incremental/persist/save.rs index 8bc1e80fc7c..64da7ad9ceb 100644 --- a/src/librustc_incremental/persist/save.rs +++ b/src/librustc_incremental/persist/save.rs @@ -16,7 +16,7 @@ use rustc::session::Session; use rustc::ty::TyCtxt; use rustc_data_structures::fnv::FnvHashMap; use rustc_serialize::{Encodable as RustcEncodable}; -use std::hash::{Hasher, SipHasher}; +use std::hash::{Hash, Hasher, SipHasher}; use std::io::{self, Cursor, Write}; use std::fs::{self, File}; use std::path::PathBuf; @@ -30,9 +30,14 @@ pub fn save_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>) { debug!("save_dep_graph()"); let _ignore = tcx.dep_graph.in_ignore(); let sess = tcx.sess; + if sess.opts.incremental.is_none() { + return; + } let mut hcx = HashContext::new(tcx); - save_in(sess, dep_graph_path(tcx), |e| encode_dep_graph(&mut hcx, e)); - save_in(sess, metadata_hash_path(tcx, LOCAL_CRATE), |e| encode_metadata_hashes(&mut hcx, e)); + let mut builder = DefIdDirectoryBuilder::new(tcx); + let query = tcx.dep_graph.query(); + save_in(sess, dep_graph_path(tcx), |e| encode_dep_graph(&mut hcx, &mut builder, &query, e)); + save_in(sess, metadata_hash_path(tcx, LOCAL_CRATE), |e| encode_metadata_hashes(&mut hcx, &mut builder, &query, e)); } pub fn save_work_products(sess: &Session, local_crate_name: &str) { @@ -96,21 +101,19 @@ fn save_in(sess: &Session, } pub fn encode_dep_graph<'a, 'tcx>(hcx: &mut HashContext<'a, 'tcx>, + builder: &mut DefIdDirectoryBuilder, + query: &DepGraphQuery, encoder: &mut Encoder) -> io::Result<()> { - let tcx = hcx.tcx; - let query = tcx.dep_graph.query(); let (nodes, edges) = post_process_graph(hcx, query); - let mut builder = DefIdDirectoryBuilder::new(tcx); - // Create hashes for inputs. let hashes = nodes.iter() .filter_map(|dep_node| { hcx.hash(dep_node) - .map(|hash| { + .map(|(_, hash)| { let node = builder.map(dep_node); SerializedHash { node: node, hash: hash } }) @@ -133,15 +136,14 @@ pub fn encode_dep_graph<'a, 'tcx>(hcx: &mut HashContext<'a, 'tcx>, debug!("graph = {:#?}", graph); // Encode the directory and then the graph data. - let directory = builder.into_directory(); - try!(directory.encode(encoder)); + try!(builder.directory().encode(encoder)); try!(graph.encode(encoder)); Ok(()) } pub fn post_process_graph<'a, 'tcx>(hcx: &mut HashContext<'a, 'tcx>, - query: DepGraphQuery) + query: &DepGraphQuery) -> (Vec>, Vec<(DepNode, DepNode)>) { let tcx = hcx.tcx; @@ -192,11 +194,12 @@ pub fn post_process_graph<'a, 'tcx>(hcx: &mut HashContext<'a, 'tcx>, pub fn encode_metadata_hashes<'a, 'tcx>(hcx: &mut HashContext<'a, 'tcx>, + builder: &mut DefIdDirectoryBuilder, + query: &DepGraphQuery, encoder: &mut Encoder) -> io::Result<()> { let tcx = hcx.tcx; - let query = tcx.dep_graph.query(); let serialized_hashes = { // Identify the `MetaData(X)` nodes where `X` is local. These are @@ -224,16 +227,32 @@ pub fn encode_metadata_hashes<'a, 'tcx>(hcx: &mut HashContext<'a, 'tcx>, let dep_node = DepNode::MetaData(def_id); let mut state = SipHasher::new(); debug!("save: computing metadata hash for {:?}", dep_node); - for node in query.transitive_predecessors(&dep_node) { - if let Some(hash) = hcx.hash(&node) { - debug!("save: predecessor {:?} has hash {}", node, hash); - state.write_u64(hash.to_le()); - } else { - debug!("save: predecessor {:?} cannot be hashed", node); - } + + let predecessors = query.transitive_predecessors(&dep_node); + let mut hashes: Vec<_> = + predecessors.iter() + .filter_map(|node| hcx.hash(&node)) + .map(|(def_id, hash)| { + let index = builder.add(def_id); + let path = builder.lookup_def_path(index); + (path.to_string(tcx), hash) // (*) + }) + .collect(); + + // (*) creating a `String` from each def-path is a bit inefficient, + // but it's the easiest way to get a deterministic ord/hash. + + hashes.sort(); + state.write_usize(hashes.len()); + for (path, hash) in hashes { + debug!("save: predecessor {:?} has hash {}", path, hash); + path.hash(&mut state); + state.write_u64(hash.to_le()); } + let hash = state.finish(); debug!("save: metadata hash for {:?} is {}", dep_node, hash); + SerializedMetadataHash { def_index: def_id.index, hash: hash, diff --git a/src/librustc_trans/back/symbol_names.rs b/src/librustc_trans/back/symbol_names.rs index ebb6e0baf20..5e2c0805c2e 100644 --- a/src/librustc_trans/back/symbol_names.rs +++ b/src/librustc_trans/back/symbol_names.rs @@ -108,36 +108,13 @@ use rustc::ty::{self, TyCtxt, TypeFoldable}; use rustc::ty::item_path::{self, ItemPathBuffer, RootMode}; use rustc::hir::map::definitions::{DefPath, DefPathData}; -use std::fmt::Write; use syntax::attr; use syntax::parse::token::{self, InternedString}; use serialize::hex::ToHex; pub fn def_id_to_string<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, def_id: DefId) -> String { let def_path = tcx.def_path(def_id); - def_path_to_string(tcx, &def_path) -} - -fn def_path_to_string<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, def_path: &DefPath) -> String { - let mut s = String::with_capacity(def_path.data.len() * 16); - - if def_path.krate == cstore::LOCAL_CRATE { - s.push_str(&tcx.crate_name(def_path.krate)); - } else { - s.push_str(&tcx.sess.cstore.original_crate_name(def_path.krate)); - } - s.push_str("/"); - s.push_str(&tcx.crate_disambiguator(def_path.krate)); - - for component in &def_path.data { - write!(s, - "::{}[{}]", - component.data.as_interned_str(), - component.disambiguator) - .unwrap(); - } - - s + def_path.to_string(tcx) } fn get_symbol_hash<'a, 'tcx>(scx: &SharedCrateContext<'a, 'tcx>, @@ -167,7 +144,7 @@ fn get_symbol_hash<'a, 'tcx>(scx: &SharedCrateContext<'a, 'tcx>, // the main symbol name is not necessarily unique; hash in the // compiler's internal def-path, guaranteeing each symbol has a // truly unique path - hash_state.input_str(&def_path_to_string(tcx, def_path)); + hash_state.input_str(&def_path.to_string(tcx)); // Include the main item-type. Note that, in this case, the // assertions about `needs_subst` may not hold, but this item-type