1
Fork 0

mv compiler to compiler/

This commit is contained in:
mark 2020-08-27 22:58:48 -05:00 committed by Vadim Petrochenkov
parent db534b3ac2
commit 9e5f7d5631
1686 changed files with 941 additions and 1051 deletions

View file

@ -0,0 +1,429 @@
//! This pass is only used for the UNIT TESTS and DEBUGGING NEEDS
//! around dependency graph construction. It serves two purposes; it
//! will dump graphs in graphviz form to disk, and it searches for
//! `#[rustc_if_this_changed]` and `#[rustc_then_this_would_need]`
//! annotations. These annotations can be used to test whether paths
//! exist in the graph. These checks run after codegen, so they view the
//! the final state of the dependency graph. Note that there are
//! similar assertions found in `persist::dirty_clean` which check the
//! **initial** state of the dependency graph, just after it has been
//! loaded from disk.
//!
//! In this code, we report errors on each `rustc_if_this_changed`
//! annotation. If a path exists in all cases, then we would report
//! "all path(s) exist". Otherwise, we report: "no path to `foo`" for
//! each case where no path exists. `compile-fail` tests can then be
//! used to check when paths exist or do not.
//!
//! The full form of the `rustc_if_this_changed` annotation is
//! `#[rustc_if_this_changed("foo")]`, which will report a
//! source node of `foo(def_id)`. The `"foo"` is optional and
//! defaults to `"Hir"` if omitted.
//!
//! Example:
//!
//! ```
//! #[rustc_if_this_changed(Hir)]
//! fn foo() { }
//!
//! #[rustc_then_this_would_need(codegen)] //~ ERROR no path from `foo`
//! fn bar() { }
//!
//! #[rustc_then_this_would_need(codegen)] //~ ERROR OK
//! fn baz() { foo(); }
//! ```
use rustc_ast as ast;
use rustc_data_structures::fx::FxHashSet;
use rustc_data_structures::graph::implementation::{Direction, NodeIndex, INCOMING, OUTGOING};
use rustc_graphviz as dot;
use rustc_hir as hir;
use rustc_hir::def_id::DefId;
use rustc_hir::intravisit::{self, NestedVisitorMap, Visitor};
use rustc_middle::dep_graph::debug::{DepNodeFilter, EdgeFilter};
use rustc_middle::dep_graph::{DepGraphQuery, DepKind, DepNode, DepNodeExt};
use rustc_middle::hir::map::Map;
use rustc_middle::ty::TyCtxt;
use rustc_span::symbol::{sym, Symbol};
use rustc_span::Span;
use std::env;
use std::fs::{self, File};
use std::io::{BufWriter, Write};
pub fn assert_dep_graph(tcx: TyCtxt<'_>) {
tcx.dep_graph.with_ignore(|| {
if tcx.sess.opts.debugging_opts.dump_dep_graph {
dump_graph(tcx);
}
// if the `rustc_attrs` feature is not enabled, then the
// attributes we are interested in cannot be present anyway, so
// skip the walk.
if !tcx.features().rustc_attrs {
return;
}
// Find annotations supplied by user (if any).
let (if_this_changed, then_this_would_need) = {
let mut visitor =
IfThisChanged { tcx, if_this_changed: vec![], then_this_would_need: vec![] };
visitor.process_attrs(hir::CRATE_HIR_ID, &tcx.hir().krate().item.attrs);
tcx.hir().krate().visit_all_item_likes(&mut visitor.as_deep_visitor());
(visitor.if_this_changed, visitor.then_this_would_need)
};
if !if_this_changed.is_empty() || !then_this_would_need.is_empty() {
assert!(
tcx.sess.opts.debugging_opts.query_dep_graph,
"cannot use the `#[{}]` or `#[{}]` annotations \
without supplying `-Z query-dep-graph`",
sym::rustc_if_this_changed,
sym::rustc_then_this_would_need
);
}
// Check paths.
check_paths(tcx, &if_this_changed, &then_this_would_need);
})
}
type Sources = Vec<(Span, DefId, DepNode)>;
type Targets = Vec<(Span, Symbol, hir::HirId, DepNode)>;
struct IfThisChanged<'tcx> {
tcx: TyCtxt<'tcx>,
if_this_changed: Sources,
then_this_would_need: Targets,
}
impl IfThisChanged<'tcx> {
fn argument(&self, attr: &ast::Attribute) -> Option<Symbol> {
let mut value = None;
for list_item in attr.meta_item_list().unwrap_or_default() {
match list_item.ident() {
Some(ident) if list_item.is_word() && value.is_none() => value = Some(ident.name),
_ =>
// FIXME better-encapsulate meta_item (don't directly access `node`)
{
span_bug!(list_item.span(), "unexpected meta-item {:?}", list_item)
}
}
}
value
}
fn process_attrs(&mut self, hir_id: hir::HirId, attrs: &[ast::Attribute]) {
let def_id = self.tcx.hir().local_def_id(hir_id);
let def_path_hash = self.tcx.def_path_hash(def_id.to_def_id());
for attr in attrs {
if self.tcx.sess.check_name(attr, sym::rustc_if_this_changed) {
let dep_node_interned = self.argument(attr);
let dep_node = match dep_node_interned {
None => DepNode::from_def_path_hash(def_path_hash, DepKind::hir_owner),
Some(n) => match DepNode::from_label_string(&n.as_str(), def_path_hash) {
Ok(n) => n,
Err(()) => {
self.tcx.sess.span_fatal(
attr.span,
&format!("unrecognized DepNode variant {:?}", n),
);
}
},
};
self.if_this_changed.push((attr.span, def_id.to_def_id(), dep_node));
} else if self.tcx.sess.check_name(attr, sym::rustc_then_this_would_need) {
let dep_node_interned = self.argument(attr);
let dep_node = match dep_node_interned {
Some(n) => match DepNode::from_label_string(&n.as_str(), def_path_hash) {
Ok(n) => n,
Err(()) => {
self.tcx.sess.span_fatal(
attr.span,
&format!("unrecognized DepNode variant {:?}", n),
);
}
},
None => {
self.tcx.sess.span_fatal(attr.span, "missing DepNode variant");
}
};
self.then_this_would_need.push((
attr.span,
dep_node_interned.unwrap(),
hir_id,
dep_node,
));
}
}
}
}
impl Visitor<'tcx> for IfThisChanged<'tcx> {
type Map = Map<'tcx>;
fn nested_visit_map(&mut self) -> NestedVisitorMap<Self::Map> {
NestedVisitorMap::OnlyBodies(self.tcx.hir())
}
fn visit_item(&mut self, item: &'tcx hir::Item<'tcx>) {
self.process_attrs(item.hir_id, &item.attrs);
intravisit::walk_item(self, item);
}
fn visit_trait_item(&mut self, trait_item: &'tcx hir::TraitItem<'tcx>) {
self.process_attrs(trait_item.hir_id, &trait_item.attrs);
intravisit::walk_trait_item(self, trait_item);
}
fn visit_impl_item(&mut self, impl_item: &'tcx hir::ImplItem<'tcx>) {
self.process_attrs(impl_item.hir_id, &impl_item.attrs);
intravisit::walk_impl_item(self, impl_item);
}
fn visit_struct_field(&mut self, s: &'tcx hir::StructField<'tcx>) {
self.process_attrs(s.hir_id, &s.attrs);
intravisit::walk_struct_field(self, s);
}
}
fn check_paths<'tcx>(tcx: TyCtxt<'tcx>, if_this_changed: &Sources, then_this_would_need: &Targets) {
// Return early here so as not to construct the query, which is not cheap.
if if_this_changed.is_empty() {
for &(target_span, _, _, _) in then_this_would_need {
tcx.sess.span_err(target_span, "no `#[rustc_if_this_changed]` annotation detected");
}
return;
}
let query = tcx.dep_graph.query();
for &(_, source_def_id, ref source_dep_node) in if_this_changed {
let dependents = query.transitive_predecessors(source_dep_node);
for &(target_span, ref target_pass, _, ref target_dep_node) in then_this_would_need {
if !dependents.contains(&target_dep_node) {
tcx.sess.span_err(
target_span,
&format!(
"no path from `{}` to `{}`",
tcx.def_path_str(source_def_id),
target_pass
),
);
} else {
tcx.sess.span_err(target_span, "OK");
}
}
}
}
fn dump_graph(tcx: TyCtxt<'_>) {
let path: String = env::var("RUST_DEP_GRAPH").unwrap_or_else(|_| "dep_graph".to_string());
let query = tcx.dep_graph.query();
let nodes = match env::var("RUST_DEP_GRAPH_FILTER") {
Ok(string) => {
// Expect one of: "-> target", "source -> target", or "source ->".
let edge_filter =
EdgeFilter::new(&string).unwrap_or_else(|e| bug!("invalid filter: {}", e));
let sources = node_set(&query, &edge_filter.source);
let targets = node_set(&query, &edge_filter.target);
filter_nodes(&query, &sources, &targets)
}
Err(_) => query.nodes().into_iter().collect(),
};
let edges = filter_edges(&query, &nodes);
{
// dump a .txt file with just the edges:
let txt_path = format!("{}.txt", path);
let mut file = BufWriter::new(File::create(&txt_path).unwrap());
for &(ref source, ref target) in &edges {
write!(file, "{:?} -> {:?}\n", source, target).unwrap();
}
}
{
// dump a .dot file in graphviz format:
let dot_path = format!("{}.dot", path);
let mut v = Vec::new();
dot::render(&GraphvizDepGraph(nodes, edges), &mut v).unwrap();
fs::write(dot_path, v).unwrap();
}
}
pub struct GraphvizDepGraph<'q>(FxHashSet<&'q DepNode>, Vec<(&'q DepNode, &'q DepNode)>);
impl<'a, 'q> dot::GraphWalk<'a> for GraphvizDepGraph<'q> {
type Node = &'q DepNode;
type Edge = (&'q DepNode, &'q DepNode);
fn nodes(&self) -> dot::Nodes<'_, &'q DepNode> {
let nodes: Vec<_> = self.0.iter().cloned().collect();
nodes.into()
}
fn edges(&self) -> dot::Edges<'_, (&'q DepNode, &'q DepNode)> {
self.1[..].into()
}
fn source(&self, edge: &(&'q DepNode, &'q DepNode)) -> &'q DepNode {
edge.0
}
fn target(&self, edge: &(&'q DepNode, &'q DepNode)) -> &'q DepNode {
edge.1
}
}
impl<'a, 'q> dot::Labeller<'a> for GraphvizDepGraph<'q> {
type Node = &'q DepNode;
type Edge = (&'q DepNode, &'q DepNode);
fn graph_id(&self) -> dot::Id<'_> {
dot::Id::new("DependencyGraph").unwrap()
}
fn node_id(&self, n: &&'q DepNode) -> dot::Id<'_> {
let s: String = format!("{:?}", n)
.chars()
.map(|c| if c == '_' || c.is_alphanumeric() { c } else { '_' })
.collect();
debug!("n={:?} s={:?}", n, s);
dot::Id::new(s).unwrap()
}
fn node_label(&self, n: &&'q DepNode) -> dot::LabelText<'_> {
dot::LabelText::label(format!("{:?}", n))
}
}
// Given an optional filter like `"x,y,z"`, returns either `None` (no
// filter) or the set of nodes whose labels contain all of those
// substrings.
fn node_set<'q>(
query: &'q DepGraphQuery,
filter: &DepNodeFilter,
) -> Option<FxHashSet<&'q DepNode>> {
debug!("node_set(filter={:?})", filter);
if filter.accepts_all() {
return None;
}
Some(query.nodes().into_iter().filter(|n| filter.test(n)).collect())
}
fn filter_nodes<'q>(
query: &'q DepGraphQuery,
sources: &Option<FxHashSet<&'q DepNode>>,
targets: &Option<FxHashSet<&'q DepNode>>,
) -> FxHashSet<&'q DepNode> {
if let &Some(ref sources) = sources {
if let &Some(ref targets) = targets {
walk_between(query, sources, targets)
} else {
walk_nodes(query, sources, OUTGOING)
}
} else if let &Some(ref targets) = targets {
walk_nodes(query, targets, INCOMING)
} else {
query.nodes().into_iter().collect()
}
}
fn walk_nodes<'q>(
query: &'q DepGraphQuery,
starts: &FxHashSet<&'q DepNode>,
direction: Direction,
) -> FxHashSet<&'q DepNode> {
let mut set = FxHashSet::default();
for &start in starts {
debug!("walk_nodes: start={:?} outgoing?={:?}", start, direction == OUTGOING);
if set.insert(start) {
let mut stack = vec![query.indices[start]];
while let Some(index) = stack.pop() {
for (_, edge) in query.graph.adjacent_edges(index, direction) {
let neighbor_index = edge.source_or_target(direction);
let neighbor = query.graph.node_data(neighbor_index);
if set.insert(neighbor) {
stack.push(neighbor_index);
}
}
}
}
}
set
}
fn walk_between<'q>(
query: &'q DepGraphQuery,
sources: &FxHashSet<&'q DepNode>,
targets: &FxHashSet<&'q DepNode>,
) -> FxHashSet<&'q DepNode> {
// This is a bit tricky. We want to include a node only if it is:
// (a) reachable from a source and (b) will reach a target. And we
// have to be careful about cycles etc. Luckily efficiency is not
// a big concern!
#[derive(Copy, Clone, PartialEq)]
enum State {
Undecided,
Deciding,
Included,
Excluded,
}
let mut node_states = vec![State::Undecided; query.graph.len_nodes()];
for &target in targets {
node_states[query.indices[target].0] = State::Included;
}
for source in sources.iter().map(|&n| query.indices[n]) {
recurse(query, &mut node_states, source);
}
return query
.nodes()
.into_iter()
.filter(|&n| {
let index = query.indices[n];
node_states[index.0] == State::Included
})
.collect();
fn recurse(query: &DepGraphQuery, node_states: &mut [State], node: NodeIndex) -> bool {
match node_states[node.0] {
// known to reach a target
State::Included => return true,
// known not to reach a target
State::Excluded => return false,
// backedge, not yet known, say false
State::Deciding => return false,
State::Undecided => {}
}
node_states[node.0] = State::Deciding;
for neighbor_index in query.graph.successor_nodes(node) {
if recurse(query, node_states, neighbor_index) {
node_states[node.0] = State::Included;
}
}
// if we didn't find a path to target, then set to excluded
if node_states[node.0] == State::Deciding {
node_states[node.0] = State::Excluded;
false
} else {
assert!(node_states[node.0] == State::Included);
true
}
}
}
fn filter_edges<'q>(
query: &'q DepGraphQuery,
nodes: &FxHashSet<&'q DepNode>,
) -> Vec<(&'q DepNode, &'q DepNode)> {
query
.edges()
.into_iter()
.filter(|&(source, target)| nodes.contains(source) && nodes.contains(target))
.collect()
}

View file

@ -0,0 +1,180 @@
//! This pass is only used for UNIT TESTS related to incremental
//! compilation. It tests whether a particular `.o` file will be re-used
//! from a previous compilation or whether it must be regenerated.
//!
//! The user adds annotations to the crate of the following form:
//!
//! ```
//! #![rustc_partition_reused(module="spike", cfg="rpass2")]
//! #![rustc_partition_codegened(module="spike-x", cfg="rpass2")]
//! ```
//!
//! The first indicates (in the cfg `rpass2`) that `spike.o` will be
//! reused, the second that `spike-x.o` will be recreated. If these
//! annotations are inaccurate, errors are reported.
//!
//! The reason that we use `cfg=...` and not `#[cfg_attr]` is so that
//! the HIR doesn't change as a result of the annotations, which might
//! perturb the reuse results.
//!
//! `#![rustc_expected_cgu_reuse(module="spike", cfg="rpass2", kind="post-lto")]
//! allows for doing a more fine-grained check to see if pre- or post-lto data
//! was re-used.
use rustc_ast as ast;
use rustc_hir::def_id::LOCAL_CRATE;
use rustc_middle::mir::mono::CodegenUnitNameBuilder;
use rustc_middle::ty::TyCtxt;
use rustc_session::cgu_reuse_tracker::*;
use rustc_span::symbol::{sym, Symbol};
use std::collections::BTreeSet;
pub fn assert_module_sources(tcx: TyCtxt<'_>) {
tcx.dep_graph.with_ignore(|| {
if tcx.sess.opts.incremental.is_none() {
return;
}
let available_cgus = tcx
.collect_and_partition_mono_items(LOCAL_CRATE)
.1
.iter()
.map(|cgu| cgu.name().to_string())
.collect::<BTreeSet<String>>();
let ams = AssertModuleSource { tcx, available_cgus };
for attr in tcx.hir().krate().item.attrs {
ams.check_attr(attr);
}
})
}
struct AssertModuleSource<'tcx> {
tcx: TyCtxt<'tcx>,
available_cgus: BTreeSet<String>,
}
impl AssertModuleSource<'tcx> {
fn check_attr(&self, attr: &ast::Attribute) {
let (expected_reuse, comp_kind) =
if self.tcx.sess.check_name(attr, sym::rustc_partition_reused) {
(CguReuse::PreLto, ComparisonKind::AtLeast)
} else if self.tcx.sess.check_name(attr, sym::rustc_partition_codegened) {
(CguReuse::No, ComparisonKind::Exact)
} else if self.tcx.sess.check_name(attr, sym::rustc_expected_cgu_reuse) {
match self.field(attr, sym::kind) {
sym::no => (CguReuse::No, ComparisonKind::Exact),
sym::pre_dash_lto => (CguReuse::PreLto, ComparisonKind::Exact),
sym::post_dash_lto => (CguReuse::PostLto, ComparisonKind::Exact),
sym::any => (CguReuse::PreLto, ComparisonKind::AtLeast),
other => {
self.tcx.sess.span_fatal(
attr.span,
&format!("unknown cgu-reuse-kind `{}` specified", other),
);
}
}
} else {
return;
};
if !self.tcx.sess.opts.debugging_opts.query_dep_graph {
self.tcx.sess.span_fatal(
attr.span,
"found CGU-reuse attribute but `-Zquery-dep-graph` was not specified",
);
}
if !self.check_config(attr) {
debug!("check_attr: config does not match, ignoring attr");
return;
}
let user_path = self.field(attr, sym::module).to_string();
let crate_name = self.tcx.crate_name(LOCAL_CRATE).to_string();
if !user_path.starts_with(&crate_name) {
let msg = format!(
"Found malformed codegen unit name `{}`. \
Codegen units names must always start with the name of the \
crate (`{}` in this case).",
user_path, crate_name
);
self.tcx.sess.span_fatal(attr.span, &msg);
}
// Split of the "special suffix" if there is one.
let (user_path, cgu_special_suffix) = if let Some(index) = user_path.rfind('.') {
(&user_path[..index], Some(&user_path[index + 1..]))
} else {
(&user_path[..], None)
};
let mut cgu_path_components = user_path.split('-').collect::<Vec<_>>();
// Remove the crate name
assert_eq!(cgu_path_components.remove(0), crate_name);
let cgu_name_builder = &mut CodegenUnitNameBuilder::new(self.tcx);
let cgu_name =
cgu_name_builder.build_cgu_name(LOCAL_CRATE, cgu_path_components, cgu_special_suffix);
debug!("mapping '{}' to cgu name '{}'", self.field(attr, sym::module), cgu_name);
if !self.available_cgus.contains(&*cgu_name.as_str()) {
self.tcx.sess.span_err(
attr.span,
&format!(
"no module named `{}` (mangled: {}). Available modules: {}",
user_path,
cgu_name,
self.available_cgus
.iter()
.map(|cgu| cgu.to_string())
.collect::<Vec<_>>()
.join(", ")
),
);
}
self.tcx.sess.cgu_reuse_tracker.set_expectation(
cgu_name,
&user_path,
attr.span,
expected_reuse,
comp_kind,
);
}
fn field(&self, attr: &ast::Attribute, name: Symbol) -> Symbol {
for item in attr.meta_item_list().unwrap_or_else(Vec::new) {
if item.has_name(name) {
if let Some(value) = item.value_str() {
return value;
} else {
self.tcx.sess.span_fatal(
item.span(),
&format!("associated value expected for `{}`", name),
);
}
}
}
self.tcx.sess.span_fatal(attr.span, &format!("no field `{}`", name));
}
/// Scan for a `cfg="foo"` attribute and check whether we have a
/// cfg flag called `foo`.
fn check_config(&self, attr: &ast::Attribute) -> bool {
let config = &self.tcx.sess.parse_sess.config;
let value = self.field(attr, sym::cfg);
debug!("check_config(config={:?}, value={:?})", config, value);
if config.iter().any(|&(name, _)| name == value) {
debug!("check_config: matched");
return true;
}
debug!("check_config: no match found");
false
}
}

View file

@ -0,0 +1,30 @@
//! Support for serializing the dep-graph and reloading it.
#![doc(html_root_url = "https://doc.rust-lang.org/nightly/")]
#![feature(in_band_lifetimes)]
#![feature(nll)]
#![recursion_limit = "256"]
#[macro_use]
extern crate rustc_middle;
#[macro_use]
extern crate tracing;
mod assert_dep_graph;
pub mod assert_module_sources;
mod persist;
pub use assert_dep_graph::assert_dep_graph;
pub use persist::copy_cgu_workproduct_to_incr_comp_cache_dir;
pub use persist::delete_workproduct_files;
pub use persist::dep_graph_tcx_init;
pub use persist::finalize_session_directory;
pub use persist::garbage_collect_session_directories;
pub use persist::in_incr_comp_dir;
pub use persist::in_incr_comp_dir_sess;
pub use persist::load_query_result_cache;
pub use persist::prepare_session_directory;
pub use persist::save_dep_graph;
pub use persist::save_work_product_index;
pub use persist::LoadResult;
pub use persist::{load_dep_graph, DepGraphFuture};

View file

@ -0,0 +1,3 @@
For info on how the incremental compilation works, see the [rustc dev guide].
[rustc dev guide]: https://rustc-dev-guide.rust-lang.org/query.html

View file

@ -0,0 +1,13 @@
//! The data that we will serialize and deserialize.
use rustc_macros::{Decodable, Encodable};
use rustc_middle::dep_graph::{WorkProduct, WorkProductId};
#[derive(Debug, Encodable, Decodable)]
pub struct SerializedWorkProduct {
/// node that produced the work-product
pub id: WorkProductId,
/// work-product data itself
pub work_product: WorkProduct,
}

View file

@ -0,0 +1,558 @@
//! Debugging code to test fingerprints computed for query results.
//! For each node marked with `#[rustc_clean]` or `#[rustc_dirty]`,
//! we will compare the fingerprint from the current and from the previous
//! compilation session as appropriate:
//!
//! - `#[rustc_clean(cfg="rev2", except="typeck")]` if we are
//! in `#[cfg(rev2)]`, then the fingerprints associated with
//! `DepNode::typeck(X)` must be DIFFERENT (`X` is the `DefId` of the
//! current node).
//! - `#[rustc_clean(cfg="rev2")]` same as above, except that the
//! fingerprints must be the SAME (along with all other fingerprints).
//!
//! Errors are reported if we are in the suitable configuration but
//! the required condition is not met.
use rustc_ast::{self as ast, Attribute, NestedMetaItem};
use rustc_data_structures::fingerprint::Fingerprint;
use rustc_data_structures::fx::FxHashSet;
use rustc_hir as hir;
use rustc_hir::def_id::DefId;
use rustc_hir::intravisit;
use rustc_hir::itemlikevisit::ItemLikeVisitor;
use rustc_hir::Node as HirNode;
use rustc_hir::{ImplItemKind, ItemKind as HirItem, TraitItemKind};
use rustc_middle::dep_graph::{label_strs, DepNode, DepNodeExt};
use rustc_middle::hir::map::Map;
use rustc_middle::ty::TyCtxt;
use rustc_span::symbol::{sym, Symbol};
use rustc_span::Span;
use std::iter::FromIterator;
use std::vec::Vec;
const EXCEPT: Symbol = sym::except;
const LABEL: Symbol = sym::label;
const CFG: Symbol = sym::cfg;
// Base and Extra labels to build up the labels
/// For typedef, constants, and statics
const BASE_CONST: &[&str] = &[label_strs::type_of];
/// DepNodes for functions + methods
const BASE_FN: &[&str] = &[
// Callers will depend on the signature of these items, so we better test
label_strs::fn_sig,
label_strs::generics_of,
label_strs::predicates_of,
label_strs::type_of,
// And a big part of compilation (that we eventually want to cache) is type inference
// information:
label_strs::typeck,
];
/// DepNodes for Hir, which is pretty much everything
const BASE_HIR: &[&str] = &[
// hir_owner and hir_owner_nodes should be computed for all nodes
label_strs::hir_owner,
label_strs::hir_owner_nodes,
];
/// `impl` implementation of struct/trait
const BASE_IMPL: &[&str] =
&[label_strs::associated_item_def_ids, label_strs::generics_of, label_strs::impl_trait_ref];
/// DepNodes for mir_built/Optimized, which is relevant in "executable"
/// code, i.e., functions+methods
const BASE_MIR: &[&str] = &[label_strs::optimized_mir, label_strs::promoted_mir];
/// Struct, Enum and Union DepNodes
///
/// Note that changing the type of a field does not change the type of the struct or enum, but
/// adding/removing fields or changing a fields name or visibility does.
const BASE_STRUCT: &[&str] =
&[label_strs::generics_of, label_strs::predicates_of, label_strs::type_of];
/// Trait definition `DepNode`s.
const BASE_TRAIT_DEF: &[&str] = &[
label_strs::associated_item_def_ids,
label_strs::generics_of,
label_strs::object_safety_violations,
label_strs::predicates_of,
label_strs::specialization_graph_of,
label_strs::trait_def,
label_strs::trait_impls_of,
];
/// Extra `DepNode`s for functions and methods.
const EXTRA_ASSOCIATED: &[&str] = &[label_strs::associated_item];
const EXTRA_TRAIT: &[&str] = &[label_strs::trait_of_item];
// Fully Built Labels
const LABELS_CONST: &[&[&str]] = &[BASE_HIR, BASE_CONST];
/// Constant/Typedef in an impl
const LABELS_CONST_IN_IMPL: &[&[&str]] = &[BASE_HIR, BASE_CONST, EXTRA_ASSOCIATED];
/// Trait-Const/Typedef DepNodes
const LABELS_CONST_IN_TRAIT: &[&[&str]] = &[BASE_HIR, BASE_CONST, EXTRA_ASSOCIATED, EXTRA_TRAIT];
/// Function `DepNode`s.
const LABELS_FN: &[&[&str]] = &[BASE_HIR, BASE_MIR, BASE_FN];
/// Method `DepNode`s.
const LABELS_FN_IN_IMPL: &[&[&str]] = &[BASE_HIR, BASE_MIR, BASE_FN, EXTRA_ASSOCIATED];
/// Trait method `DepNode`s.
const LABELS_FN_IN_TRAIT: &[&[&str]] =
&[BASE_HIR, BASE_MIR, BASE_FN, EXTRA_ASSOCIATED, EXTRA_TRAIT];
/// For generic cases like inline-assembly, modules, etc.
const LABELS_HIR_ONLY: &[&[&str]] = &[BASE_HIR];
/// Impl `DepNode`s.
const LABELS_IMPL: &[&[&str]] = &[BASE_HIR, BASE_IMPL];
/// Abstract data type (struct, enum, union) `DepNode`s.
const LABELS_ADT: &[&[&str]] = &[BASE_HIR, BASE_STRUCT];
/// Trait definition `DepNode`s.
#[allow(dead_code)]
const LABELS_TRAIT: &[&[&str]] = &[BASE_HIR, BASE_TRAIT_DEF];
// FIXME: Struct/Enum/Unions Fields (there is currently no way to attach these)
//
// Fields are kind of separate from their containers, as they can change independently from
// them. We should at least check
//
// type_of for these.
type Labels = FxHashSet<String>;
/// Represents the requested configuration by rustc_clean/dirty
struct Assertion {
clean: Labels,
dirty: Labels,
}
impl Assertion {
fn from_clean_labels(labels: Labels) -> Assertion {
Assertion { clean: labels, dirty: Labels::default() }
}
fn from_dirty_labels(labels: Labels) -> Assertion {
Assertion { clean: Labels::default(), dirty: labels }
}
}
pub fn check_dirty_clean_annotations(tcx: TyCtxt<'_>) {
// can't add `#[rustc_dirty]` etc without opting in to this feature
if !tcx.features().rustc_attrs {
return;
}
tcx.dep_graph.with_ignore(|| {
let krate = tcx.hir().krate();
let mut dirty_clean_visitor = DirtyCleanVisitor { tcx, checked_attrs: Default::default() };
krate.visit_all_item_likes(&mut dirty_clean_visitor);
let mut all_attrs = FindAllAttrs {
tcx,
attr_names: vec![sym::rustc_dirty, sym::rustc_clean],
found_attrs: vec![],
};
intravisit::walk_crate(&mut all_attrs, krate);
// Note that we cannot use the existing "unused attribute"-infrastructure
// here, since that is running before codegen. This is also the reason why
// all codegen-specific attributes are `AssumedUsed` in rustc_ast::feature_gate.
all_attrs.report_unchecked_attrs(&dirty_clean_visitor.checked_attrs);
})
}
pub struct DirtyCleanVisitor<'tcx> {
tcx: TyCtxt<'tcx>,
checked_attrs: FxHashSet<ast::AttrId>,
}
impl DirtyCleanVisitor<'tcx> {
/// Possibly "deserialize" the attribute into a clean/dirty assertion
fn assertion_maybe(&mut self, item_id: hir::HirId, attr: &Attribute) -> Option<Assertion> {
let is_clean = if self.tcx.sess.check_name(attr, sym::rustc_dirty) {
false
} else if self.tcx.sess.check_name(attr, sym::rustc_clean) {
true
} else {
// skip: not rustc_clean/dirty
return None;
};
if !check_config(self.tcx, attr) {
// skip: not the correct `cfg=`
return None;
}
let assertion = if let Some(labels) = self.labels(attr) {
if is_clean {
Assertion::from_clean_labels(labels)
} else {
Assertion::from_dirty_labels(labels)
}
} else {
self.assertion_auto(item_id, attr, is_clean)
};
Some(assertion)
}
/// Gets the "auto" assertion on pre-validated attr, along with the `except` labels.
fn assertion_auto(
&mut self,
item_id: hir::HirId,
attr: &Attribute,
is_clean: bool,
) -> Assertion {
let (name, mut auto) = self.auto_labels(item_id, attr);
let except = self.except(attr);
for e in except.iter() {
if !auto.remove(e) {
let msg = format!(
"`except` specified DepNodes that can not be affected for \"{}\": \"{}\"",
name, e
);
self.tcx.sess.span_fatal(attr.span, &msg);
}
}
if is_clean {
Assertion { clean: auto, dirty: except }
} else {
Assertion { clean: except, dirty: auto }
}
}
fn labels(&self, attr: &Attribute) -> Option<Labels> {
for item in attr.meta_item_list().unwrap_or_else(Vec::new) {
if item.has_name(LABEL) {
let value = expect_associated_value(self.tcx, &item);
return Some(self.resolve_labels(&item, value));
}
}
None
}
/// `except=` attribute value
fn except(&self, attr: &Attribute) -> Labels {
for item in attr.meta_item_list().unwrap_or_else(Vec::new) {
if item.has_name(EXCEPT) {
let value = expect_associated_value(self.tcx, &item);
return self.resolve_labels(&item, value);
}
}
// if no `label` or `except` is given, only the node's group are asserted
Labels::default()
}
/// Return all DepNode labels that should be asserted for this item.
/// index=0 is the "name" used for error messages
fn auto_labels(&mut self, item_id: hir::HirId, attr: &Attribute) -> (&'static str, Labels) {
let node = self.tcx.hir().get(item_id);
let (name, labels) = match node {
HirNode::Item(item) => {
match item.kind {
// note: these are in the same order as hir::Item_;
// FIXME(michaelwoerister): do commented out ones
// // An `extern crate` item, with optional original crate name,
// HirItem::ExternCrate(..), // intentionally no assertions
// // `use foo::bar::*;` or `use foo::bar::baz as quux;`
// HirItem::Use(..), // intentionally no assertions
// A `static` item
HirItem::Static(..) => ("ItemStatic", LABELS_CONST),
// A `const` item
HirItem::Const(..) => ("ItemConst", LABELS_CONST),
// A function declaration
HirItem::Fn(..) => ("ItemFn", LABELS_FN),
// // A module
HirItem::Mod(..) => ("ItemMod", LABELS_HIR_ONLY),
// // An external module
HirItem::ForeignMod(..) => ("ItemForeignMod", LABELS_HIR_ONLY),
// Module-level inline assembly (from global_asm!)
HirItem::GlobalAsm(..) => ("ItemGlobalAsm", LABELS_HIR_ONLY),
// A type alias, e.g., `type Foo = Bar<u8>`
HirItem::TyAlias(..) => ("ItemTy", LABELS_HIR_ONLY),
// An enum definition, e.g., `enum Foo<A, B> {C<A>, D<B>}`
HirItem::Enum(..) => ("ItemEnum", LABELS_ADT),
// A struct definition, e.g., `struct Foo<A> {x: A}`
HirItem::Struct(..) => ("ItemStruct", LABELS_ADT),
// A union definition, e.g., `union Foo<A, B> {x: A, y: B}`
HirItem::Union(..) => ("ItemUnion", LABELS_ADT),
// Represents a Trait Declaration
// FIXME(michaelwoerister): trait declaration is buggy because sometimes some of
// the depnodes don't exist (because they legitametely didn't need to be
// calculated)
//
// michaelwoerister and vitiral came up with a possible solution,
// to just do this before every query
// ```
// ::rustc_middle::ty::query::plumbing::force_from_dep_node(tcx, dep_node)
// ```
//
// However, this did not seem to work effectively and more bugs were hit.
// Nebie @vitiral gave up :)
//
//HirItem::Trait(..) => ("ItemTrait", LABELS_TRAIT),
// An implementation, eg `impl<A> Trait for Foo { .. }`
HirItem::Impl { .. } => ("ItemKind::Impl", LABELS_IMPL),
_ => self.tcx.sess.span_fatal(
attr.span,
&format!(
"clean/dirty auto-assertions not yet defined \
for Node::Item.node={:?}",
item.kind
),
),
}
}
HirNode::TraitItem(item) => match item.kind {
TraitItemKind::Fn(..) => ("Node::TraitItem", LABELS_FN_IN_TRAIT),
TraitItemKind::Const(..) => ("NodeTraitConst", LABELS_CONST_IN_TRAIT),
TraitItemKind::Type(..) => ("NodeTraitType", LABELS_CONST_IN_TRAIT),
},
HirNode::ImplItem(item) => match item.kind {
ImplItemKind::Fn(..) => ("Node::ImplItem", LABELS_FN_IN_IMPL),
ImplItemKind::Const(..) => ("NodeImplConst", LABELS_CONST_IN_IMPL),
ImplItemKind::TyAlias(..) => ("NodeImplType", LABELS_CONST_IN_IMPL),
},
_ => self.tcx.sess.span_fatal(
attr.span,
&format!("clean/dirty auto-assertions not yet defined for {:?}", node),
),
};
let labels =
Labels::from_iter(labels.iter().flat_map(|s| s.iter().map(|l| (*l).to_string())));
(name, labels)
}
fn resolve_labels(&self, item: &NestedMetaItem, value: Symbol) -> Labels {
let mut out = Labels::default();
for label in value.as_str().split(',') {
let label = label.trim();
if DepNode::has_label_string(label) {
if out.contains(label) {
self.tcx.sess.span_fatal(
item.span(),
&format!("dep-node label `{}` is repeated", label),
);
}
out.insert(label.to_string());
} else {
self.tcx
.sess
.span_fatal(item.span(), &format!("dep-node label `{}` not recognized", label));
}
}
out
}
fn dep_nodes<'l>(
&self,
labels: &'l Labels,
def_id: DefId,
) -> impl Iterator<Item = DepNode> + 'l {
let def_path_hash = self.tcx.def_path_hash(def_id);
labels.iter().map(move |label| match DepNode::from_label_string(label, def_path_hash) {
Ok(dep_node) => dep_node,
Err(()) => unreachable!("label: {}", label),
})
}
fn dep_node_str(&self, dep_node: &DepNode) -> String {
if let Some(def_id) = dep_node.extract_def_id(self.tcx) {
format!("{:?}({})", dep_node.kind, self.tcx.def_path_str(def_id))
} else {
format!("{:?}({:?})", dep_node.kind, dep_node.hash)
}
}
fn assert_dirty(&self, item_span: Span, dep_node: DepNode) {
debug!("assert_dirty({:?})", dep_node);
let current_fingerprint = self.get_fingerprint(&dep_node);
let prev_fingerprint = self.tcx.dep_graph.prev_fingerprint_of(&dep_node);
if current_fingerprint == prev_fingerprint {
let dep_node_str = self.dep_node_str(&dep_node);
self.tcx
.sess
.span_err(item_span, &format!("`{}` should be dirty but is not", dep_node_str));
}
}
fn get_fingerprint(&self, dep_node: &DepNode) -> Option<Fingerprint> {
if self.tcx.dep_graph.dep_node_exists(dep_node) {
let dep_node_index = self.tcx.dep_graph.dep_node_index_of(dep_node);
Some(self.tcx.dep_graph.fingerprint_of(dep_node_index))
} else {
None
}
}
fn assert_clean(&self, item_span: Span, dep_node: DepNode) {
debug!("assert_clean({:?})", dep_node);
let current_fingerprint = self.get_fingerprint(&dep_node);
let prev_fingerprint = self.tcx.dep_graph.prev_fingerprint_of(&dep_node);
// if the node wasn't previously evaluated and now is (or vice versa),
// then the node isn't actually clean or dirty.
if (current_fingerprint == None) ^ (prev_fingerprint == None) {
return;
}
if current_fingerprint != prev_fingerprint {
let dep_node_str = self.dep_node_str(&dep_node);
self.tcx
.sess
.span_err(item_span, &format!("`{}` should be clean but is not", dep_node_str));
}
}
fn check_item(&mut self, item_id: hir::HirId, item_span: Span) {
let def_id = self.tcx.hir().local_def_id(item_id);
for attr in self.tcx.get_attrs(def_id.to_def_id()).iter() {
let assertion = match self.assertion_maybe(item_id, attr) {
Some(a) => a,
None => continue,
};
self.checked_attrs.insert(attr.id);
for dep_node in self.dep_nodes(&assertion.clean, def_id.to_def_id()) {
self.assert_clean(item_span, dep_node);
}
for dep_node in self.dep_nodes(&assertion.dirty, def_id.to_def_id()) {
self.assert_dirty(item_span, dep_node);
}
}
}
}
impl ItemLikeVisitor<'tcx> for DirtyCleanVisitor<'tcx> {
fn visit_item(&mut self, item: &'tcx hir::Item<'tcx>) {
self.check_item(item.hir_id, item.span);
}
fn visit_trait_item(&mut self, item: &hir::TraitItem<'_>) {
self.check_item(item.hir_id, item.span);
}
fn visit_impl_item(&mut self, item: &hir::ImplItem<'_>) {
self.check_item(item.hir_id, item.span);
}
}
/// Given a `#[rustc_dirty]` or `#[rustc_clean]` attribute, scan
/// for a `cfg="foo"` attribute and check whether we have a cfg
/// flag called `foo`.
///
/// Also make sure that the `label` and `except` fields do not
/// both exist.
fn check_config(tcx: TyCtxt<'_>, attr: &Attribute) -> bool {
debug!("check_config(attr={:?})", attr);
let config = &tcx.sess.parse_sess.config;
debug!("check_config: config={:?}", config);
let (mut cfg, mut except, mut label) = (None, false, false);
for item in attr.meta_item_list().unwrap_or_else(Vec::new) {
if item.has_name(CFG) {
let value = expect_associated_value(tcx, &item);
debug!("check_config: searching for cfg {:?}", value);
cfg = Some(config.contains(&(value, None)));
}
if item.has_name(LABEL) {
label = true;
}
if item.has_name(EXCEPT) {
except = true;
}
}
if label && except {
tcx.sess.span_fatal(attr.span, "must specify only one of: `label`, `except`");
}
match cfg {
None => tcx.sess.span_fatal(attr.span, "no cfg attribute"),
Some(c) => c,
}
}
fn expect_associated_value(tcx: TyCtxt<'_>, item: &NestedMetaItem) -> Symbol {
if let Some(value) = item.value_str() {
value
} else {
let msg = if let Some(ident) = item.ident() {
format!("associated value expected for `{}`", ident)
} else {
"expected an associated value".to_string()
};
tcx.sess.span_fatal(item.span(), &msg);
}
}
// A visitor that collects all #[rustc_dirty]/#[rustc_clean] attributes from
// the HIR. It is used to verfiy that we really ran checks for all annotated
// nodes.
pub struct FindAllAttrs<'tcx> {
tcx: TyCtxt<'tcx>,
attr_names: Vec<Symbol>,
found_attrs: Vec<&'tcx Attribute>,
}
impl FindAllAttrs<'tcx> {
fn is_active_attr(&mut self, attr: &Attribute) -> bool {
for attr_name in &self.attr_names {
if self.tcx.sess.check_name(attr, *attr_name) && check_config(self.tcx, attr) {
return true;
}
}
false
}
fn report_unchecked_attrs(&self, checked_attrs: &FxHashSet<ast::AttrId>) {
for attr in &self.found_attrs {
if !checked_attrs.contains(&attr.id) {
self.tcx.sess.span_err(
attr.span,
"found unchecked `#[rustc_dirty]` / `#[rustc_clean]` attribute",
);
}
}
}
}
impl intravisit::Visitor<'tcx> for FindAllAttrs<'tcx> {
type Map = Map<'tcx>;
fn nested_visit_map(&mut self) -> intravisit::NestedVisitorMap<Self::Map> {
intravisit::NestedVisitorMap::All(self.tcx.hir())
}
fn visit_attribute(&mut self, attr: &'tcx Attribute) {
if self.is_active_attr(attr) {
self.found_attrs.push(attr);
}
}
}

View file

@ -0,0 +1,131 @@
//! This module defines a generic file format that allows to check if a given
//! file generated by incremental compilation was generated by a compatible
//! compiler version. This file format is used for the on-disk version of the
//! dependency graph and the exported metadata hashes.
//!
//! In practice "compatible compiler version" means "exactly the same compiler
//! version", since the header encodes the git commit hash of the compiler.
//! Since we can always just ignore the incremental compilation cache and
//! compiler versions don't change frequently for the typical user, being
//! conservative here practically has no downside.
use std::env;
use std::fs;
use std::io::{self, Read};
use std::path::Path;
use rustc_serialize::opaque::Encoder;
use rustc_session::config::nightly_options;
/// The first few bytes of files generated by incremental compilation.
const FILE_MAGIC: &[u8] = b"RSIC";
/// Change this if the header format changes.
const HEADER_FORMAT_VERSION: u16 = 0;
/// A version string that hopefully is always different for compiler versions
/// with different encodings of incremental compilation artifacts. Contains
/// the Git commit hash.
const RUSTC_VERSION: Option<&str> = option_env!("CFG_VERSION");
pub fn write_file_header(stream: &mut Encoder) {
stream.emit_raw_bytes(FILE_MAGIC);
stream
.emit_raw_bytes(&[(HEADER_FORMAT_VERSION >> 0) as u8, (HEADER_FORMAT_VERSION >> 8) as u8]);
let rustc_version = rustc_version();
assert_eq!(rustc_version.len(), (rustc_version.len() as u8) as usize);
stream.emit_raw_bytes(&[rustc_version.len() as u8]);
stream.emit_raw_bytes(rustc_version.as_bytes());
}
/// Reads the contents of a file with a file header as defined in this module.
///
/// - Returns `Ok(Some(data, pos))` if the file existed and was generated by a
/// compatible compiler version. `data` is the entire contents of the file
/// and `pos` points to the first byte after the header.
/// - Returns `Ok(None)` if the file did not exist or was generated by an
/// incompatible version of the compiler.
/// - Returns `Err(..)` if some kind of IO error occurred while reading the
/// file.
pub fn read_file(
report_incremental_info: bool,
path: &Path,
) -> io::Result<Option<(Vec<u8>, usize)>> {
if !path.exists() {
return Ok(None);
}
let data = fs::read(path)?;
let mut file = io::Cursor::new(data);
// Check FILE_MAGIC
{
debug_assert!(FILE_MAGIC.len() == 4);
let mut file_magic = [0u8; 4];
file.read_exact(&mut file_magic)?;
if file_magic != FILE_MAGIC {
report_format_mismatch(report_incremental_info, path, "Wrong FILE_MAGIC");
return Ok(None);
}
}
// Check HEADER_FORMAT_VERSION
{
debug_assert!(::std::mem::size_of_val(&HEADER_FORMAT_VERSION) == 2);
let mut header_format_version = [0u8; 2];
file.read_exact(&mut header_format_version)?;
let header_format_version =
(header_format_version[0] as u16) | ((header_format_version[1] as u16) << 8);
if header_format_version != HEADER_FORMAT_VERSION {
report_format_mismatch(report_incremental_info, path, "Wrong HEADER_FORMAT_VERSION");
return Ok(None);
}
}
// Check RUSTC_VERSION
{
let mut rustc_version_str_len = [0u8; 1];
file.read_exact(&mut rustc_version_str_len)?;
let rustc_version_str_len = rustc_version_str_len[0] as usize;
let mut buffer = vec![0; rustc_version_str_len];
file.read_exact(&mut buffer)?;
if buffer != rustc_version().as_bytes() {
report_format_mismatch(report_incremental_info, path, "Different compiler version");
return Ok(None);
}
}
let post_header_start_pos = file.position() as usize;
Ok(Some((file.into_inner(), post_header_start_pos)))
}
fn report_format_mismatch(report_incremental_info: bool, file: &Path, message: &str) {
debug!("read_file: {}", message);
if report_incremental_info {
println!(
"[incremental] ignoring cache artifact `{}`: {}",
file.file_name().unwrap().to_string_lossy(),
message
);
}
}
fn rustc_version() -> String {
if nightly_options::is_nightly_build() {
if let Some(val) = env::var_os("RUSTC_FORCE_INCR_COMP_ARTIFACT_HEADER") {
return val.to_string_lossy().into_owned();
}
}
RUSTC_VERSION
.expect(
"Cannot use rustc without explicit version for \
incremental compilation",
)
.to_string()
}

View file

@ -0,0 +1,946 @@
//! This module manages how the incremental compilation cache is represented in
//! the file system.
//!
//! Incremental compilation caches are managed according to a copy-on-write
//! strategy: Once a complete, consistent cache version is finalized, it is
//! never modified. Instead, when a subsequent compilation session is started,
//! the compiler will allocate a new version of the cache that starts out as
//! a copy of the previous version. Then only this new copy is modified and it
//! will not be visible to other processes until it is finalized. This ensures
//! that multiple compiler processes can be executed concurrently for the same
//! crate without interfering with each other or blocking each other.
//!
//! More concretely this is implemented via the following protocol:
//!
//! 1. For a newly started compilation session, the compiler allocates a
//! new `session` directory within the incremental compilation directory.
//! This session directory will have a unique name that ends with the suffix
//! "-working" and that contains a creation timestamp.
//! 2. Next, the compiler looks for the newest finalized session directory,
//! that is, a session directory from a previous compilation session that
//! has been marked as valid and consistent. A session directory is
//! considered finalized if the "-working" suffix in the directory name has
//! been replaced by the SVH of the crate.
//! 3. Once the compiler has found a valid, finalized session directory, it will
//! hard-link/copy its contents into the new "-working" directory. If all
//! goes well, it will have its own, private copy of the source directory and
//! subsequently not have to worry about synchronizing with other compiler
//! processes.
//! 4. Now the compiler can do its normal compilation process, which involves
//! reading and updating its private session directory.
//! 5. When compilation finishes without errors, the private session directory
//! will be in a state where it can be used as input for other compilation
//! sessions. That is, it will contain a dependency graph and cache artifacts
//! that are consistent with the state of the source code it was compiled
//! from, with no need to change them ever again. At this point, the compiler
//! finalizes and "publishes" its private session directory by renaming it
//! from "s-{timestamp}-{random}-working" to "s-{timestamp}-{SVH}".
//! 6. At this point the "old" session directory that we copied our data from
//! at the beginning of the session has become obsolete because we have just
//! published a more current version. Thus the compiler will delete it.
//!
//! ## Garbage Collection
//!
//! Naively following the above protocol might lead to old session directories
//! piling up if a compiler instance crashes for some reason before its able to
//! remove its private session directory. In order to avoid wasting disk space,
//! the compiler also does some garbage collection each time it is started in
//! incremental compilation mode. Specifically, it will scan the incremental
//! compilation directory for private session directories that are not in use
//! any more and will delete those. It will also delete any finalized session
//! directories for a given crate except for the most recent one.
//!
//! ## Synchronization
//!
//! There is some synchronization needed in order for the compiler to be able to
//! determine whether a given private session directory is not in used any more.
//! This is done by creating a lock file for each session directory and
//! locking it while the directory is still being used. Since file locks have
//! operating system support, we can rely on the lock being released if the
//! compiler process dies for some unexpected reason. Thus, when garbage
//! collecting private session directories, the collecting process can determine
//! whether the directory is still in use by trying to acquire a lock on the
//! file. If locking the file fails, the original process must still be alive.
//! If locking the file succeeds, we know that the owning process is not alive
//! any more and we can safely delete the directory.
//! There is still a small time window between the original process creating the
//! lock file and actually locking it. In order to minimize the chance that
//! another process tries to acquire the lock in just that instance, only
//! session directories that are older than a few seconds are considered for
//! garbage collection.
//!
//! Another case that has to be considered is what happens if one process
//! deletes a finalized session directory that another process is currently
//! trying to copy from. This case is also handled via the lock file. Before
//! a process starts copying a finalized session directory, it will acquire a
//! shared lock on the directory's lock file. Any garbage collecting process,
//! on the other hand, will acquire an exclusive lock on the lock file.
//! Thus, if a directory is being collected, any reader process will fail
//! acquiring the shared lock and will leave the directory alone. Conversely,
//! if a collecting process can't acquire the exclusive lock because the
//! directory is currently being read from, it will leave collecting that
//! directory to another process at a later point in time.
//! The exact same scheme is also used when reading the metadata hashes file
//! from an extern crate. When a crate is compiled, the hash values of its
//! metadata are stored in a file in its session directory. When the
//! compilation session of another crate imports the first crate's metadata,
//! it also has to read in the accompanying metadata hashes. It thus will access
//! the finalized session directory of all crates it links to and while doing
//! so, it will also place a read lock on that the respective session directory
//! so that it won't be deleted while the metadata hashes are loaded.
//!
//! ## Preconditions
//!
//! This system relies on two features being available in the file system in
//! order to work really well: file locking and hard linking.
//! If hard linking is not available (like on FAT) the data in the cache
//! actually has to be copied at the beginning of each session.
//! If file locking does not work reliably (like on NFS), some of the
//! synchronization will go haywire.
//! In both cases we recommend to locate the incremental compilation directory
//! on a file system that supports these things.
//! It might be a good idea though to try and detect whether we are on an
//! unsupported file system and emit a warning in that case. This is not yet
//! implemented.
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_data_structures::svh::Svh;
use rustc_data_structures::{base_n, flock};
use rustc_fs_util::{link_or_copy, LinkOrCopy};
use rustc_session::{CrateDisambiguator, Session};
use std::fs as std_fs;
use std::io;
use std::mem;
use std::path::{Path, PathBuf};
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use rand::{thread_rng, RngCore};
#[cfg(test)]
mod tests;
const LOCK_FILE_EXT: &str = ".lock";
const DEP_GRAPH_FILENAME: &str = "dep-graph.bin";
const WORK_PRODUCTS_FILENAME: &str = "work-products.bin";
const QUERY_CACHE_FILENAME: &str = "query-cache.bin";
// We encode integers using the following base, so they are shorter than decimal
// or hexadecimal numbers (we want short file and directory names). Since these
// numbers will be used in file names, we choose an encoding that is not
// case-sensitive (as opposed to base64, for example).
const INT_ENCODE_BASE: usize = base_n::CASE_INSENSITIVE;
pub fn dep_graph_path(sess: &Session) -> PathBuf {
in_incr_comp_dir_sess(sess, DEP_GRAPH_FILENAME)
}
pub fn dep_graph_path_from(incr_comp_session_dir: &Path) -> PathBuf {
in_incr_comp_dir(incr_comp_session_dir, DEP_GRAPH_FILENAME)
}
pub fn work_products_path(sess: &Session) -> PathBuf {
in_incr_comp_dir_sess(sess, WORK_PRODUCTS_FILENAME)
}
pub fn query_cache_path(sess: &Session) -> PathBuf {
in_incr_comp_dir_sess(sess, QUERY_CACHE_FILENAME)
}
pub fn lock_file_path(session_dir: &Path) -> PathBuf {
let crate_dir = session_dir.parent().unwrap();
let directory_name = session_dir.file_name().unwrap().to_string_lossy();
assert_no_characters_lost(&directory_name);
let dash_indices: Vec<_> = directory_name.match_indices('-').map(|(idx, _)| idx).collect();
if dash_indices.len() != 3 {
bug!(
"Encountered incremental compilation session directory with \
malformed name: {}",
session_dir.display()
)
}
crate_dir.join(&directory_name[0..dash_indices[2]]).with_extension(&LOCK_FILE_EXT[1..])
}
pub fn in_incr_comp_dir_sess(sess: &Session, file_name: &str) -> PathBuf {
in_incr_comp_dir(&sess.incr_comp_session_dir(), file_name)
}
pub fn in_incr_comp_dir(incr_comp_session_dir: &Path, file_name: &str) -> PathBuf {
incr_comp_session_dir.join(file_name)
}
/// Allocates the private session directory. The boolean in the Ok() result
/// indicates whether we should try loading a dep graph from the successfully
/// initialized directory, or not.
/// The post-condition of this fn is that we have a valid incremental
/// compilation session directory, if the result is `Ok`. A valid session
/// directory is one that contains a locked lock file. It may or may not contain
/// a dep-graph and work products from a previous session.
/// If the call fails, the fn may leave behind an invalid session directory.
/// The garbage collection will take care of it.
pub fn prepare_session_directory(
sess: &Session,
crate_name: &str,
crate_disambiguator: CrateDisambiguator,
) {
if sess.opts.incremental.is_none() {
return;
}
let _timer = sess.timer("incr_comp_prepare_session_directory");
debug!("prepare_session_directory");
// {incr-comp-dir}/{crate-name-and-disambiguator}
let crate_dir = crate_path(sess, crate_name, crate_disambiguator);
debug!("crate-dir: {}", crate_dir.display());
if create_dir(sess, &crate_dir, "crate").is_err() {
return;
}
// Hack: canonicalize the path *after creating the directory*
// because, on windows, long paths can cause problems;
// canonicalization inserts this weird prefix that makes windows
// tolerate long paths.
let crate_dir = match crate_dir.canonicalize() {
Ok(v) => v,
Err(err) => {
sess.err(&format!(
"incremental compilation: error canonicalizing path `{}`: {}",
crate_dir.display(),
err
));
return;
}
};
let mut source_directories_already_tried = FxHashSet::default();
loop {
// Generate a session directory of the form:
//
// {incr-comp-dir}/{crate-name-and-disambiguator}/s-{timestamp}-{random}-working
let session_dir = generate_session_dir_path(&crate_dir);
debug!("session-dir: {}", session_dir.display());
// Lock the new session directory. If this fails, return an
// error without retrying
let (directory_lock, lock_file_path) = match lock_directory(sess, &session_dir) {
Ok(e) => e,
Err(_) => return,
};
// Now that we have the lock, we can actually create the session
// directory
if create_dir(sess, &session_dir, "session").is_err() {
return;
}
// Find a suitable source directory to copy from. Ignore those that we
// have already tried before.
let source_directory = find_source_directory(&crate_dir, &source_directories_already_tried);
let source_directory = if let Some(dir) = source_directory {
dir
} else {
// There's nowhere to copy from, we're done
debug!(
"no source directory found. Continuing with empty session \
directory."
);
sess.init_incr_comp_session(session_dir, directory_lock, false);
return;
};
debug!("attempting to copy data from source: {}", source_directory.display());
// Try copying over all files from the source directory
if let Ok(allows_links) = copy_files(sess, &session_dir, &source_directory) {
debug!("successfully copied data from: {}", source_directory.display());
if !allows_links {
sess.warn(&format!(
"Hard linking files in the incremental \
compilation cache failed. Copying files \
instead. Consider moving the cache \
directory to a file system which supports \
hard linking in session dir `{}`",
session_dir.display()
));
}
sess.init_incr_comp_session(session_dir, directory_lock, true);
return;
} else {
debug!("copying failed - trying next directory");
// Something went wrong while trying to copy/link files from the
// source directory. Try again with a different one.
source_directories_already_tried.insert(source_directory);
// Try to remove the session directory we just allocated. We don't
// know if there's any garbage in it from the failed copy action.
if let Err(err) = safe_remove_dir_all(&session_dir) {
sess.warn(&format!(
"Failed to delete partly initialized \
session dir `{}`: {}",
session_dir.display(),
err
));
}
delete_session_dir_lock_file(sess, &lock_file_path);
mem::drop(directory_lock);
}
}
}
/// This function finalizes and thus 'publishes' the session directory by
/// renaming it to `s-{timestamp}-{svh}` and releasing the file lock.
/// If there have been compilation errors, however, this function will just
/// delete the presumably invalid session directory.
pub fn finalize_session_directory(sess: &Session, svh: Svh) {
if sess.opts.incremental.is_none() {
return;
}
let _timer = sess.timer("incr_comp_finalize_session_directory");
let incr_comp_session_dir: PathBuf = sess.incr_comp_session_dir().clone();
if sess.has_errors_or_delayed_span_bugs() {
// If there have been any errors during compilation, we don't want to
// publish this session directory. Rather, we'll just delete it.
debug!(
"finalize_session_directory() - invalidating session directory: {}",
incr_comp_session_dir.display()
);
if let Err(err) = safe_remove_dir_all(&*incr_comp_session_dir) {
sess.warn(&format!(
"Error deleting incremental compilation \
session directory `{}`: {}",
incr_comp_session_dir.display(),
err
));
}
let lock_file_path = lock_file_path(&*incr_comp_session_dir);
delete_session_dir_lock_file(sess, &lock_file_path);
sess.mark_incr_comp_session_as_invalid();
}
debug!("finalize_session_directory() - session directory: {}", incr_comp_session_dir.display());
let old_sub_dir_name = incr_comp_session_dir.file_name().unwrap().to_string_lossy();
assert_no_characters_lost(&old_sub_dir_name);
// Keep the 's-{timestamp}-{random-number}' prefix, but replace the
// '-working' part with the SVH of the crate
let dash_indices: Vec<_> = old_sub_dir_name.match_indices('-').map(|(idx, _)| idx).collect();
if dash_indices.len() != 3 {
bug!(
"Encountered incremental compilation session directory with \
malformed name: {}",
incr_comp_session_dir.display()
)
}
// State: "s-{timestamp}-{random-number}-"
let mut new_sub_dir_name = String::from(&old_sub_dir_name[..=dash_indices[2]]);
// Append the svh
base_n::push_str(svh.as_u64() as u128, INT_ENCODE_BASE, &mut new_sub_dir_name);
// Create the full path
let new_path = incr_comp_session_dir.parent().unwrap().join(new_sub_dir_name);
debug!("finalize_session_directory() - new path: {}", new_path.display());
match std_fs::rename(&*incr_comp_session_dir, &new_path) {
Ok(_) => {
debug!("finalize_session_directory() - directory renamed successfully");
// This unlocks the directory
sess.finalize_incr_comp_session(new_path);
}
Err(e) => {
// Warn about the error. However, no need to abort compilation now.
sess.warn(&format!(
"Error finalizing incremental compilation \
session directory `{}`: {}",
incr_comp_session_dir.display(),
e
));
debug!("finalize_session_directory() - error, marking as invalid");
// Drop the file lock, so we can garage collect
sess.mark_incr_comp_session_as_invalid();
}
}
let _ = garbage_collect_session_directories(sess);
}
pub fn delete_all_session_dir_contents(sess: &Session) -> io::Result<()> {
let sess_dir_iterator = sess.incr_comp_session_dir().read_dir()?;
for entry in sess_dir_iterator {
let entry = entry?;
safe_remove_file(&entry.path())?
}
Ok(())
}
fn copy_files(sess: &Session, target_dir: &Path, source_dir: &Path) -> Result<bool, ()> {
// We acquire a shared lock on the lock file of the directory, so that
// nobody deletes it out from under us while we are reading from it.
let lock_file_path = lock_file_path(source_dir);
let _lock = if let Ok(lock) = flock::Lock::new(
&lock_file_path,
false, // don't wait,
false, // don't create
false,
) {
// not exclusive
lock
} else {
// Could not acquire the lock, don't try to copy from here
return Err(());
};
let source_dir_iterator = match source_dir.read_dir() {
Ok(it) => it,
Err(_) => return Err(()),
};
let mut files_linked = 0;
let mut files_copied = 0;
for entry in source_dir_iterator {
match entry {
Ok(entry) => {
let file_name = entry.file_name();
let target_file_path = target_dir.join(file_name);
let source_path = entry.path();
debug!("copying into session dir: {}", source_path.display());
match link_or_copy(source_path, target_file_path) {
Ok(LinkOrCopy::Link) => files_linked += 1,
Ok(LinkOrCopy::Copy) => files_copied += 1,
Err(_) => return Err(()),
}
}
Err(_) => return Err(()),
}
}
if sess.opts.debugging_opts.incremental_info {
println!(
"[incremental] session directory: \
{} files hard-linked",
files_linked
);
println!(
"[incremental] session directory: \
{} files copied",
files_copied
);
}
Ok(files_linked > 0 || files_copied == 0)
}
/// Generates unique directory path of the form:
/// {crate_dir}/s-{timestamp}-{random-number}-working
fn generate_session_dir_path(crate_dir: &Path) -> PathBuf {
let timestamp = timestamp_to_string(SystemTime::now());
debug!("generate_session_dir_path: timestamp = {}", timestamp);
let random_number = thread_rng().next_u32();
debug!("generate_session_dir_path: random_number = {}", random_number);
let directory_name = format!(
"s-{}-{}-working",
timestamp,
base_n::encode(random_number as u128, INT_ENCODE_BASE)
);
debug!("generate_session_dir_path: directory_name = {}", directory_name);
let directory_path = crate_dir.join(directory_name);
debug!("generate_session_dir_path: directory_path = {}", directory_path.display());
directory_path
}
fn create_dir(sess: &Session, path: &Path, dir_tag: &str) -> Result<(), ()> {
match std_fs::create_dir_all(path) {
Ok(()) => {
debug!("{} directory created successfully", dir_tag);
Ok(())
}
Err(err) => {
sess.err(&format!(
"Could not create incremental compilation {} \
directory `{}`: {}",
dir_tag,
path.display(),
err
));
Err(())
}
}
}
/// Allocate the lock-file and lock it.
fn lock_directory(sess: &Session, session_dir: &Path) -> Result<(flock::Lock, PathBuf), ()> {
let lock_file_path = lock_file_path(session_dir);
debug!("lock_directory() - lock_file: {}", lock_file_path.display());
match flock::Lock::new(
&lock_file_path,
false, // don't wait
true, // create the lock file
true,
) {
// the lock should be exclusive
Ok(lock) => Ok((lock, lock_file_path)),
Err(err) => {
sess.err(&format!(
"incremental compilation: could not create \
session directory lock file: {}",
err
));
Err(())
}
}
}
fn delete_session_dir_lock_file(sess: &Session, lock_file_path: &Path) {
if let Err(err) = safe_remove_file(&lock_file_path) {
sess.warn(&format!(
"Error deleting lock file for incremental \
compilation session directory `{}`: {}",
lock_file_path.display(),
err
));
}
}
/// Finds the most recent published session directory that is not in the
/// ignore-list.
fn find_source_directory(
crate_dir: &Path,
source_directories_already_tried: &FxHashSet<PathBuf>,
) -> Option<PathBuf> {
let iter = crate_dir
.read_dir()
.unwrap() // FIXME
.filter_map(|e| e.ok().map(|e| e.path()));
find_source_directory_in_iter(iter, source_directories_already_tried)
}
fn find_source_directory_in_iter<I>(
iter: I,
source_directories_already_tried: &FxHashSet<PathBuf>,
) -> Option<PathBuf>
where
I: Iterator<Item = PathBuf>,
{
let mut best_candidate = (UNIX_EPOCH, None);
for session_dir in iter {
debug!("find_source_directory_in_iter - inspecting `{}`", session_dir.display());
let directory_name = session_dir.file_name().unwrap().to_string_lossy();
assert_no_characters_lost(&directory_name);
if source_directories_already_tried.contains(&session_dir)
|| !is_session_directory(&directory_name)
|| !is_finalized(&directory_name)
{
debug!("find_source_directory_in_iter - ignoring");
continue;
}
let timestamp = extract_timestamp_from_session_dir(&directory_name).unwrap_or_else(|_| {
bug!("unexpected incr-comp session dir: {}", session_dir.display())
});
if timestamp > best_candidate.0 {
best_candidate = (timestamp, Some(session_dir.clone()));
}
}
best_candidate.1
}
fn is_finalized(directory_name: &str) -> bool {
!directory_name.ends_with("-working")
}
fn is_session_directory(directory_name: &str) -> bool {
directory_name.starts_with("s-") && !directory_name.ends_with(LOCK_FILE_EXT)
}
fn is_session_directory_lock_file(file_name: &str) -> bool {
file_name.starts_with("s-") && file_name.ends_with(LOCK_FILE_EXT)
}
fn extract_timestamp_from_session_dir(directory_name: &str) -> Result<SystemTime, ()> {
if !is_session_directory(directory_name) {
return Err(());
}
let dash_indices: Vec<_> = directory_name.match_indices('-').map(|(idx, _)| idx).collect();
if dash_indices.len() != 3 {
return Err(());
}
string_to_timestamp(&directory_name[dash_indices[0] + 1..dash_indices[1]])
}
fn timestamp_to_string(timestamp: SystemTime) -> String {
let duration = timestamp.duration_since(UNIX_EPOCH).unwrap();
let micros = duration.as_secs() * 1_000_000 + (duration.subsec_nanos() as u64) / 1000;
base_n::encode(micros as u128, INT_ENCODE_BASE)
}
fn string_to_timestamp(s: &str) -> Result<SystemTime, ()> {
let micros_since_unix_epoch = u64::from_str_radix(s, INT_ENCODE_BASE as u32);
if micros_since_unix_epoch.is_err() {
return Err(());
}
let micros_since_unix_epoch = micros_since_unix_epoch.unwrap();
let duration = Duration::new(
micros_since_unix_epoch / 1_000_000,
1000 * (micros_since_unix_epoch % 1_000_000) as u32,
);
Ok(UNIX_EPOCH + duration)
}
fn crate_path(
sess: &Session,
crate_name: &str,
crate_disambiguator: CrateDisambiguator,
) -> PathBuf {
let incr_dir = sess.opts.incremental.as_ref().unwrap().clone();
// The full crate disambiguator is really long. 64 bits of it should be
// sufficient.
let crate_disambiguator = crate_disambiguator.to_fingerprint().to_smaller_hash();
let crate_disambiguator = base_n::encode(crate_disambiguator as u128, INT_ENCODE_BASE);
let crate_name = format!("{}-{}", crate_name, crate_disambiguator);
incr_dir.join(crate_name)
}
fn assert_no_characters_lost(s: &str) {
if s.contains('\u{FFFD}') {
bug!("Could not losslessly convert '{}'.", s)
}
}
fn is_old_enough_to_be_collected(timestamp: SystemTime) -> bool {
timestamp < SystemTime::now() - Duration::from_secs(10)
}
pub fn garbage_collect_session_directories(sess: &Session) -> io::Result<()> {
debug!("garbage_collect_session_directories() - begin");
let session_directory = sess.incr_comp_session_dir();
debug!(
"garbage_collect_session_directories() - session directory: {}",
session_directory.display()
);
let crate_directory = session_directory.parent().unwrap();
debug!(
"garbage_collect_session_directories() - crate directory: {}",
crate_directory.display()
);
// First do a pass over the crate directory, collecting lock files and
// session directories
let mut session_directories = FxHashSet::default();
let mut lock_files = FxHashSet::default();
for dir_entry in crate_directory.read_dir()? {
let dir_entry = match dir_entry {
Ok(dir_entry) => dir_entry,
_ => {
// Ignore any errors
continue;
}
};
let entry_name = dir_entry.file_name();
let entry_name = entry_name.to_string_lossy();
if is_session_directory_lock_file(&entry_name) {
assert_no_characters_lost(&entry_name);
lock_files.insert(entry_name.into_owned());
} else if is_session_directory(&entry_name) {
assert_no_characters_lost(&entry_name);
session_directories.insert(entry_name.into_owned());
} else {
// This is something we don't know, leave it alone
}
}
// Now map from lock files to session directories
let lock_file_to_session_dir: FxHashMap<String, Option<String>> = lock_files
.into_iter()
.map(|lock_file_name| {
assert!(lock_file_name.ends_with(LOCK_FILE_EXT));
let dir_prefix_end = lock_file_name.len() - LOCK_FILE_EXT.len();
let session_dir = {
let dir_prefix = &lock_file_name[0..dir_prefix_end];
session_directories.iter().find(|dir_name| dir_name.starts_with(dir_prefix))
};
(lock_file_name, session_dir.map(String::clone))
})
.collect();
// Delete all lock files, that don't have an associated directory. They must
// be some kind of leftover
for (lock_file_name, directory_name) in &lock_file_to_session_dir {
if directory_name.is_none() {
let timestamp = match extract_timestamp_from_session_dir(lock_file_name) {
Ok(timestamp) => timestamp,
Err(()) => {
debug!(
"found lock-file with malformed timestamp: {}",
crate_directory.join(&lock_file_name).display()
);
// Ignore it
continue;
}
};
let lock_file_path = crate_directory.join(&**lock_file_name);
if is_old_enough_to_be_collected(timestamp) {
debug!(
"garbage_collect_session_directories() - deleting \
garbage lock file: {}",
lock_file_path.display()
);
delete_session_dir_lock_file(sess, &lock_file_path);
} else {
debug!(
"garbage_collect_session_directories() - lock file with \
no session dir not old enough to be collected: {}",
lock_file_path.display()
);
}
}
}
// Filter out `None` directories
let lock_file_to_session_dir: FxHashMap<String, String> = lock_file_to_session_dir
.into_iter()
.filter_map(|(lock_file_name, directory_name)| directory_name.map(|n| (lock_file_name, n)))
.collect();
// Delete all session directories that don't have a lock file.
for directory_name in session_directories {
if !lock_file_to_session_dir.values().any(|dir| *dir == directory_name) {
let path = crate_directory.join(directory_name);
if let Err(err) = safe_remove_dir_all(&path) {
sess.warn(&format!(
"Failed to garbage collect invalid incremental \
compilation session directory `{}`: {}",
path.display(),
err
));
}
}
}
// Now garbage collect the valid session directories.
let mut deletion_candidates = vec![];
let mut definitely_delete = vec![];
for (lock_file_name, directory_name) in &lock_file_to_session_dir {
debug!("garbage_collect_session_directories() - inspecting: {}", directory_name);
let timestamp = match extract_timestamp_from_session_dir(directory_name) {
Ok(timestamp) => timestamp,
Err(()) => {
debug!(
"found session-dir with malformed timestamp: {}",
crate_directory.join(directory_name).display()
);
// Ignore it
continue;
}
};
if is_finalized(directory_name) {
let lock_file_path = crate_directory.join(lock_file_name);
match flock::Lock::new(
&lock_file_path,
false, // don't wait
false, // don't create the lock-file
true,
) {
// get an exclusive lock
Ok(lock) => {
debug!(
"garbage_collect_session_directories() - \
successfully acquired lock"
);
debug!(
"garbage_collect_session_directories() - adding \
deletion candidate: {}",
directory_name
);
// Note that we are holding on to the lock
deletion_candidates.push((
timestamp,
crate_directory.join(directory_name),
Some(lock),
));
}
Err(_) => {
debug!(
"garbage_collect_session_directories() - \
not collecting, still in use"
);
}
}
} else if is_old_enough_to_be_collected(timestamp) {
// When cleaning out "-working" session directories, i.e.
// session directories that might still be in use by another
// compiler instance, we only look a directories that are
// at least ten seconds old. This is supposed to reduce the
// chance of deleting a directory in the time window where
// the process has allocated the directory but has not yet
// acquired the file-lock on it.
// Try to acquire the directory lock. If we can't, it
// means that the owning process is still alive and we
// leave this directory alone.
let lock_file_path = crate_directory.join(lock_file_name);
match flock::Lock::new(
&lock_file_path,
false, // don't wait
false, // don't create the lock-file
true,
) {
// get an exclusive lock
Ok(lock) => {
debug!(
"garbage_collect_session_directories() - \
successfully acquired lock"
);
// Note that we are holding on to the lock
definitely_delete.push((crate_directory.join(directory_name), Some(lock)));
}
Err(_) => {
debug!(
"garbage_collect_session_directories() - \
not collecting, still in use"
);
}
}
} else {
debug!(
"garbage_collect_session_directories() - not finalized, not \
old enough"
);
}
}
// Delete all but the most recent of the candidates
for (path, lock) in all_except_most_recent(deletion_candidates) {
debug!("garbage_collect_session_directories() - deleting `{}`", path.display());
if let Err(err) = safe_remove_dir_all(&path) {
sess.warn(&format!(
"Failed to garbage collect finalized incremental \
compilation session directory `{}`: {}",
path.display(),
err
));
} else {
delete_session_dir_lock_file(sess, &lock_file_path(&path));
}
// Let's make it explicit that the file lock is released at this point,
// or rather, that we held on to it until here
mem::drop(lock);
}
for (path, lock) in definitely_delete {
debug!("garbage_collect_session_directories() - deleting `{}`", path.display());
if let Err(err) = safe_remove_dir_all(&path) {
sess.warn(&format!(
"Failed to garbage collect incremental \
compilation session directory `{}`: {}",
path.display(),
err
));
} else {
delete_session_dir_lock_file(sess, &lock_file_path(&path));
}
// Let's make it explicit that the file lock is released at this point,
// or rather, that we held on to it until here
mem::drop(lock);
}
Ok(())
}
fn all_except_most_recent(
deletion_candidates: Vec<(SystemTime, PathBuf, Option<flock::Lock>)>,
) -> FxHashMap<PathBuf, Option<flock::Lock>> {
let most_recent = deletion_candidates.iter().map(|&(timestamp, ..)| timestamp).max();
if let Some(most_recent) = most_recent {
deletion_candidates
.into_iter()
.filter(|&(timestamp, ..)| timestamp != most_recent)
.map(|(_, path, lock)| (path, lock))
.collect()
} else {
FxHashMap::default()
}
}
/// Since paths of artifacts within session directories can get quite long, we
/// need to support deleting files with very long paths. The regular
/// WinApi functions only support paths up to 260 characters, however. In order
/// to circumvent this limitation, we canonicalize the path of the directory
/// before passing it to std::fs::remove_dir_all(). This will convert the path
/// into the '\\?\' format, which supports much longer paths.
fn safe_remove_dir_all(p: &Path) -> io::Result<()> {
if p.exists() {
let canonicalized = p.canonicalize()?;
std_fs::remove_dir_all(canonicalized)
} else {
Ok(())
}
}
fn safe_remove_file(p: &Path) -> io::Result<()> {
if p.exists() {
let canonicalized = p.canonicalize()?;
match std_fs::remove_file(canonicalized) {
Err(ref err) if err.kind() == io::ErrorKind::NotFound => Ok(()),
result => result,
}
} else {
Ok(())
}
}

View file

@ -0,0 +1,84 @@
use super::*;
#[test]
fn test_all_except_most_recent() {
assert_eq!(
all_except_most_recent(vec![
(UNIX_EPOCH + Duration::new(4, 0), PathBuf::from("4"), None),
(UNIX_EPOCH + Duration::new(1, 0), PathBuf::from("1"), None),
(UNIX_EPOCH + Duration::new(5, 0), PathBuf::from("5"), None),
(UNIX_EPOCH + Duration::new(3, 0), PathBuf::from("3"), None),
(UNIX_EPOCH + Duration::new(2, 0), PathBuf::from("2"), None),
])
.keys()
.cloned()
.collect::<FxHashSet<PathBuf>>(),
vec![PathBuf::from("1"), PathBuf::from("2"), PathBuf::from("3"), PathBuf::from("4"),]
.into_iter()
.collect::<FxHashSet<PathBuf>>()
);
assert_eq!(
all_except_most_recent(vec![]).keys().cloned().collect::<FxHashSet<PathBuf>>(),
FxHashSet::default()
);
}
#[test]
fn test_timestamp_serialization() {
for i in 0..1_000u64 {
let time = UNIX_EPOCH + Duration::new(i * 1_434_578, (i as u32) * 239_000);
let s = timestamp_to_string(time);
assert_eq!(Ok(time), string_to_timestamp(&s));
}
}
#[test]
fn test_find_source_directory_in_iter() {
let already_visited = FxHashSet::default();
// Find newest
assert_eq!(
find_source_directory_in_iter(
vec![
PathBuf::from("crate-dir/s-3234-0000-svh"),
PathBuf::from("crate-dir/s-2234-0000-svh"),
PathBuf::from("crate-dir/s-1234-0000-svh")
]
.into_iter(),
&already_visited
),
Some(PathBuf::from("crate-dir/s-3234-0000-svh"))
);
// Filter out "-working"
assert_eq!(
find_source_directory_in_iter(
vec![
PathBuf::from("crate-dir/s-3234-0000-working"),
PathBuf::from("crate-dir/s-2234-0000-svh"),
PathBuf::from("crate-dir/s-1234-0000-svh")
]
.into_iter(),
&already_visited
),
Some(PathBuf::from("crate-dir/s-2234-0000-svh"))
);
// Handle empty
assert_eq!(find_source_directory_in_iter(vec![].into_iter(), &already_visited), None);
// Handle only working
assert_eq!(
find_source_directory_in_iter(
vec![
PathBuf::from("crate-dir/s-3234-0000-working"),
PathBuf::from("crate-dir/s-2234-0000-working"),
PathBuf::from("crate-dir/s-1234-0000-working")
]
.into_iter(),
&already_visited
),
None
);
}

View file

@ -0,0 +1,208 @@
//! Code to save/load the dep-graph from files.
use rustc_data_structures::fx::FxHashMap;
use rustc_middle::dep_graph::{PreviousDepGraph, SerializedDepGraph, WorkProduct, WorkProductId};
use rustc_middle::ty::query::OnDiskCache;
use rustc_middle::ty::TyCtxt;
use rustc_serialize::opaque::Decoder;
use rustc_serialize::Decodable as RustcDecodable;
use rustc_session::Session;
use std::path::Path;
use super::data::*;
use super::file_format;
use super::fs::*;
use super::work_product;
pub fn dep_graph_tcx_init(tcx: TyCtxt<'_>) {
if !tcx.dep_graph.is_fully_enabled() {
return;
}
tcx.allocate_metadata_dep_nodes();
}
type WorkProductMap = FxHashMap<WorkProductId, WorkProduct>;
pub enum LoadResult<T> {
Ok { data: T },
DataOutOfDate,
Error { message: String },
}
impl LoadResult<(PreviousDepGraph, WorkProductMap)> {
pub fn open(self, sess: &Session) -> (PreviousDepGraph, WorkProductMap) {
match self {
LoadResult::Error { message } => {
sess.warn(&message);
Default::default()
}
LoadResult::DataOutOfDate => {
if let Err(err) = delete_all_session_dir_contents(sess) {
sess.err(&format!(
"Failed to delete invalidated or incompatible \
incremental compilation session directory contents `{}`: {}.",
dep_graph_path(sess).display(),
err
));
}
Default::default()
}
LoadResult::Ok { data } => data,
}
}
}
fn load_data(report_incremental_info: bool, path: &Path) -> LoadResult<(Vec<u8>, usize)> {
match file_format::read_file(report_incremental_info, path) {
Ok(Some(data_and_pos)) => LoadResult::Ok { data: data_and_pos },
Ok(None) => {
// The file either didn't exist or was produced by an incompatible
// compiler version. Neither is an error.
LoadResult::DataOutOfDate
}
Err(err) => LoadResult::Error {
message: format!("could not load dep-graph from `{}`: {}", path.display(), err),
},
}
}
fn delete_dirty_work_product(sess: &Session, swp: SerializedWorkProduct) {
debug!("delete_dirty_work_product({:?})", swp);
work_product::delete_workproduct_files(sess, &swp.work_product);
}
/// Either a result that has already be computed or a
/// handle that will let us wait until it is computed
/// by a background thread.
pub enum MaybeAsync<T> {
Sync(T),
Async(std::thread::JoinHandle<T>),
}
impl<T> MaybeAsync<T> {
pub fn open(self) -> std::thread::Result<T> {
match self {
MaybeAsync::Sync(result) => Ok(result),
MaybeAsync::Async(handle) => handle.join(),
}
}
}
pub type DepGraphFuture = MaybeAsync<LoadResult<(PreviousDepGraph, WorkProductMap)>>;
/// Launch a thread and load the dependency graph in the background.
pub fn load_dep_graph(sess: &Session) -> DepGraphFuture {
// Since `sess` isn't `Sync`, we perform all accesses to `sess`
// before we fire the background thread.
let prof = sess.prof.clone();
if sess.opts.incremental.is_none() {
// No incremental compilation.
return MaybeAsync::Sync(LoadResult::Ok { data: Default::default() });
}
let _timer = sess.prof.generic_activity("incr_comp_prepare_load_dep_graph");
// Calling `sess.incr_comp_session_dir()` will panic if `sess.opts.incremental.is_none()`.
// Fortunately, we just checked that this isn't the case.
let path = dep_graph_path_from(&sess.incr_comp_session_dir());
let report_incremental_info = sess.opts.debugging_opts.incremental_info;
let expected_hash = sess.opts.dep_tracking_hash();
let mut prev_work_products = FxHashMap::default();
// If we are only building with -Zquery-dep-graph but without an actual
// incr. comp. session directory, we skip this. Otherwise we'd fail
// when trying to load work products.
if sess.incr_comp_session_dir_opt().is_some() {
let work_products_path = work_products_path(sess);
let load_result = load_data(report_incremental_info, &work_products_path);
if let LoadResult::Ok { data: (work_products_data, start_pos) } = load_result {
// Decode the list of work_products
let mut work_product_decoder = Decoder::new(&work_products_data[..], start_pos);
let work_products: Vec<SerializedWorkProduct> =
RustcDecodable::decode(&mut work_product_decoder).unwrap_or_else(|e| {
let msg = format!(
"Error decoding `work-products` from incremental \
compilation session directory: {}",
e
);
sess.fatal(&msg[..])
});
for swp in work_products {
let mut all_files_exist = true;
if let Some(ref file_name) = swp.work_product.saved_file {
let path = in_incr_comp_dir_sess(sess, file_name);
if !path.exists() {
all_files_exist = false;
if sess.opts.debugging_opts.incremental_info {
eprintln!(
"incremental: could not find file for work \
product: {}",
path.display()
);
}
}
}
if all_files_exist {
debug!("reconcile_work_products: all files for {:?} exist", swp);
prev_work_products.insert(swp.id, swp.work_product);
} else {
debug!("reconcile_work_products: some file for {:?} does not exist", swp);
delete_dirty_work_product(sess, swp);
}
}
}
}
MaybeAsync::Async(std::thread::spawn(move || {
let _prof_timer = prof.generic_activity("incr_comp_load_dep_graph");
match load_data(report_incremental_info, &path) {
LoadResult::DataOutOfDate => LoadResult::DataOutOfDate,
LoadResult::Error { message } => LoadResult::Error { message },
LoadResult::Ok { data: (bytes, start_pos) } => {
let mut decoder = Decoder::new(&bytes, start_pos);
let prev_commandline_args_hash = u64::decode(&mut decoder)
.expect("Error reading commandline arg hash from cached dep-graph");
if prev_commandline_args_hash != expected_hash {
if report_incremental_info {
println!(
"[incremental] completely ignoring cache because of \
differing commandline arguments"
);
}
// We can't reuse the cache, purge it.
debug!("load_dep_graph_new: differing commandline arg hashes");
// No need to do any further work
return LoadResult::DataOutOfDate;
}
let dep_graph = SerializedDepGraph::decode(&mut decoder)
.expect("Error reading cached dep-graph");
LoadResult::Ok { data: (PreviousDepGraph::new(dep_graph), prev_work_products) }
}
}
}))
}
pub fn load_query_result_cache(sess: &Session) -> OnDiskCache<'_> {
if sess.opts.incremental.is_none() {
return OnDiskCache::new_empty(sess.source_map());
}
let _prof_timer = sess.prof.generic_activity("incr_comp_load_query_result_cache");
match load_data(sess.opts.debugging_opts.incremental_info, &query_cache_path(sess)) {
LoadResult::Ok { data: (bytes, start_pos) } => OnDiskCache::new(sess, bytes, start_pos),
_ => OnDiskCache::new_empty(sess.source_map()),
}
}

View file

@ -0,0 +1,25 @@
//! When in incremental mode, this pass dumps out the dependency graph
//! into the given directory. At the same time, it also hashes the
//! various HIR nodes.
mod data;
mod dirty_clean;
mod file_format;
mod fs;
mod load;
mod save;
mod work_product;
pub use fs::finalize_session_directory;
pub use fs::garbage_collect_session_directories;
pub use fs::in_incr_comp_dir;
pub use fs::in_incr_comp_dir_sess;
pub use fs::prepare_session_directory;
pub use load::dep_graph_tcx_init;
pub use load::load_query_result_cache;
pub use load::LoadResult;
pub use load::{load_dep_graph, DepGraphFuture};
pub use save::save_dep_graph;
pub use save::save_work_product_index;
pub use work_product::copy_cgu_workproduct_to_incr_comp_cache_dir;
pub use work_product::delete_workproduct_files;

View file

@ -0,0 +1,248 @@
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::sync::join;
use rustc_middle::dep_graph::{DepGraph, DepKind, WorkProduct, WorkProductId};
use rustc_middle::ty::TyCtxt;
use rustc_serialize::opaque::Encoder;
use rustc_serialize::Encodable as RustcEncodable;
use rustc_session::Session;
use std::fs;
use std::path::PathBuf;
use super::data::*;
use super::dirty_clean;
use super::file_format;
use super::fs::*;
use super::work_product;
pub fn save_dep_graph(tcx: TyCtxt<'_>) {
debug!("save_dep_graph()");
tcx.dep_graph.with_ignore(|| {
let sess = tcx.sess;
if sess.opts.incremental.is_none() {
return;
}
// This is going to be deleted in finalize_session_directory, so let's not create it
if sess.has_errors_or_delayed_span_bugs() {
return;
}
let query_cache_path = query_cache_path(sess);
let dep_graph_path = dep_graph_path(sess);
join(
move || {
sess.time("incr_comp_persist_result_cache", || {
save_in(sess, query_cache_path, |e| encode_query_cache(tcx, e));
});
},
|| {
sess.time("incr_comp_persist_dep_graph", || {
save_in(sess, dep_graph_path, |e| {
sess.time("incr_comp_encode_dep_graph", || encode_dep_graph(tcx, e))
});
});
},
);
dirty_clean::check_dirty_clean_annotations(tcx);
})
}
pub fn save_work_product_index(
sess: &Session,
dep_graph: &DepGraph,
new_work_products: FxHashMap<WorkProductId, WorkProduct>,
) {
if sess.opts.incremental.is_none() {
return;
}
// This is going to be deleted in finalize_session_directory, so let's not create it
if sess.has_errors_or_delayed_span_bugs() {
return;
}
debug!("save_work_product_index()");
dep_graph.assert_ignored();
let path = work_products_path(sess);
save_in(sess, path, |e| encode_work_product_index(&new_work_products, e));
// We also need to clean out old work-products, as not all of them are
// deleted during invalidation. Some object files don't change their
// content, they are just not needed anymore.
let previous_work_products = dep_graph.previous_work_products();
for (id, wp) in previous_work_products.iter() {
if !new_work_products.contains_key(id) {
work_product::delete_workproduct_files(sess, wp);
debug_assert!(
wp.saved_file.as_ref().map_or(true, |file_name| {
!in_incr_comp_dir_sess(sess, &file_name).exists()
})
);
}
}
// Check that we did not delete one of the current work-products:
debug_assert!({
new_work_products
.iter()
.flat_map(|(_, wp)| wp.saved_file.iter())
.map(|name| in_incr_comp_dir_sess(sess, name))
.all(|path| path.exists())
});
}
fn save_in<F>(sess: &Session, path_buf: PathBuf, encode: F)
where
F: FnOnce(&mut Encoder),
{
debug!("save: storing data in {}", path_buf.display());
// delete the old dep-graph, if any
// Note: It's important that we actually delete the old file and not just
// truncate and overwrite it, since it might be a shared hard-link, the
// underlying data of which we don't want to modify
if path_buf.exists() {
match fs::remove_file(&path_buf) {
Ok(()) => {
debug!("save: remove old file");
}
Err(err) => {
sess.err(&format!(
"unable to delete old dep-graph at `{}`: {}",
path_buf.display(),
err
));
return;
}
}
}
// generate the data in a memory buffer
let mut encoder = Encoder::new(Vec::new());
file_format::write_file_header(&mut encoder);
encode(&mut encoder);
// write the data out
let data = encoder.into_inner();
match fs::write(&path_buf, data) {
Ok(_) => {
debug!("save: data written to disk successfully");
}
Err(err) => {
sess.err(&format!("failed to write dep-graph to `{}`: {}", path_buf.display(), err));
}
}
}
fn encode_dep_graph(tcx: TyCtxt<'_>, encoder: &mut Encoder) {
// First encode the commandline arguments hash
tcx.sess.opts.dep_tracking_hash().encode(encoder).unwrap();
// Encode the graph data.
let serialized_graph =
tcx.sess.time("incr_comp_serialize_dep_graph", || tcx.dep_graph.serialize());
if tcx.sess.opts.debugging_opts.incremental_info {
#[derive(Clone)]
struct Stat {
kind: DepKind,
node_counter: u64,
edge_counter: u64,
}
let total_node_count = serialized_graph.nodes.len();
let total_edge_count = serialized_graph.edge_list_data.len();
let mut counts: FxHashMap<_, Stat> = FxHashMap::default();
for (i, &node) in serialized_graph.nodes.iter_enumerated() {
let stat = counts.entry(node.kind).or_insert(Stat {
kind: node.kind,
node_counter: 0,
edge_counter: 0,
});
stat.node_counter += 1;
let (edge_start, edge_end) = serialized_graph.edge_list_indices[i];
stat.edge_counter += (edge_end - edge_start) as u64;
}
let mut counts: Vec<_> = counts.values().cloned().collect();
counts.sort_by_key(|s| -(s.node_counter as i64));
let percentage_of_all_nodes: Vec<f64> = counts
.iter()
.map(|s| (100.0 * (s.node_counter as f64)) / (total_node_count as f64))
.collect();
let average_edges_per_kind: Vec<f64> =
counts.iter().map(|s| (s.edge_counter as f64) / (s.node_counter as f64)).collect();
println!("[incremental]");
println!("[incremental] DepGraph Statistics");
const SEPARATOR: &str = "[incremental] --------------------------------\
----------------------------------------------\
------------";
println!("{}", SEPARATOR);
println!("[incremental]");
println!("[incremental] Total Node Count: {}", total_node_count);
println!("[incremental] Total Edge Count: {}", total_edge_count);
if let Some((total_edge_reads, total_duplicate_edge_reads)) =
tcx.dep_graph.edge_deduplication_data()
{
println!("[incremental] Total Edge Reads: {}", total_edge_reads);
println!("[incremental] Total Duplicate Edge Reads: {}", total_duplicate_edge_reads);
}
println!("[incremental]");
println!(
"[incremental] {:<36}| {:<17}| {:<12}| {:<17}|",
"Node Kind", "Node Frequency", "Node Count", "Avg. Edge Count"
);
println!(
"[incremental] -------------------------------------\
|------------------\
|-------------\
|------------------|"
);
for (i, stat) in counts.iter().enumerate() {
println!(
"[incremental] {:<36}|{:>16.1}% |{:>12} |{:>17.1} |",
format!("{:?}", stat.kind),
percentage_of_all_nodes[i],
stat.node_counter,
average_edges_per_kind[i]
);
}
println!("{}", SEPARATOR);
println!("[incremental]");
}
tcx.sess.time("incr_comp_encode_serialized_dep_graph", || {
serialized_graph.encode(encoder).unwrap();
});
}
fn encode_work_product_index(
work_products: &FxHashMap<WorkProductId, WorkProduct>,
encoder: &mut Encoder,
) {
let serialized_products: Vec<_> = work_products
.iter()
.map(|(id, work_product)| SerializedWorkProduct {
id: *id,
work_product: work_product.clone(),
})
.collect();
serialized_products.encode(encoder).unwrap();
}
fn encode_query_cache(tcx: TyCtxt<'_>, encoder: &mut Encoder) {
tcx.sess.time("incr_comp_serialize_result_cache", || {
tcx.serialize_query_result_cache(encoder).unwrap();
})
}

View file

@ -0,0 +1,57 @@
//! This module contains files for saving intermediate work-products.
use crate::persist::fs::*;
use rustc_fs_util::link_or_copy;
use rustc_middle::dep_graph::{WorkProduct, WorkProductId};
use rustc_session::Session;
use std::fs as std_fs;
use std::path::PathBuf;
pub fn copy_cgu_workproduct_to_incr_comp_cache_dir(
sess: &Session,
cgu_name: &str,
path: &Option<PathBuf>,
) -> Option<(WorkProductId, WorkProduct)> {
debug!("copy_cgu_workproduct_to_incr_comp_cache_dir({:?},{:?})", cgu_name, path);
sess.opts.incremental.as_ref()?;
let saved_file = if let Some(path) = path {
let file_name = format!("{}.o", cgu_name);
let path_in_incr_dir = in_incr_comp_dir_sess(sess, &file_name);
match link_or_copy(path, &path_in_incr_dir) {
Ok(_) => Some(file_name),
Err(err) => {
sess.warn(&format!(
"error copying object file `{}` to incremental directory as `{}`: {}",
path.display(),
path_in_incr_dir.display(),
err
));
return None;
}
}
} else {
None
};
let work_product = WorkProduct { cgu_name: cgu_name.to_string(), saved_file };
let work_product_id = WorkProductId::from_cgu_name(cgu_name);
Some((work_product_id, work_product))
}
pub fn delete_workproduct_files(sess: &Session, work_product: &WorkProduct) {
if let Some(ref file_name) = work_product.saved_file {
let path = in_incr_comp_dir_sess(sess, file_name);
match std_fs::remove_file(&path) {
Ok(()) => {}
Err(err) => {
sess.warn(&format!(
"file-system error deleting outdated file `{}`: {}",
path.display(),
err
));
}
}
}
}