Auto merge of #132566 - saethlin:querify-mir-collection, r=cjgillot
Querify MonoItem collection Factored out of https://github.com/rust-lang/rust/pull/131650. These changes are required for post-mono MIR opts, because the previous implementation would load the MIR for every Instance that we traverse (as well as invoke queries on it). The cost of that would grow massively with post-mono MIR opts because we'll need to load new MIR for every Instance, instead of re-using the `optimized_mir` for every Instance with the same DefId. So the approach here is to add two new queries, `items_of_instance` and `size_estimate`, which contain the specific information about an Instance's MIR that MirUsedCollector and CGU partitioning need, respectively. Caching these significantly increases the size of the query cache, but that's justified by our improved incrementality (I'm sure walking all the MIR for a huge crate scales quite poorly). This also changes `MonoItems` into a type that will retain the traversal order (otherwise we perturb a bunch of diagnostics), and will also eliminate duplicate findings. Eliminating duplicates removes about a quarter of the query cache size growth. The perf improvements in this PR are inflated because rustc-perf uses `-Zincremental-verify-ich`, which makes loading MIR a lot slower because MIR contains a lot of Spans and computing the stable hash of a Span is slow. And the primary goal of this PR is to load less MIR. Some squinting at `collector profile_local perf-record +stage1` runs suggests the magnitude of the improvements in this PR would be decreased by between a third and a half if that flag weren't being used. Though this effect may apply to the regressions too since most are incr-full and this change also causes such builds to encode more Spans.
This commit is contained in:
commit
ee4a56e353
9 changed files with 165 additions and 67 deletions
|
@ -46,7 +46,7 @@ pub enum InstantiationMode {
|
|||
LocalCopy,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq, Clone, Copy, Debug, Hash, HashStable)]
|
||||
#[derive(PartialEq, Eq, Clone, Copy, Debug, Hash, HashStable, TyEncodable, TyDecodable)]
|
||||
pub enum MonoItem<'tcx> {
|
||||
Fn(Instance<'tcx>),
|
||||
Static(DefId),
|
||||
|
@ -66,20 +66,7 @@ impl<'tcx> MonoItem<'tcx> {
|
|||
// change NON_INCR_MIN_CGU_SIZE as well.
|
||||
pub fn size_estimate(&self, tcx: TyCtxt<'tcx>) -> usize {
|
||||
match *self {
|
||||
MonoItem::Fn(instance) => {
|
||||
match instance.def {
|
||||
// "Normal" functions size estimate: the number of
|
||||
// statements, plus one for the terminator.
|
||||
InstanceKind::Item(..)
|
||||
| InstanceKind::DropGlue(..)
|
||||
| InstanceKind::AsyncDropGlueCtorShim(..) => {
|
||||
let mir = tcx.instance_mir(instance.def);
|
||||
mir.basic_blocks.iter().map(|bb| bb.statements.len() + 1).sum()
|
||||
}
|
||||
// Other compiler-generated shims size estimate: 1
|
||||
_ => 1,
|
||||
}
|
||||
}
|
||||
MonoItem::Fn(instance) => tcx.size_estimate(instance),
|
||||
// Conservatively estimate the size of a static declaration or
|
||||
// assembly item to be 1.
|
||||
MonoItem::Static(_) | MonoItem::GlobalAsm(_) => 1,
|
||||
|
@ -556,3 +543,21 @@ impl<'tcx> CodegenUnitNameBuilder<'tcx> {
|
|||
Symbol::intern(&cgu_name)
|
||||
}
|
||||
}
|
||||
|
||||
/// See module-level docs of `rustc_monomorphize::collector` on some context for "mentioned" items.
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, HashStable)]
|
||||
pub enum CollectionMode {
|
||||
/// Collect items that are used, i.e., actually needed for codegen.
|
||||
///
|
||||
/// Which items are used can depend on optimization levels, as MIR optimizations can remove
|
||||
/// uses.
|
||||
UsedItems,
|
||||
/// Collect items that are mentioned. The goal of this mode is that it is independent of
|
||||
/// optimizations: the set of "mentioned" items is computed before optimizations are run.
|
||||
///
|
||||
/// The exact contents of this set are *not* a stable guarantee. (For instance, it is currently
|
||||
/// computed after drop-elaboration. If we ever do some optimizations even in debug builds, we
|
||||
/// might decide to run them before computing mentioned items.) The key property of this set is
|
||||
/// that it is optimization-independent.
|
||||
MentionedItems,
|
||||
}
|
||||
|
|
|
@ -216,6 +216,10 @@ impl<T0, T1> EraseType for (&'_ T0, &'_ [T1]) {
|
|||
type Result = [u8; size_of::<(&'static (), &'static [()])>()];
|
||||
}
|
||||
|
||||
impl<T0, T1> EraseType for (&'_ [T0], &'_ [T1]) {
|
||||
type Result = [u8; size_of::<(&'static [()], &'static [()])>()];
|
||||
}
|
||||
|
||||
impl<T0> EraseType for (&'_ T0, Result<(), ErrorGuaranteed>) {
|
||||
type Result = [u8; size_of::<(&'static (), Result<(), ErrorGuaranteed>)>()];
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@ use rustc_span::symbol::{Ident, Symbol};
|
|||
use rustc_span::{DUMMY_SP, Span};
|
||||
|
||||
use crate::infer::canonical::CanonicalQueryInput;
|
||||
use crate::mir::mono::CollectionMode;
|
||||
use crate::ty::fast_reject::SimplifiedType;
|
||||
use crate::ty::layout::{TyAndLayout, ValidityRequirement};
|
||||
use crate::ty::{self, GenericArg, GenericArgsRef, Ty, TyCtxt};
|
||||
|
@ -590,3 +591,11 @@ impl<'tcx> Key for (ValidityRequirement, ty::ParamEnvAnd<'tcx, Ty<'tcx>>) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'tcx> Key for (ty::Instance<'tcx>, CollectionMode) {
|
||||
type Cache<V> = DefaultCache<Self, V>;
|
||||
|
||||
fn default_span(&self, tcx: TyCtxt<'_>) -> Span {
|
||||
self.0.default_span(tcx)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -40,6 +40,7 @@ use rustc_session::cstore::{
|
|||
};
|
||||
use rustc_session::lint::LintExpectationId;
|
||||
use rustc_span::def_id::LOCAL_CRATE;
|
||||
use rustc_span::source_map::Spanned;
|
||||
use rustc_span::symbol::Symbol;
|
||||
use rustc_span::{DUMMY_SP, Span};
|
||||
use rustc_target::spec::PanicStrategy;
|
||||
|
@ -59,7 +60,7 @@ use crate::mir::interpret::{
|
|||
EvalStaticInitializerRawResult, EvalToAllocationRawResult, EvalToConstValueResult,
|
||||
EvalToValTreeResult, GlobalId, LitToConstError, LitToConstInput,
|
||||
};
|
||||
use crate::mir::mono::CodegenUnit;
|
||||
use crate::mir::mono::{CodegenUnit, CollectionMode, MonoItem};
|
||||
use crate::query::erase::{Erase, erase, restore};
|
||||
use crate::query::plumbing::{
|
||||
CyclePlaceholder, DynamicQuery, query_ensure, query_ensure_error_guaranteed, query_get_at,
|
||||
|
@ -2339,6 +2340,16 @@ rustc_queries! {
|
|||
arena_cache
|
||||
desc { "functions to skip for move-size check" }
|
||||
}
|
||||
|
||||
query items_of_instance(key: (ty::Instance<'tcx>, CollectionMode)) -> (&'tcx [Spanned<MonoItem<'tcx>>], &'tcx [Spanned<MonoItem<'tcx>>]) {
|
||||
desc { "collecting items used by `{}`", key.0 }
|
||||
cache_on_disk_if { true }
|
||||
}
|
||||
|
||||
query size_estimate(key: ty::Instance<'tcx>) -> usize {
|
||||
desc { "estimating codegen size of `{}`", key }
|
||||
cache_on_disk_if { true }
|
||||
}
|
||||
}
|
||||
|
||||
rustc_query_append! { define_callbacks! }
|
||||
|
|
|
@ -12,6 +12,7 @@ use rustc_index::{Idx, IndexVec};
|
|||
use rustc_macros::{Decodable, Encodable};
|
||||
use rustc_middle::dep_graph::{DepNodeIndex, SerializedDepNodeIndex};
|
||||
use rustc_middle::mir::interpret::{AllocDecodingSession, AllocDecodingState};
|
||||
use rustc_middle::mir::mono::MonoItem;
|
||||
use rustc_middle::mir::{self, interpret};
|
||||
use rustc_middle::ty::codec::{RefDecodable, TyDecoder, TyEncoder};
|
||||
use rustc_middle::ty::{self, Ty, TyCtxt};
|
||||
|
@ -22,7 +23,7 @@ use rustc_session::Session;
|
|||
use rustc_span::hygiene::{
|
||||
ExpnId, HygieneDecodeContext, HygieneEncodeContext, SyntaxContext, SyntaxContextData,
|
||||
};
|
||||
use rustc_span::source_map::SourceMap;
|
||||
use rustc_span::source_map::{SourceMap, Spanned};
|
||||
use rustc_span::{
|
||||
BytePos, CachingSourceMapView, ExpnData, ExpnHash, Pos, RelativeBytePos, SourceFile, Span,
|
||||
SpanDecoder, SpanEncoder, StableSourceFileId, Symbol,
|
||||
|
@ -773,6 +774,13 @@ impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>> for &'tcx [rustc_ast::InlineAsm
|
|||
}
|
||||
}
|
||||
|
||||
impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>> for &'tcx [Spanned<MonoItem<'tcx>>] {
|
||||
#[inline]
|
||||
fn decode(d: &mut CacheDecoder<'a, 'tcx>) -> Self {
|
||||
RefDecodable::decode(d)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>>
|
||||
for &'tcx crate::traits::specialization_graph::Graph
|
||||
{
|
||||
|
|
|
@ -13,9 +13,11 @@ use std::marker::DiscriminantKind;
|
|||
use rustc_abi::{FieldIdx, VariantIdx};
|
||||
use rustc_data_structures::fx::FxHashMap;
|
||||
use rustc_hir::def_id::LocalDefId;
|
||||
use rustc_middle::mir::mono::MonoItem;
|
||||
use rustc_middle::ty::TyCtxt;
|
||||
use rustc_serialize::{Decodable, Encodable};
|
||||
use rustc_span::Span;
|
||||
use rustc_span::source_map::Spanned;
|
||||
pub use rustc_type_ir::{TyDecoder, TyEncoder};
|
||||
|
||||
use crate::arena::ArenaAllocatable;
|
||||
|
@ -397,6 +399,15 @@ impl<'tcx, D: TyDecoder<I = TyCtxt<'tcx>>> RefDecodable<'tcx, D>
|
|||
}
|
||||
}
|
||||
|
||||
impl<'tcx, D: TyDecoder<I = TyCtxt<'tcx>>> RefDecodable<'tcx, D> for [Spanned<MonoItem<'tcx>>] {
|
||||
fn decode(decoder: &mut D) -> &'tcx Self {
|
||||
decoder
|
||||
.interner()
|
||||
.arena
|
||||
.alloc_from_iter((0..decoder.read_usize()).map(|_| Decodable::decode(decoder)))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'tcx, D: TyDecoder<I = TyCtxt<'tcx>>> RefDecodable<'tcx, D>
|
||||
for ty::List<ty::BoundVariableKind>
|
||||
{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue