diff --git a/compiler/rustc_codegen_ssa/src/back/symbol_export.rs b/compiler/rustc_codegen_ssa/src/back/symbol_export.rs index 5c9e72ae0f1..fd06c50eb81 100644 --- a/compiler/rustc_codegen_ssa/src/back/symbol_export.rs +++ b/compiler/rustc_codegen_ssa/src/back/symbol_export.rs @@ -95,16 +95,11 @@ fn reachable_non_generics_provider(tcx: TyCtxt<'_>, _: LocalCrate) -> DefIdMap { GlobalAsm(ItemId), } +fn opt_incr_drop_glue_mode<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> InstantiationMode { + // Non-ADTs can't have a Drop impl. This case is mostly hit by closures whose captures require + // dropping. + let ty::Adt(adt_def, _) = ty.kind() else { + return InstantiationMode::LocalCopy; + }; + + // Types that don't have a direct Drop impl, but have fields that require dropping. + let Some(dtor) = adt_def.destructor(tcx) else { + // We use LocalCopy for drops of enums only; this code is inherited from + // https://github.com/rust-lang/rust/pull/67332 and the theory is that we get to optimize + // out code like drop_in_place(Option::None) before crate-local ThinLTO, which improves + // compile time. At the time of writing, simply removing this entire check does seem to + // regress incr-opt compile times. But it sure seems like a more sophisticated check could + // do better here. + if adt_def.is_enum() { + return InstantiationMode::LocalCopy; + } else { + return InstantiationMode::GloballyShared { may_conflict: true }; + } + }; + + // We've gotten to a drop_in_place for a type that directly implements Drop. + // The drop glue is a wrapper for the Drop::drop impl, and we are an optimized build, so in an + // effort to coordinate with the mode that the actual impl will get, we make the glue also + // LocalCopy. + if tcx.cross_crate_inlinable(dtor.did) { + InstantiationMode::LocalCopy + } else { + InstantiationMode::GloballyShared { may_conflict: true } + } +} + impl<'tcx> MonoItem<'tcx> { /// Returns `true` if the mono item is user-defined (i.e. not compiler-generated, like shims). pub fn is_user_defined(&self) -> bool { @@ -124,16 +157,36 @@ impl<'tcx> MonoItem<'tcx> { return InstantiationMode::GloballyShared { may_conflict: false }; } - // FIXME: The logic for which functions are permitted to get LocalCopy is actually spread - // across 4 functions: - // * cross_crate_inlinable(def_id) - // * InstanceKind::requires_inline - // * InstanceKind::generate_cgu_internal_copy - // * MonoItem::instantiation_mode - // Since reachable_non_generics calls InstanceKind::generates_cgu_internal_copy to decide - // which symbols this crate exports, we are obligated to only generate LocalCopy when - // generates_cgu_internal_copy returns true. - if !instance.def.generates_cgu_internal_copy(tcx) { + // This is technically a heuristic even though it's in the "not a heuristic" part of + // instantiation mode selection. + // It is surely possible to untangle this; the root problem is that the way we instantiate + // InstanceKind other than Item is very complicated. + // + // The fallback case is to give everything else GloballyShared at OptLevel::No and + // LocalCopy at all other opt levels. This is a good default, except for one specific build + // configuration: Optimized incremental builds. + // In the current compiler architecture there is a fundamental tension between + // optimizations (which want big CGUs with as many things LocalCopy as possible) and + // incrementality (which wants small CGUs with as many things GloballyShared as possible). + // The heuristics implemented here do better than a completely naive approach in the + // compiler benchmark suite, but there is no reason to believe they are optimal. + if let InstanceKind::DropGlue(_, Some(ty)) = instance.def { + if tcx.sess.opts.optimize == OptLevel::No { + return InstantiationMode::GloballyShared { may_conflict: false }; + } + if tcx.sess.opts.incremental.is_none() { + return InstantiationMode::LocalCopy; + } + return opt_incr_drop_glue_mode(tcx, ty); + } + + // We need to ensure that we do not decide the InstantiationMode of an exported symbol is + // LocalCopy. Since exported symbols are computed based on the output of + // cross_crate_inlinable, we are beholden to our previous decisions. + // + // Note that just like above, this check for requires_inline is technically a heuristic + // even though it's in the "not a heuristic" part of instantiation mode selection. + if !tcx.cross_crate_inlinable(instance.def_id()) && !instance.def.requires_inline(tcx) { return InstantiationMode::GloballyShared { may_conflict: false }; } diff --git a/compiler/rustc_middle/src/ty/instance.rs b/compiler/rustc_middle/src/ty/instance.rs index 980d20f9d3b..e3b3eccffb5 100644 --- a/compiler/rustc_middle/src/ty/instance.rs +++ b/compiler/rustc_middle/src/ty/instance.rs @@ -302,50 +302,6 @@ impl<'tcx> InstanceKind<'tcx> { ) } - /// Returns `true` if the machine code for this instance is instantiated in - /// each codegen unit that references it. - /// Note that this is only a hint! The compiler can globally decide to *not* - /// do this in order to speed up compilation. CGU-internal copies are - /// only exist to enable inlining. If inlining is not performed (e.g. at - /// `-Copt-level=0`) then the time for generating them is wasted and it's - /// better to create a single copy with external linkage. - pub fn generates_cgu_internal_copy(&self, tcx: TyCtxt<'tcx>) -> bool { - if self.requires_inline(tcx) { - return true; - } - if let ty::InstanceKind::DropGlue(.., Some(ty)) - | ty::InstanceKind::AsyncDropGlueCtorShim(.., Some(ty)) = *self - { - // Drop glue generally wants to be instantiated at every codegen - // unit, but without an #[inline] hint. We should make this - // available to normal end-users. - if tcx.sess.opts.incremental.is_none() { - return true; - } - // When compiling with incremental, we can generate a *lot* of - // codegen units. Including drop glue into all of them has a - // considerable compile time cost. - // - // We include enums without destructors to allow, say, optimizing - // drops of `Option::None` before LTO. We also respect the intent of - // `#[inline]` on `Drop::drop` implementations. - return ty.ty_adt_def().is_none_or(|adt_def| { - match *self { - ty::InstanceKind::DropGlue(..) => adt_def.destructor(tcx).map(|dtor| dtor.did), - ty::InstanceKind::AsyncDropGlueCtorShim(..) => { - adt_def.async_destructor(tcx).map(|dtor| dtor.ctor) - } - _ => unreachable!(), - } - .map_or_else(|| adt_def.is_enum(), |did| tcx.cross_crate_inlinable(did)) - }); - } - if let ty::InstanceKind::ThreadLocalShim(..) = *self { - return false; - } - tcx.cross_crate_inlinable(self.def_id()) - } - pub fn requires_caller_location(&self, tcx: TyCtxt<'_>) -> bool { match *self { InstanceKind::Item(def_id) | InstanceKind::Virtual(def_id, _) => {