Remove InstanceKind::generates_cgu_internal_copy
This commit is contained in:
parent
8ad2c9724d
commit
817e2c598d
3 changed files with 68 additions and 64 deletions
|
@ -93,16 +93,11 @@ fn reachable_non_generics_provider(tcx: TyCtxt<'_>, _: LocalCrate) -> DefIdMap<S
|
|||
return None;
|
||||
}
|
||||
|
||||
// Functions marked with #[inline] are codegened with "internal"
|
||||
// linkage and are not exported unless marked with an extern
|
||||
// indicator
|
||||
if !Instance::mono(tcx, def_id.to_def_id()).def.generates_cgu_internal_copy(tcx)
|
||||
|| tcx.codegen_fn_attrs(def_id.to_def_id()).contains_extern_indicator()
|
||||
{
|
||||
Some(def_id)
|
||||
} else {
|
||||
None
|
||||
if Instance::mono(tcx, def_id.into()).def.requires_inline(tcx) {
|
||||
return None;
|
||||
}
|
||||
|
||||
if tcx.cross_crate_inlinable(def_id) { None } else { Some(def_id) }
|
||||
})
|
||||
.map(|def_id| {
|
||||
// We won't link right if this symbol is stripped during LTO.
|
||||
|
|
|
@ -20,7 +20,7 @@ use tracing::debug;
|
|||
|
||||
use crate::dep_graph::{DepNode, WorkProduct, WorkProductId};
|
||||
use crate::middle::codegen_fn_attrs::CodegenFnAttrFlags;
|
||||
use crate::ty::{GenericArgs, Instance, InstanceKind, SymbolName, TyCtxt};
|
||||
use crate::ty::{self, GenericArgs, Instance, InstanceKind, SymbolName, Ty, TyCtxt};
|
||||
|
||||
/// Describes how a monomorphization will be instantiated in object files.
|
||||
#[derive(PartialEq)]
|
||||
|
@ -54,6 +54,39 @@ pub enum MonoItem<'tcx> {
|
|||
GlobalAsm(ItemId),
|
||||
}
|
||||
|
||||
fn opt_incr_drop_glue_mode<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> InstantiationMode {
|
||||
// Non-ADTs can't have a Drop impl. This case is mostly hit by closures whose captures require
|
||||
// dropping.
|
||||
let ty::Adt(adt_def, _) = ty.kind() else {
|
||||
return InstantiationMode::LocalCopy;
|
||||
};
|
||||
|
||||
// Types that don't have a direct Drop impl, but have fields that require dropping.
|
||||
let Some(dtor) = adt_def.destructor(tcx) else {
|
||||
// We use LocalCopy for drops of enums only; this code is inherited from
|
||||
// https://github.com/rust-lang/rust/pull/67332 and the theory is that we get to optimize
|
||||
// out code like drop_in_place(Option::None) before crate-local ThinLTO, which improves
|
||||
// compile time. At the time of writing, simply removing this entire check does seem to
|
||||
// regress incr-opt compile times. But it sure seems like a more sophisticated check could
|
||||
// do better here.
|
||||
if adt_def.is_enum() {
|
||||
return InstantiationMode::LocalCopy;
|
||||
} else {
|
||||
return InstantiationMode::GloballyShared { may_conflict: true };
|
||||
}
|
||||
};
|
||||
|
||||
// We've gotten to a drop_in_place for a type that directly implements Drop.
|
||||
// The drop glue is a wrapper for the Drop::drop impl, and we are an optimized build, so in an
|
||||
// effort to coordinate with the mode that the actual impl will get, we make the glue also
|
||||
// LocalCopy.
|
||||
if tcx.cross_crate_inlinable(dtor.did) {
|
||||
InstantiationMode::LocalCopy
|
||||
} else {
|
||||
InstantiationMode::GloballyShared { may_conflict: true }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'tcx> MonoItem<'tcx> {
|
||||
/// Returns `true` if the mono item is user-defined (i.e. not compiler-generated, like shims).
|
||||
pub fn is_user_defined(&self) -> bool {
|
||||
|
@ -123,16 +156,36 @@ impl<'tcx> MonoItem<'tcx> {
|
|||
return InstantiationMode::GloballyShared { may_conflict: false };
|
||||
}
|
||||
|
||||
// FIXME: The logic for which functions are permitted to get LocalCopy is actually spread
|
||||
// across 4 functions:
|
||||
// * cross_crate_inlinable(def_id)
|
||||
// * InstanceKind::requires_inline
|
||||
// * InstanceKind::generate_cgu_internal_copy
|
||||
// * MonoItem::instantiation_mode
|
||||
// Since reachable_non_generics calls InstanceKind::generates_cgu_internal_copy to decide
|
||||
// which symbols this crate exports, we are obligated to only generate LocalCopy when
|
||||
// generates_cgu_internal_copy returns true.
|
||||
if !instance.def.generates_cgu_internal_copy(tcx) {
|
||||
// This is technically a heuristic even though it's in the "not a heuristic" part of
|
||||
// instantiation mode selection.
|
||||
// It is surely possible to untangle this; the root problem is that the way we instantiate
|
||||
// InstanceKind other than Item is very complicated.
|
||||
//
|
||||
// The fallback case is to give everything else GloballyShared at OptLevel::No and
|
||||
// LocalCopy at all other opt levels. This is a good default, except for one specific build
|
||||
// configuration: Optimized incremental builds.
|
||||
// In the current compiler architecture there is a fundamental tension between
|
||||
// optimizations (which want big CGUs with as many things LocalCopy as possible) and
|
||||
// incrementality (which wants small CGUs with as many things GloballyShared as possible).
|
||||
// The heuristics implemented here do better than a completely naive approach in the
|
||||
// compiler benchmark suite, but there is no reason to believe they are optimal.
|
||||
if let InstanceKind::DropGlue(_, Some(ty)) = instance.def {
|
||||
if tcx.sess.opts.optimize == OptLevel::No {
|
||||
return InstantiationMode::GloballyShared { may_conflict: false };
|
||||
}
|
||||
if tcx.sess.opts.incremental.is_none() {
|
||||
return InstantiationMode::LocalCopy;
|
||||
}
|
||||
return opt_incr_drop_glue_mode(tcx, ty);
|
||||
}
|
||||
|
||||
// We need to ensure that we do not decide the InstantiationMode of an exported symbol is
|
||||
// LocalCopy. Since exported symbols are computed based on the output of
|
||||
// cross_crate_inlinable, we are beholden to our previous decisions.
|
||||
//
|
||||
// Note that just like above, this check for requires_inline is technically a heuristic
|
||||
// even though it's in the "not a heuristic" part of instantiation mode selection.
|
||||
if !tcx.cross_crate_inlinable(instance.def_id()) && !instance.def.requires_inline(tcx) {
|
||||
return InstantiationMode::GloballyShared { may_conflict: false };
|
||||
}
|
||||
|
||||
|
|
|
@ -301,50 +301,6 @@ impl<'tcx> InstanceKind<'tcx> {
|
|||
)
|
||||
}
|
||||
|
||||
/// Returns `true` if the machine code for this instance is instantiated in
|
||||
/// each codegen unit that references it.
|
||||
/// Note that this is only a hint! The compiler can globally decide to *not*
|
||||
/// do this in order to speed up compilation. CGU-internal copies are
|
||||
/// only exist to enable inlining. If inlining is not performed (e.g. at
|
||||
/// `-Copt-level=0`) then the time for generating them is wasted and it's
|
||||
/// better to create a single copy with external linkage.
|
||||
pub fn generates_cgu_internal_copy(&self, tcx: TyCtxt<'tcx>) -> bool {
|
||||
if self.requires_inline(tcx) {
|
||||
return true;
|
||||
}
|
||||
if let ty::InstanceKind::DropGlue(.., Some(ty))
|
||||
| ty::InstanceKind::AsyncDropGlueCtorShim(.., Some(ty)) = *self
|
||||
{
|
||||
// Drop glue generally wants to be instantiated at every codegen
|
||||
// unit, but without an #[inline] hint. We should make this
|
||||
// available to normal end-users.
|
||||
if tcx.sess.opts.incremental.is_none() {
|
||||
return true;
|
||||
}
|
||||
// When compiling with incremental, we can generate a *lot* of
|
||||
// codegen units. Including drop glue into all of them has a
|
||||
// considerable compile time cost.
|
||||
//
|
||||
// We include enums without destructors to allow, say, optimizing
|
||||
// drops of `Option::None` before LTO. We also respect the intent of
|
||||
// `#[inline]` on `Drop::drop` implementations.
|
||||
return ty.ty_adt_def().is_none_or(|adt_def| {
|
||||
match *self {
|
||||
ty::InstanceKind::DropGlue(..) => adt_def.destructor(tcx).map(|dtor| dtor.did),
|
||||
ty::InstanceKind::AsyncDropGlueCtorShim(..) => {
|
||||
adt_def.async_destructor(tcx).map(|dtor| dtor.ctor)
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
.map_or_else(|| adt_def.is_enum(), |did| tcx.cross_crate_inlinable(did))
|
||||
});
|
||||
}
|
||||
if let ty::InstanceKind::ThreadLocalShim(..) = *self {
|
||||
return false;
|
||||
}
|
||||
tcx.cross_crate_inlinable(self.def_id())
|
||||
}
|
||||
|
||||
pub fn requires_caller_location(&self, tcx: TyCtxt<'_>) -> bool {
|
||||
match *self {
|
||||
InstanceKind::Item(def_id) | InstanceKind::Virtual(def_id, _) => {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue