Auto merge of #123244 - Mark-Simulacrum:share-inline-never-generics, r=saethlin
Enable -Zshare-generics for inline(never) functions This avoids inlining cross-crate generic items when possible that are already marked inline(never), implying that the author is not intending for the function to be inlined by callers. As such, having a local copy may make it easier for LLVM to optimize but mostly just adds to binary bloat and codegen time. In practice our benchmarks indicate this is indeed a win for larger compilations, where the extra cost in dynamic linking to these symbols is diminished compared to the advantages in fewer copies that need optimizing in each binary. It might also make sense it expand this with other heuristics (e.g., `#[cold]`) in the future, but this seems like a good starting point. FWIW, I expect that doing cleanup in where we make the decision what should/shouldn't be shared is also a good idea. Way too much code needed to be tweaked to check this. But I'm hoping to leave that for a follow-up PR rather than blocking this on it.
This commit is contained in:
commit
d53f0b1d8e
26 changed files with 127 additions and 52 deletions
|
@ -4137,6 +4137,7 @@ name = "rustc_monomorphize"
|
|||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"rustc_abi",
|
||||
"rustc_attr",
|
||||
"rustc_data_structures",
|
||||
"rustc_errors",
|
||||
"rustc_fluent_macro",
|
||||
|
|
|
@ -104,7 +104,10 @@ pub(crate) fn get_fn<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>, instance: Instance<'t
|
|||
|
||||
let is_hidden = if is_generic {
|
||||
// This is a monomorphization of a generic function.
|
||||
if !cx.tcx.sess.opts.share_generics() {
|
||||
if !(cx.tcx.sess.opts.share_generics()
|
||||
|| tcx.codegen_fn_attrs(instance_def_id).inline
|
||||
== rustc_attr::InlineAttr::Never)
|
||||
{
|
||||
// When not sharing generics, all instances are in the same
|
||||
// crate and have hidden visibility.
|
||||
true
|
||||
|
|
|
@ -282,7 +282,7 @@ fn exported_symbols_provider_local(
|
|||
}));
|
||||
}
|
||||
|
||||
if tcx.sess.opts.share_generics() && tcx.local_crate_exports_generics() {
|
||||
if tcx.local_crate_exports_generics() {
|
||||
use rustc_middle::mir::mono::{Linkage, MonoItem, Visibility};
|
||||
use rustc_middle::ty::InstanceKind;
|
||||
|
||||
|
@ -310,6 +310,16 @@ fn exported_symbols_provider_local(
|
|||
continue;
|
||||
}
|
||||
|
||||
if !tcx.sess.opts.share_generics() {
|
||||
if tcx.codegen_fn_attrs(mono_item.def_id()).inline == rustc_attr::InlineAttr::Never
|
||||
{
|
||||
// this is OK, we explicitly allow sharing inline(never) across crates even
|
||||
// without share-generics.
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
match *mono_item {
|
||||
MonoItem::Fn(Instance { def: InstanceKind::Item(def), args }) => {
|
||||
if args.non_erasable_generics().next().is_some() {
|
||||
|
|
|
@ -111,6 +111,13 @@ impl<'tcx> MonoItem<'tcx> {
|
|||
return InstantiationMode::GloballyShared { may_conflict: false };
|
||||
}
|
||||
|
||||
if let InlineAttr::Never = tcx.codegen_fn_attrs(instance.def_id()).inline
|
||||
&& self.is_generic_fn()
|
||||
{
|
||||
// Upgrade inline(never) to a globally shared instance.
|
||||
return InstantiationMode::GloballyShared { may_conflict: true };
|
||||
}
|
||||
|
||||
// At this point we don't have explicit linkage and we're an
|
||||
// inlined function. If we're inlining into all CGUs then we'll
|
||||
// be creating a local copy per CGU.
|
||||
|
|
|
@ -1955,8 +1955,6 @@ impl<'tcx> TyCtxt<'tcx> {
|
|||
|
||||
#[inline]
|
||||
pub fn local_crate_exports_generics(self) -> bool {
|
||||
debug_assert!(self.sess.opts.share_generics());
|
||||
|
||||
self.crate_types().iter().any(|crate_type| {
|
||||
match crate_type {
|
||||
CrateType::Executable
|
||||
|
|
|
@ -190,19 +190,23 @@ impl<'tcx> Instance<'tcx> {
|
|||
/// This method already takes into account the global `-Zshare-generics`
|
||||
/// setting, always returning `None` if `share-generics` is off.
|
||||
pub fn upstream_monomorphization(&self, tcx: TyCtxt<'tcx>) -> Option<CrateNum> {
|
||||
// If we are not in share generics mode, we don't link to upstream
|
||||
// monomorphizations but always instantiate our own internal versions
|
||||
// instead.
|
||||
if !tcx.sess.opts.share_generics() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// If this is an item that is defined in the local crate, no upstream
|
||||
// crate can know about it/provide a monomorphization.
|
||||
if self.def_id().is_local() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// If we are not in share generics mode, we don't link to upstream
|
||||
// monomorphizations but always instantiate our own internal versions
|
||||
// instead.
|
||||
if !tcx.sess.opts.share_generics()
|
||||
// However, if the def_id is marked inline(never), then it's fine to just reuse the
|
||||
// upstream monomorphization.
|
||||
&& tcx.codegen_fn_attrs(self.def_id()).inline != rustc_attr::InlineAttr::Never
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
// If this a non-generic instance, it cannot be a shared monomorphization.
|
||||
self.args.non_erasable_generics().next()?;
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@ edition = "2021"
|
|||
[dependencies]
|
||||
# tidy-alphabetical-start
|
||||
rustc_abi = { path = "../rustc_abi" }
|
||||
rustc_attr = { path = "../rustc_attr" }
|
||||
rustc_data_structures = { path = "../rustc_data_structures" }
|
||||
rustc_errors = { path = "../rustc_errors" }
|
||||
rustc_fluent_macro = { path = "../rustc_fluent_macro" }
|
||||
|
|
|
@ -208,8 +208,8 @@ where
|
|||
// available to downstream crates. This depends on whether we are in
|
||||
// share-generics mode and whether the current crate can even have
|
||||
// downstream crates.
|
||||
let export_generics =
|
||||
cx.tcx.sess.opts.share_generics() && cx.tcx.local_crate_exports_generics();
|
||||
let can_export_generics = cx.tcx.local_crate_exports_generics();
|
||||
let always_export_generics = can_export_generics && cx.tcx.sess.opts.share_generics();
|
||||
|
||||
let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx);
|
||||
let cgu_name_cache = &mut UnordMap::default();
|
||||
|
@ -249,7 +249,8 @@ where
|
|||
cx.tcx,
|
||||
&mono_item,
|
||||
&mut can_be_internalized,
|
||||
export_generics,
|
||||
can_export_generics,
|
||||
always_export_generics,
|
||||
);
|
||||
if visibility == Visibility::Hidden && can_be_internalized {
|
||||
internalization_candidates.insert(mono_item);
|
||||
|
@ -739,12 +740,19 @@ fn mono_item_linkage_and_visibility<'tcx>(
|
|||
tcx: TyCtxt<'tcx>,
|
||||
mono_item: &MonoItem<'tcx>,
|
||||
can_be_internalized: &mut bool,
|
||||
export_generics: bool,
|
||||
can_export_generics: bool,
|
||||
always_export_generics: bool,
|
||||
) -> (Linkage, Visibility) {
|
||||
if let Some(explicit_linkage) = mono_item.explicit_linkage(tcx) {
|
||||
return (explicit_linkage, Visibility::Default);
|
||||
}
|
||||
let vis = mono_item_visibility(tcx, mono_item, can_be_internalized, export_generics);
|
||||
let vis = mono_item_visibility(
|
||||
tcx,
|
||||
mono_item,
|
||||
can_be_internalized,
|
||||
can_export_generics,
|
||||
always_export_generics,
|
||||
);
|
||||
(Linkage::External, vis)
|
||||
}
|
||||
|
||||
|
@ -767,7 +775,8 @@ fn mono_item_visibility<'tcx>(
|
|||
tcx: TyCtxt<'tcx>,
|
||||
mono_item: &MonoItem<'tcx>,
|
||||
can_be_internalized: &mut bool,
|
||||
export_generics: bool,
|
||||
can_export_generics: bool,
|
||||
always_export_generics: bool,
|
||||
) -> Visibility {
|
||||
let instance = match mono_item {
|
||||
// This is pretty complicated; see below.
|
||||
|
@ -826,7 +835,11 @@ fn mono_item_visibility<'tcx>(
|
|||
|
||||
// Upstream `DefId` instances get different handling than local ones.
|
||||
let Some(def_id) = def_id.as_local() else {
|
||||
return if export_generics && is_generic {
|
||||
return if is_generic
|
||||
&& (always_export_generics
|
||||
|| (can_export_generics
|
||||
&& tcx.codegen_fn_attrs(def_id).inline == rustc_attr::InlineAttr::Never))
|
||||
{
|
||||
// If it is an upstream monomorphization and we export generics, we must make
|
||||
// it available to downstream crates.
|
||||
*can_be_internalized = false;
|
||||
|
@ -837,7 +850,10 @@ fn mono_item_visibility<'tcx>(
|
|||
};
|
||||
|
||||
if is_generic {
|
||||
if export_generics {
|
||||
if always_export_generics
|
||||
|| (can_export_generics
|
||||
&& tcx.codegen_fn_attrs(def_id).inline == rustc_attr::InlineAttr::Never)
|
||||
{
|
||||
if tcx.is_unreachable_local_definition(def_id) {
|
||||
// This instance cannot be used from another crate.
|
||||
Visibility::Hidden
|
||||
|
|
|
@ -757,7 +757,9 @@ impl<A: Allocator> RawVecInner<A> {
|
|||
}
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
// not marked inline(never) since we want optimizers to be able to observe the specifics of this
|
||||
// function, see tests/codegen/vec-reserve-extend.rs.
|
||||
#[cold]
|
||||
fn finish_grow<A>(
|
||||
new_layout: Layout,
|
||||
current_memory: Option<(NonNull<u8>, Layout)>,
|
||||
|
|
|
@ -362,6 +362,7 @@
|
|||
#![feature(strict_provenance_atomic_ptr)]
|
||||
#![feature(sync_unsafe_cell)]
|
||||
#![feature(ub_checks)]
|
||||
#![feature(used_with_arg)]
|
||||
// tidy-alphabetical-end
|
||||
//
|
||||
// Library features (alloc):
|
||||
|
|
|
@ -27,6 +27,22 @@ use crate::sys::backtrace;
|
|||
use crate::sys::stdio::panic_output;
|
||||
use crate::{fmt, intrinsics, process, thread};
|
||||
|
||||
// This forces codegen of the function called by panic!() inside the std crate, rather than in
|
||||
// downstream crates. Primarily this is useful for rustc's codegen tests, which rely on noticing
|
||||
// complete removal of panic from generated IR. Since begin_panic is inline(never), it's only
|
||||
// codegen'd once per crate-graph so this pushes that to std rather than our codegen test crates.
|
||||
//
|
||||
// (See https://github.com/rust-lang/rust/pull/123244 for more info on why).
|
||||
//
|
||||
// If this is causing problems we can also modify those codegen tests to use a crate type like
|
||||
// cdylib which doesn't export "Rust" symbols to downstream linkage units.
|
||||
#[unstable(feature = "libstd_sys_internals", reason = "used by the panic! macro", issue = "none")]
|
||||
#[doc(hidden)]
|
||||
#[allow(dead_code)]
|
||||
#[used(compiler)]
|
||||
pub static EMPTY_PANIC: fn(&'static str) -> ! =
|
||||
begin_panic::<&'static str> as fn(&'static str) -> !;
|
||||
|
||||
// Binary interface to the panic runtime that the standard library depends on.
|
||||
//
|
||||
// The standard library is tagged with `#![needs_panic_runtime]` (introduced in
|
||||
|
|
|
@ -11,10 +11,21 @@ pub fn foo<T>(x: T) -> (T, u32, i8) {
|
|||
#[inline(never)]
|
||||
fn bar<T>(x: T) -> (T, Struct) {
|
||||
let _ = not_exported_and_not_generic(0);
|
||||
exported_and_generic::<u32>(0);
|
||||
(x, Struct(1))
|
||||
}
|
||||
|
||||
pub static F: fn(u32) -> u32 = exported_and_generic::<u32>;
|
||||
|
||||
// These should not contribute to the codegen items of other crates.
|
||||
|
||||
// This is generic, but it's only instantiated with a u32 argument and that instantiation is present
|
||||
// in the local crate (see F above).
|
||||
#[inline(never)]
|
||||
pub fn exported_and_generic<T>(x: T) -> T {
|
||||
x
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
pub fn exported_but_not_generic(x: i32) -> i64 {
|
||||
x as i64
|
||||
|
|
|
@ -86,7 +86,7 @@ pub extern "C" fn test() {
|
|||
|
||||
// A call through the Fn trait must use address space 1.
|
||||
//
|
||||
// CHECK: call{{.+}}addrspace(1) void @call_through_fn_trait()
|
||||
// CHECK: call{{.+}}addrspace(1) void @call_through_fn_trait({{.*}})
|
||||
call_through_fn_trait(&mut update_bar_value);
|
||||
|
||||
// A call through a global variable must use address space 1.
|
||||
|
|
|
@ -2,7 +2,10 @@
|
|||
|
||||
// A drop([...].clone()) sequence on an Rc should be a no-op
|
||||
// In particular, no call to __rust_dealloc should be emitted
|
||||
#![crate_type = "lib"]
|
||||
//
|
||||
// We use a cdylib since it's a leaf unit for Rust purposes, so doesn't codegen -Zshare-generics
|
||||
// code.
|
||||
#![crate_type = "cdylib"]
|
||||
use std::rc::Rc;
|
||||
|
||||
pub fn foo(t: &Rc<Vec<usize>>) {
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#![feature(naked_functions, asm_const, linkage)]
|
||||
#![feature(naked_functions, linkage)]
|
||||
#![crate_type = "dylib"]
|
||||
|
||||
use std::arch::naked_asm;
|
||||
|
@ -38,7 +38,7 @@ pub extern "C" fn public_vanilla() -> u32 {
|
|||
|
||||
#[naked]
|
||||
#[no_mangle]
|
||||
pub extern "C" fn public_naked() -> u32 {
|
||||
pub extern "C" fn public_naked_nongeneric() -> u32 {
|
||||
unsafe { naked_asm!("mov rax, 42", "ret") }
|
||||
}
|
||||
|
||||
|
|
|
@ -17,10 +17,12 @@ fn main() {
|
|||
not_exported(&rdylib, "private_naked");
|
||||
|
||||
global_function(&rdylib, "public_vanilla");
|
||||
global_function(&rdylib, "public_naked");
|
||||
global_function(&rdylib, "public_naked_nongeneric");
|
||||
|
||||
not_exported(&rdylib, "public_vanilla_generic");
|
||||
not_exported(&rdylib, "public_naked_generic");
|
||||
// #[naked] functions are implicitly #[inline(never)], so they get shared regardless of
|
||||
// -Zshare-generics.
|
||||
global_function(&rdylib, "public_naked_generic");
|
||||
|
||||
global_function(&rdylib, "vanilla_external_linkage");
|
||||
global_function(&rdylib, "naked_external_linkage");
|
||||
|
|
|
@ -6,6 +6,12 @@
|
|||
//@ check-run-results
|
||||
//@ exec-env:RUST_BACKTRACE=1
|
||||
|
||||
// This is needed to avoid test output differences across std being built with v0 symbols vs legacy
|
||||
// symbols.
|
||||
//@ normalize-stderr-test: "begin_panic::<&str>" -> "begin_panic"
|
||||
// And this is for differences between std with and without debuginfo.
|
||||
//@ normalize-stderr-test: "\n +at [^\n]+" -> ""
|
||||
|
||||
//@ ignore-msvc see #62897 and `backtrace-debuginfo.rs` test
|
||||
//@ ignore-android FIXME #17520
|
||||
//@ ignore-openbsd no support for libbacktrace without filename
|
||||
|
@ -14,11 +20,6 @@
|
|||
//@ ignore-sgx no subprocess support
|
||||
//@ ignore-fuchsia Backtraces not symbolized
|
||||
|
||||
// NOTE(eddyb) output differs between symbol mangling schemes
|
||||
//@ revisions: legacy v0
|
||||
//@ [legacy] compile-flags: -Zunstable-options -Csymbol-mangling-version=legacy
|
||||
//@ [v0] compile-flags: -Csymbol-mangling-version=v0
|
||||
|
||||
fn main() {
|
||||
panic!()
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
thread 'main' panicked at $DIR/issue-47429-short-backtraces.rs:23:5:
|
||||
thread 'main' panicked at $DIR/issue-47429-short-backtraces.rs:24:5:
|
||||
explicit panic
|
||||
stack backtrace:
|
||||
0: std::panicking::begin_panic
|
|
@ -1,6 +0,0 @@
|
|||
thread 'main' panicked at $DIR/issue-47429-short-backtraces.rs:23:5:
|
||||
explicit panic
|
||||
stack backtrace:
|
||||
0: std::panicking::begin_panic::<&str>
|
||||
1: issue_47429_short_backtraces::main
|
||||
note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace.
|
|
@ -6,6 +6,12 @@
|
|||
//@ check-run-results
|
||||
//@ exec-env:RUST_BACKTRACE=0
|
||||
|
||||
// This is needed to avoid test output differences across std being built with v0 symbols vs legacy
|
||||
// symbols.
|
||||
//@ normalize-stderr-test: "begin_panic::<&str>" -> "begin_panic"
|
||||
// And this is for differences between std with and without debuginfo.
|
||||
//@ normalize-stderr-test: "\n +at [^\n]+" -> ""
|
||||
|
||||
//@ ignore-msvc see #62897 and `backtrace-debuginfo.rs` test
|
||||
//@ ignore-android FIXME #17520
|
||||
//@ ignore-openbsd no support for libbacktrace without filename
|
||||
|
@ -14,11 +20,6 @@
|
|||
//@ ignore-sgx no subprocess support
|
||||
//@ ignore-fuchsia Backtrace not symbolized
|
||||
|
||||
// NOTE(eddyb) output differs between symbol mangling schemes
|
||||
//@ revisions: legacy v0
|
||||
//@ [legacy] compile-flags: -Zunstable-options -Csymbol-mangling-version=legacy
|
||||
//@ [v0] compile-flags: -Csymbol-mangling-version=v0
|
||||
|
||||
#![feature(panic_backtrace_config)]
|
||||
|
||||
fn main() {
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
thread 'main' panicked at $DIR/runtime-switch.rs:26:5:
|
||||
thread 'main' panicked at $DIR/runtime-switch.rs:27:5:
|
||||
explicit panic
|
||||
stack backtrace:
|
||||
0: std::panicking::begin_panic
|
|
@ -1,6 +0,0 @@
|
|||
thread 'main' panicked at $DIR/runtime-switch.rs:26:5:
|
||||
explicit panic
|
||||
stack backtrace:
|
||||
0: std::panicking::begin_panic::<&str>
|
||||
1: runtime_switch::main
|
||||
note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace.
|
|
@ -9,6 +9,11 @@
|
|||
//@ ignore-sgx Backtraces not symbolized
|
||||
//@ ignore-fuchsia Backtraces not symbolized
|
||||
//@ ignore-msvc the `__rust_{begin,end}_short_backtrace` symbols aren't reliable.
|
||||
// This is needed to avoid test output differences across std being built with v0 symbols vs legacy
|
||||
// symbols.
|
||||
//@ normalize-stderr-test: "begin_panic::<&str>" -> "begin_panic"
|
||||
// And this is for differences between std with and without debuginfo.
|
||||
//@ normalize-stderr-test: "\n +at [^\n]+" -> ""
|
||||
|
||||
/// This test case make sure that we can have multiple pairs of `__rust_{begin,end}_short_backtrace`
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
thread 'main' panicked at $DIR/short-ice-remove-middle-frames-2.rs:56:5:
|
||||
thread 'main' panicked at $DIR/short-ice-remove-middle-frames-2.rs:61:5:
|
||||
debug!!!
|
||||
stack backtrace:
|
||||
0: std::panicking::begin_panic
|
||||
|
|
|
@ -10,6 +10,11 @@
|
|||
//@ ignore-fuchsia Backtraces not symbolized
|
||||
//@ ignore-msvc the `__rust_{begin,end}_short_backtrace` symbols aren't reliable.
|
||||
|
||||
// This is needed to avoid test output differences across std being built with v0 symbols vs legacy
|
||||
// symbols.
|
||||
//@ normalize-stderr-test: "begin_panic::<&str>" -> "begin_panic"
|
||||
// And this is for differences between std with and without debuginfo.
|
||||
//@ normalize-stderr-test: "\n +at [^\n]+" -> ""
|
||||
|
||||
#[inline(never)]
|
||||
fn __rust_begin_short_backtrace<T, F: FnOnce() -> T>(f: F) -> T {
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
thread 'main' panicked at $DIR/short-ice-remove-middle-frames.rs:52:5:
|
||||
thread 'main' panicked at $DIR/short-ice-remove-middle-frames.rs:57:5:
|
||||
debug!!!
|
||||
stack backtrace:
|
||||
0: std::panicking::begin_panic
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue