rustc: Prepare to enable ThinLTO by default
This commit prepares to enable ThinLTO and multiple codegen units in release mode by default. We've still got a debuginfo bug or two to sort out before actually turning it on by default.
This commit is contained in:
parent
7df4683cc0
commit
855f6d1483
8 changed files with 124 additions and 32 deletions
|
@ -383,8 +383,13 @@ top_level_options!(
|
|||
// try to not rely on this too much.
|
||||
actually_rustdoc: bool [TRACKED],
|
||||
|
||||
// Number of object files/codegen units to produce on the backend
|
||||
// Specifications of codegen units / ThinLTO which are forced as a
|
||||
// result of parsing command line options. These are not necessarily
|
||||
// what rustc was invoked with, but massaged a bit to agree with
|
||||
// commands like `--emit llvm-ir` which they're often incompatible with
|
||||
// if we otherwise use the defaults of rustc.
|
||||
cli_forced_codegen_units: Option<usize> [UNTRACKED],
|
||||
cli_forced_thinlto: Option<bool> [UNTRACKED],
|
||||
}
|
||||
);
|
||||
|
||||
|
@ -566,6 +571,7 @@ pub fn basic_options() -> Options {
|
|||
debug_assertions: true,
|
||||
actually_rustdoc: false,
|
||||
cli_forced_codegen_units: None,
|
||||
cli_forced_thinlto: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1165,7 +1171,7 @@ options! {DebuggingOptions, DebuggingSetter, basic_debugging_options,
|
|||
"run the non-lexical lifetimes MIR pass"),
|
||||
trans_time_graph: bool = (false, parse_bool, [UNTRACKED],
|
||||
"generate a graphical HTML report of time spent in trans and LLVM"),
|
||||
thinlto: bool = (false, parse_bool, [TRACKED],
|
||||
thinlto: Option<bool> = (None, parse_opt_bool, [TRACKED],
|
||||
"enable ThinLTO when possible"),
|
||||
inline_in_all_cgus: Option<bool> = (None, parse_opt_bool, [TRACKED],
|
||||
"control whether #[inline] functions are in all cgus"),
|
||||
|
@ -1601,6 +1607,7 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches)
|
|||
|
||||
let mut cg = build_codegen_options(matches, error_format);
|
||||
let mut codegen_units = cg.codegen_units;
|
||||
let mut thinlto = None;
|
||||
|
||||
// Issue #30063: if user requests llvm-related output to one
|
||||
// particular path, disable codegen-units.
|
||||
|
@ -1622,9 +1629,13 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches)
|
|||
}
|
||||
early_warn(error_format, "resetting to default -C codegen-units=1");
|
||||
codegen_units = Some(1);
|
||||
thinlto = Some(false);
|
||||
}
|
||||
}
|
||||
_ => codegen_units = Some(1),
|
||||
_ => {
|
||||
codegen_units = Some(1);
|
||||
thinlto = Some(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1834,6 +1845,7 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches)
|
|||
debug_assertions,
|
||||
actually_rustdoc: false,
|
||||
cli_forced_codegen_units: codegen_units,
|
||||
cli_forced_thinlto: thinlto,
|
||||
},
|
||||
cfg)
|
||||
}
|
||||
|
|
|
@ -656,30 +656,91 @@ impl Session {
|
|||
return n as usize
|
||||
}
|
||||
|
||||
// Why is 16 codegen units the default all the time?
|
||||
//
|
||||
// The main reason for enabling multiple codegen units by default is to
|
||||
// leverage the ability for the trans backend to do translation and
|
||||
// codegen in parallel. This allows us, especially for large crates, to
|
||||
// make good use of all available resources on the machine once we've
|
||||
// hit that stage of compilation. Large crates especially then often
|
||||
// take a long time in trans/codegen and this helps us amortize that
|
||||
// cost.
|
||||
//
|
||||
// Note that a high number here doesn't mean that we'll be spawning a
|
||||
// large number of threads in parallel. The backend of rustc contains
|
||||
// global rate limiting through the `jobserver` crate so we'll never
|
||||
// overload the system with too much work, but rather we'll only be
|
||||
// optimizing when we're otherwise cooperating with other instances of
|
||||
// rustc.
|
||||
//
|
||||
// Rather a high number here means that we should be able to keep a lot
|
||||
// of idle cpus busy. By ensuring that no codegen unit takes *too* long
|
||||
// to build we'll be guaranteed that all cpus will finish pretty closely
|
||||
// to one another and we should make relatively optimal use of system
|
||||
// resources
|
||||
//
|
||||
// Note that the main cost of codegen units is that it prevents LLVM
|
||||
// from inlining across codegen units. Users in general don't have a lot
|
||||
// of control over how codegen units are split up so it's our job in the
|
||||
// compiler to ensure that undue performance isn't lost when using
|
||||
// codegen units (aka we can't require everyone to slap `#[inline]` on
|
||||
// everything).
|
||||
//
|
||||
// If we're compiling at `-O0` then the number doesn't really matter too
|
||||
// much because performance doesn't matter and inlining is ok to lose.
|
||||
// In debug mode we just want to try to guarantee that no cpu is stuck
|
||||
// doing work that could otherwise be farmed to others.
|
||||
//
|
||||
// In release mode, however (O1 and above) performance does indeed
|
||||
// matter! To recover the loss in performance due to inlining we'll be
|
||||
// enabling ThinLTO by default (the function for which is just below).
|
||||
// This will ensure that we recover any inlining wins we otherwise lost
|
||||
// through codegen unit partitioning.
|
||||
//
|
||||
// ---
|
||||
//
|
||||
// Ok that's a lot of words but the basic tl;dr; is that we want a high
|
||||
// number here -- but not too high. Additionally we're "safe" to have it
|
||||
// always at the same number at all optimization levels.
|
||||
//
|
||||
// As a result 16 was chosen here! Mostly because it was a power of 2
|
||||
// and most benchmarks agreed it was roughly a local optimum. Not very
|
||||
// scientific.
|
||||
match self.opts.optimize {
|
||||
// If we're compiling at `-O0` then default to 16 codegen units.
|
||||
// The number here shouldn't matter too too much as debug mode
|
||||
// builds don't rely on performance at all, meaning that lost
|
||||
// opportunities for inlining through multiple codegen units is
|
||||
// a non-issue.
|
||||
//
|
||||
// Note that the high number here doesn't mean that we'll be
|
||||
// spawning a large number of threads in parallel. The backend
|
||||
// of rustc contains global rate limiting through the
|
||||
// `jobserver` crate so we'll never overload the system with too
|
||||
// much work, but rather we'll only be optimizing when we're
|
||||
// otherwise cooperating with other instances of rustc.
|
||||
//
|
||||
// Rather the high number here means that we should be able to
|
||||
// keep a lot of idle cpus busy. By ensuring that no codegen
|
||||
// unit takes *too* long to build we'll be guaranteed that all
|
||||
// cpus will finish pretty closely to one another and we should
|
||||
// make relatively optimal use of system resources
|
||||
config::OptLevel::No => 16,
|
||||
_ => 1, // FIXME(#46346) this should be 16
|
||||
}
|
||||
}
|
||||
|
||||
// All other optimization levels default use one codegen unit,
|
||||
// the historical default in Rust for a Long Time.
|
||||
_ => 1,
|
||||
/// Returns whether ThinLTO is enabled for this compilation
|
||||
pub fn thinlto(&self) -> bool {
|
||||
// If processing command line options determined that we're incompatible
|
||||
// with ThinLTO (e.g. `-C lto --emit llvm-ir`) then return that option.
|
||||
if let Some(enabled) = self.opts.cli_forced_thinlto {
|
||||
return enabled
|
||||
}
|
||||
|
||||
// If explicitly specified, use that with the next highest priority
|
||||
if let Some(enabled) = self.opts.debugging_opts.thinlto {
|
||||
return enabled
|
||||
}
|
||||
|
||||
// If there's only one codegen unit and LTO isn't enabled then there's
|
||||
// no need for ThinLTO so just return false.
|
||||
if self.codegen_units() == 1 && !self.lto() {
|
||||
return false
|
||||
}
|
||||
|
||||
// Right now ThinLTO isn't compatible with incremental compilation.
|
||||
if self.opts.incremental.is_some() {
|
||||
return false
|
||||
}
|
||||
|
||||
// Now we're in "defaults" territory. By default we enable ThinLTO for
|
||||
// optimized compiles (anything greater than O0).
|
||||
match self.opts.optimize {
|
||||
config::OptLevel::No => false,
|
||||
_ => true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1402,8 +1402,9 @@ fn start_executing_work(tcx: TyCtxt,
|
|||
// for doesn't require full LTO. Some targets require one LLVM module
|
||||
// (they effectively don't have a linker) so it's up to us to use LTO to
|
||||
// link everything together.
|
||||
thinlto: sess.opts.debugging_opts.thinlto &&
|
||||
!sess.target.target.options.requires_lto,
|
||||
thinlto: sess.thinlto() &&
|
||||
!sess.target.target.options.requires_lto &&
|
||||
unsafe { llvm::LLVMRustThinLTOAvailable() },
|
||||
|
||||
no_landing_pads: sess.no_landing_pads(),
|
||||
save_temps: sess.opts.cg.save_temps,
|
||||
|
|
|
@ -706,7 +706,7 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
|
|||
|
||||
check_for_rustc_errors_attr(tcx);
|
||||
|
||||
if tcx.sess.opts.debugging_opts.thinlto {
|
||||
if let Some(true) = tcx.sess.opts.debugging_opts.thinlto {
|
||||
if unsafe { !llvm::LLVMRustThinLTOAvailable() } {
|
||||
tcx.sess.fatal("this compiler's LLVM does not support ThinLTO");
|
||||
}
|
||||
|
|
|
@ -252,8 +252,26 @@ fn output_fileline(w: &mut Write,
|
|||
// Note that this demangler isn't quite as fancy as it could be. We have lots
|
||||
// of other information in our symbols like hashes, version, type information,
|
||||
// etc. Additionally, this doesn't handle glue symbols at all.
|
||||
pub fn demangle(writer: &mut Write, s: &str, format: PrintFormat) -> io::Result<()> {
|
||||
// First validate the symbol. If it doesn't look like anything we're
|
||||
pub fn demangle(writer: &mut Write, mut s: &str, format: PrintFormat) -> io::Result<()> {
|
||||
// During ThinLTO LLVM may import and rename internal symbols, so strip out
|
||||
// those endings first as they're one of the last manglings applied to
|
||||
// symbol names.
|
||||
let llvm = ".llvm.";
|
||||
if let Some(i) = s.find(llvm) {
|
||||
let candidate = &s[i + llvm.len()..];
|
||||
let all_hex = candidate.chars().all(|c| {
|
||||
match c {
|
||||
'A' ... 'F' | '0' ... '9' => true,
|
||||
_ => false,
|
||||
}
|
||||
});
|
||||
|
||||
if all_hex {
|
||||
s = &s[..i];
|
||||
}
|
||||
}
|
||||
|
||||
// Validate the symbol. If it doesn't look like anything we're
|
||||
// expecting, we just print it literally. Note that we must handle non-rust
|
||||
// symbols because we could have any function in the backtrace.
|
||||
let mut valid = true;
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
// compile-flags: -Z no-landing-pads
|
||||
// compile-flags: -Z no-landing-pads -C codegen-units=1
|
||||
// error-pattern:converging_fn called
|
||||
use std::io::{self, Write};
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
// compile-flags: -Z no-landing-pads
|
||||
// compile-flags: -Z no-landing-pads -C codegen-units=1
|
||||
// error-pattern:diverging_fn called
|
||||
use std::io::{self, Write};
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
// compile-flags: -Z no-landing-pads
|
||||
// compile-flags: -Z no-landing-pads -C codegen-units=1
|
||||
// ignore-emscripten no threads support
|
||||
|
||||
use std::thread;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue