rustc: Prepare to enable ThinLTO by default
This commit prepares to enable ThinLTO and multiple codegen units in release mode by default. We've still got a debuginfo bug or two to sort out before actually turning it on by default.
This commit is contained in:
parent
7df4683cc0
commit
855f6d1483
8 changed files with 124 additions and 32 deletions
|
@ -383,8 +383,13 @@ top_level_options!(
|
||||||
// try to not rely on this too much.
|
// try to not rely on this too much.
|
||||||
actually_rustdoc: bool [TRACKED],
|
actually_rustdoc: bool [TRACKED],
|
||||||
|
|
||||||
// Number of object files/codegen units to produce on the backend
|
// Specifications of codegen units / ThinLTO which are forced as a
|
||||||
|
// result of parsing command line options. These are not necessarily
|
||||||
|
// what rustc was invoked with, but massaged a bit to agree with
|
||||||
|
// commands like `--emit llvm-ir` which they're often incompatible with
|
||||||
|
// if we otherwise use the defaults of rustc.
|
||||||
cli_forced_codegen_units: Option<usize> [UNTRACKED],
|
cli_forced_codegen_units: Option<usize> [UNTRACKED],
|
||||||
|
cli_forced_thinlto: Option<bool> [UNTRACKED],
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -566,6 +571,7 @@ pub fn basic_options() -> Options {
|
||||||
debug_assertions: true,
|
debug_assertions: true,
|
||||||
actually_rustdoc: false,
|
actually_rustdoc: false,
|
||||||
cli_forced_codegen_units: None,
|
cli_forced_codegen_units: None,
|
||||||
|
cli_forced_thinlto: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1165,7 +1171,7 @@ options! {DebuggingOptions, DebuggingSetter, basic_debugging_options,
|
||||||
"run the non-lexical lifetimes MIR pass"),
|
"run the non-lexical lifetimes MIR pass"),
|
||||||
trans_time_graph: bool = (false, parse_bool, [UNTRACKED],
|
trans_time_graph: bool = (false, parse_bool, [UNTRACKED],
|
||||||
"generate a graphical HTML report of time spent in trans and LLVM"),
|
"generate a graphical HTML report of time spent in trans and LLVM"),
|
||||||
thinlto: bool = (false, parse_bool, [TRACKED],
|
thinlto: Option<bool> = (None, parse_opt_bool, [TRACKED],
|
||||||
"enable ThinLTO when possible"),
|
"enable ThinLTO when possible"),
|
||||||
inline_in_all_cgus: Option<bool> = (None, parse_opt_bool, [TRACKED],
|
inline_in_all_cgus: Option<bool> = (None, parse_opt_bool, [TRACKED],
|
||||||
"control whether #[inline] functions are in all cgus"),
|
"control whether #[inline] functions are in all cgus"),
|
||||||
|
@ -1601,6 +1607,7 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches)
|
||||||
|
|
||||||
let mut cg = build_codegen_options(matches, error_format);
|
let mut cg = build_codegen_options(matches, error_format);
|
||||||
let mut codegen_units = cg.codegen_units;
|
let mut codegen_units = cg.codegen_units;
|
||||||
|
let mut thinlto = None;
|
||||||
|
|
||||||
// Issue #30063: if user requests llvm-related output to one
|
// Issue #30063: if user requests llvm-related output to one
|
||||||
// particular path, disable codegen-units.
|
// particular path, disable codegen-units.
|
||||||
|
@ -1622,9 +1629,13 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches)
|
||||||
}
|
}
|
||||||
early_warn(error_format, "resetting to default -C codegen-units=1");
|
early_warn(error_format, "resetting to default -C codegen-units=1");
|
||||||
codegen_units = Some(1);
|
codegen_units = Some(1);
|
||||||
|
thinlto = Some(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => codegen_units = Some(1),
|
_ => {
|
||||||
|
codegen_units = Some(1);
|
||||||
|
thinlto = Some(false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1834,6 +1845,7 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches)
|
||||||
debug_assertions,
|
debug_assertions,
|
||||||
actually_rustdoc: false,
|
actually_rustdoc: false,
|
||||||
cli_forced_codegen_units: codegen_units,
|
cli_forced_codegen_units: codegen_units,
|
||||||
|
cli_forced_thinlto: thinlto,
|
||||||
},
|
},
|
||||||
cfg)
|
cfg)
|
||||||
}
|
}
|
||||||
|
|
|
@ -656,30 +656,91 @@ impl Session {
|
||||||
return n as usize
|
return n as usize
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Why is 16 codegen units the default all the time?
|
||||||
|
//
|
||||||
|
// The main reason for enabling multiple codegen units by default is to
|
||||||
|
// leverage the ability for the trans backend to do translation and
|
||||||
|
// codegen in parallel. This allows us, especially for large crates, to
|
||||||
|
// make good use of all available resources on the machine once we've
|
||||||
|
// hit that stage of compilation. Large crates especially then often
|
||||||
|
// take a long time in trans/codegen and this helps us amortize that
|
||||||
|
// cost.
|
||||||
|
//
|
||||||
|
// Note that a high number here doesn't mean that we'll be spawning a
|
||||||
|
// large number of threads in parallel. The backend of rustc contains
|
||||||
|
// global rate limiting through the `jobserver` crate so we'll never
|
||||||
|
// overload the system with too much work, but rather we'll only be
|
||||||
|
// optimizing when we're otherwise cooperating with other instances of
|
||||||
|
// rustc.
|
||||||
|
//
|
||||||
|
// Rather a high number here means that we should be able to keep a lot
|
||||||
|
// of idle cpus busy. By ensuring that no codegen unit takes *too* long
|
||||||
|
// to build we'll be guaranteed that all cpus will finish pretty closely
|
||||||
|
// to one another and we should make relatively optimal use of system
|
||||||
|
// resources
|
||||||
|
//
|
||||||
|
// Note that the main cost of codegen units is that it prevents LLVM
|
||||||
|
// from inlining across codegen units. Users in general don't have a lot
|
||||||
|
// of control over how codegen units are split up so it's our job in the
|
||||||
|
// compiler to ensure that undue performance isn't lost when using
|
||||||
|
// codegen units (aka we can't require everyone to slap `#[inline]` on
|
||||||
|
// everything).
|
||||||
|
//
|
||||||
|
// If we're compiling at `-O0` then the number doesn't really matter too
|
||||||
|
// much because performance doesn't matter and inlining is ok to lose.
|
||||||
|
// In debug mode we just want to try to guarantee that no cpu is stuck
|
||||||
|
// doing work that could otherwise be farmed to others.
|
||||||
|
//
|
||||||
|
// In release mode, however (O1 and above) performance does indeed
|
||||||
|
// matter! To recover the loss in performance due to inlining we'll be
|
||||||
|
// enabling ThinLTO by default (the function for which is just below).
|
||||||
|
// This will ensure that we recover any inlining wins we otherwise lost
|
||||||
|
// through codegen unit partitioning.
|
||||||
|
//
|
||||||
|
// ---
|
||||||
|
//
|
||||||
|
// Ok that's a lot of words but the basic tl;dr; is that we want a high
|
||||||
|
// number here -- but not too high. Additionally we're "safe" to have it
|
||||||
|
// always at the same number at all optimization levels.
|
||||||
|
//
|
||||||
|
// As a result 16 was chosen here! Mostly because it was a power of 2
|
||||||
|
// and most benchmarks agreed it was roughly a local optimum. Not very
|
||||||
|
// scientific.
|
||||||
match self.opts.optimize {
|
match self.opts.optimize {
|
||||||
// If we're compiling at `-O0` then default to 16 codegen units.
|
|
||||||
// The number here shouldn't matter too too much as debug mode
|
|
||||||
// builds don't rely on performance at all, meaning that lost
|
|
||||||
// opportunities for inlining through multiple codegen units is
|
|
||||||
// a non-issue.
|
|
||||||
//
|
|
||||||
// Note that the high number here doesn't mean that we'll be
|
|
||||||
// spawning a large number of threads in parallel. The backend
|
|
||||||
// of rustc contains global rate limiting through the
|
|
||||||
// `jobserver` crate so we'll never overload the system with too
|
|
||||||
// much work, but rather we'll only be optimizing when we're
|
|
||||||
// otherwise cooperating with other instances of rustc.
|
|
||||||
//
|
|
||||||
// Rather the high number here means that we should be able to
|
|
||||||
// keep a lot of idle cpus busy. By ensuring that no codegen
|
|
||||||
// unit takes *too* long to build we'll be guaranteed that all
|
|
||||||
// cpus will finish pretty closely to one another and we should
|
|
||||||
// make relatively optimal use of system resources
|
|
||||||
config::OptLevel::No => 16,
|
config::OptLevel::No => 16,
|
||||||
|
_ => 1, // FIXME(#46346) this should be 16
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// All other optimization levels default use one codegen unit,
|
/// Returns whether ThinLTO is enabled for this compilation
|
||||||
// the historical default in Rust for a Long Time.
|
pub fn thinlto(&self) -> bool {
|
||||||
_ => 1,
|
// If processing command line options determined that we're incompatible
|
||||||
|
// with ThinLTO (e.g. `-C lto --emit llvm-ir`) then return that option.
|
||||||
|
if let Some(enabled) = self.opts.cli_forced_thinlto {
|
||||||
|
return enabled
|
||||||
|
}
|
||||||
|
|
||||||
|
// If explicitly specified, use that with the next highest priority
|
||||||
|
if let Some(enabled) = self.opts.debugging_opts.thinlto {
|
||||||
|
return enabled
|
||||||
|
}
|
||||||
|
|
||||||
|
// If there's only one codegen unit and LTO isn't enabled then there's
|
||||||
|
// no need for ThinLTO so just return false.
|
||||||
|
if self.codegen_units() == 1 && !self.lto() {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Right now ThinLTO isn't compatible with incremental compilation.
|
||||||
|
if self.opts.incremental.is_some() {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now we're in "defaults" territory. By default we enable ThinLTO for
|
||||||
|
// optimized compiles (anything greater than O0).
|
||||||
|
match self.opts.optimize {
|
||||||
|
config::OptLevel::No => false,
|
||||||
|
_ => true,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1402,8 +1402,9 @@ fn start_executing_work(tcx: TyCtxt,
|
||||||
// for doesn't require full LTO. Some targets require one LLVM module
|
// for doesn't require full LTO. Some targets require one LLVM module
|
||||||
// (they effectively don't have a linker) so it's up to us to use LTO to
|
// (they effectively don't have a linker) so it's up to us to use LTO to
|
||||||
// link everything together.
|
// link everything together.
|
||||||
thinlto: sess.opts.debugging_opts.thinlto &&
|
thinlto: sess.thinlto() &&
|
||||||
!sess.target.target.options.requires_lto,
|
!sess.target.target.options.requires_lto &&
|
||||||
|
unsafe { llvm::LLVMRustThinLTOAvailable() },
|
||||||
|
|
||||||
no_landing_pads: sess.no_landing_pads(),
|
no_landing_pads: sess.no_landing_pads(),
|
||||||
save_temps: sess.opts.cg.save_temps,
|
save_temps: sess.opts.cg.save_temps,
|
||||||
|
|
|
@ -706,7 +706,7 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
|
||||||
|
|
||||||
check_for_rustc_errors_attr(tcx);
|
check_for_rustc_errors_attr(tcx);
|
||||||
|
|
||||||
if tcx.sess.opts.debugging_opts.thinlto {
|
if let Some(true) = tcx.sess.opts.debugging_opts.thinlto {
|
||||||
if unsafe { !llvm::LLVMRustThinLTOAvailable() } {
|
if unsafe { !llvm::LLVMRustThinLTOAvailable() } {
|
||||||
tcx.sess.fatal("this compiler's LLVM does not support ThinLTO");
|
tcx.sess.fatal("this compiler's LLVM does not support ThinLTO");
|
||||||
}
|
}
|
||||||
|
|
|
@ -252,8 +252,26 @@ fn output_fileline(w: &mut Write,
|
||||||
// Note that this demangler isn't quite as fancy as it could be. We have lots
|
// Note that this demangler isn't quite as fancy as it could be. We have lots
|
||||||
// of other information in our symbols like hashes, version, type information,
|
// of other information in our symbols like hashes, version, type information,
|
||||||
// etc. Additionally, this doesn't handle glue symbols at all.
|
// etc. Additionally, this doesn't handle glue symbols at all.
|
||||||
pub fn demangle(writer: &mut Write, s: &str, format: PrintFormat) -> io::Result<()> {
|
pub fn demangle(writer: &mut Write, mut s: &str, format: PrintFormat) -> io::Result<()> {
|
||||||
// First validate the symbol. If it doesn't look like anything we're
|
// During ThinLTO LLVM may import and rename internal symbols, so strip out
|
||||||
|
// those endings first as they're one of the last manglings applied to
|
||||||
|
// symbol names.
|
||||||
|
let llvm = ".llvm.";
|
||||||
|
if let Some(i) = s.find(llvm) {
|
||||||
|
let candidate = &s[i + llvm.len()..];
|
||||||
|
let all_hex = candidate.chars().all(|c| {
|
||||||
|
match c {
|
||||||
|
'A' ... 'F' | '0' ... '9' => true,
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if all_hex {
|
||||||
|
s = &s[..i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate the symbol. If it doesn't look like anything we're
|
||||||
// expecting, we just print it literally. Note that we must handle non-rust
|
// expecting, we just print it literally. Note that we must handle non-rust
|
||||||
// symbols because we could have any function in the backtrace.
|
// symbols because we could have any function in the backtrace.
|
||||||
let mut valid = true;
|
let mut valid = true;
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
// option. This file may not be copied, modified, or distributed
|
// option. This file may not be copied, modified, or distributed
|
||||||
// except according to those terms.
|
// except according to those terms.
|
||||||
|
|
||||||
// compile-flags: -Z no-landing-pads
|
// compile-flags: -Z no-landing-pads -C codegen-units=1
|
||||||
// error-pattern:converging_fn called
|
// error-pattern:converging_fn called
|
||||||
use std::io::{self, Write};
|
use std::io::{self, Write};
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
// option. This file may not be copied, modified, or distributed
|
// option. This file may not be copied, modified, or distributed
|
||||||
// except according to those terms.
|
// except according to those terms.
|
||||||
|
|
||||||
// compile-flags: -Z no-landing-pads
|
// compile-flags: -Z no-landing-pads -C codegen-units=1
|
||||||
// error-pattern:diverging_fn called
|
// error-pattern:diverging_fn called
|
||||||
use std::io::{self, Write};
|
use std::io::{self, Write};
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
// option. This file may not be copied, modified, or distributed
|
// option. This file may not be copied, modified, or distributed
|
||||||
// except according to those terms.
|
// except according to those terms.
|
||||||
|
|
||||||
// compile-flags: -Z no-landing-pads
|
// compile-flags: -Z no-landing-pads -C codegen-units=1
|
||||||
// ignore-emscripten no threads support
|
// ignore-emscripten no threads support
|
||||||
|
|
||||||
use std::thread;
|
use std::thread;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue