rustc: Prepare to enable ThinLTO by default

This commit prepares to enable ThinLTO and multiple codegen units in release mode by default. We've still got a debuginfo bug or two to sort out before actually turning it on by default.
2017-11-25 11:13:58 -08:00 · 2017-11-25 11:13:58 -08:00 · 855f6d1483
commit 855f6d1483
parent 7df4683cc0
8 changed files with 124 additions and 32 deletions
--- a/src/librustc/session/config.rs
+++ b/src/librustc/session/config.rs
@ -383,8 +383,13 @@ top_level_options!(
        // try to not rely on this too much.
        actually_rustdoc: bool [TRACKED],
-        // Number of object files/codegen units to produce on the backend
+        // Specifications of codegen units / ThinLTO which are forced as a
        // result of parsing command line options. These are not necessarily
        // what rustc was invoked with, but massaged a bit to agree with
        // commands like `--emit llvm-ir` which they're often incompatible with
        // if we otherwise use the defaults of rustc.
        cli_forced_codegen_units: Option<usize> [UNTRACKED],
        cli_forced_thinlto: Option<bool> [UNTRACKED],
    }
 );
@ -566,6 +571,7 @@ pub fn basic_options() -> Options {
        debug_assertions: true,
        actually_rustdoc: false,
        cli_forced_codegen_units: None,
        cli_forced_thinlto: None,
    }
 }
@ -1165,7 +1171,7 @@ options! {DebuggingOptions, DebuggingSetter, basic_debugging_options,
                 "run the non-lexical lifetimes MIR pass"),
    trans_time_graph: bool = (false, parse_bool, [UNTRACKED],
        "generate a graphical HTML report of time spent in trans and LLVM"),
-    thinlto: bool = (false, parse_bool, [TRACKED],
+    thinlto: Option<bool> = (None, parse_opt_bool, [TRACKED],
        "enable ThinLTO when possible"),
    inline_in_all_cgus: Option<bool> = (None, parse_opt_bool, [TRACKED],
        "control whether #[inline] functions are in all cgus"),
@ -1601,6 +1607,7 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches)
    let mut cg = build_codegen_options(matches, error_format);
    let mut codegen_units = cg.codegen_units;
    let mut thinlto = None;
    // Issue #30063: if user requests llvm-related output to one
    // particular path, disable codegen-units.
@ -1622,9 +1629,13 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches)
                    }
                    early_warn(error_format, "resetting to default -C codegen-units=1");
                    codegen_units = Some(1);
                    thinlto = Some(false);
                }
            }
-            _ => codegen_units = Some(1),
+            _ => {
                codegen_units = Some(1);
                thinlto = Some(false);
            }
        }
    }
@ -1834,6 +1845,7 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches)
        debug_assertions,
        actually_rustdoc: false,
        cli_forced_codegen_units: codegen_units,
        cli_forced_thinlto: thinlto,
    },
    cfg)
 }
--- a/src/librustc/session/mod.rs
+++ b/src/librustc/session/mod.rs
@ -656,30 +656,91 @@ impl Session {
            return n as usize
        }
        // Why is 16 codegen units the default all the time?
        //
        // The main reason for enabling multiple codegen units by default is to
        // leverage the ability for the trans backend to do translation and
        // codegen in parallel. This allows us, especially for large crates, to
        // make good use of all available resources on the machine once we've
        // hit that stage of compilation. Large crates especially then often
        // take a long time in trans/codegen and this helps us amortize that
        // cost.
        //
        // Note that a high number here doesn't mean that we'll be spawning a
        // large number of threads in parallel. The backend of rustc contains
        // global rate limiting through the `jobserver` crate so we'll never
        // overload the system with too much work, but rather we'll only be
        // optimizing when we're otherwise cooperating with other instances of
        // rustc.
        //
        // Rather a high number here means that we should be able to keep a lot
        // of idle cpus busy. By ensuring that no codegen unit takes *too* long
        // to build we'll be guaranteed that all cpus will finish pretty closely
        // to one another and we should make relatively optimal use of system
        // resources
        //
        // Note that the main cost of codegen units is that it prevents LLVM
        // from inlining across codegen units. Users in general don't have a lot
        // of control over how codegen units are split up so it's our job in the
        // compiler to ensure that undue performance isn't lost when using
        // codegen units (aka we can't require everyone to slap `#[inline]` on
        // everything).
        //
        // If we're compiling at `-O0` then the number doesn't really matter too
        // much because performance doesn't matter and inlining is ok to lose.
        // In debug mode we just want to try to guarantee that no cpu is stuck
        // doing work that could otherwise be farmed to others.
        //
        // In release mode, however (O1 and above) performance does indeed
        // matter! To recover the loss in performance due to inlining we'll be
        // enabling ThinLTO by default (the function for which is just below).
        // This will ensure that we recover any inlining wins we otherwise lost
        // through codegen unit partitioning.
        //
        // ---
        //
        // Ok that's a lot of words but the basic tl;dr; is that we want a high
        // number here -- but not too high. Additionally we're "safe" to have it
        // always at the same number at all optimization levels.
        //
        // As a result 16 was chosen here! Mostly because it was a power of 2
        // and most benchmarks agreed it was roughly a local optimum. Not very
        // scientific.
        match self.opts.optimize {
            // If we're compiling at `-O0` then default to 16 codegen units.
            // The number here shouldn't matter too too much as debug mode
            // builds don't rely on performance at all, meaning that lost
            // opportunities for inlining through multiple codegen units is
            // a non-issue.
            //
            // Note that the high number here doesn't mean that we'll be
            // spawning a large number of threads in parallel. The backend
            // of rustc contains global rate limiting through the
            // `jobserver` crate so we'll never overload the system with too
            // much work, but rather we'll only be optimizing when we're
            // otherwise cooperating with other instances of rustc.
            //
            // Rather the high number here means that we should be able to
            // keep a lot of idle cpus busy. By ensuring that no codegen
            // unit takes *too* long to build we'll be guaranteed that all
            // cpus will finish pretty closely to one another and we should
            // make relatively optimal use of system resources
            config::OptLevel::No => 16,
            _ => 1, // FIXME(#46346) this should be 16
        }
    }
-            // All other optimization levels default use one codegen unit,
+    /// Returns whether ThinLTO is enabled for this compilation
-            // the historical default in Rust for a Long Time.
+    pub fn thinlto(&self) -> bool {
-            _ => 1,
+        // If processing command line options determined that we're incompatible
        // with ThinLTO (e.g. `-C lto --emit llvm-ir`) then return that option.
        if let Some(enabled) = self.opts.cli_forced_thinlto {
            return enabled
        }
        // If explicitly specified, use that with the next highest priority
        if let Some(enabled) = self.opts.debugging_opts.thinlto {
            return enabled
        }
        // If there's only one codegen unit and LTO isn't enabled then there's
        // no need for ThinLTO so just return false.
        if self.codegen_units() == 1 && !self.lto() {
            return false
        }
        // Right now ThinLTO isn't compatible with incremental compilation.
        if self.opts.incremental.is_some() {
            return false
        }
        // Now we're in "defaults" territory. By default we enable ThinLTO for
        // optimized compiles (anything greater than O0).
        match self.opts.optimize {
            config::OptLevel::No => false,
            _ => true,
        }
    }
 }
--- a/src/librustc_trans/back/write.rs
+++ b/src/librustc_trans/back/write.rs
@ -1402,8 +1402,9 @@ fn start_executing_work(tcx: TyCtxt,
        // for doesn't require full LTO. Some targets require one LLVM module
        // (they effectively don't have a linker) so it's up to us to use LTO to
        // link everything together.
-        thinlto: sess.opts.debugging_opts.thinlto &&
+        thinlto: sess.thinlto() &&
-            !sess.target.target.options.requires_lto,
+            !sess.target.target.options.requires_lto &&
            unsafe { llvm::LLVMRustThinLTOAvailable() },
        no_landing_pads: sess.no_landing_pads(),
        save_temps: sess.opts.cg.save_temps,
--- a/src/librustc_trans/base.rs
+++ b/src/librustc_trans/base.rs
@ -706,7 +706,7 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
    check_for_rustc_errors_attr(tcx);
-    if tcx.sess.opts.debugging_opts.thinlto {
+    if let Some(true) = tcx.sess.opts.debugging_opts.thinlto {
        if unsafe { !llvm::LLVMRustThinLTOAvailable() } {
            tcx.sess.fatal("this compiler's LLVM does not support ThinLTO");
        }
--- a/src/libstd/sys_common/backtrace.rs
+++ b/src/libstd/sys_common/backtrace.rs
@ -252,8 +252,26 @@ fn output_fileline(w: &mut Write,
 // Note that this demangler isn't quite as fancy as it could be. We have lots
 // of other information in our symbols like hashes, version, type information,
 // etc. Additionally, this doesn't handle glue symbols at all.
-pub fn demangle(writer: &mut Write, s: &str, format: PrintFormat) -> io::Result<()> {
+pub fn demangle(writer: &mut Write, mut s: &str, format: PrintFormat) -> io::Result<()> {
-    // First validate the symbol. If it doesn't look like anything we're
+    // During ThinLTO LLVM may import and rename internal symbols, so strip out
    // those endings first as they're one of the last manglings applied to
    // symbol names.
    let llvm = ".llvm.";
    if let Some(i) = s.find(llvm) {
        let candidate = &s[i + llvm.len()..];
        let all_hex = candidate.chars().all(|c| {
            match c {
                'A' ... 'F' | '0' ... '9' => true,
                _ => false,
            }
        });
        if all_hex {
            s = &s[..i];
        }
    }
    // Validate the symbol. If it doesn't look like anything we're
    // expecting, we just print it literally. Note that we must handle non-rust
    // symbols because we could have any function in the backtrace.
    let mut valid = true;
--- a/src/test/run-fail/mir_trans_no_landing_pads.rs
+++ b/src/test/run-fail/mir_trans_no_landing_pads.rs
@ -8,7 +8,7 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
-// compile-flags: -Z no-landing-pads
+// compile-flags: -Z no-landing-pads -C codegen-units=1
 // error-pattern:converging_fn called
 use std::io::{self, Write};
--- a/src/test/run-fail/mir_trans_no_landing_pads_diverging.rs
+++ b/src/test/run-fail/mir_trans_no_landing_pads_diverging.rs
@ -8,7 +8,7 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
-// compile-flags: -Z no-landing-pads
+// compile-flags: -Z no-landing-pads -C codegen-units=1
 // error-pattern:diverging_fn called
 use std::io::{self, Write};
--- a/src/test/run-pass/no-landing-pads.rs
+++ b/src/test/run-pass/no-landing-pads.rs
@ -8,7 +8,7 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
-// compile-flags: -Z no-landing-pads
+// compile-flags: -Z no-landing-pads -C codegen-units=1
 // ignore-emscripten no threads support
 use std::thread;