1
Fork 0

Auto merge of #113879 - nnethercote:codegen_ssa-cleanups, r=bjorn3

`codegen_ssa` cleanups

Some clarifications I made when reading this code closely.

r? `@tmiasko`
This commit is contained in:
bors 2023-07-31 08:18:19 +00:00
commit 5082281609
9 changed files with 200 additions and 199 deletions

View file

@ -71,7 +71,7 @@ use gccjit::{Context, OptimizationLevel, CType};
use rustc_ast::expand::allocator::AllocatorKind; use rustc_ast::expand::allocator::AllocatorKind;
use rustc_codegen_ssa::{CodegenResults, CompiledModule, ModuleCodegen}; use rustc_codegen_ssa::{CodegenResults, CompiledModule, ModuleCodegen};
use rustc_codegen_ssa::base::codegen_crate; use rustc_codegen_ssa::base::codegen_crate;
use rustc_codegen_ssa::back::write::{CodegenContext, FatLTOInput, ModuleConfig, TargetMachineFactoryFn}; use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput, ModuleConfig, TargetMachineFactoryFn};
use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule}; use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule};
use rustc_codegen_ssa::target_features::supported_target_features; use rustc_codegen_ssa::target_features::supported_target_features;
use rustc_codegen_ssa::traits::{CodegenBackend, ExtraBackendMethods, ModuleBufferMethods, ThinBufferMethods, WriteBackendMethods}; use rustc_codegen_ssa::traits::{CodegenBackend, ExtraBackendMethods, ModuleBufferMethods, ThinBufferMethods, WriteBackendMethods};
@ -217,14 +217,14 @@ impl WriteBackendMethods for GccCodegenBackend {
type ThinData = (); type ThinData = ();
type ThinBuffer = ThinBuffer; type ThinBuffer = ThinBuffer;
fn run_fat_lto(_cgcx: &CodegenContext<Self>, mut modules: Vec<FatLTOInput<Self>>, _cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>) -> Result<LtoModuleCodegen<Self>, FatalError> { fn run_fat_lto(_cgcx: &CodegenContext<Self>, mut modules: Vec<FatLtoInput<Self>>, _cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>) -> Result<LtoModuleCodegen<Self>, FatalError> {
// TODO(antoyo): implement LTO by sending -flto to libgccjit and adding the appropriate gcc linker plugins. // TODO(antoyo): implement LTO by sending -flto to libgccjit and adding the appropriate gcc linker plugins.
// NOTE: implemented elsewhere. // NOTE: implemented elsewhere.
// TODO(antoyo): what is implemented elsewhere ^ ? // TODO(antoyo): what is implemented elsewhere ^ ?
let module = let module =
match modules.remove(0) { match modules.remove(0) {
FatLTOInput::InMemory(module) => module, FatLtoInput::InMemory(module) => module,
FatLTOInput::Serialized { .. } => { FatLtoInput::Serialized { .. } => {
unimplemented!(); unimplemented!();
} }
}; };

View file

@ -7,7 +7,7 @@ use crate::{LlvmCodegenBackend, ModuleLlvm};
use object::read::archive::ArchiveFile; use object::read::archive::ArchiveFile;
use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule, ThinShared}; use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule, ThinShared};
use rustc_codegen_ssa::back::symbol_export; use rustc_codegen_ssa::back::symbol_export;
use rustc_codegen_ssa::back::write::{CodegenContext, FatLTOInput, TargetMachineFactoryConfig}; use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput, TargetMachineFactoryConfig};
use rustc_codegen_ssa::traits::*; use rustc_codegen_ssa::traits::*;
use rustc_codegen_ssa::{looks_like_rust_object_file, ModuleCodegen, ModuleKind}; use rustc_codegen_ssa::{looks_like_rust_object_file, ModuleCodegen, ModuleKind};
use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::fx::FxHashMap;
@ -166,7 +166,7 @@ fn get_bitcode_slice_from_object_data(obj: &[u8]) -> Result<&[u8], LtoBitcodeFro
/// for further optimization. /// for further optimization.
pub(crate) fn run_fat( pub(crate) fn run_fat(
cgcx: &CodegenContext<LlvmCodegenBackend>, cgcx: &CodegenContext<LlvmCodegenBackend>,
modules: Vec<FatLTOInput<LlvmCodegenBackend>>, modules: Vec<FatLtoInput<LlvmCodegenBackend>>,
cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>, cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
) -> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError> { ) -> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError> {
let diag_handler = cgcx.create_diag_handler(); let diag_handler = cgcx.create_diag_handler();
@ -220,7 +220,7 @@ pub(crate) fn prepare_thin(module: ModuleCodegen<ModuleLlvm>) -> (String, ThinBu
fn fat_lto( fn fat_lto(
cgcx: &CodegenContext<LlvmCodegenBackend>, cgcx: &CodegenContext<LlvmCodegenBackend>,
diag_handler: &Handler, diag_handler: &Handler,
modules: Vec<FatLTOInput<LlvmCodegenBackend>>, modules: Vec<FatLtoInput<LlvmCodegenBackend>>,
cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>, cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
mut serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>, mut serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
symbols_below_threshold: &[*const libc::c_char], symbols_below_threshold: &[*const libc::c_char],
@ -245,8 +245,8 @@ fn fat_lto(
})); }));
for module in modules { for module in modules {
match module { match module {
FatLTOInput::InMemory(m) => in_memory.push(m), FatLtoInput::InMemory(m) => in_memory.push(m),
FatLTOInput::Serialized { name, buffer } => { FatLtoInput::Serialized { name, buffer } => {
info!("pushing serialized module {:?}", name); info!("pushing serialized module {:?}", name);
let buffer = SerializedModule::Local(buffer); let buffer = SerializedModule::Local(buffer);
serialized_modules.push((buffer, CString::new(name).unwrap())); serialized_modules.push((buffer, CString::new(name).unwrap()));

View file

@ -28,7 +28,7 @@ pub use llvm_util::target_features;
use rustc_ast::expand::allocator::AllocatorKind; use rustc_ast::expand::allocator::AllocatorKind;
use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule}; use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule};
use rustc_codegen_ssa::back::write::{ use rustc_codegen_ssa::back::write::{
CodegenContext, FatLTOInput, ModuleConfig, TargetMachineFactoryConfig, TargetMachineFactoryFn, CodegenContext, FatLtoInput, ModuleConfig, TargetMachineFactoryConfig, TargetMachineFactoryFn,
}; };
use rustc_codegen_ssa::traits::*; use rustc_codegen_ssa::traits::*;
use rustc_codegen_ssa::ModuleCodegen; use rustc_codegen_ssa::ModuleCodegen;
@ -141,18 +141,6 @@ impl ExtraBackendMethods for LlvmCodegenBackend {
back::write::target_machine_factory(sess, optlvl, target_features) back::write::target_machine_factory(sess, optlvl, target_features)
} }
fn spawn_thread<F, T>(time_trace: bool, f: F) -> std::thread::JoinHandle<T>
where
F: FnOnce() -> T,
F: Send + 'static,
T: Send + 'static,
{
std::thread::spawn(move || {
let _profiler = TimeTraceProfiler::new(time_trace);
f()
})
}
fn spawn_named_thread<F, T>( fn spawn_named_thread<F, T>(
time_trace: bool, time_trace: bool,
name: String, name: String,
@ -212,7 +200,7 @@ impl WriteBackendMethods for LlvmCodegenBackend {
} }
fn run_fat_lto( fn run_fat_lto(
cgcx: &CodegenContext<Self>, cgcx: &CodegenContext<Self>,
modules: Vec<FatLTOInput<Self>>, modules: Vec<FatLtoInput<Self>>,
cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>, cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
) -> Result<LtoModuleCodegen<Self>, FatalError> { ) -> Result<LtoModuleCodegen<Self>, FatalError> {
back::lto::run_fat(cgcx, modules, cached_modules) back::lto::run_fat(cgcx, modules, cached_modules)

View file

@ -349,8 +349,6 @@ pub struct CodegenContext<B: WriteBackendMethods> {
/// Directory into which should the LLVM optimization remarks be written. /// Directory into which should the LLVM optimization remarks be written.
/// If `None`, they will be written to stderr. /// If `None`, they will be written to stderr.
pub remark_dir: Option<PathBuf>, pub remark_dir: Option<PathBuf>,
/// Worker thread number
pub worker: usize,
/// The incremental compilation session directory, or None if we are not /// The incremental compilation session directory, or None if we are not
/// compiling incrementally /// compiling incrementally
pub incr_comp_session_dir: Option<PathBuf>, pub incr_comp_session_dir: Option<PathBuf>,
@ -376,38 +374,39 @@ impl<B: WriteBackendMethods> CodegenContext<B> {
fn generate_lto_work<B: ExtraBackendMethods>( fn generate_lto_work<B: ExtraBackendMethods>(
cgcx: &CodegenContext<B>, cgcx: &CodegenContext<B>,
needs_fat_lto: Vec<FatLTOInput<B>>, needs_fat_lto: Vec<FatLtoInput<B>>,
needs_thin_lto: Vec<(String, B::ThinBuffer)>, needs_thin_lto: Vec<(String, B::ThinBuffer)>,
import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>, import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
) -> Vec<(WorkItem<B>, u64)> { ) -> Vec<(WorkItem<B>, u64)> {
let _prof_timer = cgcx.prof.generic_activity("codegen_generate_lto_work"); let _prof_timer = cgcx.prof.generic_activity("codegen_generate_lto_work");
let (lto_modules, copy_jobs) = if !needs_fat_lto.is_empty() { if !needs_fat_lto.is_empty() {
assert!(needs_thin_lto.is_empty()); assert!(needs_thin_lto.is_empty());
let lto_module = let module =
B::run_fat_lto(cgcx, needs_fat_lto, import_only_modules).unwrap_or_else(|e| e.raise()); B::run_fat_lto(cgcx, needs_fat_lto, import_only_modules).unwrap_or_else(|e| e.raise());
(vec![lto_module], vec![]) // We are adding a single work item, so the cost doesn't matter.
vec![(WorkItem::LTO(module), 0)]
} else { } else {
assert!(needs_fat_lto.is_empty()); assert!(needs_fat_lto.is_empty());
B::run_thin_lto(cgcx, needs_thin_lto, import_only_modules).unwrap_or_else(|e| e.raise()) let (lto_modules, copy_jobs) = B::run_thin_lto(cgcx, needs_thin_lto, import_only_modules)
}; .unwrap_or_else(|e| e.raise());
lto_modules
lto_modules .into_iter()
.into_iter() .map(|module| {
.map(|module| { let cost = module.cost();
let cost = module.cost(); (WorkItem::LTO(module), cost)
(WorkItem::LTO(module), cost) })
}) .chain(copy_jobs.into_iter().map(|wp| {
.chain(copy_jobs.into_iter().map(|wp| { (
( WorkItem::CopyPostLtoArtifacts(CachedModuleCodegen {
WorkItem::CopyPostLtoArtifacts(CachedModuleCodegen { name: wp.cgu_name.clone(),
name: wp.cgu_name.clone(), source: wp,
source: wp, }),
}), 0, // copying is very cheap
0, )
) }))
})) .collect()
.collect() }
} }
pub struct CompiledModules { pub struct CompiledModules {
@ -742,22 +741,32 @@ impl<B: WriteBackendMethods> WorkItem<B> {
} }
match self { match self {
WorkItem::Optimize(m) => desc("opt", "optimize module {}", &m.name), WorkItem::Optimize(m) => desc("opt", "optimize module", &m.name),
WorkItem::CopyPostLtoArtifacts(m) => desc("cpy", "copy LTO artifacts for {}", &m.name), WorkItem::CopyPostLtoArtifacts(m) => desc("cpy", "copy LTO artifacts for", &m.name),
WorkItem::LTO(m) => desc("lto", "LTO module {}", m.name()), WorkItem::LTO(m) => desc("lto", "LTO module", m.name()),
} }
} }
} }
/// A result produced by the backend. /// A result produced by the backend.
pub(crate) enum WorkItemResult<B: WriteBackendMethods> { pub(crate) enum WorkItemResult<B: WriteBackendMethods> {
Compiled(CompiledModule), /// The backend has finished compiling a CGU, nothing more required.
Finished(CompiledModule),
/// The backend has finished compiling a CGU, which now needs linking
/// because `-Zcombine-cgu` was specified.
NeedsLink(ModuleCodegen<B::Module>), NeedsLink(ModuleCodegen<B::Module>),
NeedsFatLTO(FatLTOInput<B>),
NeedsThinLTO(String, B::ThinBuffer), /// The backend has finished compiling a CGU, which now needs to go through
/// fat LTO.
NeedsFatLto(FatLtoInput<B>),
/// The backend has finished compiling a CGU, which now needs to go through
/// thin LTO.
NeedsThinLto(String, B::ThinBuffer),
} }
pub enum FatLTOInput<B: WriteBackendMethods> { pub enum FatLtoInput<B: WriteBackendMethods> {
Serialized { name: String, buffer: B::ModuleBuffer }, Serialized { name: String, buffer: B::ModuleBuffer },
InMemory(ModuleCodegen<B::Module>), InMemory(ModuleCodegen<B::Module>),
} }
@ -846,7 +855,7 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>(
panic!("Error writing pre-lto-bitcode file `{}`: {}", path.display(), e); panic!("Error writing pre-lto-bitcode file `{}`: {}", path.display(), e);
}); });
} }
Ok(WorkItemResult::NeedsThinLTO(name, thin_buffer)) Ok(WorkItemResult::NeedsThinLto(name, thin_buffer))
} }
ComputedLtoType::Fat => match bitcode { ComputedLtoType::Fat => match bitcode {
Some(path) => { Some(path) => {
@ -854,9 +863,9 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>(
fs::write(&path, buffer.data()).unwrap_or_else(|e| { fs::write(&path, buffer.data()).unwrap_or_else(|e| {
panic!("Error writing pre-lto-bitcode file `{}`: {}", path.display(), e); panic!("Error writing pre-lto-bitcode file `{}`: {}", path.display(), e);
}); });
Ok(WorkItemResult::NeedsFatLTO(FatLTOInput::Serialized { name, buffer })) Ok(WorkItemResult::NeedsFatLto(FatLtoInput::Serialized { name, buffer }))
} }
None => Ok(WorkItemResult::NeedsFatLTO(FatLTOInput::InMemory(module))), None => Ok(WorkItemResult::NeedsFatLto(FatLtoInput::InMemory(module))),
}, },
} }
} }
@ -906,7 +915,7 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
load_from_incr_comp_dir(dwarf_obj_out, &saved_dwarf_object_file) load_from_incr_comp_dir(dwarf_obj_out, &saved_dwarf_object_file)
}); });
WorkItemResult::Compiled(CompiledModule { WorkItemResult::Finished(CompiledModule {
name: module.name, name: module.name,
kind: ModuleKind::Regular, kind: ModuleKind::Regular,
object, object,
@ -936,7 +945,7 @@ fn finish_intra_module_work<B: ExtraBackendMethods>(
|| module.kind == ModuleKind::Allocator || module.kind == ModuleKind::Allocator
{ {
let module = unsafe { B::codegen(cgcx, &diag_handler, module, module_config)? }; let module = unsafe { B::codegen(cgcx, &diag_handler, module, module_config)? };
Ok(WorkItemResult::Compiled(module)) Ok(WorkItemResult::Finished(module))
} else { } else {
Ok(WorkItemResult::NeedsLink(module)) Ok(WorkItemResult::NeedsLink(module))
} }
@ -987,10 +996,15 @@ struct Diagnostic {
} }
#[derive(PartialEq, Clone, Copy, Debug)] #[derive(PartialEq, Clone, Copy, Debug)]
enum MainThreadWorkerState { enum MainThreadState {
/// Doing nothing.
Idle, Idle,
/// Doing codegen, i.e. MIR-to-LLVM-IR conversion.
Codegenning, Codegenning,
LLVMing,
/// Idle, but lending the compiler process's Token to an LLVM thread so it can do useful work.
Lending,
} }
fn start_executing_work<B: ExtraBackendMethods>( fn start_executing_work<B: ExtraBackendMethods>(
@ -1089,7 +1103,6 @@ fn start_executing_work<B: ExtraBackendMethods>(
exported_symbols, exported_symbols,
remark: sess.opts.cg.remark.clone(), remark: sess.opts.cg.remark.clone(),
remark_dir, remark_dir,
worker: 0,
incr_comp_session_dir: sess.incr_comp_session_dir_opt().map(|r| r.clone()), incr_comp_session_dir: sess.incr_comp_session_dir_opt().map(|r| r.clone()),
cgu_reuse_tracker: sess.cgu_reuse_tracker.clone(), cgu_reuse_tracker: sess.cgu_reuse_tracker.clone(),
coordinator_send, coordinator_send,
@ -1242,7 +1255,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
// Each LLVM module is automatically sent back to the coordinator for LTO if // Each LLVM module is automatically sent back to the coordinator for LTO if
// necessary. There's already optimizations in place to avoid sending work // necessary. There's already optimizations in place to avoid sending work
// back to the coordinator if LTO isn't requested. // back to the coordinator if LTO isn't requested.
return B::spawn_thread(cgcx.time_trace, move || { return B::spawn_named_thread(cgcx.time_trace, "coordinator".to_string(), move || {
let mut worker_id_counter = 0; let mut worker_id_counter = 0;
let mut free_worker_ids = Vec::new(); let mut free_worker_ids = Vec::new();
let mut get_worker_id = |free_worker_ids: &mut Vec<usize>| { let mut get_worker_id = |free_worker_ids: &mut Vec<usize>| {
@ -1285,10 +1298,19 @@ fn start_executing_work<B: ExtraBackendMethods>(
// the implicit Token the compiler process owns no matter what. // the implicit Token the compiler process owns no matter what.
let mut tokens = Vec::new(); let mut tokens = Vec::new();
let mut main_thread_worker_state = MainThreadWorkerState::Idle; let mut main_thread_state = MainThreadState::Idle;
let mut running = 0;
// How many LLVM worker threads are running while holding a Token. This
// *excludes* any that the main thread is lending a Token to.
let mut running_with_own_token = 0;
// How many LLVM worker threads are running in total. This *includes*
// any that the main thread is lending a Token to.
let running_with_any_token = |main_thread_state, running_with_own_token| {
running_with_own_token
+ if main_thread_state == MainThreadState::Lending { 1 } else { 0 }
};
let prof = &cgcx.prof;
let mut llvm_start_time: Option<VerboseTimingGuard<'_>> = None; let mut llvm_start_time: Option<VerboseTimingGuard<'_>> = None;
// Run the message loop while there's still anything that needs message // Run the message loop while there's still anything that needs message
@ -1296,66 +1318,62 @@ fn start_executing_work<B: ExtraBackendMethods>(
// wait for all existing work to finish, so many of the conditions here // wait for all existing work to finish, so many of the conditions here
// only apply if codegen hasn't been aborted as they represent pending // only apply if codegen hasn't been aborted as they represent pending
// work to be done. // work to be done.
while codegen_state == Ongoing loop {
|| running > 0
|| main_thread_worker_state == MainThreadWorkerState::LLVMing
|| (codegen_state == Completed
&& !(work_items.is_empty()
&& needs_fat_lto.is_empty()
&& needs_thin_lto.is_empty()
&& lto_import_only_modules.is_empty()
&& main_thread_worker_state == MainThreadWorkerState::Idle))
{
// While there are still CGUs to be codegened, the coordinator has // While there are still CGUs to be codegened, the coordinator has
// to decide how to utilize the compiler processes implicit Token: // to decide how to utilize the compiler processes implicit Token:
// For codegenning more CGU or for running them through LLVM. // For codegenning more CGU or for running them through LLVM.
if codegen_state == Ongoing { if codegen_state == Ongoing {
if main_thread_worker_state == MainThreadWorkerState::Idle { if main_thread_state == MainThreadState::Idle {
// Compute the number of workers that will be running once we've taken as many // Compute the number of workers that will be running once we've taken as many
// items from the work queue as we can, plus one for the main thread. It's not // items from the work queue as we can, plus one for the main thread. It's not
// critically important that we use this instead of just `running`, but it // critically important that we use this instead of just
// prevents the `queue_full_enough` heuristic from fluctuating just because a // `running_with_own_token`, but it prevents the `queue_full_enough` heuristic
// worker finished up and we decreased the `running` count, even though we're // from fluctuating just because a worker finished up and we decreased the
// just going to increase it right after this when we put a new worker to work. // `running_with_own_token` count, even though we're just going to increase it
let extra_tokens = tokens.len().checked_sub(running).unwrap(); // right after this when we put a new worker to work.
let extra_tokens = tokens.len().checked_sub(running_with_own_token).unwrap();
let additional_running = std::cmp::min(extra_tokens, work_items.len()); let additional_running = std::cmp::min(extra_tokens, work_items.len());
let anticipated_running = running + additional_running + 1; let anticipated_running = running_with_own_token + additional_running + 1;
if !queue_full_enough(work_items.len(), anticipated_running) { if !queue_full_enough(work_items.len(), anticipated_running) {
// The queue is not full enough, process more codegen units: // The queue is not full enough, process more codegen units:
if codegen_worker_send.send(CguMessage).is_err() { if codegen_worker_send.send(CguMessage).is_err() {
panic!("Could not send CguMessage to main thread") panic!("Could not send CguMessage to main thread")
} }
main_thread_worker_state = MainThreadWorkerState::Codegenning; main_thread_state = MainThreadState::Codegenning;
} else { } else {
// The queue is full enough to not let the worker // The queue is full enough to not let the worker
// threads starve. Use the implicit Token to do some // threads starve. Use the implicit Token to do some
// LLVM work too. // LLVM work too.
let (item, _) = let (item, _) =
work_items.pop().expect("queue empty - queue_full_enough() broken?"); work_items.pop().expect("queue empty - queue_full_enough() broken?");
let cgcx = CodegenContext { main_thread_state = MainThreadState::Lending;
worker: get_worker_id(&mut free_worker_ids), spawn_work(
..cgcx.clone() &cgcx,
};
maybe_start_llvm_timer(
prof,
cgcx.config(item.module_kind()),
&mut llvm_start_time, &mut llvm_start_time,
get_worker_id(&mut free_worker_ids),
item,
); );
main_thread_worker_state = MainThreadWorkerState::LLVMing;
spawn_work(cgcx, item);
} }
} }
} else if codegen_state == Completed { } else if codegen_state == Completed {
// If we've finished everything related to normal codegen if running_with_any_token(main_thread_state, running_with_own_token) == 0
// then it must be the case that we've got some LTO work to do. && work_items.is_empty()
// Perform the serial work here of figuring out what we're
// going to LTO and then push a bunch of work items onto our
// queue to do LTO
if work_items.is_empty()
&& running == 0
&& main_thread_worker_state == MainThreadWorkerState::Idle
{ {
// All codegen work is done. Do we have LTO work to do?
if needs_fat_lto.is_empty()
&& needs_thin_lto.is_empty()
&& lto_import_only_modules.is_empty()
{
// Nothing more to do!
break;
}
// We have LTO work to do. Perform the serial work here of
// figuring out what we're going to LTO and then push a
// bunch of work items onto our queue to do LTO. This all
// happens on the coordinator thread but it's very quick so
// we don't worry about tokens.
assert!(!started_lto); assert!(!started_lto);
started_lto = true; started_lto = true;
@ -1379,20 +1397,16 @@ fn start_executing_work<B: ExtraBackendMethods>(
// In this branch, we know that everything has been codegened, // In this branch, we know that everything has been codegened,
// so it's just a matter of determining whether the implicit // so it's just a matter of determining whether the implicit
// Token is free to use for LLVM work. // Token is free to use for LLVM work.
match main_thread_worker_state { match main_thread_state {
MainThreadWorkerState::Idle => { MainThreadState::Idle => {
if let Some((item, _)) = work_items.pop() { if let Some((item, _)) = work_items.pop() {
let cgcx = CodegenContext { main_thread_state = MainThreadState::Lending;
worker: get_worker_id(&mut free_worker_ids), spawn_work(
..cgcx.clone() &cgcx,
};
maybe_start_llvm_timer(
prof,
cgcx.config(item.module_kind()),
&mut llvm_start_time, &mut llvm_start_time,
get_worker_id(&mut free_worker_ids),
item,
); );
main_thread_worker_state = MainThreadWorkerState::LLVMing;
spawn_work(cgcx, item);
} else { } else {
// There is no unstarted work, so let the main thread // There is no unstarted work, so let the main thread
// take over for a running worker. Otherwise the // take over for a running worker. Otherwise the
@ -1400,16 +1414,16 @@ fn start_executing_work<B: ExtraBackendMethods>(
// We reduce the `running` counter by one. The // We reduce the `running` counter by one. The
// `tokens.truncate()` below will take care of // `tokens.truncate()` below will take care of
// giving the Token back. // giving the Token back.
debug_assert!(running > 0); debug_assert!(running_with_own_token > 0);
running -= 1; running_with_own_token -= 1;
main_thread_worker_state = MainThreadWorkerState::LLVMing; main_thread_state = MainThreadState::Lending;
} }
} }
MainThreadWorkerState::Codegenning => bug!( MainThreadState::Codegenning => bug!(
"codegen worker should not be codegenning after \ "codegen worker should not be codegenning after \
codegen was already completed" codegen was already completed"
), ),
MainThreadWorkerState::LLVMing => { MainThreadState::Lending => {
// Already making good use of that token // Already making good use of that token
} }
} }
@ -1417,35 +1431,39 @@ fn start_executing_work<B: ExtraBackendMethods>(
// Don't queue up any more work if codegen was aborted, we're // Don't queue up any more work if codegen was aborted, we're
// just waiting for our existing children to finish. // just waiting for our existing children to finish.
assert!(codegen_state == Aborted); assert!(codegen_state == Aborted);
if running_with_any_token(main_thread_state, running_with_own_token) == 0 {
break;
}
} }
// Spin up what work we can, only doing this while we've got available // Spin up what work we can, only doing this while we've got available
// parallelism slots and work left to spawn. // parallelism slots and work left to spawn.
while codegen_state != Aborted && !work_items.is_empty() && running < tokens.len() { if codegen_state != Aborted {
let (item, _) = work_items.pop().unwrap(); while !work_items.is_empty() && running_with_own_token < tokens.len() {
let (item, _) = work_items.pop().unwrap();
maybe_start_llvm_timer(prof, cgcx.config(item.module_kind()), &mut llvm_start_time); spawn_work(
&cgcx,
let cgcx = &mut llvm_start_time,
CodegenContext { worker: get_worker_id(&mut free_worker_ids), ..cgcx.clone() }; get_worker_id(&mut free_worker_ids),
item,
spawn_work(cgcx, item); );
running += 1; running_with_own_token += 1;
}
} }
// Relinquish accidentally acquired extra tokens // Relinquish accidentally acquired extra tokens.
tokens.truncate(running); tokens.truncate(running_with_own_token);
// If a thread exits successfully then we drop a token associated // If a thread exits successfully then we drop a token associated
// with that worker and update our `running` count. We may later // with that worker and update our `running_with_own_token` count.
// re-acquire a token to continue running more work. We may also not // We may later re-acquire a token to continue running more work.
// actually drop a token here if the worker was running with an // We may also not actually drop a token here if the worker was
// "ephemeral token" // running with an "ephemeral token".
let mut free_worker = |worker_id| { let mut free_worker = |worker_id| {
if main_thread_worker_state == MainThreadWorkerState::LLVMing { if main_thread_state == MainThreadState::Lending {
main_thread_worker_state = MainThreadWorkerState::Idle; main_thread_state = MainThreadState::Idle;
} else { } else {
running -= 1; running_with_own_token -= 1;
} }
free_worker_ids.push(worker_id); free_worker_ids.push(worker_id);
@ -1461,13 +1479,13 @@ fn start_executing_work<B: ExtraBackendMethods>(
Ok(token) => { Ok(token) => {
tokens.push(token); tokens.push(token);
if main_thread_worker_state == MainThreadWorkerState::LLVMing { if main_thread_state == MainThreadState::Lending {
// If the main thread token is used for LLVM work // If the main thread token is used for LLVM work
// at the moment, we turn that thread into a regular // at the moment, we turn that thread into a regular
// LLVM worker thread, so the main thread is free // LLVM worker thread, so the main thread is free
// to react to codegen demand. // to react to codegen demand.
main_thread_worker_state = MainThreadWorkerState::Idle; main_thread_state = MainThreadState::Idle;
running += 1; running_with_own_token += 1;
} }
} }
Err(e) => { Err(e) => {
@ -1496,16 +1514,16 @@ fn start_executing_work<B: ExtraBackendMethods>(
if !cgcx.opts.unstable_opts.no_parallel_llvm { if !cgcx.opts.unstable_opts.no_parallel_llvm {
helper.request_token(); helper.request_token();
} }
assert_eq!(main_thread_worker_state, MainThreadWorkerState::Codegenning); assert_eq!(main_thread_state, MainThreadState::Codegenning);
main_thread_worker_state = MainThreadWorkerState::Idle; main_thread_state = MainThreadState::Idle;
} }
Message::CodegenComplete => { Message::CodegenComplete => {
if codegen_state != Aborted { if codegen_state != Aborted {
codegen_state = Completed; codegen_state = Completed;
} }
assert_eq!(main_thread_worker_state, MainThreadWorkerState::Codegenning); assert_eq!(main_thread_state, MainThreadState::Codegenning);
main_thread_worker_state = MainThreadWorkerState::Idle; main_thread_state = MainThreadState::Idle;
} }
// If codegen is aborted that means translation was aborted due // If codegen is aborted that means translation was aborted due
@ -1513,7 +1531,8 @@ fn start_executing_work<B: ExtraBackendMethods>(
// to exit as soon as possible, but we want to make sure all // to exit as soon as possible, but we want to make sure all
// existing work has finished. Flag codegen as being done, and // existing work has finished. Flag codegen as being done, and
// then conditions above will ensure no more work is spawned but // then conditions above will ensure no more work is spawned but
// we'll keep executing this loop until `running` hits 0. // we'll keep executing this loop until `running_with_own_token`
// hits 0.
Message::CodegenAborted => { Message::CodegenAborted => {
codegen_state = Aborted; codegen_state = Aborted;
} }
@ -1522,9 +1541,10 @@ fn start_executing_work<B: ExtraBackendMethods>(
free_worker(worker_id); free_worker(worker_id);
match result { match result {
Ok(WorkItemResult::Compiled(compiled_module)) => { Ok(WorkItemResult::Finished(compiled_module)) => {
match compiled_module.kind { match compiled_module.kind {
ModuleKind::Regular => { ModuleKind::Regular => {
assert!(needs_link.is_empty());
compiled_modules.push(compiled_module); compiled_modules.push(compiled_module);
} }
ModuleKind::Allocator => { ModuleKind::Allocator => {
@ -1535,14 +1555,17 @@ fn start_executing_work<B: ExtraBackendMethods>(
} }
} }
Ok(WorkItemResult::NeedsLink(module)) => { Ok(WorkItemResult::NeedsLink(module)) => {
assert!(compiled_modules.is_empty());
needs_link.push(module); needs_link.push(module);
} }
Ok(WorkItemResult::NeedsFatLTO(fat_lto_input)) => { Ok(WorkItemResult::NeedsFatLto(fat_lto_input)) => {
assert!(!started_lto); assert!(!started_lto);
assert!(needs_thin_lto.is_empty());
needs_fat_lto.push(fat_lto_input); needs_fat_lto.push(fat_lto_input);
} }
Ok(WorkItemResult::NeedsThinLTO(name, thin_buffer)) => { Ok(WorkItemResult::NeedsThinLto(name, thin_buffer)) => {
assert!(!started_lto); assert!(!started_lto);
assert!(needs_fat_lto.is_empty());
needs_thin_lto.push((name, thin_buffer)); needs_thin_lto.push((name, thin_buffer));
} }
Err(Some(WorkerFatalError)) => { Err(Some(WorkerFatalError)) => {
@ -1560,9 +1583,9 @@ fn start_executing_work<B: ExtraBackendMethods>(
Message::AddImportOnlyModule { module_data, work_product } => { Message::AddImportOnlyModule { module_data, work_product } => {
assert!(!started_lto); assert!(!started_lto);
assert_eq!(codegen_state, Ongoing); assert_eq!(codegen_state, Ongoing);
assert_eq!(main_thread_worker_state, MainThreadWorkerState::Codegenning); assert_eq!(main_thread_state, MainThreadState::Codegenning);
lto_import_only_modules.push((module_data, work_product)); lto_import_only_modules.push((module_data, work_product));
main_thread_worker_state = MainThreadWorkerState::Idle; main_thread_state = MainThreadState::Idle;
} }
} }
} }
@ -1595,7 +1618,8 @@ fn start_executing_work<B: ExtraBackendMethods>(
modules: compiled_modules, modules: compiled_modules,
allocator_module: compiled_allocator_module, allocator_module: compiled_allocator_module,
}) })
}); })
.expect("failed to spawn coordinator thread");
// A heuristic that determines if we have enough LLVM WorkItems in the // A heuristic that determines if we have enough LLVM WorkItems in the
// queue so that the main thread can do LLVM work instead of codegen // queue so that the main thread can do LLVM work instead of codegen
@ -1653,23 +1677,24 @@ fn start_executing_work<B: ExtraBackendMethods>(
let quarter_of_workers = workers_running - 3 * workers_running / 4; let quarter_of_workers = workers_running - 3 * workers_running / 4;
items_in_queue > 0 && items_in_queue >= quarter_of_workers items_in_queue > 0 && items_in_queue >= quarter_of_workers
} }
fn maybe_start_llvm_timer<'a>(
prof: &'a SelfProfilerRef,
config: &ModuleConfig,
llvm_start_time: &mut Option<VerboseTimingGuard<'a>>,
) {
if config.time_module && llvm_start_time.is_none() {
*llvm_start_time = Some(prof.verbose_generic_activity("LLVM_passes"));
}
}
} }
/// `FatalError` is explicitly not `Send`. /// `FatalError` is explicitly not `Send`.
#[must_use] #[must_use]
pub struct WorkerFatalError; pub struct WorkerFatalError;
fn spawn_work<B: ExtraBackendMethods>(cgcx: CodegenContext<B>, work: WorkItem<B>) { fn spawn_work<'a, B: ExtraBackendMethods>(
cgcx: &'a CodegenContext<B>,
llvm_start_time: &mut Option<VerboseTimingGuard<'a>>,
worker_id: usize,
work: WorkItem<B>,
) {
if cgcx.config(work.module_kind()).time_module && llvm_start_time.is_none() {
*llvm_start_time = Some(cgcx.prof.verbose_generic_activity("LLVM_passes"));
}
let cgcx = cgcx.clone();
B::spawn_named_thread(cgcx.time_trace, work.short_description(), move || { B::spawn_named_thread(cgcx.time_trace, work.short_description(), move || {
// Set up a destructor which will fire off a message that we're done as // Set up a destructor which will fire off a message that we're done as
// we exit. // we exit.
@ -1692,11 +1717,8 @@ fn spawn_work<B: ExtraBackendMethods>(cgcx: CodegenContext<B>, work: WorkItem<B>
} }
} }
let mut bomb = Bomb::<B> { let mut bomb =
coordinator_send: cgcx.coordinator_send.clone(), Bomb::<B> { coordinator_send: cgcx.coordinator_send.clone(), result: None, worker_id };
result: None,
worker_id: cgcx.worker,
};
// Execute the work itself, and if it finishes successfully then flag // Execute the work itself, and if it finishes successfully then flag
// ourselves as a success as well. // ourselves as a success as well.
@ -1728,7 +1750,7 @@ fn spawn_work<B: ExtraBackendMethods>(cgcx: CodegenContext<B>, work: WorkItem<B>
}) })
}; };
}) })
.expect("failed to spawn thread"); .expect("failed to spawn work thread");
} }
enum SharedEmitterMessage { enum SharedEmitterMessage {
@ -1962,19 +1984,6 @@ impl<B: ExtraBackendMethods> OngoingCodegen<B> {
) )
} }
pub fn submit_pre_codegened_module_to_llvm(
&self,
tcx: TyCtxt<'_>,
module: ModuleCodegen<B::Module>,
) {
self.wait_for_signal_to_codegen_item();
self.check_for_errors(tcx.sess);
// These are generally cheap and won't throw off scheduling.
let cost = 0;
submit_codegened_module_to_llvm(&self.backend, &self.coordinator.sender, module, cost);
}
pub fn codegen_finished(&self, tcx: TyCtxt<'_>) { pub fn codegen_finished(&self, tcx: TyCtxt<'_>) {
self.wait_for_signal_to_codegen_item(); self.wait_for_signal_to_codegen_item();
self.check_for_errors(tcx.sess); self.check_for_errors(tcx.sess);
@ -2040,7 +2049,7 @@ pub fn submit_pre_lto_module_to_llvm<B: ExtraBackendMethods>(
}))); })));
} }
pub fn pre_lto_bitcode_filename(module_name: &str) -> String { fn pre_lto_bitcode_filename(module_name: &str) -> String {
format!("{module_name}.{PRE_LTO_BC_EXT}") format!("{module_name}.{PRE_LTO_BC_EXT}")
} }

View file

@ -664,9 +664,16 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
) )
}); });
ongoing_codegen.submit_pre_codegened_module_to_llvm( ongoing_codegen.wait_for_signal_to_codegen_item();
tcx, ongoing_codegen.check_for_errors(tcx.sess);
// These modules are generally cheap and won't throw off scheduling.
let cost = 0;
submit_codegened_module_to_llvm(
&backend,
&ongoing_codegen.coordinator.sender,
ModuleCodegen { name: llmod_id, module_llvm, kind: ModuleKind::Allocator }, ModuleCodegen { name: llmod_id, module_llvm, kind: ModuleKind::Allocator },
cost,
); );
} }
@ -761,7 +768,6 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
module, module,
cost, cost,
); );
false
} }
CguReuse::PreLto => { CguReuse::PreLto => {
submit_pre_lto_module_to_llvm( submit_pre_lto_module_to_llvm(
@ -773,7 +779,6 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
source: cgu.previous_work_product(tcx), source: cgu.previous_work_product(tcx),
}, },
); );
true
} }
CguReuse::PostLto => { CguReuse::PostLto => {
submit_post_lto_module_to_llvm( submit_post_lto_module_to_llvm(
@ -784,9 +789,8 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
source: cgu.previous_work_product(tcx), source: cgu.previous_work_product(tcx),
}, },
); );
true
} }
}; }
} }
ongoing_codegen.codegen_finished(tcx); ongoing_codegen.codegen_finished(tcx);

View file

@ -142,15 +142,6 @@ pub trait ExtraBackendMethods:
target_features: &[String], target_features: &[String],
) -> TargetMachineFactoryFn<Self>; ) -> TargetMachineFactoryFn<Self>;
fn spawn_thread<F, T>(_time_trace: bool, f: F) -> std::thread::JoinHandle<T>
where
F: FnOnce() -> T,
F: Send + 'static,
T: Send + 'static,
{
std::thread::spawn(f)
}
fn spawn_named_thread<F, T>( fn spawn_named_thread<F, T>(
_time_trace: bool, _time_trace: bool,
name: String, name: String,

View file

@ -1,5 +1,5 @@
use crate::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule}; use crate::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule};
use crate::back::write::{CodegenContext, FatLTOInput, ModuleConfig}; use crate::back::write::{CodegenContext, FatLtoInput, ModuleConfig};
use crate::{CompiledModule, ModuleCodegen}; use crate::{CompiledModule, ModuleCodegen};
use rustc_errors::{FatalError, Handler}; use rustc_errors::{FatalError, Handler};
@ -23,7 +23,7 @@ pub trait WriteBackendMethods: 'static + Sized + Clone {
/// for further optimization. /// for further optimization.
fn run_fat_lto( fn run_fat_lto(
cgcx: &CodegenContext<Self>, cgcx: &CodegenContext<Self>,
modules: Vec<FatLTOInput<Self>>, modules: Vec<FatLtoInput<Self>>,
cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>, cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
) -> Result<LtoModuleCodegen<Self>, FatalError>; ) -> Result<LtoModuleCodegen<Self>, FatalError>;
/// Performs thin LTO by performing necessary global analysis and returning two /// Performs thin LTO by performing necessary global analysis and returning two

View file

@ -1881,6 +1881,7 @@ written to standard error output)"),
// If you add a new option, please update: // If you add a new option, please update:
// - compiler/rustc_interface/src/tests.rs // - compiler/rustc_interface/src/tests.rs
// - src/doc/unstable-book/src/compiler-flags
} }
#[derive(Clone, Hash, PartialEq, Eq, Debug)] #[derive(Clone, Hash, PartialEq, Eq, Debug)]

View file

@ -0,0 +1,8 @@
# `no-parallel-llvm`
---------------------
This flag disables parallelization of codegen and linking, while otherwise preserving
behavior with regard to codegen units and LTO.
This flag is not useful for regular users, but it can be useful for debugging the backend. Codegen issues commonly only manifest under specific circumstances, e.g. if multiple codegen units are used and ThinLTO is enabled. Serialization of these threaded configurations makes the use of LLVM debugging facilities easier, by avoiding the interleaving of output.