Auto merge of #133250 - DianQK:embed-bitcode-pgo, r=nikic

The embedded bitcode should always be prepared for LTO/ThinLTO

Fixes #115344. Fixes #117220.

There are currently two methods for generating bitcode that used for LTO. One method involves using `-C linker-plugin-lto` to emit object files as bitcode, which is the typical setting used by cargo. The other method is through `-C embed-bitcode=yes`.

When using with `-C embed-bitcode=yes -C lto=no`, we run a complete non-LTO LLVM pipeline to obtain bitcode, then the bitcode is used for LTO. We run the Call Graph Profile Pass twice on the same module.

This PR is doing something similar to LLVM's `buildFatLTODefaultPipeline`, obtaining the bitcode for embedding after running `buildThinLTOPreLinkDefaultPipeline`.

r? nikic
This commit is contained in:
bors 2025-03-01 08:22:18 +00:00
commit 0c72c0d11a
20 changed files with 294 additions and 101 deletions

View file

@ -2,6 +2,7 @@ use std::collections::BTreeMap;
use std::ffi::{CStr, CString};
use std::fs::File;
use std::path::Path;
use std::ptr::NonNull;
use std::sync::Arc;
use std::{io, iter, slice};
@ -305,11 +306,8 @@ fn fat_lto(
assert!(!serialized_modules.is_empty(), "must have at least one serialized module");
let (buffer, name) = serialized_modules.remove(0);
info!("no in-memory regular modules to choose from, parsing {:?}", name);
ModuleCodegen {
module_llvm: ModuleLlvm::parse(cgcx, &name, buffer.data(), dcx)?,
name: name.into_string().unwrap(),
kind: ModuleKind::Regular,
}
let llvm_module = ModuleLlvm::parse(cgcx, &name, buffer.data(), dcx)?;
ModuleCodegen::new_regular(name.into_string().unwrap(), llvm_module)
}
};
{
@ -655,14 +653,14 @@ pub(crate) fn run_pass_manager(
}
unsafe {
write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, stage)?;
write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage)?;
}
if cfg!(llvm_enzyme) && enable_ad {
let opt_stage = llvm::OptStage::FatLTO;
let stage = write::AutodiffStage::PostAD;
unsafe {
write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, stage)?;
write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage)?;
}
// This is the final IR, so people should be able to inspect the optimized autodiff output.
@ -729,6 +727,11 @@ impl ThinBuffer {
ThinBuffer(buffer)
}
}
pub unsafe fn from_raw_ptr(ptr: *mut llvm::ThinLTOBuffer) -> ThinBuffer {
let mut ptr = NonNull::new(ptr).unwrap();
ThinBuffer(unsafe { ptr.as_mut() })
}
}
impl ThinBufferMethods for ThinBuffer {
@ -772,11 +775,11 @@ pub(crate) unsafe fn optimize_thin_module(
// crates but for locally codegened modules we may be able to reuse
// that LLVM Context and Module.
let module_llvm = ModuleLlvm::parse(cgcx, module_name, thin_module.data(), dcx)?;
let mut module = ModuleCodegen {
module_llvm,
name: thin_module.name().to_string(),
kind: ModuleKind::Regular,
};
let mut module = ModuleCodegen::new_regular(thin_module.name(), module_llvm);
// Given that the newly created module lacks a thinlto buffer for embedding, we need to re-add it here.
if cgcx.config(ModuleKind::Regular).embed_bitcode() {
module.thin_lto_buffer = Some(thin_module.data().to_vec());
}
{
let target = &*module.module_llvm.tm;
let llmod = module.module_llvm.llmod();

View file

@ -1,6 +1,7 @@
use std::ffi::{CStr, CString};
use std::io::{self, Write};
use std::path::{Path, PathBuf};
use std::ptr::null_mut;
use std::sync::Arc;
use std::{fs, slice, str};
@ -15,7 +16,7 @@ use rustc_codegen_ssa::back::write::{
TargetMachineFactoryFn,
};
use rustc_codegen_ssa::traits::*;
use rustc_codegen_ssa::{CompiledModule, ModuleCodegen};
use rustc_codegen_ssa::{CompiledModule, ModuleCodegen, ModuleKind};
use rustc_data_structures::profiling::SelfProfilerRef;
use rustc_data_structures::small_c_str::SmallCStr;
use rustc_errors::{DiagCtxtHandle, FatalError, Level};
@ -551,6 +552,7 @@ pub(crate) unsafe fn llvm_optimize(
cgcx: &CodegenContext<LlvmCodegenBackend>,
dcx: DiagCtxtHandle<'_>,
module: &ModuleCodegen<ModuleLlvm>,
thin_lto_buffer: Option<&mut *mut llvm::ThinLTOBuffer>,
config: &ModuleConfig,
opt_level: config::OptLevel,
opt_stage: llvm::OptStage,
@ -584,7 +586,17 @@ pub(crate) unsafe fn llvm_optimize(
vectorize_loop = config.vectorize_loop;
}
trace!(?unroll_loops, ?vectorize_slp, ?vectorize_loop, ?run_enzyme);
let using_thin_buffers = opt_stage == llvm::OptStage::PreLinkThinLTO || config.bitcode_needed();
if thin_lto_buffer.is_some() {
assert!(
matches!(
opt_stage,
llvm::OptStage::PreLinkNoLTO
| llvm::OptStage::PreLinkFatLTO
| llvm::OptStage::PreLinkThinLTO
),
"the bitcode for LTO can only be obtained at the pre-link stage"
);
}
let pgo_gen_path = get_pgo_gen_path(config);
let pgo_use_path = get_pgo_use_path(config);
let pgo_sample_use_path = get_pgo_sample_use_path(config);
@ -644,7 +656,9 @@ pub(crate) unsafe fn llvm_optimize(
config.no_prepopulate_passes,
config.verify_llvm_ir,
config.lint_llvm_ir,
using_thin_buffers,
thin_lto_buffer,
config.emit_thin_lto,
config.emit_thin_lto_summary,
config.merge_functions,
unroll_loops,
vectorize_slp,
@ -675,7 +689,7 @@ pub(crate) unsafe fn llvm_optimize(
pub(crate) unsafe fn optimize(
cgcx: &CodegenContext<LlvmCodegenBackend>,
dcx: DiagCtxtHandle<'_>,
module: &ModuleCodegen<ModuleLlvm>,
module: &mut ModuleCodegen<ModuleLlvm>,
config: &ModuleConfig,
) -> Result<(), FatalError> {
let _timer = cgcx.prof.generic_activity_with_arg("LLVM_module_optimize", &*module.name);
@ -705,9 +719,53 @@ pub(crate) unsafe fn optimize(
// Otherwise we pretend AD is already done and run the normal opt pipeline (=PostAD).
let consider_ad = cfg!(llvm_enzyme) && config.autodiff.contains(&config::AutoDiff::Enable);
let autodiff_stage = if consider_ad { AutodiffStage::PreAD } else { AutodiffStage::PostAD };
return unsafe {
llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, autodiff_stage)
// The embedded bitcode is used to run LTO/ThinLTO.
// The bitcode obtained during the `codegen` phase is no longer suitable for performing LTO.
// It may have undergone LTO due to ThinLocal, so we need to obtain the embedded bitcode at
// this point.
let mut thin_lto_buffer = if (module.kind == ModuleKind::Regular
&& config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full))
|| config.emit_thin_lto_summary
{
Some(null_mut())
} else {
None
};
unsafe {
llvm_optimize(
cgcx,
dcx,
module,
thin_lto_buffer.as_mut(),
config,
opt_level,
opt_stage,
autodiff_stage,
)
}?;
if let Some(thin_lto_buffer) = thin_lto_buffer {
let thin_lto_buffer = unsafe { ThinBuffer::from_raw_ptr(thin_lto_buffer) };
module.thin_lto_buffer = Some(thin_lto_buffer.data().to_vec());
let bc_summary_out =
cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
if config.emit_thin_lto_summary
&& let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
{
let summary_data = thin_lto_buffer.thin_link_data();
cgcx.prof.artifact_size(
"llvm_bitcode_summary",
thin_link_bitcode_filename.to_string_lossy(),
summary_data.len() as u64,
);
let _timer = cgcx.prof.generic_activity_with_arg(
"LLVM_module_codegen_emit_bitcode_summary",
&*module.name,
);
if let Err(err) = fs::write(&bc_summary_out, summary_data) {
dcx.emit_err(WriteBytecode { path: &bc_summary_out, err });
}
}
}
}
Ok(())
}
@ -760,59 +818,41 @@ pub(crate) unsafe fn codegen(
// otherwise requested.
let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name);
let bc_summary_out =
cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
let obj_out = cgcx.output_filenames.temp_path(OutputType::Object, module_name);
if config.bitcode_needed() {
let _timer = cgcx
.prof
.generic_activity_with_arg("LLVM_module_codegen_make_bitcode", &*module.name);
let thin = ThinBuffer::new(llmod, config.emit_thin_lto, config.emit_thin_lto_summary);
let data = thin.data();
if let Some(bitcode_filename) = bc_out.file_name() {
cgcx.prof.artifact_size(
"llvm_bitcode",
bitcode_filename.to_string_lossy(),
data.len() as u64,
);
}
if config.emit_thin_lto_summary
&& let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
{
let summary_data = thin.thin_link_data();
cgcx.prof.artifact_size(
"llvm_bitcode_summary",
thin_link_bitcode_filename.to_string_lossy(),
summary_data.len() as u64,
);
let _timer = cgcx.prof.generic_activity_with_arg(
"LLVM_module_codegen_emit_bitcode_summary",
&*module.name,
);
if let Err(err) = fs::write(&bc_summary_out, summary_data) {
dcx.emit_err(WriteBytecode { path: &bc_summary_out, err });
}
}
if config.emit_bc || config.emit_obj == EmitObj::Bitcode {
let thin = {
let _timer = cgcx.prof.generic_activity_with_arg(
"LLVM_module_codegen_make_bitcode",
&*module.name,
);
ThinBuffer::new(llmod, config.emit_thin_lto, false)
};
let data = thin.data();
let _timer = cgcx
.prof
.generic_activity_with_arg("LLVM_module_codegen_emit_bitcode", &*module.name);
if let Some(bitcode_filename) = bc_out.file_name() {
cgcx.prof.artifact_size(
"llvm_bitcode",
bitcode_filename.to_string_lossy(),
data.len() as u64,
);
}
if let Err(err) = fs::write(&bc_out, data) {
dcx.emit_err(WriteBytecode { path: &bc_out, err });
}
}
if config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full) {
if config.embed_bitcode() && module.kind == ModuleKind::Regular {
let _timer = cgcx
.prof
.generic_activity_with_arg("LLVM_module_codegen_embed_bitcode", &*module.name);
let thin_bc =
module.thin_lto_buffer.as_deref().expect("cannot find embedded bitcode");
unsafe {
embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, data);
embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, &thin_bc);
}
}
}

View file

@ -13,10 +13,10 @@
use std::time::Instant;
use rustc_codegen_ssa::ModuleCodegen;
use rustc_codegen_ssa::base::maybe_create_entry_wrapper;
use rustc_codegen_ssa::mono_item::MonoItemExt;
use rustc_codegen_ssa::traits::*;
use rustc_codegen_ssa::{ModuleCodegen, ModuleKind};
use rustc_data_structures::small_c_str::SmallCStr;
use rustc_middle::dep_graph;
use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs;
@ -133,11 +133,7 @@ pub(crate) fn compile_codegen_unit(
}
}
ModuleCodegen {
name: cgu_name.to_string(),
module_llvm: llvm_module,
kind: ModuleKind::Regular,
}
ModuleCodegen::new_regular(cgu_name.to_string(), llvm_module)
}
(module, cost)

View file

@ -194,7 +194,7 @@ impl WriteBackendMethods for LlvmCodegenBackend {
unsafe fn optimize(
cgcx: &CodegenContext<Self>,
dcx: DiagCtxtHandle<'_>,
module: &ModuleCodegen<Self::Module>,
module: &mut ModuleCodegen<Self::Module>,
config: &ModuleConfig,
) -> Result<(), FatalError> {
unsafe { back::write::optimize(cgcx, dcx, module, config) }

View file

@ -2425,7 +2425,9 @@ unsafe extern "C" {
NoPrepopulatePasses: bool,
VerifyIR: bool,
LintIR: bool,
UseThinLTOBuffers: bool,
ThinLTOBuffer: Option<&mut *mut ThinLTOBuffer>,
EmitThinLTO: bool,
EmitThinLTOSummary: bool,
MergeFunctions: bool,
UnrollLoops: bool,
SLPVectorize: bool,