The embedded bitcode should always be prepared for LTO/ThinLTO
This commit is contained in:
parent
1805b33483
commit
1a99ca8da9
13 changed files with 265 additions and 69 deletions
|
@ -2,6 +2,7 @@ use std::collections::BTreeMap;
|
|||
use std::ffi::{CStr, CString};
|
||||
use std::fs::File;
|
||||
use std::path::Path;
|
||||
use std::ptr::NonNull;
|
||||
use std::sync::Arc;
|
||||
use std::{io, iter, slice};
|
||||
|
||||
|
@ -655,14 +656,14 @@ pub(crate) fn run_pass_manager(
|
|||
}
|
||||
|
||||
unsafe {
|
||||
write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, stage)?;
|
||||
write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage)?;
|
||||
}
|
||||
|
||||
if cfg!(llvm_enzyme) && enable_ad {
|
||||
let opt_stage = llvm::OptStage::FatLTO;
|
||||
let stage = write::AutodiffStage::PostAD;
|
||||
unsafe {
|
||||
write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, stage)?;
|
||||
write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage)?;
|
||||
}
|
||||
|
||||
// This is the final IR, so people should be able to inspect the optimized autodiff output.
|
||||
|
@ -729,6 +730,11 @@ impl ThinBuffer {
|
|||
ThinBuffer(buffer)
|
||||
}
|
||||
}
|
||||
|
||||
pub unsafe fn from_raw_ptr(ptr: *mut llvm::ThinLTOBuffer) -> ThinBuffer {
|
||||
let mut ptr = NonNull::new(ptr).unwrap();
|
||||
ThinBuffer(unsafe { ptr.as_mut() })
|
||||
}
|
||||
}
|
||||
|
||||
impl ThinBufferMethods for ThinBuffer {
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
use std::ffi::{CStr, CString};
|
||||
use std::io::{self, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::ptr::null_mut;
|
||||
use std::sync::Arc;
|
||||
use std::{fs, slice, str};
|
||||
|
||||
|
@ -15,7 +16,7 @@ use rustc_codegen_ssa::back::write::{
|
|||
TargetMachineFactoryFn,
|
||||
};
|
||||
use rustc_codegen_ssa::traits::*;
|
||||
use rustc_codegen_ssa::{CompiledModule, ModuleCodegen};
|
||||
use rustc_codegen_ssa::{CompiledModule, ModuleCodegen, ModuleKind};
|
||||
use rustc_data_structures::profiling::SelfProfilerRef;
|
||||
use rustc_data_structures::small_c_str::SmallCStr;
|
||||
use rustc_errors::{DiagCtxtHandle, FatalError, Level};
|
||||
|
@ -551,6 +552,7 @@ pub(crate) unsafe fn llvm_optimize(
|
|||
cgcx: &CodegenContext<LlvmCodegenBackend>,
|
||||
dcx: DiagCtxtHandle<'_>,
|
||||
module: &ModuleCodegen<ModuleLlvm>,
|
||||
thin_lto_buffer: Option<&mut *mut llvm::ThinLTOBuffer>,
|
||||
config: &ModuleConfig,
|
||||
opt_level: config::OptLevel,
|
||||
opt_stage: llvm::OptStage,
|
||||
|
@ -584,7 +586,17 @@ pub(crate) unsafe fn llvm_optimize(
|
|||
vectorize_loop = config.vectorize_loop;
|
||||
}
|
||||
trace!(?unroll_loops, ?vectorize_slp, ?vectorize_loop, ?run_enzyme);
|
||||
let using_thin_buffers = opt_stage == llvm::OptStage::PreLinkThinLTO || config.bitcode_needed();
|
||||
if thin_lto_buffer.is_some() {
|
||||
assert!(
|
||||
matches!(
|
||||
opt_stage,
|
||||
llvm::OptStage::PreLinkNoLTO
|
||||
| llvm::OptStage::PreLinkFatLTO
|
||||
| llvm::OptStage::PreLinkThinLTO
|
||||
),
|
||||
"the bitcode for LTO can only be obtained at the pre-link stage"
|
||||
);
|
||||
}
|
||||
let pgo_gen_path = get_pgo_gen_path(config);
|
||||
let pgo_use_path = get_pgo_use_path(config);
|
||||
let pgo_sample_use_path = get_pgo_sample_use_path(config);
|
||||
|
@ -644,7 +656,9 @@ pub(crate) unsafe fn llvm_optimize(
|
|||
config.no_prepopulate_passes,
|
||||
config.verify_llvm_ir,
|
||||
config.lint_llvm_ir,
|
||||
using_thin_buffers,
|
||||
thin_lto_buffer,
|
||||
config.emit_thin_lto,
|
||||
config.emit_thin_lto_summary,
|
||||
config.merge_functions,
|
||||
unroll_loops,
|
||||
vectorize_slp,
|
||||
|
@ -705,9 +719,56 @@ pub(crate) unsafe fn optimize(
|
|||
// Otherwise we pretend AD is already done and run the normal opt pipeline (=PostAD).
|
||||
let consider_ad = cfg!(llvm_enzyme) && config.autodiff.contains(&config::AutoDiff::Enable);
|
||||
let autodiff_stage = if consider_ad { AutodiffStage::PreAD } else { AutodiffStage::PostAD };
|
||||
return unsafe {
|
||||
llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, autodiff_stage)
|
||||
// The embedded bitcode is used to run LTO/ThinLTO.
|
||||
// The bitcode obtained during the `codegen` phase is no longer suitable for performing LTO.
|
||||
// It may have undergone LTO due to ThinLocal, so we need to obtain the embedded bitcode at
|
||||
// this point.
|
||||
let mut thin_lto_buffer = if (module.kind == ModuleKind::Regular
|
||||
&& config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full))
|
||||
|| config.emit_thin_lto_summary
|
||||
{
|
||||
Some(null_mut())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
unsafe {
|
||||
llvm_optimize(
|
||||
cgcx,
|
||||
dcx,
|
||||
module,
|
||||
thin_lto_buffer.as_mut(),
|
||||
config,
|
||||
opt_level,
|
||||
opt_stage,
|
||||
autodiff_stage,
|
||||
)
|
||||
}?;
|
||||
if let Some(thin_lto_buffer) = thin_lto_buffer {
|
||||
let thin_lto_buffer = unsafe { ThinBuffer::from_raw_ptr(thin_lto_buffer) };
|
||||
let thin_bc_out = cgcx.output_filenames.temp_path(OutputType::ThinBitcode, module_name);
|
||||
if let Err(err) = fs::write(&thin_bc_out, thin_lto_buffer.data()) {
|
||||
dcx.emit_err(WriteBytecode { path: &thin_bc_out, err });
|
||||
}
|
||||
let bc_summary_out =
|
||||
cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
|
||||
if config.emit_thin_lto_summary
|
||||
&& let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
|
||||
{
|
||||
let summary_data = thin_lto_buffer.thin_link_data();
|
||||
cgcx.prof.artifact_size(
|
||||
"llvm_bitcode_summary",
|
||||
thin_link_bitcode_filename.to_string_lossy(),
|
||||
summary_data.len() as u64,
|
||||
);
|
||||
let _timer = cgcx.prof.generic_activity_with_arg(
|
||||
"LLVM_module_codegen_emit_bitcode_summary",
|
||||
&*module.name,
|
||||
);
|
||||
if let Err(err) = fs::write(&bc_summary_out, summary_data) {
|
||||
dcx.emit_err(WriteBytecode { path: &bc_summary_out, err });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
@ -760,59 +821,47 @@ pub(crate) unsafe fn codegen(
|
|||
// otherwise requested.
|
||||
|
||||
let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name);
|
||||
let bc_summary_out =
|
||||
cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
|
||||
let obj_out = cgcx.output_filenames.temp_path(OutputType::Object, module_name);
|
||||
|
||||
if config.bitcode_needed() {
|
||||
let _timer = cgcx
|
||||
.prof
|
||||
.generic_activity_with_arg("LLVM_module_codegen_make_bitcode", &*module.name);
|
||||
let thin = ThinBuffer::new(llmod, config.emit_thin_lto, config.emit_thin_lto_summary);
|
||||
let data = thin.data();
|
||||
|
||||
if let Some(bitcode_filename) = bc_out.file_name() {
|
||||
cgcx.prof.artifact_size(
|
||||
"llvm_bitcode",
|
||||
bitcode_filename.to_string_lossy(),
|
||||
data.len() as u64,
|
||||
);
|
||||
}
|
||||
|
||||
if config.emit_thin_lto_summary
|
||||
&& let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
|
||||
{
|
||||
let summary_data = thin.thin_link_data();
|
||||
cgcx.prof.artifact_size(
|
||||
"llvm_bitcode_summary",
|
||||
thin_link_bitcode_filename.to_string_lossy(),
|
||||
summary_data.len() as u64,
|
||||
);
|
||||
|
||||
let _timer = cgcx.prof.generic_activity_with_arg(
|
||||
"LLVM_module_codegen_emit_bitcode_summary",
|
||||
&*module.name,
|
||||
);
|
||||
if let Err(err) = fs::write(&bc_summary_out, summary_data) {
|
||||
dcx.emit_err(WriteBytecode { path: &bc_summary_out, err });
|
||||
}
|
||||
}
|
||||
|
||||
if config.emit_bc || config.emit_obj == EmitObj::Bitcode {
|
||||
let thin = {
|
||||
let _timer = cgcx.prof.generic_activity_with_arg(
|
||||
"LLVM_module_codegen_make_bitcode",
|
||||
&*module.name,
|
||||
);
|
||||
ThinBuffer::new(llmod, config.emit_thin_lto, false)
|
||||
};
|
||||
let data = thin.data();
|
||||
let _timer = cgcx
|
||||
.prof
|
||||
.generic_activity_with_arg("LLVM_module_codegen_emit_bitcode", &*module.name);
|
||||
if let Some(bitcode_filename) = bc_out.file_name() {
|
||||
cgcx.prof.artifact_size(
|
||||
"llvm_bitcode",
|
||||
bitcode_filename.to_string_lossy(),
|
||||
data.len() as u64,
|
||||
);
|
||||
}
|
||||
if let Err(err) = fs::write(&bc_out, data) {
|
||||
dcx.emit_err(WriteBytecode { path: &bc_out, err });
|
||||
}
|
||||
}
|
||||
|
||||
if config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full) {
|
||||
if config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full)
|
||||
&& module.kind == ModuleKind::Regular
|
||||
{
|
||||
let _timer = cgcx
|
||||
.prof
|
||||
.generic_activity_with_arg("LLVM_module_codegen_embed_bitcode", &*module.name);
|
||||
let thin_bc_out =
|
||||
cgcx.output_filenames.temp_path(OutputType::ThinBitcode, module_name);
|
||||
assert!(thin_bc_out.exists(), "cannot find {:?} as embedded bitcode", thin_bc_out);
|
||||
let data = fs::read(&thin_bc_out).unwrap();
|
||||
debug!("removing embed bitcode file {:?}", thin_bc_out);
|
||||
ensure_removed(dcx, &thin_bc_out);
|
||||
unsafe {
|
||||
embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, data);
|
||||
embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, &data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2421,7 +2421,9 @@ unsafe extern "C" {
|
|||
NoPrepopulatePasses: bool,
|
||||
VerifyIR: bool,
|
||||
LintIR: bool,
|
||||
UseThinLTOBuffers: bool,
|
||||
ThinLTOBuffer: Option<&mut *mut ThinLTOBuffer>,
|
||||
EmitThinLTO: bool,
|
||||
EmitThinLTOSummary: bool,
|
||||
MergeFunctions: bool,
|
||||
UnrollLoops: bool,
|
||||
SLPVectorize: bool,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue