1
Fork 0

move second opt run to lto phase and cleanup code

This commit is contained in:
Manuel Drehwald 2025-02-10 01:35:22 -05:00
parent 21d096184e
commit 1221cff551
7 changed files with 75 additions and 54 deletions

View file

@ -606,10 +606,31 @@ pub(crate) fn run_pass_manager(
// If this rustc version was build with enzyme/autodiff enabled, and if users applied the
// `#[autodiff]` macro at least once, then we will later call llvm_optimize a second time.
let first_run = true;
debug!("running llvm pm opt pipeline");
unsafe {
write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, first_run)?;
write::llvm_optimize(
cgcx,
dcx,
module,
config,
opt_level,
opt_stage,
write::AutodiffStage::DuringAD,
)?;
}
// FIXME(ZuseZ4): Make this more granular
if cfg!(llvm_enzyme) && !thin {
unsafe {
write::llvm_optimize(
cgcx,
dcx,
module,
config,
opt_level,
llvm::OptStage::FatLTO,
write::AutodiffStage::PostAD,
)?;
}
}
debug!("lto done");
Ok(())

View file

@ -530,6 +530,16 @@ fn get_instr_profile_output_path(config: &ModuleConfig) -> Option<CString> {
config.instrument_coverage.then(|| c"default_%m_%p.profraw".to_owned())
}
// PreAD will run llvm opts but disable size increasing opts (vectorization, loop unrolling)
// DuringAD is the same as above, but also runs the enzyme opt and autodiff passes.
// PostAD will run all opts, including size increasing opts.
#[derive(Debug, Eq, PartialEq)]
pub(crate) enum AutodiffStage {
PreAD,
DuringAD,
PostAD,
}
pub(crate) unsafe fn llvm_optimize(
cgcx: &CodegenContext<LlvmCodegenBackend>,
dcx: DiagCtxtHandle<'_>,
@ -537,7 +547,7 @@ pub(crate) unsafe fn llvm_optimize(
config: &ModuleConfig,
opt_level: config::OptLevel,
opt_stage: llvm::OptStage,
skip_size_increasing_opts: bool,
autodiff_stage: AutodiffStage,
) -> Result<(), FatalError> {
// Enzyme:
// The whole point of compiler based AD is to differentiate optimized IR instead of unoptimized
@ -550,13 +560,16 @@ pub(crate) unsafe fn llvm_optimize(
let unroll_loops;
let vectorize_slp;
let vectorize_loop;
let run_enzyme = cfg!(llvm_enzyme) && autodiff_stage == AutodiffStage::DuringAD;
let run_enzyme = cfg!(llvm_enzyme);
// When we build rustc with enzyme/autodiff support, we want to postpone size-increasing
// optimizations until after differentiation. FIXME(ZuseZ4): Before shipping on nightly,
// optimizations until after differentiation. Our pipeline is thus: (opt + enzyme), (full opt).
// We therefore have two calls to llvm_optimize, if autodiff is used.
//
// FIXME(ZuseZ4): Before shipping on nightly,
// we should make this more granular, or at least check that the user has at least one autodiff
// call in their code, to justify altering the compilation pipeline.
if skip_size_increasing_opts && run_enzyme {
if cfg!(llvm_enzyme) && autodiff_stage != AutodiffStage::PostAD {
unroll_loops = false;
vectorize_slp = false;
vectorize_loop = false;
@ -566,7 +579,7 @@ pub(crate) unsafe fn llvm_optimize(
vectorize_slp = config.vectorize_slp;
vectorize_loop = config.vectorize_loop;
}
trace!(?unroll_loops, ?vectorize_slp, ?vectorize_loop);
trace!(?unroll_loops, ?vectorize_slp, ?vectorize_loop, ?run_enzyme);
let using_thin_buffers = opt_stage == llvm::OptStage::PreLinkThinLTO || config.bitcode_needed();
let pgo_gen_path = get_pgo_gen_path(config);
let pgo_use_path = get_pgo_use_path(config);
@ -686,18 +699,14 @@ pub(crate) unsafe fn optimize(
_ => llvm::OptStage::PreLinkNoLTO,
};
// If we know that we will later run AD, then we disable vectorization and loop unrolling
let skip_size_increasing_opts = cfg!(llvm_enzyme);
// If we know that we will later run AD, then we disable vectorization and loop unrolling.
// Otherwise we pretend AD is already done and run the normal opt pipeline (=PostAD).
// FIXME(ZuseZ4): Make this more granular, only set PreAD if we actually have autodiff
// usages, not just if we build rustc with autodiff support.
let autodiff_stage =
if cfg!(llvm_enzyme) { AutodiffStage::PreAD } else { AutodiffStage::PostAD };
return unsafe {
llvm_optimize(
cgcx,
dcx,
module,
config,
opt_level,
opt_stage,
skip_size_increasing_opts,
)
llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, autodiff_stage)
};
}
Ok(())

View file

@ -4,10 +4,9 @@ use rustc_ast::expand::autodiff_attrs::{AutoDiffAttrs, AutoDiffItem, DiffActivit
use rustc_codegen_ssa::ModuleCodegen;
use rustc_codegen_ssa::back::write::ModuleConfig;
use rustc_errors::FatalError;
use rustc_session::config::Lto;
use tracing::{debug, trace};
use crate::back::write::{llvm_err, llvm_optimize};
use crate::back::write::llvm_err;
use crate::builder::SBuilder;
use crate::context::SimpleCx;
use crate::declare::declare_simple_fn;
@ -153,7 +152,7 @@ fn generate_enzyme_call<'ll>(
_ => {}
}
trace!("matching autodiff arguments");
debug!("matching autodiff arguments");
// We now handle the issue that Rust level arguments not always match the llvm-ir level
// arguments. A slice, `&[f32]`, for example, is represented as a pointer and a length on
// llvm-ir level. The number of activities matches the number of Rust level arguments, so we
@ -222,7 +221,10 @@ fn generate_enzyme_call<'ll>(
// A duplicated pointer will have the following two outer_fn arguments:
// (..., ptr, ptr, ...). We add the following llvm-ir to our __enzyme call:
// (..., metadata! enzyme_dup, ptr, ptr, ...).
if matches!(diff_activity, DiffActivity::Duplicated | DiffActivity::DuplicatedOnly) {
if matches!(
diff_activity,
DiffActivity::Duplicated | DiffActivity::DuplicatedOnly
) {
assert!(
llvm::LLVMRustGetTypeKind(next_outer_ty) == llvm::TypeKind::Pointer
);
@ -282,7 +284,7 @@ pub(crate) fn differentiate<'ll>(
module: &'ll ModuleCodegen<ModuleLlvm>,
cgcx: &CodegenContext<LlvmCodegenBackend>,
diff_items: Vec<AutoDiffItem>,
config: &ModuleConfig,
_config: &ModuleConfig,
) -> Result<(), FatalError> {
for item in &diff_items {
trace!("{}", item);
@ -317,29 +319,6 @@ pub(crate) fn differentiate<'ll>(
// FIXME(ZuseZ4): support SanitizeHWAddress and prevent illegal/unsupported opts
if let Some(opt_level) = config.opt_level {
let opt_stage = match cgcx.lto {
Lto::Fat => llvm::OptStage::PreLinkFatLTO,
Lto::Thin | Lto::ThinLocal => llvm::OptStage::PreLinkThinLTO,
_ if cgcx.opts.cg.linker_plugin_lto.enabled() => llvm::OptStage::PreLinkThinLTO,
_ => llvm::OptStage::PreLinkNoLTO,
};
// This is our second opt call, so now we run all opts,
// to make sure we get the best performance.
let skip_size_increasing_opts = false;
trace!("running Module Optimization after differentiation");
unsafe {
llvm_optimize(
cgcx,
diag_handler.handle(),
module,
config,
opt_level,
opt_stage,
skip_size_increasing_opts,
)?
};
}
trace!("done with differentiate()");
Ok(())