1
Fork 0

Auto merge of #133250 - DianQK:embed-bitcode-pgo, r=nikic

The embedded bitcode should always be prepared for LTO/ThinLTO

Fixes #115344. Fixes #117220.

There are currently two methods for generating bitcode that used for LTO. One method involves using `-C linker-plugin-lto` to emit object files as bitcode, which is the typical setting used by cargo. The other method is through `-C embed-bitcode=yes`.

When using with `-C embed-bitcode=yes -C lto=no`, we run a complete non-LTO LLVM pipeline to obtain bitcode, then the bitcode is used for LTO. We run the Call Graph Profile Pass twice on the same module.

This PR is doing something similar to LLVM's `buildFatLTODefaultPipeline`, obtaining the bitcode for embedding after running `buildThinLTOPreLinkDefaultPipeline`.

r? nikic
This commit is contained in:
bors 2025-03-01 08:22:18 +00:00
commit 0c72c0d11a
20 changed files with 294 additions and 101 deletions

View file

@ -632,17 +632,16 @@ pub unsafe fn optimize_thin_module(
Arc::new(SyncContext::new(context))
}
};
let module = ModuleCodegen {
module_llvm: GccContext {
let module = ModuleCodegen::new_regular(
thin_module.name().to_string(),
GccContext {
context,
should_combine_object_files,
// TODO(antoyo): use the correct relocation model here.
relocation_model: RelocModel::Pic,
temp_dir: None,
},
name: thin_module.name().to_string(),
kind: ModuleKind::Regular,
};
);
/*{
let target = &*module.module_llvm.tm;
let llmod = module.module_llvm.llmod();

View file

@ -4,10 +4,10 @@ use std::sync::Arc;
use std::time::Instant;
use gccjit::{CType, Context, FunctionType, GlobalKind};
use rustc_codegen_ssa::ModuleCodegen;
use rustc_codegen_ssa::base::maybe_create_entry_wrapper;
use rustc_codegen_ssa::mono_item::MonoItemExt;
use rustc_codegen_ssa::traits::DebugInfoCodegenMethods;
use rustc_codegen_ssa::{ModuleCodegen, ModuleKind};
use rustc_middle::dep_graph;
use rustc_middle::mir::mono::Linkage;
#[cfg(feature = "master")]
@ -237,16 +237,15 @@ pub fn compile_codegen_unit(
}
}
ModuleCodegen {
name: cgu_name.to_string(),
module_llvm: GccContext {
ModuleCodegen::new_regular(
cgu_name.to_string(),
GccContext {
context: Arc::new(SyncContext::new(context)),
relocation_model: tcx.sess.relocation_model(),
should_combine_object_files: false,
temp_dir: None,
},
kind: ModuleKind::Regular,
}
)
}
(module, cost)

View file

@ -393,7 +393,7 @@ impl WriteBackendMethods for GccCodegenBackend {
unsafe fn optimize(
_cgcx: &CodegenContext<Self>,
_dcx: DiagCtxtHandle<'_>,
module: &ModuleCodegen<Self::Module>,
module: &mut ModuleCodegen<Self::Module>,
config: &ModuleConfig,
) -> Result<(), FatalError> {
module.module_llvm.context.set_optimization_level(to_gcc_opt_level(config.opt_level));

View file

@ -2,6 +2,7 @@ use std::collections::BTreeMap;
use std::ffi::{CStr, CString};
use std::fs::File;
use std::path::Path;
use std::ptr::NonNull;
use std::sync::Arc;
use std::{io, iter, slice};
@ -305,11 +306,8 @@ fn fat_lto(
assert!(!serialized_modules.is_empty(), "must have at least one serialized module");
let (buffer, name) = serialized_modules.remove(0);
info!("no in-memory regular modules to choose from, parsing {:?}", name);
ModuleCodegen {
module_llvm: ModuleLlvm::parse(cgcx, &name, buffer.data(), dcx)?,
name: name.into_string().unwrap(),
kind: ModuleKind::Regular,
}
let llvm_module = ModuleLlvm::parse(cgcx, &name, buffer.data(), dcx)?;
ModuleCodegen::new_regular(name.into_string().unwrap(), llvm_module)
}
};
{
@ -655,14 +653,14 @@ pub(crate) fn run_pass_manager(
}
unsafe {
write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, stage)?;
write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage)?;
}
if cfg!(llvm_enzyme) && enable_ad {
let opt_stage = llvm::OptStage::FatLTO;
let stage = write::AutodiffStage::PostAD;
unsafe {
write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, stage)?;
write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage)?;
}
// This is the final IR, so people should be able to inspect the optimized autodiff output.
@ -729,6 +727,11 @@ impl ThinBuffer {
ThinBuffer(buffer)
}
}
pub unsafe fn from_raw_ptr(ptr: *mut llvm::ThinLTOBuffer) -> ThinBuffer {
let mut ptr = NonNull::new(ptr).unwrap();
ThinBuffer(unsafe { ptr.as_mut() })
}
}
impl ThinBufferMethods for ThinBuffer {
@ -772,11 +775,11 @@ pub(crate) unsafe fn optimize_thin_module(
// crates but for locally codegened modules we may be able to reuse
// that LLVM Context and Module.
let module_llvm = ModuleLlvm::parse(cgcx, module_name, thin_module.data(), dcx)?;
let mut module = ModuleCodegen {
module_llvm,
name: thin_module.name().to_string(),
kind: ModuleKind::Regular,
};
let mut module = ModuleCodegen::new_regular(thin_module.name(), module_llvm);
// Given that the newly created module lacks a thinlto buffer for embedding, we need to re-add it here.
if cgcx.config(ModuleKind::Regular).embed_bitcode() {
module.thin_lto_buffer = Some(thin_module.data().to_vec());
}
{
let target = &*module.module_llvm.tm;
let llmod = module.module_llvm.llmod();

View file

@ -1,6 +1,7 @@
use std::ffi::{CStr, CString};
use std::io::{self, Write};
use std::path::{Path, PathBuf};
use std::ptr::null_mut;
use std::sync::Arc;
use std::{fs, slice, str};
@ -15,7 +16,7 @@ use rustc_codegen_ssa::back::write::{
TargetMachineFactoryFn,
};
use rustc_codegen_ssa::traits::*;
use rustc_codegen_ssa::{CompiledModule, ModuleCodegen};
use rustc_codegen_ssa::{CompiledModule, ModuleCodegen, ModuleKind};
use rustc_data_structures::profiling::SelfProfilerRef;
use rustc_data_structures::small_c_str::SmallCStr;
use rustc_errors::{DiagCtxtHandle, FatalError, Level};
@ -551,6 +552,7 @@ pub(crate) unsafe fn llvm_optimize(
cgcx: &CodegenContext<LlvmCodegenBackend>,
dcx: DiagCtxtHandle<'_>,
module: &ModuleCodegen<ModuleLlvm>,
thin_lto_buffer: Option<&mut *mut llvm::ThinLTOBuffer>,
config: &ModuleConfig,
opt_level: config::OptLevel,
opt_stage: llvm::OptStage,
@ -584,7 +586,17 @@ pub(crate) unsafe fn llvm_optimize(
vectorize_loop = config.vectorize_loop;
}
trace!(?unroll_loops, ?vectorize_slp, ?vectorize_loop, ?run_enzyme);
let using_thin_buffers = opt_stage == llvm::OptStage::PreLinkThinLTO || config.bitcode_needed();
if thin_lto_buffer.is_some() {
assert!(
matches!(
opt_stage,
llvm::OptStage::PreLinkNoLTO
| llvm::OptStage::PreLinkFatLTO
| llvm::OptStage::PreLinkThinLTO
),
"the bitcode for LTO can only be obtained at the pre-link stage"
);
}
let pgo_gen_path = get_pgo_gen_path(config);
let pgo_use_path = get_pgo_use_path(config);
let pgo_sample_use_path = get_pgo_sample_use_path(config);
@ -644,7 +656,9 @@ pub(crate) unsafe fn llvm_optimize(
config.no_prepopulate_passes,
config.verify_llvm_ir,
config.lint_llvm_ir,
using_thin_buffers,
thin_lto_buffer,
config.emit_thin_lto,
config.emit_thin_lto_summary,
config.merge_functions,
unroll_loops,
vectorize_slp,
@ -675,7 +689,7 @@ pub(crate) unsafe fn llvm_optimize(
pub(crate) unsafe fn optimize(
cgcx: &CodegenContext<LlvmCodegenBackend>,
dcx: DiagCtxtHandle<'_>,
module: &ModuleCodegen<ModuleLlvm>,
module: &mut ModuleCodegen<ModuleLlvm>,
config: &ModuleConfig,
) -> Result<(), FatalError> {
let _timer = cgcx.prof.generic_activity_with_arg("LLVM_module_optimize", &*module.name);
@ -705,9 +719,53 @@ pub(crate) unsafe fn optimize(
// Otherwise we pretend AD is already done and run the normal opt pipeline (=PostAD).
let consider_ad = cfg!(llvm_enzyme) && config.autodiff.contains(&config::AutoDiff::Enable);
let autodiff_stage = if consider_ad { AutodiffStage::PreAD } else { AutodiffStage::PostAD };
return unsafe {
llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, autodiff_stage)
// The embedded bitcode is used to run LTO/ThinLTO.
// The bitcode obtained during the `codegen` phase is no longer suitable for performing LTO.
// It may have undergone LTO due to ThinLocal, so we need to obtain the embedded bitcode at
// this point.
let mut thin_lto_buffer = if (module.kind == ModuleKind::Regular
&& config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full))
|| config.emit_thin_lto_summary
{
Some(null_mut())
} else {
None
};
unsafe {
llvm_optimize(
cgcx,
dcx,
module,
thin_lto_buffer.as_mut(),
config,
opt_level,
opt_stage,
autodiff_stage,
)
}?;
if let Some(thin_lto_buffer) = thin_lto_buffer {
let thin_lto_buffer = unsafe { ThinBuffer::from_raw_ptr(thin_lto_buffer) };
module.thin_lto_buffer = Some(thin_lto_buffer.data().to_vec());
let bc_summary_out =
cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
if config.emit_thin_lto_summary
&& let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
{
let summary_data = thin_lto_buffer.thin_link_data();
cgcx.prof.artifact_size(
"llvm_bitcode_summary",
thin_link_bitcode_filename.to_string_lossy(),
summary_data.len() as u64,
);
let _timer = cgcx.prof.generic_activity_with_arg(
"LLVM_module_codegen_emit_bitcode_summary",
&*module.name,
);
if let Err(err) = fs::write(&bc_summary_out, summary_data) {
dcx.emit_err(WriteBytecode { path: &bc_summary_out, err });
}
}
}
}
Ok(())
}
@ -760,17 +818,21 @@ pub(crate) unsafe fn codegen(
// otherwise requested.
let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name);
let bc_summary_out =
cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
let obj_out = cgcx.output_filenames.temp_path(OutputType::Object, module_name);
if config.bitcode_needed() {
if config.emit_bc || config.emit_obj == EmitObj::Bitcode {
let thin = {
let _timer = cgcx.prof.generic_activity_with_arg(
"LLVM_module_codegen_make_bitcode",
&*module.name,
);
ThinBuffer::new(llmod, config.emit_thin_lto, false)
};
let data = thin.data();
let _timer = cgcx
.prof
.generic_activity_with_arg("LLVM_module_codegen_make_bitcode", &*module.name);
let thin = ThinBuffer::new(llmod, config.emit_thin_lto, config.emit_thin_lto_summary);
let data = thin.data();
.generic_activity_with_arg("LLVM_module_codegen_emit_bitcode", &*module.name);
if let Some(bitcode_filename) = bc_out.file_name() {
cgcx.prof.artifact_size(
"llvm_bitcode",
@ -778,41 +840,19 @@ pub(crate) unsafe fn codegen(
data.len() as u64,
);
}
if config.emit_thin_lto_summary
&& let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
{
let summary_data = thin.thin_link_data();
cgcx.prof.artifact_size(
"llvm_bitcode_summary",
thin_link_bitcode_filename.to_string_lossy(),
summary_data.len() as u64,
);
let _timer = cgcx.prof.generic_activity_with_arg(
"LLVM_module_codegen_emit_bitcode_summary",
&*module.name,
);
if let Err(err) = fs::write(&bc_summary_out, summary_data) {
dcx.emit_err(WriteBytecode { path: &bc_summary_out, err });
}
}
if config.emit_bc || config.emit_obj == EmitObj::Bitcode {
let _timer = cgcx
.prof
.generic_activity_with_arg("LLVM_module_codegen_emit_bitcode", &*module.name);
if let Err(err) = fs::write(&bc_out, data) {
dcx.emit_err(WriteBytecode { path: &bc_out, err });
}
}
if config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full) {
if config.embed_bitcode() && module.kind == ModuleKind::Regular {
let _timer = cgcx
.prof
.generic_activity_with_arg("LLVM_module_codegen_embed_bitcode", &*module.name);
let thin_bc =
module.thin_lto_buffer.as_deref().expect("cannot find embedded bitcode");
unsafe {
embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, data);
embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, &thin_bc);
}
}
}

View file

@ -13,10 +13,10 @@
use std::time::Instant;
use rustc_codegen_ssa::ModuleCodegen;
use rustc_codegen_ssa::base::maybe_create_entry_wrapper;
use rustc_codegen_ssa::mono_item::MonoItemExt;
use rustc_codegen_ssa::traits::*;
use rustc_codegen_ssa::{ModuleCodegen, ModuleKind};
use rustc_data_structures::small_c_str::SmallCStr;
use rustc_middle::dep_graph;
use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs;
@ -133,11 +133,7 @@ pub(crate) fn compile_codegen_unit(
}
}
ModuleCodegen {
name: cgu_name.to_string(),
module_llvm: llvm_module,
kind: ModuleKind::Regular,
}
ModuleCodegen::new_regular(cgu_name.to_string(), llvm_module)
}
(module, cost)

View file

@ -194,7 +194,7 @@ impl WriteBackendMethods for LlvmCodegenBackend {
unsafe fn optimize(
cgcx: &CodegenContext<Self>,
dcx: DiagCtxtHandle<'_>,
module: &ModuleCodegen<Self::Module>,
module: &mut ModuleCodegen<Self::Module>,
config: &ModuleConfig,
) -> Result<(), FatalError> {
unsafe { back::write::optimize(cgcx, dcx, module, config) }

View file

@ -2425,7 +2425,9 @@ unsafe extern "C" {
NoPrepopulatePasses: bool,
VerifyIR: bool,
LintIR: bool,
UseThinLTOBuffers: bool,
ThinLTOBuffer: Option<&mut *mut ThinLTOBuffer>,
EmitThinLTO: bool,
EmitThinLTOSummary: bool,
MergeFunctions: bool,
UnrollLoops: bool,
SLPVectorize: bool,

View file

@ -278,6 +278,10 @@ impl ModuleConfig {
|| self.emit_obj == EmitObj::Bitcode
|| self.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full)
}
pub fn embed_bitcode(&self) -> bool {
self.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full)
}
}
/// Configuration passed to the function returned by the `target_machine_factory`.
@ -877,14 +881,14 @@ pub(crate) fn compute_per_cgu_lto_type(
fn execute_optimize_work_item<B: ExtraBackendMethods>(
cgcx: &CodegenContext<B>,
module: ModuleCodegen<B::Module>,
mut module: ModuleCodegen<B::Module>,
module_config: &ModuleConfig,
) -> Result<WorkItemResult<B>, FatalError> {
let dcx = cgcx.create_dcx();
let dcx = dcx.handle();
unsafe {
B::optimize(cgcx, dcx, &module, module_config)?;
B::optimize(cgcx, dcx, &mut module, module_config)?;
}
// After we've done the initial round of optimizations we need to

View file

@ -687,7 +687,7 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
submit_codegened_module_to_llvm(
&backend,
&ongoing_codegen.coordinator.sender,
ModuleCodegen { name: llmod_id, module_llvm, kind: ModuleKind::Allocator },
ModuleCodegen::new_allocator(llmod_id, module_llvm),
cost,
);
}

View file

@ -75,9 +75,29 @@ pub struct ModuleCodegen<M> {
pub name: String,
pub module_llvm: M,
pub kind: ModuleKind,
/// Saving the ThinLTO buffer for embedding in the object file.
pub thin_lto_buffer: Option<Vec<u8>>,
}
impl<M> ModuleCodegen<M> {
pub fn new_regular(name: impl Into<String>, module: M) -> Self {
Self {
name: name.into(),
module_llvm: module,
kind: ModuleKind::Regular,
thin_lto_buffer: None,
}
}
pub fn new_allocator(name: impl Into<String>, module: M) -> Self {
Self {
name: name.into(),
module_llvm: module,
kind: ModuleKind::Allocator,
thin_lto_buffer: None,
}
}
pub fn into_compiled_module(
self,
emit_obj: bool,

View file

@ -40,7 +40,7 @@ pub trait WriteBackendMethods: 'static + Sized + Clone {
unsafe fn optimize(
cgcx: &CodegenContext<Self>,
dcx: DiagCtxtHandle<'_>,
module: &ModuleCodegen<Self::Module>,
module: &mut ModuleCodegen<Self::Module>,
config: &ModuleConfig,
) -> Result<(), FatalError>;
fn optimize_fat(

View file

@ -7,6 +7,7 @@
#include "llvm/Analysis/Lint.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/Bitcode/BitcodeWriterPass.h"
#include "llvm/CodeGen/CommandFlags.h"
#include "llvm/IR/AssemblyAnnotationWriter.h"
#include "llvm/IR/AutoUpgrade.h"
@ -37,6 +38,7 @@
#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
#include "llvm/Transforms/Scalar/AnnotationRemarks.h"
#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
@ -195,6 +197,19 @@ extern "C" void LLVMRustTimeTraceProfilerFinish(const char *FileName) {
GEN_SUBTARGETS
#undef SUBTARGET
// This struct and various functions are sort of a hack right now, but the
// problem is that we've got in-memory LLVM modules after we generate and
// optimize all codegen-units for one compilation in rustc. To be compatible
// with the LTO support above we need to serialize the modules plus their
// ThinLTO summary into memory.
//
// This structure is basically an owned version of a serialize module, with
// a ThinLTO summary attached.
struct LLVMRustThinLTOBuffer {
std::string data;
std::string thin_link_data;
};
extern "C" bool LLVMRustHasFeature(LLVMTargetMachineRef TM,
const char *Feature) {
TargetMachine *Target = unwrap(TM);
@ -704,7 +719,8 @@ extern "C" LLVMRustResult LLVMRustOptimize(
LLVMModuleRef ModuleRef, LLVMTargetMachineRef TMRef,
LLVMRustPassBuilderOptLevel OptLevelRust, LLVMRustOptStage OptStage,
bool IsLinkerPluginLTO, bool NoPrepopulatePasses, bool VerifyIR,
bool LintIR, bool UseThinLTOBuffers, bool MergeFunctions, bool UnrollLoops,
bool LintIR, LLVMRustThinLTOBuffer **ThinLTOBufferRef, bool EmitThinLTO,
bool EmitThinLTOSummary, bool MergeFunctions, bool UnrollLoops,
bool SLPVectorize, bool LoopVectorize, bool DisableSimplifyLibCalls,
bool EmitLifetimeMarkers, bool RunEnzyme,
LLVMRustSanitizerOptions *SanitizerOptions, const char *PGOGenPath,
@ -952,7 +968,10 @@ extern "C" LLVMRustResult LLVMRustOptimize(
}
ModulePassManager MPM;
bool NeedThinLTOBufferPasses = UseThinLTOBuffers;
bool NeedThinLTOBufferPasses = EmitThinLTO;
auto ThinLTOBuffer = std::make_unique<LLVMRustThinLTOBuffer>();
raw_string_ostream ThinLTODataOS(ThinLTOBuffer->data);
raw_string_ostream ThinLinkDataOS(ThinLTOBuffer->thin_link_data);
if (!NoPrepopulatePasses) {
// The pre-link pipelines don't support O0 and require using
// buildO0DefaultPipeline() instead. At the same time, the LTO pipelines do
@ -976,7 +995,25 @@ extern "C" LLVMRustResult LLVMRustOptimize(
switch (OptStage) {
case LLVMRustOptStage::PreLinkNoLTO:
if (ThinLTOBufferRef) {
// This is similar to LLVM's `buildFatLTODefaultPipeline`, where the
// bitcode for embedding is obtained after performing
// `ThinLTOPreLinkDefaultPipeline`.
MPM.addPass(PB.buildThinLTOPreLinkDefaultPipeline(OptLevel));
if (EmitThinLTO) {
MPM.addPass(ThinLTOBitcodeWriterPass(
ThinLTODataOS, EmitThinLTOSummary ? &ThinLinkDataOS : nullptr));
} else {
MPM.addPass(BitcodeWriterPass(ThinLTODataOS));
}
*ThinLTOBufferRef = ThinLTOBuffer.release();
MPM.addPass(PB.buildModuleOptimizationPipeline(
OptLevel, ThinOrFullLTOPhase::None));
MPM.addPass(
createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
} else {
MPM = PB.buildPerModuleDefaultPipeline(OptLevel);
}
break;
case LLVMRustOptStage::PreLinkThinLTO:
MPM = PB.buildThinLTOPreLinkDefaultPipeline(OptLevel);
@ -1022,6 +1059,16 @@ extern "C" LLVMRustResult LLVMRustOptimize(
MPM.addPass(CanonicalizeAliasesPass());
MPM.addPass(NameAnonGlobalPass());
}
// For `-Copt-level=0`, ThinLTO, or LTO.
if (ThinLTOBufferRef && *ThinLTOBufferRef == nullptr) {
if (EmitThinLTO) {
MPM.addPass(ThinLTOBitcodeWriterPass(
ThinLTODataOS, EmitThinLTOSummary ? &ThinLinkDataOS : nullptr));
} else {
MPM.addPass(BitcodeWriterPass(ThinLTODataOS));
}
*ThinLTOBufferRef = ThinLTOBuffer.release();
}
// now load "-enzyme" pass:
#ifdef ENZYME
@ -1500,19 +1547,6 @@ extern "C" bool LLVMRustPrepareThinLTOImport(const LLVMRustThinLTOData *Data,
return true;
}
// This struct and various functions are sort of a hack right now, but the
// problem is that we've got in-memory LLVM modules after we generate and
// optimize all codegen-units for one compilation in rustc. To be compatible
// with the LTO support above we need to serialize the modules plus their
// ThinLTO summary into memory.
//
// This structure is basically an owned version of a serialize module, with
// a ThinLTO summary attached.
struct LLVMRustThinLTOBuffer {
std::string data;
std::string thin_link_data;
};
extern "C" LLVMRustThinLTOBuffer *
LLVMRustThinLTOBufferCreate(LLVMModuleRef M, bool is_thin, bool emit_summary) {
auto Ret = std::make_unique<LLVMRustThinLTOBuffer>();

View file

@ -539,7 +539,10 @@ impl FromStr for SplitDwarfKind {
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, PartialOrd, Ord, HashStable_Generic)]
#[derive(Encodable, Decodable)]
pub enum OutputType {
/// This is the optimized bitcode, which could be either pre-LTO or non-LTO bitcode,
/// depending on the specific request type.
Bitcode,
/// This is the summary or index data part of the ThinLTO bitcode.
ThinLinkBitcode,
Assembly,
LlvmAssembly,

View file

@ -9,14 +9,14 @@ struct S(i32);
struct SmallStruct(f32, Option<S>, &'static [f32]);
// CHECK: @0 = private unnamed_addr constant
// CHECK: [[const:@.*]] = private unnamed_addr constant
// CHECK-SAME: , align 8
#[no_mangle]
pub fn overaligned_constant() {
// CHECK-LABEL: @overaligned_constant
// CHECK: [[full:%_.*]] = alloca [32 x i8], align 8
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[full]], ptr align 8 @0, i64 32, i1 false)
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[full]], ptr align 8 [[const]], i64 32, i1 false)
let mut s = S(1);
s.0 = 3;

View file

@ -11,15 +11,15 @@ pub struct PartiallyUninit {
y: MaybeUninit<[u8; 10]>,
}
// CHECK: [[FULLY_UNINIT:@[0-9]+]] = private unnamed_addr constant <{ [10 x i8] }> undef
// CHECK: [[FULLY_UNINIT:@.*]] = private unnamed_addr constant <{ [10 x i8] }> undef
// CHECK: [[PARTIALLY_UNINIT:@[0-9]+]] = private unnamed_addr constant <{ [4 x i8], [12 x i8] }> <{ [4 x i8] c"{{\\EF\\BE\\AD\\DE|\\DE\\AD\\BE\\EF}}", [12 x i8] undef }>, align 4
// CHECK: [[PARTIALLY_UNINIT:@.*]] = private unnamed_addr constant <{ [4 x i8], [12 x i8] }> <{ [4 x i8] c"{{\\EF\\BE\\AD\\DE|\\DE\\AD\\BE\\EF}}", [12 x i8] undef }>, align 4
// This shouldn't contain undef, since it contains more chunks
// than the default value of uninit_const_chunk_threshold.
// CHECK: [[UNINIT_PADDING_HUGE:@[0-9]+]] = private unnamed_addr constant <{ [32768 x i8] }> <{ [32768 x i8] c"{{.+}}" }>, align 4
// CHECK: [[UNINIT_PADDING_HUGE:@.*]] = private unnamed_addr constant <{ [32768 x i8] }> <{ [32768 x i8] c"{{.+}}" }>, align 4
// CHECK: [[FULLY_UNINIT_HUGE:@[0-9]+]] = private unnamed_addr constant <{ [16384 x i8] }> undef
// CHECK: [[FULLY_UNINIT_HUGE:@.*]] = private unnamed_addr constant <{ [16384 x i8] }> undef
// CHECK-LABEL: @fully_uninit
#[no_mangle]

View file

@ -0,0 +1,16 @@
#![crate_name = "interesting"]
#![crate_type = "rlib"]
extern crate opaque;
#[no_mangle]
#[inline(never)]
pub fn function_called_once() {
opaque::foo();
}
// CHECK-LABEL: @function_called_once
// CHECK-SAME: !prof [[function_called_once_id:![0-9]+]] {
// CHECK: "CG Profile"
// CHECK-NOT: "CG Profile"
// CHECK-DAG: [[function_called_once_id]] = !{!"function_entry_count", i64 1}

View file

@ -0,0 +1,5 @@
extern crate interesting;
fn main() {
interesting::function_called_once();
}

View file

@ -0,0 +1,5 @@
#![crate_name = "opaque"]
#![crate_type = "rlib"]
#[inline(never)]
pub fn foo() {}

View file

@ -0,0 +1,67 @@
// This test case verifies that we successfully complete an LTO build with PGO
// using the embedded bitcode.
// It also ensures that the generated IR correctly includes the call results.
//@ needs-profiler-runtime
//@ ignore-cross-compile
use std::path::Path;
use run_make_support::{
has_extension, has_prefix, llvm_filecheck, llvm_profdata, rfs, run, rustc, shallow_find_files,
};
fn run_test(cg_units: usize) {
let path_prof_data_dir = Path::new("prof_data_dir");
if path_prof_data_dir.exists() {
rfs::remove_dir_all(path_prof_data_dir);
}
rfs::create_dir_all(&path_prof_data_dir);
let path_merged_profdata = path_prof_data_dir.join("merged.profdata");
rustc().input("opaque.rs").codegen_units(1).run();
rustc()
.input("interesting.rs")
.profile_generate(&path_prof_data_dir)
.opt()
.crate_type("lib,cdylib")
.codegen_units(cg_units)
.run();
rustc()
.input("main.rs")
.arg("-Clto=thin")
.opt()
.codegen_units(cg_units)
.profile_generate(&path_prof_data_dir)
.opt()
.run();
run("main");
llvm_profdata().merge().output(&path_merged_profdata).input(path_prof_data_dir).run();
rustc()
.input("interesting.rs")
.profile_use(&path_merged_profdata)
.opt()
.crate_type("lib,cdylib")
.codegen_units(cg_units)
.emit("link")
.run();
rustc()
.input("main.rs")
.arg("-Clto=thin")
.opt()
.codegen_units(cg_units)
.profile_use(&path_merged_profdata)
.emit("llvm-ir,link")
.opt()
.run();
let files = shallow_find_files(".", |path| {
has_prefix(path, "main.interesting.interesting") && has_extension(path, "ll")
});
assert_eq!(files.len(), 1);
let llvm_ir = &files[0];
llvm_filecheck().patterns("interesting.rs").stdin_buf(rfs::read(llvm_ir)).run();
}
fn main() {
run_test(1);
run_test(16);
}