1
Fork 0

The embedded bitcode should always be prepared for LTO/ThinLTO

This commit is contained in:
DianQK 2024-11-17 14:21:23 +08:00
parent 1805b33483
commit 1a99ca8da9
No known key found for this signature in database
13 changed files with 265 additions and 69 deletions

View file

@ -210,7 +210,7 @@ fn produce_final_output_artifacts(
// to get rid of it. // to get rid of it.
for output_type in crate_output.outputs.keys() { for output_type in crate_output.outputs.keys() {
match *output_type { match *output_type {
OutputType::Bitcode | OutputType::ThinLinkBitcode => { OutputType::Bitcode | OutputType::ThinLinkBitcode | OutputType::ThinBitcode => {
// Cranelift doesn't have bitcode // Cranelift doesn't have bitcode
// user_wants_bitcode = true; // user_wants_bitcode = true;
// // Copy to .bc, but always keep the .0.bc. There is a later // // Copy to .bc, but always keep the .0.bc. There is a later

View file

@ -2,6 +2,7 @@ use std::collections::BTreeMap;
use std::ffi::{CStr, CString}; use std::ffi::{CStr, CString};
use std::fs::File; use std::fs::File;
use std::path::Path; use std::path::Path;
use std::ptr::NonNull;
use std::sync::Arc; use std::sync::Arc;
use std::{io, iter, slice}; use std::{io, iter, slice};
@ -655,14 +656,14 @@ pub(crate) fn run_pass_manager(
} }
unsafe { unsafe {
write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, stage)?; write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage)?;
} }
if cfg!(llvm_enzyme) && enable_ad { if cfg!(llvm_enzyme) && enable_ad {
let opt_stage = llvm::OptStage::FatLTO; let opt_stage = llvm::OptStage::FatLTO;
let stage = write::AutodiffStage::PostAD; let stage = write::AutodiffStage::PostAD;
unsafe { unsafe {
write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, stage)?; write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage)?;
} }
// This is the final IR, so people should be able to inspect the optimized autodiff output. // This is the final IR, so people should be able to inspect the optimized autodiff output.
@ -729,6 +730,11 @@ impl ThinBuffer {
ThinBuffer(buffer) ThinBuffer(buffer)
} }
} }
pub unsafe fn from_raw_ptr(ptr: *mut llvm::ThinLTOBuffer) -> ThinBuffer {
let mut ptr = NonNull::new(ptr).unwrap();
ThinBuffer(unsafe { ptr.as_mut() })
}
} }
impl ThinBufferMethods for ThinBuffer { impl ThinBufferMethods for ThinBuffer {

View file

@ -1,6 +1,7 @@
use std::ffi::{CStr, CString}; use std::ffi::{CStr, CString};
use std::io::{self, Write}; use std::io::{self, Write};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::ptr::null_mut;
use std::sync::Arc; use std::sync::Arc;
use std::{fs, slice, str}; use std::{fs, slice, str};
@ -15,7 +16,7 @@ use rustc_codegen_ssa::back::write::{
TargetMachineFactoryFn, TargetMachineFactoryFn,
}; };
use rustc_codegen_ssa::traits::*; use rustc_codegen_ssa::traits::*;
use rustc_codegen_ssa::{CompiledModule, ModuleCodegen}; use rustc_codegen_ssa::{CompiledModule, ModuleCodegen, ModuleKind};
use rustc_data_structures::profiling::SelfProfilerRef; use rustc_data_structures::profiling::SelfProfilerRef;
use rustc_data_structures::small_c_str::SmallCStr; use rustc_data_structures::small_c_str::SmallCStr;
use rustc_errors::{DiagCtxtHandle, FatalError, Level}; use rustc_errors::{DiagCtxtHandle, FatalError, Level};
@ -551,6 +552,7 @@ pub(crate) unsafe fn llvm_optimize(
cgcx: &CodegenContext<LlvmCodegenBackend>, cgcx: &CodegenContext<LlvmCodegenBackend>,
dcx: DiagCtxtHandle<'_>, dcx: DiagCtxtHandle<'_>,
module: &ModuleCodegen<ModuleLlvm>, module: &ModuleCodegen<ModuleLlvm>,
thin_lto_buffer: Option<&mut *mut llvm::ThinLTOBuffer>,
config: &ModuleConfig, config: &ModuleConfig,
opt_level: config::OptLevel, opt_level: config::OptLevel,
opt_stage: llvm::OptStage, opt_stage: llvm::OptStage,
@ -584,7 +586,17 @@ pub(crate) unsafe fn llvm_optimize(
vectorize_loop = config.vectorize_loop; vectorize_loop = config.vectorize_loop;
} }
trace!(?unroll_loops, ?vectorize_slp, ?vectorize_loop, ?run_enzyme); trace!(?unroll_loops, ?vectorize_slp, ?vectorize_loop, ?run_enzyme);
let using_thin_buffers = opt_stage == llvm::OptStage::PreLinkThinLTO || config.bitcode_needed(); if thin_lto_buffer.is_some() {
assert!(
matches!(
opt_stage,
llvm::OptStage::PreLinkNoLTO
| llvm::OptStage::PreLinkFatLTO
| llvm::OptStage::PreLinkThinLTO
),
"the bitcode for LTO can only be obtained at the pre-link stage"
);
}
let pgo_gen_path = get_pgo_gen_path(config); let pgo_gen_path = get_pgo_gen_path(config);
let pgo_use_path = get_pgo_use_path(config); let pgo_use_path = get_pgo_use_path(config);
let pgo_sample_use_path = get_pgo_sample_use_path(config); let pgo_sample_use_path = get_pgo_sample_use_path(config);
@ -644,7 +656,9 @@ pub(crate) unsafe fn llvm_optimize(
config.no_prepopulate_passes, config.no_prepopulate_passes,
config.verify_llvm_ir, config.verify_llvm_ir,
config.lint_llvm_ir, config.lint_llvm_ir,
using_thin_buffers, thin_lto_buffer,
config.emit_thin_lto,
config.emit_thin_lto_summary,
config.merge_functions, config.merge_functions,
unroll_loops, unroll_loops,
vectorize_slp, vectorize_slp,
@ -705,9 +719,56 @@ pub(crate) unsafe fn optimize(
// Otherwise we pretend AD is already done and run the normal opt pipeline (=PostAD). // Otherwise we pretend AD is already done and run the normal opt pipeline (=PostAD).
let consider_ad = cfg!(llvm_enzyme) && config.autodiff.contains(&config::AutoDiff::Enable); let consider_ad = cfg!(llvm_enzyme) && config.autodiff.contains(&config::AutoDiff::Enable);
let autodiff_stage = if consider_ad { AutodiffStage::PreAD } else { AutodiffStage::PostAD }; let autodiff_stage = if consider_ad { AutodiffStage::PreAD } else { AutodiffStage::PostAD };
return unsafe { // The embedded bitcode is used to run LTO/ThinLTO.
llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, autodiff_stage) // The bitcode obtained during the `codegen` phase is no longer suitable for performing LTO.
// It may have undergone LTO due to ThinLocal, so we need to obtain the embedded bitcode at
// this point.
let mut thin_lto_buffer = if (module.kind == ModuleKind::Regular
&& config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full))
|| config.emit_thin_lto_summary
{
Some(null_mut())
} else {
None
}; };
unsafe {
llvm_optimize(
cgcx,
dcx,
module,
thin_lto_buffer.as_mut(),
config,
opt_level,
opt_stage,
autodiff_stage,
)
}?;
if let Some(thin_lto_buffer) = thin_lto_buffer {
let thin_lto_buffer = unsafe { ThinBuffer::from_raw_ptr(thin_lto_buffer) };
let thin_bc_out = cgcx.output_filenames.temp_path(OutputType::ThinBitcode, module_name);
if let Err(err) = fs::write(&thin_bc_out, thin_lto_buffer.data()) {
dcx.emit_err(WriteBytecode { path: &thin_bc_out, err });
}
let bc_summary_out =
cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
if config.emit_thin_lto_summary
&& let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
{
let summary_data = thin_lto_buffer.thin_link_data();
cgcx.prof.artifact_size(
"llvm_bitcode_summary",
thin_link_bitcode_filename.to_string_lossy(),
summary_data.len() as u64,
);
let _timer = cgcx.prof.generic_activity_with_arg(
"LLVM_module_codegen_emit_bitcode_summary",
&*module.name,
);
if let Err(err) = fs::write(&bc_summary_out, summary_data) {
dcx.emit_err(WriteBytecode { path: &bc_summary_out, err });
}
}
}
} }
Ok(()) Ok(())
} }
@ -760,59 +821,47 @@ pub(crate) unsafe fn codegen(
// otherwise requested. // otherwise requested.
let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name); let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name);
let bc_summary_out =
cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
let obj_out = cgcx.output_filenames.temp_path(OutputType::Object, module_name); let obj_out = cgcx.output_filenames.temp_path(OutputType::Object, module_name);
if config.bitcode_needed() { if config.bitcode_needed() {
let _timer = cgcx
.prof
.generic_activity_with_arg("LLVM_module_codegen_make_bitcode", &*module.name);
let thin = ThinBuffer::new(llmod, config.emit_thin_lto, config.emit_thin_lto_summary);
let data = thin.data();
if let Some(bitcode_filename) = bc_out.file_name() {
cgcx.prof.artifact_size(
"llvm_bitcode",
bitcode_filename.to_string_lossy(),
data.len() as u64,
);
}
if config.emit_thin_lto_summary
&& let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
{
let summary_data = thin.thin_link_data();
cgcx.prof.artifact_size(
"llvm_bitcode_summary",
thin_link_bitcode_filename.to_string_lossy(),
summary_data.len() as u64,
);
let _timer = cgcx.prof.generic_activity_with_arg(
"LLVM_module_codegen_emit_bitcode_summary",
&*module.name,
);
if let Err(err) = fs::write(&bc_summary_out, summary_data) {
dcx.emit_err(WriteBytecode { path: &bc_summary_out, err });
}
}
if config.emit_bc || config.emit_obj == EmitObj::Bitcode { if config.emit_bc || config.emit_obj == EmitObj::Bitcode {
let thin = {
let _timer = cgcx.prof.generic_activity_with_arg(
"LLVM_module_codegen_make_bitcode",
&*module.name,
);
ThinBuffer::new(llmod, config.emit_thin_lto, false)
};
let data = thin.data();
let _timer = cgcx let _timer = cgcx
.prof .prof
.generic_activity_with_arg("LLVM_module_codegen_emit_bitcode", &*module.name); .generic_activity_with_arg("LLVM_module_codegen_emit_bitcode", &*module.name);
if let Some(bitcode_filename) = bc_out.file_name() {
cgcx.prof.artifact_size(
"llvm_bitcode",
bitcode_filename.to_string_lossy(),
data.len() as u64,
);
}
if let Err(err) = fs::write(&bc_out, data) { if let Err(err) = fs::write(&bc_out, data) {
dcx.emit_err(WriteBytecode { path: &bc_out, err }); dcx.emit_err(WriteBytecode { path: &bc_out, err });
} }
} }
if config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full) { if config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full)
&& module.kind == ModuleKind::Regular
{
let _timer = cgcx let _timer = cgcx
.prof .prof
.generic_activity_with_arg("LLVM_module_codegen_embed_bitcode", &*module.name); .generic_activity_with_arg("LLVM_module_codegen_embed_bitcode", &*module.name);
let thin_bc_out =
cgcx.output_filenames.temp_path(OutputType::ThinBitcode, module_name);
assert!(thin_bc_out.exists(), "cannot find {:?} as embedded bitcode", thin_bc_out);
let data = fs::read(&thin_bc_out).unwrap();
debug!("removing embed bitcode file {:?}", thin_bc_out);
ensure_removed(dcx, &thin_bc_out);
unsafe { unsafe {
embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, data); embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, &data);
} }
} }
} }

View file

@ -2421,7 +2421,9 @@ unsafe extern "C" {
NoPrepopulatePasses: bool, NoPrepopulatePasses: bool,
VerifyIR: bool, VerifyIR: bool,
LintIR: bool, LintIR: bool,
UseThinLTOBuffers: bool, ThinLTOBuffer: Option<&mut *mut ThinLTOBuffer>,
EmitThinLTO: bool,
EmitThinLTOSummary: bool,
MergeFunctions: bool, MergeFunctions: bool,
UnrollLoops: bool, UnrollLoops: bool,
SLPVectorize: bool, SLPVectorize: bool,

View file

@ -626,6 +626,9 @@ fn produce_final_output_artifacts(
// them for making an rlib. // them for making an rlib.
copy_if_one_unit(OutputType::Bitcode, true); copy_if_one_unit(OutputType::Bitcode, true);
} }
OutputType::ThinBitcode => {
copy_if_one_unit(OutputType::ThinBitcode, true);
}
OutputType::ThinLinkBitcode => { OutputType::ThinLinkBitcode => {
copy_if_one_unit(OutputType::ThinLinkBitcode, false); copy_if_one_unit(OutputType::ThinLinkBitcode, false);
} }

View file

@ -7,6 +7,7 @@
#include "llvm/Analysis/Lint.h" #include "llvm/Analysis/Lint.h"
#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/Bitcode/BitcodeWriterPass.h"
#include "llvm/CodeGen/CommandFlags.h" #include "llvm/CodeGen/CommandFlags.h"
#include "llvm/IR/AssemblyAnnotationWriter.h" #include "llvm/IR/AssemblyAnnotationWriter.h"
#include "llvm/IR/AutoUpgrade.h" #include "llvm/IR/AutoUpgrade.h"
@ -37,6 +38,7 @@
#include "llvm/Transforms/Instrumentation/InstrProfiling.h" #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
#include "llvm/Transforms/Instrumentation/MemorySanitizer.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
#include "llvm/Transforms/Scalar/AnnotationRemarks.h"
#include "llvm/Transforms/Utils/CanonicalizeAliases.h" #include "llvm/Transforms/Utils/CanonicalizeAliases.h"
#include "llvm/Transforms/Utils/FunctionImportUtils.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h"
#include "llvm/Transforms/Utils/NameAnonGlobals.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h"
@ -195,6 +197,19 @@ extern "C" void LLVMRustTimeTraceProfilerFinish(const char *FileName) {
GEN_SUBTARGETS GEN_SUBTARGETS
#undef SUBTARGET #undef SUBTARGET
// This struct and various functions are sort of a hack right now, but the
// problem is that we've got in-memory LLVM modules after we generate and
// optimize all codegen-units for one compilation in rustc. To be compatible
// with the LTO support above we need to serialize the modules plus their
// ThinLTO summary into memory.
//
// This structure is basically an owned version of a serialize module, with
// a ThinLTO summary attached.
struct LLVMRustThinLTOBuffer {
std::string data;
std::string thin_link_data;
};
extern "C" bool LLVMRustHasFeature(LLVMTargetMachineRef TM, extern "C" bool LLVMRustHasFeature(LLVMTargetMachineRef TM,
const char *Feature) { const char *Feature) {
TargetMachine *Target = unwrap(TM); TargetMachine *Target = unwrap(TM);
@ -704,7 +719,8 @@ extern "C" LLVMRustResult LLVMRustOptimize(
LLVMModuleRef ModuleRef, LLVMTargetMachineRef TMRef, LLVMModuleRef ModuleRef, LLVMTargetMachineRef TMRef,
LLVMRustPassBuilderOptLevel OptLevelRust, LLVMRustOptStage OptStage, LLVMRustPassBuilderOptLevel OptLevelRust, LLVMRustOptStage OptStage,
bool IsLinkerPluginLTO, bool NoPrepopulatePasses, bool VerifyIR, bool IsLinkerPluginLTO, bool NoPrepopulatePasses, bool VerifyIR,
bool LintIR, bool UseThinLTOBuffers, bool MergeFunctions, bool UnrollLoops, bool LintIR, LLVMRustThinLTOBuffer **ThinLTOBufferRef, bool EmitThinLTO,
bool EmitThinLTOSummary, bool MergeFunctions, bool UnrollLoops,
bool SLPVectorize, bool LoopVectorize, bool DisableSimplifyLibCalls, bool SLPVectorize, bool LoopVectorize, bool DisableSimplifyLibCalls,
bool EmitLifetimeMarkers, bool RunEnzyme, bool EmitLifetimeMarkers, bool RunEnzyme,
LLVMRustSanitizerOptions *SanitizerOptions, const char *PGOGenPath, LLVMRustSanitizerOptions *SanitizerOptions, const char *PGOGenPath,
@ -952,7 +968,10 @@ extern "C" LLVMRustResult LLVMRustOptimize(
} }
ModulePassManager MPM; ModulePassManager MPM;
bool NeedThinLTOBufferPasses = UseThinLTOBuffers; bool NeedThinLTOBufferPasses = EmitThinLTO;
auto ThinLTOBuffer = std::make_unique<LLVMRustThinLTOBuffer>();
raw_string_ostream ThinLTODataOS(ThinLTOBuffer->data);
raw_string_ostream ThinLinkDataOS(ThinLTOBuffer->thin_link_data);
if (!NoPrepopulatePasses) { if (!NoPrepopulatePasses) {
// The pre-link pipelines don't support O0 and require using // The pre-link pipelines don't support O0 and require using
// buildO0DefaultPipeline() instead. At the same time, the LTO pipelines do // buildO0DefaultPipeline() instead. At the same time, the LTO pipelines do
@ -976,7 +995,25 @@ extern "C" LLVMRustResult LLVMRustOptimize(
switch (OptStage) { switch (OptStage) {
case LLVMRustOptStage::PreLinkNoLTO: case LLVMRustOptStage::PreLinkNoLTO:
MPM = PB.buildPerModuleDefaultPipeline(OptLevel); if (ThinLTOBufferRef) {
// This is similar to LLVM's `buildFatLTODefaultPipeline`, where the
// bitcode for embedding is obtained after performing
// `ThinLTOPreLinkDefaultPipeline`.
MPM.addPass(PB.buildThinLTOPreLinkDefaultPipeline(OptLevel));
if (EmitThinLTO) {
MPM.addPass(ThinLTOBitcodeWriterPass(
ThinLTODataOS, EmitThinLTOSummary ? &ThinLinkDataOS : nullptr));
} else {
MPM.addPass(BitcodeWriterPass(ThinLTODataOS));
}
*ThinLTOBufferRef = ThinLTOBuffer.release();
MPM.addPass(PB.buildModuleOptimizationPipeline(
OptLevel, ThinOrFullLTOPhase::None));
MPM.addPass(
createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
} else {
MPM = PB.buildPerModuleDefaultPipeline(OptLevel);
}
break; break;
case LLVMRustOptStage::PreLinkThinLTO: case LLVMRustOptStage::PreLinkThinLTO:
MPM = PB.buildThinLTOPreLinkDefaultPipeline(OptLevel); MPM = PB.buildThinLTOPreLinkDefaultPipeline(OptLevel);
@ -1022,6 +1059,16 @@ extern "C" LLVMRustResult LLVMRustOptimize(
MPM.addPass(CanonicalizeAliasesPass()); MPM.addPass(CanonicalizeAliasesPass());
MPM.addPass(NameAnonGlobalPass()); MPM.addPass(NameAnonGlobalPass());
} }
// For `-Copt-level=0`, ThinLTO, or LTO.
if (ThinLTOBufferRef && *ThinLTOBufferRef == nullptr) {
if (EmitThinLTO) {
MPM.addPass(ThinLTOBitcodeWriterPass(
ThinLTODataOS, EmitThinLTOSummary ? &ThinLinkDataOS : nullptr));
} else {
MPM.addPass(BitcodeWriterPass(ThinLTODataOS));
}
*ThinLTOBufferRef = ThinLTOBuffer.release();
}
// now load "-enzyme" pass: // now load "-enzyme" pass:
#ifdef ENZYME #ifdef ENZYME
@ -1500,19 +1547,6 @@ extern "C" bool LLVMRustPrepareThinLTOImport(const LLVMRustThinLTOData *Data,
return true; return true;
} }
// This struct and various functions are sort of a hack right now, but the
// problem is that we've got in-memory LLVM modules after we generate and
// optimize all codegen-units for one compilation in rustc. To be compatible
// with the LTO support above we need to serialize the modules plus their
// ThinLTO summary into memory.
//
// This structure is basically an owned version of a serialize module, with
// a ThinLTO summary attached.
struct LLVMRustThinLTOBuffer {
std::string data;
std::string thin_link_data;
};
extern "C" LLVMRustThinLTOBuffer * extern "C" LLVMRustThinLTOBuffer *
LLVMRustThinLTOBufferCreate(LLVMModuleRef M, bool is_thin, bool emit_summary) { LLVMRustThinLTOBufferCreate(LLVMModuleRef M, bool is_thin, bool emit_summary) {
auto Ret = std::make_unique<LLVMRustThinLTOBuffer>(); auto Ret = std::make_unique<LLVMRustThinLTOBuffer>();

View file

@ -541,6 +541,7 @@ impl FromStr for SplitDwarfKind {
pub enum OutputType { pub enum OutputType {
Bitcode, Bitcode,
ThinLinkBitcode, ThinLinkBitcode,
ThinBitcode,
Assembly, Assembly,
LlvmAssembly, LlvmAssembly,
Mir, Mir,
@ -571,6 +572,7 @@ impl OutputType {
OutputType::Exe | OutputType::DepInfo | OutputType::Metadata => true, OutputType::Exe | OutputType::DepInfo | OutputType::Metadata => true,
OutputType::Bitcode OutputType::Bitcode
| OutputType::ThinLinkBitcode | OutputType::ThinLinkBitcode
| OutputType::ThinBitcode
| OutputType::Assembly | OutputType::Assembly
| OutputType::LlvmAssembly | OutputType::LlvmAssembly
| OutputType::Mir | OutputType::Mir
@ -582,6 +584,7 @@ impl OutputType {
match *self { match *self {
OutputType::Bitcode => "llvm-bc", OutputType::Bitcode => "llvm-bc",
OutputType::ThinLinkBitcode => "thin-link-bitcode", OutputType::ThinLinkBitcode => "thin-link-bitcode",
OutputType::ThinBitcode => "thin-llvm-bc",
OutputType::Assembly => "asm", OutputType::Assembly => "asm",
OutputType::LlvmAssembly => "llvm-ir", OutputType::LlvmAssembly => "llvm-ir",
OutputType::Mir => "mir", OutputType::Mir => "mir",
@ -599,6 +602,7 @@ impl OutputType {
"mir" => OutputType::Mir, "mir" => OutputType::Mir,
"llvm-bc" => OutputType::Bitcode, "llvm-bc" => OutputType::Bitcode,
"thin-link-bitcode" => OutputType::ThinLinkBitcode, "thin-link-bitcode" => OutputType::ThinLinkBitcode,
"thin-llvm-bc" => OutputType::ThinBitcode,
"obj" => OutputType::Object, "obj" => OutputType::Object,
"metadata" => OutputType::Metadata, "metadata" => OutputType::Metadata,
"link" => OutputType::Exe, "link" => OutputType::Exe,
@ -609,9 +613,10 @@ impl OutputType {
fn shorthands_display() -> String { fn shorthands_display() -> String {
format!( format!(
"`{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`", "`{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`",
OutputType::Bitcode.shorthand(), OutputType::Bitcode.shorthand(),
OutputType::ThinLinkBitcode.shorthand(), OutputType::ThinLinkBitcode.shorthand(),
OutputType::ThinBitcode.shorthand(),
OutputType::Assembly.shorthand(), OutputType::Assembly.shorthand(),
OutputType::LlvmAssembly.shorthand(), OutputType::LlvmAssembly.shorthand(),
OutputType::Mir.shorthand(), OutputType::Mir.shorthand(),
@ -626,6 +631,7 @@ impl OutputType {
match *self { match *self {
OutputType::Bitcode => "bc", OutputType::Bitcode => "bc",
OutputType::ThinLinkBitcode => "indexing.o", OutputType::ThinLinkBitcode => "indexing.o",
OutputType::ThinBitcode => "thin.bc",
OutputType::Assembly => "s", OutputType::Assembly => "s",
OutputType::LlvmAssembly => "ll", OutputType::LlvmAssembly => "ll",
OutputType::Mir => "mir", OutputType::Mir => "mir",
@ -644,6 +650,7 @@ impl OutputType {
| OutputType::DepInfo => true, | OutputType::DepInfo => true,
OutputType::Bitcode OutputType::Bitcode
| OutputType::ThinLinkBitcode | OutputType::ThinLinkBitcode
| OutputType::ThinBitcode
| OutputType::Object | OutputType::Object
| OutputType::Metadata | OutputType::Metadata
| OutputType::Exe => false, | OutputType::Exe => false,
@ -731,6 +738,7 @@ impl OutputTypes {
self.0.keys().any(|k| match *k { self.0.keys().any(|k| match *k {
OutputType::Bitcode OutputType::Bitcode
| OutputType::ThinLinkBitcode | OutputType::ThinLinkBitcode
| OutputType::ThinBitcode
| OutputType::Assembly | OutputType::Assembly
| OutputType::LlvmAssembly | OutputType::LlvmAssembly
| OutputType::Mir | OutputType::Mir
@ -745,6 +753,7 @@ impl OutputTypes {
self.0.keys().any(|k| match *k { self.0.keys().any(|k| match *k {
OutputType::Bitcode OutputType::Bitcode
| OutputType::ThinLinkBitcode | OutputType::ThinLinkBitcode
| OutputType::ThinBitcode
| OutputType::Assembly | OutputType::Assembly
| OutputType::LlvmAssembly | OutputType::LlvmAssembly
| OutputType::Mir | OutputType::Mir

View file

@ -9,14 +9,14 @@ struct S(i32);
struct SmallStruct(f32, Option<S>, &'static [f32]); struct SmallStruct(f32, Option<S>, &'static [f32]);
// CHECK: @0 = private unnamed_addr constant // CHECK: [[const:@.*]] = private unnamed_addr constant
// CHECK-SAME: , align 8 // CHECK-SAME: , align 8
#[no_mangle] #[no_mangle]
pub fn overaligned_constant() { pub fn overaligned_constant() {
// CHECK-LABEL: @overaligned_constant // CHECK-LABEL: @overaligned_constant
// CHECK: [[full:%_.*]] = alloca [32 x i8], align 8 // CHECK: [[full:%_.*]] = alloca [32 x i8], align 8
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[full]], ptr align 8 @0, i64 32, i1 false) // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[full]], ptr align 8 [[const]], i64 32, i1 false)
let mut s = S(1); let mut s = S(1);
s.0 = 3; s.0 = 3;

View file

@ -11,15 +11,15 @@ pub struct PartiallyUninit {
y: MaybeUninit<[u8; 10]>, y: MaybeUninit<[u8; 10]>,
} }
// CHECK: [[FULLY_UNINIT:@[0-9]+]] = private unnamed_addr constant <{ [10 x i8] }> undef // CHECK: [[FULLY_UNINIT:@.*]] = private unnamed_addr constant <{ [10 x i8] }> undef
// CHECK: [[PARTIALLY_UNINIT:@[0-9]+]] = private unnamed_addr constant <{ [4 x i8], [12 x i8] }> <{ [4 x i8] c"{{\\EF\\BE\\AD\\DE|\\DE\\AD\\BE\\EF}}", [12 x i8] undef }>, align 4 // CHECK: [[PARTIALLY_UNINIT:@.*]] = private unnamed_addr constant <{ [4 x i8], [12 x i8] }> <{ [4 x i8] c"{{\\EF\\BE\\AD\\DE|\\DE\\AD\\BE\\EF}}", [12 x i8] undef }>, align 4
// This shouldn't contain undef, since it contains more chunks // This shouldn't contain undef, since it contains more chunks
// than the default value of uninit_const_chunk_threshold. // than the default value of uninit_const_chunk_threshold.
// CHECK: [[UNINIT_PADDING_HUGE:@[0-9]+]] = private unnamed_addr constant <{ [32768 x i8] }> <{ [32768 x i8] c"{{.+}}" }>, align 4 // CHECK: [[UNINIT_PADDING_HUGE:@.*]] = private unnamed_addr constant <{ [32768 x i8] }> <{ [32768 x i8] c"{{.+}}" }>, align 4
// CHECK: [[FULLY_UNINIT_HUGE:@[0-9]+]] = private unnamed_addr constant <{ [16384 x i8] }> undef // CHECK: [[FULLY_UNINIT_HUGE:@.*]] = private unnamed_addr constant <{ [16384 x i8] }> undef
// CHECK-LABEL: @fully_uninit // CHECK-LABEL: @fully_uninit
#[no_mangle] #[no_mangle]

View file

@ -0,0 +1,16 @@
#![crate_name = "interesting"]
#![crate_type = "rlib"]
extern crate opaque;
#[no_mangle]
#[inline(never)]
pub fn function_called_once() {
opaque::foo();
}
// CHECK-LABEL: @function_called_once
// CHECK-SAME: !prof [[function_called_once_id:![0-9]+]] {
// CHECK: "CG Profile"
// CHECK-NOT: "CG Profile"
// CHECK-DAG: [[function_called_once_id]] = !{!"function_entry_count", i64 1}

View file

@ -0,0 +1,5 @@
extern crate interesting;
fn main() {
interesting::function_called_once();
}

View file

@ -0,0 +1,5 @@
#![crate_name = "opaque"]
#![crate_type = "rlib"]
#[inline(never)]
pub fn foo() {}

View file

@ -0,0 +1,67 @@
// This test case verifies that we successfully complete an LTO build with PGO
// using the embedded bitcode.
// It also ensures that the generated IR correctly includes the call results.
//@ needs-profiler-runtime
//@ ignore-cross-compile
use std::path::Path;
use run_make_support::{
has_extension, has_prefix, llvm_filecheck, llvm_profdata, rfs, run, rustc, shallow_find_files,
};
fn run_test(cg_units: usize) {
let path_prof_data_dir = Path::new("prof_data_dir");
if path_prof_data_dir.exists() {
rfs::remove_dir_all(path_prof_data_dir);
}
rfs::create_dir_all(&path_prof_data_dir);
let path_merged_profdata = path_prof_data_dir.join("merged.profdata");
rustc().input("opaque.rs").codegen_units(1).run();
rustc()
.input("interesting.rs")
.profile_generate(&path_prof_data_dir)
.opt()
.crate_type("lib,cdylib")
.codegen_units(cg_units)
.run();
rustc()
.input("main.rs")
.arg("-Clto=thin")
.opt()
.codegen_units(cg_units)
.profile_generate(&path_prof_data_dir)
.opt()
.run();
run("main");
llvm_profdata().merge().output(&path_merged_profdata).input(path_prof_data_dir).run();
rustc()
.input("interesting.rs")
.profile_use(&path_merged_profdata)
.opt()
.crate_type("lib,cdylib")
.codegen_units(cg_units)
.emit("link")
.run();
rustc()
.input("main.rs")
.arg("-Clto=thin")
.opt()
.codegen_units(cg_units)
.profile_use(&path_merged_profdata)
.emit("llvm-ir,link")
.opt()
.run();
let files = shallow_find_files(".", |path| {
has_prefix(path, "main.interesting.interesting") && has_extension(path, "ll")
});
assert_eq!(files.len(), 1);
let llvm_ir = &files[0];
llvm_filecheck().patterns("interesting.rs").stdin_buf(rfs::read(llvm_ir)).run();
}
fn main() {
run_test(1);
run_test(16);
}