Auto merge of #133250 - DianQK:embed-bitcode-pgo, r=nikic
The embedded bitcode should always be prepared for LTO/ThinLTO Fixes #115344. Fixes #117220. There are currently two methods for generating bitcode that used for LTO. One method involves using `-C linker-plugin-lto` to emit object files as bitcode, which is the typical setting used by cargo. The other method is through `-C embed-bitcode=yes`. When using with `-C embed-bitcode=yes -C lto=no`, we run a complete non-LTO LLVM pipeline to obtain bitcode, then the bitcode is used for LTO. We run the Call Graph Profile Pass twice on the same module. This PR is doing something similar to LLVM's `buildFatLTODefaultPipeline`, obtaining the bitcode for embedding after running `buildThinLTOPreLinkDefaultPipeline`. r? nikic
This commit is contained in:
commit
0c72c0d11a
20 changed files with 294 additions and 101 deletions
|
@ -7,6 +7,7 @@
|
|||
#include "llvm/Analysis/Lint.h"
|
||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||
#include "llvm/Bitcode/BitcodeWriter.h"
|
||||
#include "llvm/Bitcode/BitcodeWriterPass.h"
|
||||
#include "llvm/CodeGen/CommandFlags.h"
|
||||
#include "llvm/IR/AssemblyAnnotationWriter.h"
|
||||
#include "llvm/IR/AutoUpgrade.h"
|
||||
|
@ -37,6 +38,7 @@
|
|||
#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
|
||||
#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
|
||||
#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
|
||||
#include "llvm/Transforms/Scalar/AnnotationRemarks.h"
|
||||
#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
|
||||
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
|
||||
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
|
||||
|
@ -195,6 +197,19 @@ extern "C" void LLVMRustTimeTraceProfilerFinish(const char *FileName) {
|
|||
GEN_SUBTARGETS
|
||||
#undef SUBTARGET
|
||||
|
||||
// This struct and various functions are sort of a hack right now, but the
|
||||
// problem is that we've got in-memory LLVM modules after we generate and
|
||||
// optimize all codegen-units for one compilation in rustc. To be compatible
|
||||
// with the LTO support above we need to serialize the modules plus their
|
||||
// ThinLTO summary into memory.
|
||||
//
|
||||
// This structure is basically an owned version of a serialize module, with
|
||||
// a ThinLTO summary attached.
|
||||
struct LLVMRustThinLTOBuffer {
|
||||
std::string data;
|
||||
std::string thin_link_data;
|
||||
};
|
||||
|
||||
extern "C" bool LLVMRustHasFeature(LLVMTargetMachineRef TM,
|
||||
const char *Feature) {
|
||||
TargetMachine *Target = unwrap(TM);
|
||||
|
@ -704,7 +719,8 @@ extern "C" LLVMRustResult LLVMRustOptimize(
|
|||
LLVMModuleRef ModuleRef, LLVMTargetMachineRef TMRef,
|
||||
LLVMRustPassBuilderOptLevel OptLevelRust, LLVMRustOptStage OptStage,
|
||||
bool IsLinkerPluginLTO, bool NoPrepopulatePasses, bool VerifyIR,
|
||||
bool LintIR, bool UseThinLTOBuffers, bool MergeFunctions, bool UnrollLoops,
|
||||
bool LintIR, LLVMRustThinLTOBuffer **ThinLTOBufferRef, bool EmitThinLTO,
|
||||
bool EmitThinLTOSummary, bool MergeFunctions, bool UnrollLoops,
|
||||
bool SLPVectorize, bool LoopVectorize, bool DisableSimplifyLibCalls,
|
||||
bool EmitLifetimeMarkers, bool RunEnzyme,
|
||||
LLVMRustSanitizerOptions *SanitizerOptions, const char *PGOGenPath,
|
||||
|
@ -952,7 +968,10 @@ extern "C" LLVMRustResult LLVMRustOptimize(
|
|||
}
|
||||
|
||||
ModulePassManager MPM;
|
||||
bool NeedThinLTOBufferPasses = UseThinLTOBuffers;
|
||||
bool NeedThinLTOBufferPasses = EmitThinLTO;
|
||||
auto ThinLTOBuffer = std::make_unique<LLVMRustThinLTOBuffer>();
|
||||
raw_string_ostream ThinLTODataOS(ThinLTOBuffer->data);
|
||||
raw_string_ostream ThinLinkDataOS(ThinLTOBuffer->thin_link_data);
|
||||
if (!NoPrepopulatePasses) {
|
||||
// The pre-link pipelines don't support O0 and require using
|
||||
// buildO0DefaultPipeline() instead. At the same time, the LTO pipelines do
|
||||
|
@ -976,7 +995,25 @@ extern "C" LLVMRustResult LLVMRustOptimize(
|
|||
|
||||
switch (OptStage) {
|
||||
case LLVMRustOptStage::PreLinkNoLTO:
|
||||
MPM = PB.buildPerModuleDefaultPipeline(OptLevel);
|
||||
if (ThinLTOBufferRef) {
|
||||
// This is similar to LLVM's `buildFatLTODefaultPipeline`, where the
|
||||
// bitcode for embedding is obtained after performing
|
||||
// `ThinLTOPreLinkDefaultPipeline`.
|
||||
MPM.addPass(PB.buildThinLTOPreLinkDefaultPipeline(OptLevel));
|
||||
if (EmitThinLTO) {
|
||||
MPM.addPass(ThinLTOBitcodeWriterPass(
|
||||
ThinLTODataOS, EmitThinLTOSummary ? &ThinLinkDataOS : nullptr));
|
||||
} else {
|
||||
MPM.addPass(BitcodeWriterPass(ThinLTODataOS));
|
||||
}
|
||||
*ThinLTOBufferRef = ThinLTOBuffer.release();
|
||||
MPM.addPass(PB.buildModuleOptimizationPipeline(
|
||||
OptLevel, ThinOrFullLTOPhase::None));
|
||||
MPM.addPass(
|
||||
createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
|
||||
} else {
|
||||
MPM = PB.buildPerModuleDefaultPipeline(OptLevel);
|
||||
}
|
||||
break;
|
||||
case LLVMRustOptStage::PreLinkThinLTO:
|
||||
MPM = PB.buildThinLTOPreLinkDefaultPipeline(OptLevel);
|
||||
|
@ -1022,6 +1059,16 @@ extern "C" LLVMRustResult LLVMRustOptimize(
|
|||
MPM.addPass(CanonicalizeAliasesPass());
|
||||
MPM.addPass(NameAnonGlobalPass());
|
||||
}
|
||||
// For `-Copt-level=0`, ThinLTO, or LTO.
|
||||
if (ThinLTOBufferRef && *ThinLTOBufferRef == nullptr) {
|
||||
if (EmitThinLTO) {
|
||||
MPM.addPass(ThinLTOBitcodeWriterPass(
|
||||
ThinLTODataOS, EmitThinLTOSummary ? &ThinLinkDataOS : nullptr));
|
||||
} else {
|
||||
MPM.addPass(BitcodeWriterPass(ThinLTODataOS));
|
||||
}
|
||||
*ThinLTOBufferRef = ThinLTOBuffer.release();
|
||||
}
|
||||
|
||||
// now load "-enzyme" pass:
|
||||
#ifdef ENZYME
|
||||
|
@ -1500,19 +1547,6 @@ extern "C" bool LLVMRustPrepareThinLTOImport(const LLVMRustThinLTOData *Data,
|
|||
return true;
|
||||
}
|
||||
|
||||
// This struct and various functions are sort of a hack right now, but the
|
||||
// problem is that we've got in-memory LLVM modules after we generate and
|
||||
// optimize all codegen-units for one compilation in rustc. To be compatible
|
||||
// with the LTO support above we need to serialize the modules plus their
|
||||
// ThinLTO summary into memory.
|
||||
//
|
||||
// This structure is basically an owned version of a serialize module, with
|
||||
// a ThinLTO summary attached.
|
||||
struct LLVMRustThinLTOBuffer {
|
||||
std::string data;
|
||||
std::string thin_link_data;
|
||||
};
|
||||
|
||||
extern "C" LLVMRustThinLTOBuffer *
|
||||
LLVMRustThinLTOBufferCreate(LLVMModuleRef M, bool is_thin, bool emit_summary) {
|
||||
auto Ret = std::make_unique<LLVMRustThinLTOBuffer>();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue