1
Fork 0

Add LLVM attributes in batches instead of individually

This should improve performance.
This commit is contained in:
Erik Desjardins 2022-02-21 11:19:16 -05:00
parent 6f681a8eb3
commit 30d3ce0674
11 changed files with 451 additions and 473 deletions

View file

@ -7,53 +7,75 @@ use rustc_codegen_ssa::traits::*;
use rustc_data_structures::small_c_str::SmallCStr;
use rustc_hir::def_id::DefId;
use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
use rustc_middle::ty::layout::HasTyCtxt;
use rustc_middle::ty::{self, TyCtxt};
use rustc_session::config::OptLevel;
use rustc_session::Session;
use rustc_target::spec::abi::Abi;
use rustc_target::spec::{FramePointer, SanitizerSet, StackProbeType, StackProtector};
use smallvec::SmallVec;
use crate::attributes;
use crate::llvm::AttributePlace::Function;
use crate::llvm::{self, Attribute};
use crate::llvm::{self, Attribute, AttributeKind, AttributePlace};
use crate::llvm_util;
pub use rustc_attr::{InlineAttr, InstructionSetAttr, OptimizeAttr};
use crate::context::CodegenCx;
use crate::value::Value;
/// Mark LLVM function to use provided inline heuristic.
#[inline]
fn inline<'ll>(cx: &CodegenCx<'ll, '_>, val: &'ll Value, inline: InlineAttr) {
use self::InlineAttr::*;
match inline {
Hint => Attribute::InlineHint.apply_llfn(Function, val),
Always => Attribute::AlwaysInline.apply_llfn(Function, val),
Never => {
if cx.tcx().sess.target.arch != "amdgpu" {
Attribute::NoInline.apply_llfn(Function, val);
}
}
None => {}
};
pub fn apply_to_llfn(llfn: &Value, idx: AttributePlace, attrs: &[&Attribute]) {
if !attrs.is_empty() {
llvm::AddFunctionAttributes(llfn, idx, attrs);
}
}
/// Apply LLVM sanitize attributes.
pub fn remove_from_llfn(llfn: &Value, idx: AttributePlace, attrs: &[AttributeKind]) {
if !attrs.is_empty() {
llvm::RemoveFunctionAttributes(llfn, idx, attrs);
}
}
pub fn apply_to_callsite(callsite: &Value, idx: AttributePlace, attrs: &[&Attribute]) {
if !attrs.is_empty() {
llvm::AddCallSiteAttributes(callsite, idx, attrs);
}
}
/// Get LLVM attribute for the provided inline heuristic.
#[inline]
pub fn sanitize<'ll>(cx: &CodegenCx<'ll, '_>, no_sanitize: SanitizerSet, llfn: &'ll Value) {
fn inline_attr<'ll>(cx: &CodegenCx<'ll, '_>, inline: InlineAttr) -> Option<&'ll Attribute> {
match inline {
InlineAttr::Hint => Some(AttributeKind::InlineHint.create_attr(cx.llcx)),
InlineAttr::Always => Some(AttributeKind::AlwaysInline.create_attr(cx.llcx)),
InlineAttr::Never => {
if cx.sess().target.arch != "amdgpu" {
Some(AttributeKind::NoInline.create_attr(cx.llcx))
} else {
None
}
}
InlineAttr::None => None,
}
}
/// Get LLVM sanitize attributes.
#[inline]
pub fn sanitize_attrs<'ll>(
cx: &CodegenCx<'ll, '_>,
no_sanitize: SanitizerSet,
) -> SmallVec<impl smallvec::Array<Item = &'ll Attribute>> {
let mut attrs = SmallVec::<[_; 4]>::new();
let enabled = cx.tcx.sess.opts.debugging_opts.sanitizer - no_sanitize;
if enabled.contains(SanitizerSet::ADDRESS) {
llvm::Attribute::SanitizeAddress.apply_llfn(Function, llfn);
attrs.push(llvm::AttributeKind::SanitizeAddress.create_attr(cx.llcx));
}
if enabled.contains(SanitizerSet::MEMORY) {
llvm::Attribute::SanitizeMemory.apply_llfn(Function, llfn);
attrs.push(llvm::AttributeKind::SanitizeMemory.create_attr(cx.llcx));
}
if enabled.contains(SanitizerSet::THREAD) {
llvm::Attribute::SanitizeThread.apply_llfn(Function, llfn);
attrs.push(llvm::AttributeKind::SanitizeThread.create_attr(cx.llcx));
}
if enabled.contains(SanitizerSet::HWADDRESS) {
llvm::Attribute::SanitizeHWAddress.apply_llfn(Function, llfn);
attrs.push(llvm::AttributeKind::SanitizeHWAddress.create_attr(cx.llcx));
}
if enabled.contains(SanitizerSet::MEMTAG) {
// Check to make sure the mte target feature is actually enabled.
@ -66,26 +88,21 @@ pub fn sanitize<'ll>(cx: &CodegenCx<'ll, '_>, no_sanitize: SanitizerSet, llfn: &
sess.err("`-Zsanitizer=memtag` requires `-Ctarget-feature=+mte`");
}
llvm::Attribute::SanitizeMemTag.apply_llfn(Function, llfn);
attrs.push(llvm::AttributeKind::SanitizeMemTag.create_attr(cx.llcx));
}
attrs
}
/// Tell LLVM to emit or not emit the information necessary to unwind the stack for the function.
#[inline]
pub fn emit_uwtable(val: &Value) {
pub fn uwtable_attr(llcx: &llvm::Context) -> &Attribute {
// NOTE: We should determine if we even need async unwind tables, as they
// take have more overhead and if we can use sync unwind tables we
// probably should.
llvm::EmitUWTableAttr(val, true);
llvm::CreateUWTableAttr(llcx, true)
}
/// Tell LLVM if this function should be 'naked', i.e., skip the epilogue and prologue.
#[inline]
fn naked(val: &Value, is_naked: bool) {
Attribute::Naked.toggle_llfn(Function, val, is_naked);
}
pub fn set_frame_pointer_type<'ll>(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
pub fn frame_pointer_type_attr<'ll>(cx: &CodegenCx<'ll, '_>) -> Option<&'ll Attribute> {
let mut fp = cx.sess().target.frame_pointer;
// "mcount" function relies on stack pointer.
// See <https://sourceware.org/binutils/docs/gprof/Implementation.html>.
@ -96,19 +113,14 @@ pub fn set_frame_pointer_type<'ll>(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
let attr_value = match fp {
FramePointer::Always => cstr!("all"),
FramePointer::NonLeaf => cstr!("non-leaf"),
FramePointer::MayOmit => return,
FramePointer::MayOmit => return None,
};
llvm::AddFunctionAttrStringValue(
llfn,
llvm::AttributePlace::Function,
cstr!("frame-pointer"),
attr_value,
);
Some(llvm::CreateAttrStringValue(cx.llcx, cstr!("frame-pointer"), attr_value))
}
/// Tell LLVM what instrument function to insert.
#[inline]
fn set_instrument_function<'ll>(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
fn instrument_function_attr<'ll>(cx: &CodegenCx<'ll, '_>) -> Option<&'ll Attribute> {
if cx.sess().instrument_mcount() {
// Similar to `clang -pg` behavior. Handled by the
// `post-inline-ee-instrument` LLVM pass.
@ -117,16 +129,17 @@ fn set_instrument_function<'ll>(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
// See test/CodeGen/mcount.c in clang.
let mcount_name = CString::new(cx.sess().target.mcount.as_str().as_bytes()).unwrap();
llvm::AddFunctionAttrStringValue(
llfn,
llvm::AttributePlace::Function,
Some(llvm::CreateAttrStringValue(
cx.llcx,
cstr!("instrument-function-entry-inlined"),
&mcount_name,
);
))
} else {
None
}
}
fn set_probestack<'ll>(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
fn probestack_attr<'ll>(cx: &CodegenCx<'ll, '_>) -> Option<&'ll Attribute> {
// Currently stack probes seem somewhat incompatible with the address
// sanitizer and thread sanitizer. With asan we're already protected from
// stack overflow anyway so we don't really need stack probes regardless.
@ -137,107 +150,105 @@ fn set_probestack<'ll>(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
.sanitizer
.intersects(SanitizerSet::ADDRESS | SanitizerSet::THREAD)
{
return;
return None;
}
// probestack doesn't play nice either with `-C profile-generate`.
if cx.sess().opts.cg.profile_generate.enabled() {
return;
return None;
}
// probestack doesn't play nice either with gcov profiling.
if cx.sess().opts.debugging_opts.profile {
return;
return None;
}
let attr_value = match cx.sess().target.stack_probes {
StackProbeType::None => None,
StackProbeType::None => return None,
// Request LLVM to generate the probes inline. If the given LLVM version does not support
// this, no probe is generated at all (even if the attribute is specified).
StackProbeType::Inline => Some(cstr!("inline-asm")),
StackProbeType::Inline => cstr!("inline-asm"),
// Flag our internal `__rust_probestack` function as the stack probe symbol.
// This is defined in the `compiler-builtins` crate for each architecture.
StackProbeType::Call => Some(cstr!("__rust_probestack")),
StackProbeType::Call => cstr!("__rust_probestack"),
// Pick from the two above based on the LLVM version.
StackProbeType::InlineOrCall { min_llvm_version_for_inline } => {
if llvm_util::get_version() < min_llvm_version_for_inline {
Some(cstr!("__rust_probestack"))
cstr!("__rust_probestack")
} else {
Some(cstr!("inline-asm"))
cstr!("inline-asm")
}
}
};
if let Some(attr_value) = attr_value {
llvm::AddFunctionAttrStringValue(
llfn,
llvm::AttributePlace::Function,
cstr!("probe-stack"),
attr_value,
);
}
Some(llvm::CreateAttrStringValue(cx.llcx, cstr!("probe-stack"), attr_value))
}
fn set_stackprotector<'ll>(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
fn stackprotector_attr<'ll>(cx: &CodegenCx<'ll, '_>) -> Option<&'ll Attribute> {
let sspattr = match cx.sess().stack_protector() {
StackProtector::None => return,
StackProtector::All => Attribute::StackProtectReq,
StackProtector::Strong => Attribute::StackProtectStrong,
StackProtector::Basic => Attribute::StackProtect,
StackProtector::None => return None,
StackProtector::All => AttributeKind::StackProtectReq,
StackProtector::Strong => AttributeKind::StackProtectStrong,
StackProtector::Basic => AttributeKind::StackProtect,
};
sspattr.apply_llfn(Function, llfn)
Some(sspattr.create_attr(cx.llcx))
}
pub fn apply_target_cpu_attr<'ll>(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
pub fn target_cpu_attr<'ll>(cx: &CodegenCx<'ll, '_>) -> &'ll Attribute {
let target_cpu = SmallCStr::new(llvm_util::target_cpu(cx.tcx.sess));
llvm::AddFunctionAttrStringValue(
llfn,
llvm::AttributePlace::Function,
cstr!("target-cpu"),
target_cpu.as_c_str(),
);
llvm::CreateAttrStringValue(cx.llcx, cstr!("target-cpu"), target_cpu.as_c_str())
}
pub fn apply_tune_cpu_attr<'ll>(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
if let Some(tune) = llvm_util::tune_cpu(cx.tcx.sess) {
pub fn tune_cpu_attr<'ll>(cx: &CodegenCx<'ll, '_>) -> Option<&'ll Attribute> {
llvm_util::tune_cpu(cx.tcx.sess).map(|tune| {
let tune_cpu = SmallCStr::new(tune);
llvm::AddFunctionAttrStringValue(
llfn,
llvm::AttributePlace::Function,
cstr!("tune-cpu"),
tune_cpu.as_c_str(),
);
}
llvm::CreateAttrStringValue(cx.llcx, cstr!("tune-cpu"), tune_cpu.as_c_str())
})
}
/// Sets the `NonLazyBind` LLVM attribute on a given function,
/// assuming the codegen options allow skipping the PLT.
pub fn non_lazy_bind<'ll>(sess: &Session, llfn: &'ll Value) {
/// Get the `NonLazyBind` LLVM attribute,
/// if the codegen options allow skipping the PLT.
pub fn non_lazy_bind_attr<'ll>(cx: &CodegenCx<'ll, '_>) -> Option<&'ll Attribute> {
// Don't generate calls through PLT if it's not necessary
if !sess.needs_plt() {
Attribute::NonLazyBind.apply_llfn(Function, llfn);
if !cx.sess().needs_plt() {
Some(AttributeKind::NonLazyBind.create_attr(cx.llcx))
} else {
None
}
}
pub(crate) fn default_optimisation_attrs<'ll>(sess: &Session, llfn: &'ll Value) {
match sess.opts.optimize {
/// Returns attributes to remove and to add, respectively,
/// to set the default optimizations attrs on a function.
#[inline]
pub(crate) fn default_optimisation_attrs<'ll>(
cx: &CodegenCx<'ll, '_>,
) -> (
// Attributes to remove
SmallVec<impl smallvec::Array<Item = AttributeKind>>,
// Attributes to add
SmallVec<impl smallvec::Array<Item = &'ll Attribute>>,
) {
let mut to_remove = SmallVec::<[_; 3]>::new();
let mut to_add = SmallVec::<[_; 2]>::new();
match cx.sess().opts.optimize {
OptLevel::Size => {
llvm::Attribute::MinSize.unapply_llfn(Function, llfn);
llvm::Attribute::OptimizeForSize.apply_llfn(Function, llfn);
llvm::Attribute::OptimizeNone.unapply_llfn(Function, llfn);
to_remove.push(llvm::AttributeKind::MinSize);
to_add.push(llvm::AttributeKind::OptimizeForSize.create_attr(cx.llcx));
to_remove.push(llvm::AttributeKind::OptimizeNone);
}
OptLevel::SizeMin => {
llvm::Attribute::MinSize.apply_llfn(Function, llfn);
llvm::Attribute::OptimizeForSize.apply_llfn(Function, llfn);
llvm::Attribute::OptimizeNone.unapply_llfn(Function, llfn);
to_add.push(llvm::AttributeKind::MinSize.create_attr(cx.llcx));
to_add.push(llvm::AttributeKind::OptimizeForSize.create_attr(cx.llcx));
to_remove.push(llvm::AttributeKind::OptimizeNone);
}
OptLevel::No => {
llvm::Attribute::MinSize.unapply_llfn(Function, llfn);
llvm::Attribute::OptimizeForSize.unapply_llfn(Function, llfn);
llvm::Attribute::OptimizeNone.unapply_llfn(Function, llfn);
to_remove.push(llvm::AttributeKind::MinSize);
to_remove.push(llvm::AttributeKind::OptimizeForSize);
to_remove.push(llvm::AttributeKind::OptimizeNone);
}
_ => {}
}
(to_remove, to_add)
}
/// Composite function which sets LLVM attributes for function depending on its AST (`#[attribute]`)
@ -249,30 +260,35 @@ pub fn from_fn_attrs<'ll, 'tcx>(
) {
let codegen_fn_attrs = cx.tcx.codegen_fn_attrs(instance.def_id());
let mut to_remove = SmallVec::<[_; 4]>::new();
let mut to_add = SmallVec::<[_; 16]>::new();
match codegen_fn_attrs.optimize {
OptimizeAttr::None => {
default_optimisation_attrs(cx.tcx.sess, llfn);
let (to_remove_opt, to_add_opt) = default_optimisation_attrs(cx);
to_remove.extend(to_remove_opt);
to_add.extend(to_add_opt);
}
OptimizeAttr::Speed => {
llvm::Attribute::MinSize.unapply_llfn(Function, llfn);
llvm::Attribute::OptimizeForSize.unapply_llfn(Function, llfn);
llvm::Attribute::OptimizeNone.unapply_llfn(Function, llfn);
to_remove.push(llvm::AttributeKind::MinSize);
to_remove.push(llvm::AttributeKind::OptimizeForSize);
to_remove.push(llvm::AttributeKind::OptimizeNone);
}
OptimizeAttr::Size => {
llvm::Attribute::MinSize.apply_llfn(Function, llfn);
llvm::Attribute::OptimizeForSize.apply_llfn(Function, llfn);
llvm::Attribute::OptimizeNone.unapply_llfn(Function, llfn);
to_add.push(llvm::AttributeKind::MinSize.create_attr(cx.llcx));
to_add.push(llvm::AttributeKind::OptimizeForSize.create_attr(cx.llcx));
to_remove.push(llvm::AttributeKind::OptimizeNone);
}
}
let inline_attr = if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::NAKED) {
let inline = if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::NAKED) {
InlineAttr::Never
} else if codegen_fn_attrs.inline == InlineAttr::None && instance.def.requires_inline(cx.tcx) {
InlineAttr::Hint
} else {
codegen_fn_attrs.inline
};
inline(cx, llfn, inline_attr);
to_add.extend(inline_attr(cx, inline));
// The `uwtable` attribute according to LLVM is:
//
@ -291,52 +307,54 @@ pub fn from_fn_attrs<'ll, 'tcx>(
// You can also find more info on why Windows always requires uwtables here:
// https://bugzilla.mozilla.org/show_bug.cgi?id=1302078
if cx.sess().must_emit_unwind_tables() {
attributes::emit_uwtable(llfn);
to_add.push(uwtable_attr(cx.llcx));
}
if cx.sess().opts.debugging_opts.profile_sample_use.is_some() {
llvm::AddFunctionAttrString(llfn, Function, cstr!("use-sample-profile"));
to_add.push(llvm::CreateAttrString(cx.llcx, cstr!("use-sample-profile")));
}
// FIXME: none of these three functions interact with source level attributes.
set_frame_pointer_type(cx, llfn);
set_instrument_function(cx, llfn);
set_probestack(cx, llfn);
set_stackprotector(cx, llfn);
to_add.extend(frame_pointer_type_attr(cx));
to_add.extend(instrument_function_attr(cx));
to_add.extend(probestack_attr(cx));
to_add.extend(stackprotector_attr(cx));
if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::COLD) {
Attribute::Cold.apply_llfn(Function, llfn);
to_add.push(AttributeKind::Cold.create_attr(cx.llcx));
}
if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::FFI_RETURNS_TWICE) {
Attribute::ReturnsTwice.apply_llfn(Function, llfn);
to_add.push(AttributeKind::ReturnsTwice.create_attr(cx.llcx));
}
if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::FFI_PURE) {
Attribute::ReadOnly.apply_llfn(Function, llfn);
to_add.push(AttributeKind::ReadOnly.create_attr(cx.llcx));
}
if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::FFI_CONST) {
Attribute::ReadNone.apply_llfn(Function, llfn);
to_add.push(AttributeKind::ReadNone.create_attr(cx.llcx));
}
if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::NAKED) {
naked(llfn, true);
to_add.push(AttributeKind::Naked.create_attr(cx.llcx));
}
if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::ALLOCATOR) {
Attribute::NoAlias.apply_llfn(llvm::AttributePlace::ReturnValue, llfn);
// apply to return place instead of function (unlike all other attributes applied in this function)
let no_alias = AttributeKind::NoAlias.create_attr(cx.llcx);
attributes::apply_to_llfn(llfn, AttributePlace::ReturnValue, &[no_alias]);
}
if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::CMSE_NONSECURE_ENTRY) {
llvm::AddFunctionAttrString(llfn, Function, cstr!("cmse_nonsecure_entry"));
to_add.push(llvm::CreateAttrString(cx.llcx, cstr!("cmse_nonsecure_entry")));
}
if let Some(align) = codegen_fn_attrs.alignment {
llvm::set_alignment(llfn, align as usize);
}
sanitize(cx, codegen_fn_attrs.no_sanitize, llfn);
to_add.extend(sanitize_attrs(cx, codegen_fn_attrs.no_sanitize));
// Always annotate functions with the target-cpu they are compiled for.
// Without this, ThinLTO won't inline Rust functions into Clang generated
// functions (because Clang annotates functions this way too).
apply_target_cpu_attr(cx, llfn);
to_add.push(target_cpu_attr(cx));
// tune-cpu is only conveyed through the attribute for our purpose.
// The target doesn't care; the subtarget reads our attribute.
apply_tune_cpu_attr(cx, llfn);
to_add.extend(tune_cpu_attr(cx));
let function_features =
codegen_fn_attrs.target_features.iter().map(|f| f.as_str()).collect::<Vec<&str>>();
@ -379,22 +397,12 @@ pub fn from_fn_attrs<'ll, 'tcx>(
// If this function is an import from the environment but the wasm
// import has a specific module/name, apply them here.
if let Some(module) = wasm_import_module(cx.tcx, instance.def_id()) {
llvm::AddFunctionAttrStringValue(
llfn,
llvm::AttributePlace::Function,
cstr!("wasm-import-module"),
&module,
);
to_add.push(llvm::CreateAttrStringValue(cx.llcx, cstr!("wasm-import-module"), &module));
let name =
codegen_fn_attrs.link_name.unwrap_or_else(|| cx.tcx.item_name(instance.def_id()));
let name = CString::new(name.as_str()).unwrap();
llvm::AddFunctionAttrStringValue(
llfn,
llvm::AttributePlace::Function,
cstr!("wasm-import-name"),
&name,
);
to_add.push(llvm::CreateAttrStringValue(cx.llcx, cstr!("wasm-import-name"), &name));
}
// The `"wasm"` abi on wasm targets automatically enables the
@ -414,13 +422,11 @@ pub fn from_fn_attrs<'ll, 'tcx>(
global_features.extend(function_features.into_iter());
let features = global_features.join(",");
let val = CString::new(features).unwrap();
llvm::AddFunctionAttrStringValue(
llfn,
llvm::AttributePlace::Function,
cstr!("target-features"),
&val,
);
to_add.push(llvm::CreateAttrStringValue(cx.llcx, cstr!("target-features"), &val));
}
attributes::remove_from_llfn(llfn, Function, &to_remove);
attributes::apply_to_llfn(llfn, Function, &to_add);
}
fn wasm_import_module(tcx: TyCtxt<'_>, id: DefId) -> Option<CString> {