Adjust -Ctarget-cpu=native
handling in cg_llvm
When cg_llvm encounters the `-Ctarget-cpu=native` it computes an explciit set of features that applies to the target in order to correctly compile code for the host CPU (because e.g. `skylake` alone is not sufficient to tell if some of the instructions are available or not). However there were a couple of issues with how we did this. Firstly, the order in which features were overriden wasn't quite right – conceptually you'd expect `-Ctarget-cpu=native` option to override the features that are implicitly set by the target definition. However due to how other `-Ctarget-cpu` values are handled we must adopt the following order of priority: * Features from -Ctarget-cpu=*; are overriden by * Features implied by --target; are overriden by * Features from -Ctarget-feature; are overriden by * function specific features. Another problem was in that the function level `target-features` attribute would overwrite the entire set of the globally enabled features, rather than just the features the `#[target_feature(enable/disable)]` specified. With something like `-Ctarget-cpu=native` we'd end up in a situation wherein a function without `#[target_feature(enable)]` annotation would have a broader set of features compared to a function with one such attribute. This turned out to be a cause of heavy run-time regressions in some code using these function-level attributes in conjunction with `-Ctarget-cpu=native`, for example. With this PR rustc is more careful about specifying the entire set of features for functions that use `#[target_feature(enable/disable)]` or `#[instruction_set]` attributes. Sadly testing the original reproducer for this behaviour is quite impossible – we cannot rely on `-Ctarget-cpu=native` to be anything in particular on developer or CI machines.
This commit is contained in:
parent
0517acd543
commit
72fb4379d5
7 changed files with 155 additions and 52 deletions
|
@ -152,18 +152,6 @@ fn set_probestack(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn llvm_target_features(sess: &Session) -> impl Iterator<Item = &str> {
|
||||
const RUSTC_SPECIFIC_FEATURES: &[&str] = &["crt-static"];
|
||||
|
||||
let cmdline = sess
|
||||
.opts
|
||||
.cg
|
||||
.target_feature
|
||||
.split(',')
|
||||
.filter(|f| !RUSTC_SPECIFIC_FEATURES.iter().any(|s| f.contains(s)));
|
||||
sess.target.features.split(',').chain(cmdline).filter(|l| !l.is_empty())
|
||||
}
|
||||
|
||||
pub fn apply_target_cpu_attr(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
|
||||
let target_cpu = SmallCStr::new(llvm_util::target_cpu(cx.tcx.sess));
|
||||
llvm::AddFunctionAttrStringValue(
|
||||
|
@ -301,20 +289,22 @@ pub fn from_fn_attrs(cx: &CodegenCx<'ll, 'tcx>, llfn: &'ll Value, instance: ty::
|
|||
// The target doesn't care; the subtarget reads our attribute.
|
||||
apply_tune_cpu_attr(cx, llfn);
|
||||
|
||||
let features = llvm_target_features(cx.tcx.sess)
|
||||
.map(|s| s.to_string())
|
||||
.chain(codegen_fn_attrs.target_features.iter().map(|f| {
|
||||
let function_features = codegen_fn_attrs
|
||||
.target_features
|
||||
.iter()
|
||||
.map(|f| {
|
||||
let feature = &f.as_str();
|
||||
format!("+{}", llvm_util::to_llvm_feature(cx.tcx.sess, feature))
|
||||
}))
|
||||
})
|
||||
.chain(codegen_fn_attrs.instruction_set.iter().map(|x| match x {
|
||||
InstructionSetAttr::ArmA32 => "-thumb-mode".to_string(),
|
||||
InstructionSetAttr::ArmT32 => "+thumb-mode".to_string(),
|
||||
}))
|
||||
.collect::<Vec<String>>()
|
||||
.join(",");
|
||||
|
||||
if !features.is_empty() {
|
||||
.collect::<Vec<String>>();
|
||||
if !function_features.is_empty() {
|
||||
let mut global_features = llvm_util::llvm_global_features(cx.tcx.sess);
|
||||
global_features.extend(function_features.into_iter());
|
||||
let features = global_features.join(",");
|
||||
let val = CString::new(features).unwrap();
|
||||
llvm::AddFunctionAttrStringValue(
|
||||
llfn,
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
use crate::attributes;
|
||||
use crate::back::lto::ThinBuffer;
|
||||
use crate::back::profiling::{
|
||||
selfprofile_after_pass_callback, selfprofile_before_pass_callback, LlvmSelfProfiler,
|
||||
|
@ -166,8 +165,6 @@ pub fn target_machine_factory(
|
|||
|
||||
let code_model = to_llvm_code_model(sess.code_model());
|
||||
|
||||
let mut features = llvm_util::handle_native_features(sess);
|
||||
features.extend(attributes::llvm_target_features(sess).map(|s| s.to_owned()));
|
||||
let mut singlethread = sess.target.singlethread;
|
||||
|
||||
// On the wasm target once the `atomics` feature is enabled that means that
|
||||
|
@ -182,7 +179,7 @@ pub fn target_machine_factory(
|
|||
|
||||
let triple = SmallCStr::new(&sess.target.llvm_target);
|
||||
let cpu = SmallCStr::new(llvm_util::target_cpu(sess));
|
||||
let features = features.join(",");
|
||||
let features = llvm_util::llvm_global_features(sess).join(",");
|
||||
let features = CString::new(features).unwrap();
|
||||
let abi = SmallCStr::new(&sess.target.llvm_abiname);
|
||||
let trap_unreachable =
|
||||
|
|
|
@ -218,13 +218,39 @@ pub fn target_cpu(sess: &Session) -> &str {
|
|||
handle_native(name)
|
||||
}
|
||||
|
||||
pub fn handle_native_features(sess: &Session) -> Vec<String> {
|
||||
match sess.opts.cg.target_cpu {
|
||||
Some(ref s) => {
|
||||
if s != "native" {
|
||||
return vec![];
|
||||
}
|
||||
/// The list of LLVM features computed from CLI flags (`-Ctarget-cpu`, `-Ctarget-feature`,
|
||||
/// `--target` and similar).
|
||||
// FIXME(nagisa): Cache the output of this somehow? Maybe make this a query? We're calling this
|
||||
// for every function that has `#[target_feature]` on it. The global features won't change between
|
||||
// the functions; only crates, maybe…
|
||||
pub fn llvm_global_features(sess: &Session) -> Vec<String> {
|
||||
// FIXME(nagisa): this should definitely be available more centrally and to other codegen backends.
|
||||
/// These features control behaviour of rustc rather than llvm.
|
||||
const RUSTC_SPECIFIC_FEATURES: &[&str] = &["crt-static"];
|
||||
|
||||
// Features that come earlier are overriden by conflicting features later in the string.
|
||||
// Typically we'll want more explicit settings to override the implicit ones, so:
|
||||
//
|
||||
// * Features from -Ctarget-cpu=*; are overriden by [^1]
|
||||
// * Features implied by --target; are overriden by
|
||||
// * Features from -Ctarget-feature; are overriden by
|
||||
// * function specific features.
|
||||
//
|
||||
// [^1]: target-cpu=native is handled here, other target-cpu values are handled implicitly
|
||||
// through LLVM TargetMachine implementation.
|
||||
//
|
||||
// FIXME(nagisa): it isn't clear what's the best interaction between features implied by
|
||||
// `-Ctarget-cpu` and `--target` are. On one hand, you'd expect CLI arguments to always
|
||||
// override anything that's implicit, so e.g. when there's no `--target` flag, features implied
|
||||
// the host target are overriden by `-Ctarget-cpu=*`. On the other hand, what about when both
|
||||
// `--target` and `-Ctarget-cpu=*` are specified? Both then imply some target features and both
|
||||
// flags are specified by the user on the CLI. It isn't as clear-cut which order of precedence
|
||||
// should be taken in cases like these.
|
||||
let mut features = vec![];
|
||||
|
||||
// -Ctarget-cpu=native
|
||||
match sess.opts.cg.target_cpu {
|
||||
Some(ref s) if s == "native" => {
|
||||
let features_string = unsafe {
|
||||
let ptr = llvm::LLVMGetHostCPUFeatures();
|
||||
let features_string = if !ptr.is_null() {
|
||||
|
@ -242,11 +268,31 @@ pub fn handle_native_features(sess: &Session) -> Vec<String> {
|
|||
|
||||
features_string
|
||||
};
|
||||
|
||||
features_string.split(",").map(|s| s.to_owned()).collect()
|
||||
features.extend(features_string.split(",").map(String::from));
|
||||
}
|
||||
None => vec![],
|
||||
}
|
||||
Some(_) | None => {}
|
||||
};
|
||||
|
||||
// Features implied by an implicit or explicit `--target`.
|
||||
features.extend(
|
||||
sess.target
|
||||
.features
|
||||
.split(',')
|
||||
.filter(|f| !f.is_empty() && !RUSTC_SPECIFIC_FEATURES.iter().any(|s| f.contains(s)))
|
||||
.map(String::from),
|
||||
);
|
||||
|
||||
// -Ctarget-features
|
||||
features.extend(
|
||||
sess.opts
|
||||
.cg
|
||||
.target_feature
|
||||
.split(',')
|
||||
.filter(|f| !f.is_empty() && !RUSTC_SPECIFIC_FEATURES.iter().any(|s| f.contains(s)))
|
||||
.map(String::from),
|
||||
);
|
||||
|
||||
features
|
||||
}
|
||||
|
||||
pub fn tune_cpu(sess: &Session) -> Option<&str> {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue