Auto merge of #137586 - nnethercote:SetImpliedBits, r=bjorn3
Speed up target feature computation The LLVM backend calls `LLVMRustHasFeature` twice for every feature. In short-running rustc invocations, this accounts for a surprising amount of work. r? `@bjorn3`
This commit is contained in:
commit
ebf0cf75d3
9 changed files with 120 additions and 113 deletions
|
@ -176,13 +176,9 @@ impl CodegenBackend for CraneliftCodegenBackend {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn target_features_cfg(
|
fn target_features_cfg(&self, sess: &Session) -> (Vec<Symbol>, Vec<Symbol>) {
|
||||||
&self,
|
|
||||||
sess: &Session,
|
|
||||||
_allow_unstable: bool,
|
|
||||||
) -> Vec<rustc_span::Symbol> {
|
|
||||||
// FIXME return the actually used target features. this is necessary for #[cfg(target_feature)]
|
// FIXME return the actually used target features. this is necessary for #[cfg(target_feature)]
|
||||||
if sess.target.arch == "x86_64" && sess.target.os != "none" {
|
let target_features = if sess.target.arch == "x86_64" && sess.target.os != "none" {
|
||||||
// x86_64 mandates SSE2 support and rustc requires the x87 feature to be enabled
|
// x86_64 mandates SSE2 support and rustc requires the x87 feature to be enabled
|
||||||
vec![sym::fsxr, sym::sse, sym::sse2, Symbol::intern("x87")]
|
vec![sym::fsxr, sym::sse, sym::sse2, Symbol::intern("x87")]
|
||||||
} else if sess.target.arch == "aarch64" {
|
} else if sess.target.arch == "aarch64" {
|
||||||
|
@ -196,7 +192,10 @@ impl CodegenBackend for CraneliftCodegenBackend {
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
vec![]
|
vec![]
|
||||||
}
|
};
|
||||||
|
// FIXME do `unstable_target_features` properly
|
||||||
|
let unstable_target_features = target_features.clone();
|
||||||
|
(target_features, unstable_target_features)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn print_version(&self) {
|
fn print_version(&self) {
|
||||||
|
|
|
@ -48,7 +48,7 @@ pub(crate) fn global_gcc_features(sess: &Session, diagnostics: bool) -> Vec<Stri
|
||||||
for feature in sess.opts.cg.target_feature.split(',') {
|
for feature in sess.opts.cg.target_feature.split(',') {
|
||||||
if let Some(feature) = feature.strip_prefix('+') {
|
if let Some(feature) = feature.strip_prefix('+') {
|
||||||
all_rust_features.extend(
|
all_rust_features.extend(
|
||||||
UnordSet::from(sess.target.implied_target_features(std::iter::once(feature)))
|
UnordSet::from(sess.target.implied_target_features(feature))
|
||||||
.to_sorted_stable_ord()
|
.to_sorted_stable_ord()
|
||||||
.iter()
|
.iter()
|
||||||
.map(|&&s| (true, s)),
|
.map(|&&s| (true, s)),
|
||||||
|
|
|
@ -259,8 +259,8 @@ impl CodegenBackend for GccCodegenBackend {
|
||||||
.join(sess)
|
.join(sess)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn target_features_cfg(&self, sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
|
fn target_features_cfg(&self, sess: &Session) -> (Vec<Symbol>, Vec<Symbol>) {
|
||||||
target_features_cfg(sess, allow_unstable, &self.target_info)
|
target_features_cfg(sess, &self.target_info)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -486,35 +486,41 @@ fn to_gcc_opt_level(optlevel: Option<OptLevel>) -> OptimizationLevel {
|
||||||
/// Returns the features that should be set in `cfg(target_feature)`.
|
/// Returns the features that should be set in `cfg(target_feature)`.
|
||||||
fn target_features_cfg(
|
fn target_features_cfg(
|
||||||
sess: &Session,
|
sess: &Session,
|
||||||
allow_unstable: bool,
|
|
||||||
target_info: &LockedTargetInfo,
|
target_info: &LockedTargetInfo,
|
||||||
) -> Vec<Symbol> {
|
) -> (Vec<Symbol>, Vec<Symbol>) {
|
||||||
// TODO(antoyo): use global_gcc_features.
|
// TODO(antoyo): use global_gcc_features.
|
||||||
sess.target
|
let f = |allow_unstable| {
|
||||||
.rust_target_features()
|
sess.target
|
||||||
.iter()
|
.rust_target_features()
|
||||||
.filter_map(|&(feature, gate, _)| {
|
.iter()
|
||||||
if allow_unstable
|
.filter_map(|&(feature, gate, _)| {
|
||||||
|| (gate.in_cfg() && (sess.is_nightly_build() || gate.requires_nightly().is_none()))
|
if allow_unstable
|
||||||
{
|
|| (gate.in_cfg()
|
||||||
Some(feature)
|
&& (sess.is_nightly_build() || gate.requires_nightly().is_none()))
|
||||||
} else {
|
{
|
||||||
None
|
Some(feature)
|
||||||
}
|
} else {
|
||||||
})
|
None
|
||||||
.filter(|feature| {
|
}
|
||||||
// TODO: we disable Neon for now since we don't support the LLVM intrinsics for it.
|
})
|
||||||
if *feature == "neon" {
|
.filter(|feature| {
|
||||||
return false;
|
// TODO: we disable Neon for now since we don't support the LLVM intrinsics for it.
|
||||||
}
|
if *feature == "neon" {
|
||||||
target_info.cpu_supports(feature)
|
return false;
|
||||||
/*
|
}
|
||||||
adx, aes, avx, avx2, avx512bf16, avx512bitalg, avx512bw, avx512cd, avx512dq, avx512er, avx512f, avx512fp16, avx512ifma,
|
target_info.cpu_supports(feature)
|
||||||
avx512pf, avx512vbmi, avx512vbmi2, avx512vl, avx512vnni, avx512vp2intersect, avx512vpopcntdq,
|
/*
|
||||||
bmi1, bmi2, cmpxchg16b, ermsb, f16c, fma, fxsr, gfni, lzcnt, movbe, pclmulqdq, popcnt, rdrand, rdseed, rtm,
|
adx, aes, avx, avx2, avx512bf16, avx512bitalg, avx512bw, avx512cd, avx512dq, avx512er, avx512f, avx512fp16, avx512ifma,
|
||||||
sha, sse, sse2, sse3, sse4.1, sse4.2, sse4a, ssse3, tbm, vaes, vpclmulqdq, xsave, xsavec, xsaveopt, xsaves
|
avx512pf, avx512vbmi, avx512vbmi2, avx512vl, avx512vnni, avx512vp2intersect, avx512vpopcntdq,
|
||||||
*/
|
bmi1, bmi2, cmpxchg16b, ermsb, f16c, fma, fxsr, gfni, lzcnt, movbe, pclmulqdq, popcnt, rdrand, rdseed, rtm,
|
||||||
})
|
sha, sse, sse2, sse3, sse4.1, sse4.2, sse4a, ssse3, tbm, vaes, vpclmulqdq, xsave, xsavec, xsaveopt, xsaves
|
||||||
.map(Symbol::intern)
|
*/
|
||||||
.collect()
|
})
|
||||||
|
.map(Symbol::intern)
|
||||||
|
.collect()
|
||||||
|
};
|
||||||
|
|
||||||
|
let target_features = f(false);
|
||||||
|
let unstable_target_features = f(true);
|
||||||
|
(target_features, unstable_target_features)
|
||||||
}
|
}
|
||||||
|
|
|
@ -341,8 +341,8 @@ impl CodegenBackend for LlvmCodegenBackend {
|
||||||
llvm_util::print_version();
|
llvm_util::print_version();
|
||||||
}
|
}
|
||||||
|
|
||||||
fn target_features_cfg(&self, sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
|
fn target_features_cfg(&self, sess: &Session) -> (Vec<Symbol>, Vec<Symbol>) {
|
||||||
target_features_cfg(sess, allow_unstable)
|
target_features_cfg(sess)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn codegen_crate<'tcx>(
|
fn codegen_crate<'tcx>(
|
||||||
|
|
|
@ -306,45 +306,44 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
|
||||||
/// Must express features in the way Rust understands them.
|
/// Must express features in the way Rust understands them.
|
||||||
///
|
///
|
||||||
/// We do not have to worry about RUSTC_SPECIFIC_FEATURES here, those are handled outside codegen.
|
/// We do not have to worry about RUSTC_SPECIFIC_FEATURES here, those are handled outside codegen.
|
||||||
pub(crate) fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
|
pub(crate) fn target_features_cfg(sess: &Session) -> (Vec<Symbol>, Vec<Symbol>) {
|
||||||
let mut features: FxHashSet<Symbol> = Default::default();
|
|
||||||
|
|
||||||
// Add base features for the target.
|
// Add base features for the target.
|
||||||
// We do *not* add the -Ctarget-features there, and instead duplicate the logic for that below.
|
// We do *not* add the -Ctarget-features there, and instead duplicate the logic for that below.
|
||||||
// The reason is that if LLVM considers a feature implied but we do not, we don't want that to
|
// The reason is that if LLVM considers a feature implied but we do not, we don't want that to
|
||||||
// show up in `cfg`. That way, `cfg` is entirely under our control -- except for the handling of
|
// show up in `cfg`. That way, `cfg` is entirely under our control -- except for the handling of
|
||||||
// the target CPU, that is still expanded to target features (with all their implied features) by
|
// the target CPU, that is still expanded to target features (with all their implied features)
|
||||||
// LLVM.
|
// by LLVM.
|
||||||
let target_machine = create_informational_target_machine(sess, true);
|
let target_machine = create_informational_target_machine(sess, true);
|
||||||
// Compute which of the known target features are enabled in the 'base' target machine.
|
// Compute which of the known target features are enabled in the 'base' target machine. We only
|
||||||
// We only consider "supported" features; "forbidden" features are not reflected in `cfg` as of now.
|
// consider "supported" features; "forbidden" features are not reflected in `cfg` as of now.
|
||||||
features.extend(
|
let mut features: FxHashSet<Symbol> = sess
|
||||||
sess.target
|
.target
|
||||||
.rust_target_features()
|
.rust_target_features()
|
||||||
.iter()
|
.iter()
|
||||||
.filter(|(feature, _, _)| {
|
.filter(|(feature, _, _)| {
|
||||||
// skip checking special features, as LLVM may not understand them
|
// skip checking special features, as LLVM may not understand them
|
||||||
if RUSTC_SPECIAL_FEATURES.contains(feature) {
|
if RUSTC_SPECIAL_FEATURES.contains(feature) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
// check that all features in a given smallvec are enabled
|
if let Some(feat) = to_llvm_features(sess, feature) {
|
||||||
if let Some(feat) = to_llvm_features(sess, feature) {
|
for llvm_feature in feat {
|
||||||
for llvm_feature in feat {
|
let cstr = SmallCStr::new(llvm_feature);
|
||||||
let cstr = SmallCStr::new(llvm_feature);
|
// `LLVMRustHasFeature` is moderately expensive. On targets with many
|
||||||
if !unsafe { llvm::LLVMRustHasFeature(target_machine.raw(), cstr.as_ptr()) }
|
// features (e.g. x86) these calls take a non-trivial fraction of runtime
|
||||||
{
|
// when compiling very small programs.
|
||||||
return false;
|
if !unsafe { llvm::LLVMRustHasFeature(target_machine.raw(), cstr.as_ptr()) } {
|
||||||
}
|
return false;
|
||||||
}
|
}
|
||||||
true
|
|
||||||
} else {
|
|
||||||
false
|
|
||||||
}
|
}
|
||||||
})
|
true
|
||||||
.map(|(feature, _, _)| Symbol::intern(feature)),
|
} else {
|
||||||
);
|
false
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.map(|(feature, _, _)| Symbol::intern(feature))
|
||||||
|
.collect();
|
||||||
|
|
||||||
// Add enabled features
|
// Add enabled and remove disabled features.
|
||||||
for (enabled, feature) in
|
for (enabled, feature) in
|
||||||
sess.opts.cg.target_feature.split(',').filter_map(|s| match s.chars().next() {
|
sess.opts.cg.target_feature.split(',').filter_map(|s| match s.chars().next() {
|
||||||
Some('+') => Some((true, Symbol::intern(&s[1..]))),
|
Some('+') => Some((true, Symbol::intern(&s[1..]))),
|
||||||
|
@ -360,7 +359,7 @@ pub(crate) fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<S
|
||||||
#[allow(rustc::potential_query_instability)]
|
#[allow(rustc::potential_query_instability)]
|
||||||
features.extend(
|
features.extend(
|
||||||
sess.target
|
sess.target
|
||||||
.implied_target_features(std::iter::once(feature.as_str()))
|
.implied_target_features(feature.as_str())
|
||||||
.iter()
|
.iter()
|
||||||
.map(|s| Symbol::intern(s)),
|
.map(|s| Symbol::intern(s)),
|
||||||
);
|
);
|
||||||
|
@ -371,11 +370,7 @@ pub(crate) fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<S
|
||||||
// `features.contains` below.
|
// `features.contains` below.
|
||||||
#[allow(rustc::potential_query_instability)]
|
#[allow(rustc::potential_query_instability)]
|
||||||
features.retain(|f| {
|
features.retain(|f| {
|
||||||
if sess
|
if sess.target.implied_target_features(f.as_str()).contains(&feature.as_str()) {
|
||||||
.target
|
|
||||||
.implied_target_features(std::iter::once(f.as_str()))
|
|
||||||
.contains(&feature.as_str())
|
|
||||||
{
|
|
||||||
// If `f` if implies `feature`, then `!feature` implies `!f`, so we have to
|
// If `f` if implies `feature`, then `!feature` implies `!f`, so we have to
|
||||||
// remove `f`. (This is the standard logical contraposition principle.)
|
// remove `f`. (This is the standard logical contraposition principle.)
|
||||||
false
|
false
|
||||||
|
@ -387,25 +382,31 @@ pub(crate) fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<S
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Filter enabled features based on feature gates
|
// Filter enabled features based on feature gates.
|
||||||
sess.target
|
let f = |allow_unstable| {
|
||||||
.rust_target_features()
|
sess.target
|
||||||
.iter()
|
.rust_target_features()
|
||||||
.filter_map(|(feature, gate, _)| {
|
.iter()
|
||||||
// The `allow_unstable` set is used by rustc internally to determined which target
|
.filter_map(|(feature, gate, _)| {
|
||||||
// features are truly available, so we want to return even perma-unstable "forbidden"
|
// The `allow_unstable` set is used by rustc internally to determined which target
|
||||||
// features.
|
// features are truly available, so we want to return even perma-unstable
|
||||||
if allow_unstable
|
// "forbidden" features.
|
||||||
|| (gate.in_cfg() && (sess.is_nightly_build() || gate.requires_nightly().is_none()))
|
if allow_unstable
|
||||||
{
|
|| (gate.in_cfg()
|
||||||
Some(*feature)
|
&& (sess.is_nightly_build() || gate.requires_nightly().is_none()))
|
||||||
} else {
|
{
|
||||||
None
|
Some(Symbol::intern(feature))
|
||||||
}
|
} else {
|
||||||
})
|
None
|
||||||
.filter(|feature| features.contains(&Symbol::intern(feature)))
|
}
|
||||||
.map(|feature| Symbol::intern(feature))
|
})
|
||||||
.collect()
|
.filter(|feature| features.contains(&feature))
|
||||||
|
.collect()
|
||||||
|
};
|
||||||
|
|
||||||
|
let target_features = f(false);
|
||||||
|
let unstable_target_features = f(true);
|
||||||
|
(target_features, unstable_target_features)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn print_version() {
|
pub(crate) fn print_version() {
|
||||||
|
@ -682,7 +683,7 @@ pub(crate) fn global_llvm_features(
|
||||||
for feature in sess.opts.cg.target_feature.split(',') {
|
for feature in sess.opts.cg.target_feature.split(',') {
|
||||||
if let Some(feature) = feature.strip_prefix('+') {
|
if let Some(feature) = feature.strip_prefix('+') {
|
||||||
all_rust_features.extend(
|
all_rust_features.extend(
|
||||||
UnordSet::from(sess.target.implied_target_features(std::iter::once(feature)))
|
UnordSet::from(sess.target.implied_target_features(feature))
|
||||||
.to_sorted_stable_ord()
|
.to_sorted_stable_ord()
|
||||||
.iter()
|
.iter()
|
||||||
.map(|&&s| (true, s)),
|
.map(|&&s| (true, s)),
|
||||||
|
|
|
@ -190,7 +190,7 @@ pub(crate) fn provide(providers: &mut Providers) {
|
||||||
},
|
},
|
||||||
implied_target_features: |tcx, feature: Symbol| {
|
implied_target_features: |tcx, feature: Symbol| {
|
||||||
let feature = feature.as_str();
|
let feature = feature.as_str();
|
||||||
UnordSet::from(tcx.sess.target.implied_target_features(std::iter::once(feature)))
|
UnordSet::from(tcx.sess.target.implied_target_features(feature))
|
||||||
.into_sorted_stable_ord()
|
.into_sorted_stable_ord()
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|s| Symbol::intern(s))
|
.map(|s| Symbol::intern(s))
|
||||||
|
|
|
@ -45,10 +45,13 @@ pub trait CodegenBackend {
|
||||||
|
|
||||||
fn print(&self, _req: &PrintRequest, _out: &mut String, _sess: &Session) {}
|
fn print(&self, _req: &PrintRequest, _out: &mut String, _sess: &Session) {}
|
||||||
|
|
||||||
/// Returns the features that should be set in `cfg(target_features)`.
|
/// Returns two feature sets:
|
||||||
|
/// - The first has the features that should be set in `cfg(target_features)`.
|
||||||
|
/// - The second is like the first, but also includes unstable features.
|
||||||
|
///
|
||||||
/// RUSTC_SPECIFIC_FEATURES should be skipped here, those are handled outside codegen.
|
/// RUSTC_SPECIFIC_FEATURES should be skipped here, those are handled outside codegen.
|
||||||
fn target_features_cfg(&self, _sess: &Session, _allow_unstable: bool) -> Vec<Symbol> {
|
fn target_features_cfg(&self, _sess: &Session) -> (Vec<Symbol>, Vec<Symbol>) {
|
||||||
vec![]
|
(vec![], vec![])
|
||||||
}
|
}
|
||||||
|
|
||||||
fn print_passes(&self) {}
|
fn print_passes(&self) {}
|
||||||
|
|
|
@ -39,11 +39,11 @@ pub(crate) fn add_configuration(
|
||||||
) {
|
) {
|
||||||
let tf = sym::target_feature;
|
let tf = sym::target_feature;
|
||||||
|
|
||||||
let unstable_target_features = codegen_backend.target_features_cfg(sess, true);
|
let (target_features, unstable_target_features) = codegen_backend.target_features_cfg(sess);
|
||||||
sess.unstable_target_features.extend(unstable_target_features.iter().cloned());
|
|
||||||
|
|
||||||
let target_features = codegen_backend.target_features_cfg(sess, false);
|
sess.unstable_target_features.extend(unstable_target_features.iter().copied());
|
||||||
sess.target_features.extend(target_features.iter().cloned());
|
|
||||||
|
sess.target_features.extend(target_features.iter().copied());
|
||||||
|
|
||||||
cfg.extend(target_features.into_iter().map(|feat| (tf, Some(feat))));
|
cfg.extend(target_features.into_iter().map(|feat| (tf, Some(feat))));
|
||||||
|
|
||||||
|
|
|
@ -768,17 +768,15 @@ impl Target {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn implied_target_features<'a>(
|
// Note: the returned set includes `base_feature`.
|
||||||
&self,
|
pub fn implied_target_features<'a>(&self, base_feature: &'a str) -> FxHashSet<&'a str> {
|
||||||
base_features: impl Iterator<Item = &'a str>,
|
|
||||||
) -> FxHashSet<&'a str> {
|
|
||||||
let implied_features =
|
let implied_features =
|
||||||
self.rust_target_features().iter().map(|(f, _, i)| (f, i)).collect::<FxHashMap<_, _>>();
|
self.rust_target_features().iter().map(|(f, _, i)| (f, i)).collect::<FxHashMap<_, _>>();
|
||||||
|
|
||||||
// implied target features have their own implied target features, so we traverse the
|
// Implied target features have their own implied target features, so we traverse the
|
||||||
// map until there are no more features to add
|
// map until there are no more features to add.
|
||||||
let mut features = FxHashSet::default();
|
let mut features = FxHashSet::default();
|
||||||
let mut new_features = base_features.collect::<Vec<&str>>();
|
let mut new_features = vec![base_feature];
|
||||||
while let Some(new_feature) = new_features.pop() {
|
while let Some(new_feature) = new_features.pop() {
|
||||||
if features.insert(new_feature) {
|
if features.insert(new_feature) {
|
||||||
if let Some(implied_features) = implied_features.get(&new_feature) {
|
if let Some(implied_features) = implied_features.get(&new_feature) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue