Auto merge of #106428 - saethlin:inline-diverging-functions, r=cjgillot
Permit the MIR inliner to inline diverging functions This heuristic prevents inlining of `hint::unreachable_unchecked`, which in turn makes `Option/Result::unwrap_unchecked` a bad inlining candidate. I looked through the changes to `core`, `alloc`, `std`, and `hashbrown` by hand and they all seem reasonable. Let's see how this looks in perf... --- Based on rustc-perf it looks like this regresses ctfe-stress, and the cachegrind diff indicates that this regression is in `InterpCx::statement`. I don't know how to do any deeper analysis because that function is _enormous_ in the try toolchain, which has no debuginfo in it. And a local build produces significantly different codegen for that function, even with LTO.
This commit is contained in:
commit
2420bd34ba
23 changed files with 780 additions and 86 deletions
|
@ -424,13 +424,6 @@ impl<'tcx> Inliner<'tcx> {
|
|||
debug!(" final inline threshold = {}", threshold);
|
||||
|
||||
// FIXME: Give a bonus to functions with only a single caller
|
||||
let diverges = matches!(
|
||||
callee_body.basic_blocks[START_BLOCK].terminator().kind,
|
||||
TerminatorKind::Unreachable | TerminatorKind::Call { target: None, .. }
|
||||
);
|
||||
if diverges && !matches!(callee_attrs.inline, InlineAttr::Always) {
|
||||
return Err("callee diverges unconditionally");
|
||||
}
|
||||
|
||||
let mut checker = CostChecker {
|
||||
tcx: self.tcx,
|
||||
|
|
|
@ -4,7 +4,7 @@ use crate::MirPass;
|
|||
use rustc_hir::Mutability;
|
||||
use rustc_middle::mir::{
|
||||
BinOp, Body, Constant, ConstantKind, LocalDecls, Operand, Place, ProjectionElem, Rvalue,
|
||||
SourceInfo, Statement, StatementKind, Terminator, TerminatorKind, UnOp,
|
||||
SourceInfo, Statement, StatementKind, SwitchTargets, Terminator, TerminatorKind, UnOp,
|
||||
};
|
||||
use rustc_middle::ty::layout::ValidityRequirement;
|
||||
use rustc_middle::ty::{self, ParamEnv, SubstsRef, Ty, TyCtxt};
|
||||
|
@ -44,6 +44,7 @@ impl<'tcx> MirPass<'tcx> for InstCombine {
|
|||
&mut block.terminator.as_mut().unwrap(),
|
||||
&mut block.statements,
|
||||
);
|
||||
ctx.combine_duplicate_switch_targets(&mut block.terminator.as_mut().unwrap());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -217,6 +218,19 @@ impl<'tcx> InstCombineContext<'tcx, '_> {
|
|||
terminator.kind = TerminatorKind::Goto { target: destination_block };
|
||||
}
|
||||
|
||||
fn combine_duplicate_switch_targets(&self, terminator: &mut Terminator<'tcx>) {
|
||||
let TerminatorKind::SwitchInt { targets, .. } = &mut terminator.kind
|
||||
else { return };
|
||||
|
||||
let otherwise = targets.otherwise();
|
||||
if targets.iter().any(|t| t.1 == otherwise) {
|
||||
*targets = SwitchTargets::new(
|
||||
targets.iter().filter(|t| t.1 != otherwise),
|
||||
targets.otherwise(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn combine_intrinsic_assert(
|
||||
&self,
|
||||
terminator: &mut Terminator<'tcx>,
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
//! return.
|
||||
|
||||
use crate::MirPass;
|
||||
use rustc_data_structures::fx::FxHashSet;
|
||||
use rustc_data_structures::fx::{FxHashSet, FxIndexSet};
|
||||
use rustc_index::vec::{Idx, IndexVec};
|
||||
use rustc_middle::mir::coverage::*;
|
||||
use rustc_middle::mir::visit::{MutVisitor, MutatingUseContext, PlaceContext, Visitor};
|
||||
|
@ -48,6 +48,7 @@ impl SimplifyCfg {
|
|||
|
||||
pub fn simplify_cfg<'tcx>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
|
||||
CfgSimplifier::new(body).simplify();
|
||||
remove_duplicate_unreachable_blocks(tcx, body);
|
||||
remove_dead_blocks(tcx, body);
|
||||
|
||||
// FIXME: Should probably be moved into some kind of pass manager
|
||||
|
@ -259,6 +260,49 @@ impl<'a, 'tcx> CfgSimplifier<'a, 'tcx> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn remove_duplicate_unreachable_blocks<'tcx>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
|
||||
struct OptApplier<'tcx> {
|
||||
tcx: TyCtxt<'tcx>,
|
||||
duplicates: FxIndexSet<BasicBlock>,
|
||||
}
|
||||
|
||||
impl<'tcx> MutVisitor<'tcx> for OptApplier<'tcx> {
|
||||
fn tcx(&self) -> TyCtxt<'tcx> {
|
||||
self.tcx
|
||||
}
|
||||
|
||||
fn visit_terminator(&mut self, terminator: &mut Terminator<'tcx>, location: Location) {
|
||||
for target in terminator.successors_mut() {
|
||||
// We don't have to check whether `target` is a cleanup block, because have
|
||||
// entirely excluded cleanup blocks in building the set of duplicates.
|
||||
if self.duplicates.contains(target) {
|
||||
*target = self.duplicates[0];
|
||||
}
|
||||
}
|
||||
|
||||
self.super_terminator(terminator, location);
|
||||
}
|
||||
}
|
||||
|
||||
let unreachable_blocks = body
|
||||
.basic_blocks
|
||||
.iter_enumerated()
|
||||
.filter(|(_, bb)| {
|
||||
// CfgSimplifier::simplify leaves behind some unreachable basic blocks without a
|
||||
// terminator. Those blocks will be deleted by remove_dead_blocks, but we run just
|
||||
// before then so we need to handle missing terminators.
|
||||
// We also need to prevent confusing cleanup and non-cleanup blocks. In practice we
|
||||
// don't emit empty unreachable cleanup blocks, so this simple check suffices.
|
||||
bb.terminator.is_some() && bb.is_empty_unreachable() && !bb.is_cleanup
|
||||
})
|
||||
.map(|(block, _)| block)
|
||||
.collect::<FxIndexSet<_>>();
|
||||
|
||||
if unreachable_blocks.len() > 1 {
|
||||
OptApplier { tcx, duplicates: unreachable_blocks }.visit_body(body);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn remove_dead_blocks<'tcx>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
|
||||
let reachable = traversal::reachable_as_bitset(body);
|
||||
let num_blocks = body.basic_blocks.len();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue