1
Fork 0

Auto merge of #106428 - saethlin:inline-diverging-functions, r=cjgillot

Permit the MIR inliner to inline diverging functions

This heuristic prevents inlining of `hint::unreachable_unchecked`, which in turn makes `Option/Result::unwrap_unchecked` a bad inlining candidate. I looked through the changes to `core`, `alloc`, `std`, and `hashbrown` by hand and they all seem reasonable. Let's see how this looks in perf...

---

Based on rustc-perf it looks like this regresses ctfe-stress, and the cachegrind diff indicates that this regression is in `InterpCx::statement`. I don't know how to do any deeper analysis because that function is _enormous_ in the try toolchain, which has no debuginfo in it. And a local build produces significantly different codegen for that function, even with LTO.
This commit is contained in:
bors 2023-03-26 05:55:32 +00:00
commit 2420bd34ba
23 changed files with 780 additions and 86 deletions

View file

@ -424,13 +424,6 @@ impl<'tcx> Inliner<'tcx> {
debug!(" final inline threshold = {}", threshold);
// FIXME: Give a bonus to functions with only a single caller
let diverges = matches!(
callee_body.basic_blocks[START_BLOCK].terminator().kind,
TerminatorKind::Unreachable | TerminatorKind::Call { target: None, .. }
);
if diverges && !matches!(callee_attrs.inline, InlineAttr::Always) {
return Err("callee diverges unconditionally");
}
let mut checker = CostChecker {
tcx: self.tcx,

View file

@ -4,7 +4,7 @@ use crate::MirPass;
use rustc_hir::Mutability;
use rustc_middle::mir::{
BinOp, Body, Constant, ConstantKind, LocalDecls, Operand, Place, ProjectionElem, Rvalue,
SourceInfo, Statement, StatementKind, Terminator, TerminatorKind, UnOp,
SourceInfo, Statement, StatementKind, SwitchTargets, Terminator, TerminatorKind, UnOp,
};
use rustc_middle::ty::layout::ValidityRequirement;
use rustc_middle::ty::{self, ParamEnv, SubstsRef, Ty, TyCtxt};
@ -44,6 +44,7 @@ impl<'tcx> MirPass<'tcx> for InstCombine {
&mut block.terminator.as_mut().unwrap(),
&mut block.statements,
);
ctx.combine_duplicate_switch_targets(&mut block.terminator.as_mut().unwrap());
}
}
}
@ -217,6 +218,19 @@ impl<'tcx> InstCombineContext<'tcx, '_> {
terminator.kind = TerminatorKind::Goto { target: destination_block };
}
fn combine_duplicate_switch_targets(&self, terminator: &mut Terminator<'tcx>) {
let TerminatorKind::SwitchInt { targets, .. } = &mut terminator.kind
else { return };
let otherwise = targets.otherwise();
if targets.iter().any(|t| t.1 == otherwise) {
*targets = SwitchTargets::new(
targets.iter().filter(|t| t.1 != otherwise),
targets.otherwise(),
);
}
}
fn combine_intrinsic_assert(
&self,
terminator: &mut Terminator<'tcx>,

View file

@ -28,7 +28,7 @@
//! return.
use crate::MirPass;
use rustc_data_structures::fx::FxHashSet;
use rustc_data_structures::fx::{FxHashSet, FxIndexSet};
use rustc_index::vec::{Idx, IndexVec};
use rustc_middle::mir::coverage::*;
use rustc_middle::mir::visit::{MutVisitor, MutatingUseContext, PlaceContext, Visitor};
@ -48,6 +48,7 @@ impl SimplifyCfg {
pub fn simplify_cfg<'tcx>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
CfgSimplifier::new(body).simplify();
remove_duplicate_unreachable_blocks(tcx, body);
remove_dead_blocks(tcx, body);
// FIXME: Should probably be moved into some kind of pass manager
@ -259,6 +260,49 @@ impl<'a, 'tcx> CfgSimplifier<'a, 'tcx> {
}
}
pub fn remove_duplicate_unreachable_blocks<'tcx>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
struct OptApplier<'tcx> {
tcx: TyCtxt<'tcx>,
duplicates: FxIndexSet<BasicBlock>,
}
impl<'tcx> MutVisitor<'tcx> for OptApplier<'tcx> {
fn tcx(&self) -> TyCtxt<'tcx> {
self.tcx
}
fn visit_terminator(&mut self, terminator: &mut Terminator<'tcx>, location: Location) {
for target in terminator.successors_mut() {
// We don't have to check whether `target` is a cleanup block, because have
// entirely excluded cleanup blocks in building the set of duplicates.
if self.duplicates.contains(target) {
*target = self.duplicates[0];
}
}
self.super_terminator(terminator, location);
}
}
let unreachable_blocks = body
.basic_blocks
.iter_enumerated()
.filter(|(_, bb)| {
// CfgSimplifier::simplify leaves behind some unreachable basic blocks without a
// terminator. Those blocks will be deleted by remove_dead_blocks, but we run just
// before then so we need to handle missing terminators.
// We also need to prevent confusing cleanup and non-cleanup blocks. In practice we
// don't emit empty unreachable cleanup blocks, so this simple check suffices.
bb.terminator.is_some() && bb.is_empty_unreachable() && !bb.is_cleanup
})
.map(|(block, _)| block)
.collect::<FxIndexSet<_>>();
if unreachable_blocks.len() > 1 {
OptApplier { tcx, duplicates: unreachable_blocks }.visit_body(body);
}
}
pub fn remove_dead_blocks<'tcx>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
let reachable = traversal::reachable_as_bitset(body);
let num_blocks = body.basic_blocks.len();