Rollup merge of #139042 - compiler-errors:do-not-optimize-switchint, r=saethlin

Do not remove trivial `SwitchInt` in analysis MIR

This PR ensures that we don't prematurely remove trivial `SwitchInt` terminators which affects both the borrow-checking and runtime semantics (i.e. UB) of the code. Previously the `SimplifyCfg` optimization was removing `SwitchInt` terminators when they was "trivial", i.e. when all arms branched to the same basic block, even if that `SwitchInt` terminator had the side-effect of reading an operand which (for example) may not be initialized or may point to an invalid place in memory.

This behavior is unlike all other optimizations, which are only applied after "analysis" (i.e. borrow-checking) is finished, and which Miri disables to make sure the compiler doesn't silently remove UB.

Fixing this code "breaks" (i.e. unmasks) code that used to borrow-check but no longer does, like:

```rust
fn foo() {
    let x;
    let (0 | _) = x;
}
```

This match expression should perform a read because `_` does not shadow the `0` literal pattern, and the compiler should have to read the match scrutinee to compare it to 0. I've checked that this behavior does not actually manifest in practice via a crater run which came back clean: https://github.com/rust-lang/rust/pull/139042#issuecomment-2767436367

As a side-note, it may be tempting to suggest that this is actually a good thing or that we should preserve this behavior. If we wanted to make this work (i.e. trivially optimize out reads from matches that are redundant like `0 | _`), then we should be enabling this behavior *after* fixing this. However, I think it's kinda unprincipled, and for example other variations of the code don't even work today, e.g.:

```rust
fn foo() {
    let x;
    let (0.. | _) = x;
}
```
This commit is contained in:
Chris Denton 2025-04-19 19:30:46 +00:00 committed by GitHub
commit 5d2375f789
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 197 additions and 51 deletions

View file

@ -818,8 +818,8 @@ fn test_unstable_options_tracking_hash() {
tracked!(min_function_alignment, Some(Align::EIGHT));
tracked!(mir_emit_retag, true);
tracked!(mir_enable_passes, vec![("DestProp".to_string(), false)]);
tracked!(mir_keep_place_mention, true);
tracked!(mir_opt_level, Some(4));
tracked!(mir_preserve_ub, true);
tracked!(move_size_limit, Some(4096));
tracked!(mutable_noalias, false);
tracked!(next_solver, NextSolverConfig { coherence: true, globally: true });

View file

@ -223,7 +223,7 @@ impl<'tcx> crate::MirPass<'tcx> for EarlyOtherwiseBranch {
// Since this optimization adds new basic blocks and invalidates others,
// clean up the cfg to make it nicer for other passes
if should_cleanup {
simplify_cfg(body);
simplify_cfg(tcx, body);
}
}

View file

@ -63,7 +63,7 @@ impl<'tcx> crate::MirPass<'tcx> for Inline {
let _guard = span.enter();
if inline::<NormalInliner<'tcx>>(tcx, body) {
debug!("running simplify cfg on {:?}", body.source);
simplify_cfg(body);
simplify_cfg(tcx, body);
deref_finder(tcx, body);
}
}
@ -99,7 +99,7 @@ impl<'tcx> crate::MirPass<'tcx> for ForceInline {
let _guard = span.enter();
if inline::<ForceInliner<'tcx>>(tcx, body) {
debug!("running simplify cfg on {:?}", body.source);
simplify_cfg(body);
simplify_cfg(tcx, body);
deref_finder(tcx, body);
}
}

View file

@ -43,7 +43,7 @@ impl<'tcx> crate::MirPass<'tcx> for MatchBranchSimplification {
}
if should_cleanup {
simplify_cfg(body);
simplify_cfg(tcx, body);
}
}

View file

@ -8,7 +8,7 @@ pub(super) struct RemovePlaceMention;
impl<'tcx> crate::MirPass<'tcx> for RemovePlaceMention {
fn is_enabled(&self, sess: &rustc_session::Session) -> bool {
!sess.opts.unstable_opts.mir_keep_place_mention
!sess.opts.unstable_opts.mir_preserve_ub
}
fn run_pass(&self, _: TyCtxt<'tcx>, body: &mut Body<'tcx>) {

View file

@ -35,7 +35,7 @@ impl<'tcx> crate::MirPass<'tcx> for RemoveUnneededDrops {
// if we applied optimizations, we potentially have some cfg to cleanup to
// make it easier for further passes
if should_simplify {
simplify_cfg(body);
simplify_cfg(tcx, body);
}
}

View file

@ -26,6 +26,13 @@
//! Here the block (`{ return; }`) has the return type `char`, rather than `()`, but the MIR we
//! naively generate still contains the `_a = ()` write in the unreachable block "after" the
//! return.
//!
//! **WARNING**: This is one of the few optimizations that runs on built and analysis MIR, and
//! so its effects may affect the type-checking, borrow-checking, and other analysis of MIR.
//! We must be extremely careful to only apply optimizations that preserve UB and all
//! non-determinism, since changes here can affect which programs compile in an insta-stable way.
//! The normal logic that a program with UB can be changed to do anything does not apply to
//! pre-"runtime" MIR!
use rustc_index::{Idx, IndexSlice, IndexVec};
use rustc_middle::mir::visit::{MutVisitor, MutatingUseContext, PlaceContext, Visitor};
@ -66,8 +73,8 @@ impl SimplifyCfg {
}
}
pub(super) fn simplify_cfg(body: &mut Body<'_>) {
CfgSimplifier::new(body).simplify();
pub(super) fn simplify_cfg<'tcx>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
CfgSimplifier::new(tcx, body).simplify();
remove_dead_blocks(body);
// FIXME: Should probably be moved into some kind of pass manager
@ -79,9 +86,9 @@ impl<'tcx> crate::MirPass<'tcx> for SimplifyCfg {
self.name()
}
fn run_pass(&self, _: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
fn run_pass(&self, tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
debug!("SimplifyCfg({:?}) - simplifying {:?}", self.name(), body.source);
simplify_cfg(body);
simplify_cfg(tcx, body);
}
fn is_required(&self) -> bool {
@ -90,12 +97,13 @@ impl<'tcx> crate::MirPass<'tcx> for SimplifyCfg {
}
struct CfgSimplifier<'a, 'tcx> {
preserve_switch_reads: bool,
basic_blocks: &'a mut IndexSlice<BasicBlock, BasicBlockData<'tcx>>,
pred_count: IndexVec<BasicBlock, u32>,
}
impl<'a, 'tcx> CfgSimplifier<'a, 'tcx> {
fn new(body: &'a mut Body<'tcx>) -> Self {
fn new(tcx: TyCtxt<'tcx>, body: &'a mut Body<'tcx>) -> Self {
let mut pred_count = IndexVec::from_elem(0u32, &body.basic_blocks);
// we can't use mir.predecessors() here because that counts
@ -110,9 +118,12 @@ impl<'a, 'tcx> CfgSimplifier<'a, 'tcx> {
}
}
// Preserve `SwitchInt` reads on built and analysis MIR, or if `-Zmir-preserve-ub`.
let preserve_switch_reads = matches!(body.phase, MirPhase::Built | MirPhase::Analysis(_))
|| tcx.sess.opts.unstable_opts.mir_preserve_ub;
let basic_blocks = body.basic_blocks_mut();
CfgSimplifier { basic_blocks, pred_count }
CfgSimplifier { preserve_switch_reads, basic_blocks, pred_count }
}
fn simplify(mut self) {
@ -253,9 +264,15 @@ impl<'a, 'tcx> CfgSimplifier<'a, 'tcx> {
// turn a branch with all successors identical to a goto
fn simplify_branch(&mut self, terminator: &mut Terminator<'tcx>) -> bool {
match terminator.kind {
TerminatorKind::SwitchInt { .. } => {}
_ => return false,
// Removing a `SwitchInt` terminator may remove reads that result in UB,
// so we must not apply this optimization before borrowck or when
// `-Zmir-preserve-ub` is set.
if self.preserve_switch_reads {
return false;
}
let TerminatorKind::SwitchInt { .. } = terminator.kind else {
return false;
};
let first_succ = {

View file

@ -2322,12 +2322,12 @@ options! {
mir_include_spans: MirIncludeSpans = (MirIncludeSpans::default(), parse_mir_include_spans, [UNTRACKED],
"include extra comments in mir pretty printing, like line numbers and statement indices, \
details about types, etc. (boolean for all passes, 'nll' to enable in NLL MIR only, default: 'nll')"),
mir_keep_place_mention: bool = (false, parse_bool, [TRACKED],
"keep place mention MIR statements, interpreted e.g., by miri; implies -Zmir-opt-level=0 \
(default: no)"),
#[rustc_lint_opt_deny_field_access("use `Session::mir_opt_level` instead of this field")]
mir_opt_level: Option<usize> = (None, parse_opt_number, [TRACKED],
"MIR optimization level (0-4; default: 1 in non optimized builds and 2 in optimized builds)"),
mir_preserve_ub: bool = (false, parse_bool, [TRACKED],
"keep place mention statements and reads in trivial SwitchInt terminators, which are interpreted \
e.g., by miri; implies -Zmir-opt-level=0 (default: no)"),
mir_strip_debuginfo: MirStripDebugInfo = (MirStripDebugInfo::None, parse_mir_strip_debuginfo, [TRACKED],
"Whether to remove some of the MIR debug info from methods. Default: None"),
move_size_limit: Option<usize> = (None, parse_opt_number, [TRACKED],

View file

@ -169,7 +169,7 @@ pub const MIRI_DEFAULT_ARGS: &[&str] = &[
"-Zalways-encode-mir",
"-Zextra-const-ub-checks",
"-Zmir-emit-retag",
"-Zmir-keep-place-mention",
"-Zmir-preserve-ub",
"-Zmir-opt-level=0",
"-Zmir-enable-passes=-CheckAlignment,-CheckNull",
// Deduplicating diagnostics means we miss events when tracking what happens during an

View file

@ -0,0 +1,14 @@
// Ensure that we don't optimize out `SwitchInt` reads even if that terminator
// branches to the same basic block on every target, since the operand may have
// side-effects that affect analysis of the MIR.
//
// See <https://github.com/rust-lang/miri/issues/4237>.
use std::mem::MaybeUninit;
fn main() {
let uninit: MaybeUninit<i32> = MaybeUninit::uninit();
let bad_ref: &i32 = unsafe { uninit.assume_init_ref() };
let &(0 | _) = bad_ref;
//~^ ERROR: Undefined Behavior: using uninitialized data, but this operation requires initialized memory
}

View file

@ -0,0 +1,15 @@
error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory
--> tests/fail/read_from_trivial_switch.rs:LL:CC
|
LL | let &(0 | _) = bad_ref;
| ^^^^^^^^ using uninitialized data, but this operation requires initialized memory
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `main` at tests/fail/read_from_trivial_switch.rs:LL:CC
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
error: aborting due to 1 previous error

View file

@ -24,43 +24,47 @@ fn match_tuple(_1: (u32, bool, Option<i32>, u32)) -> u32 {
bb1: {
_0 = const 0_u32;
goto -> bb10;
goto -> bb11;
}
bb2: {
_2 = discriminant((_1.2: std::option::Option<i32>));
switchInt(move _2) -> [0: bb4, 1: bb3, otherwise: bb1];
switchInt(copy (_1.1: bool)) -> [0: bb3, otherwise: bb3];
}
bb3: {
switchInt(copy (((_1.2: std::option::Option<i32>) as Some).0: i32)) -> [1: bb4, 8: bb4, otherwise: bb1];
_2 = discriminant((_1.2: std::option::Option<i32>));
switchInt(move _2) -> [0: bb5, 1: bb4, otherwise: bb1];
}
bb4: {
_5 = Le(const 6_u32, copy (_1.3: u32));
switchInt(move _5) -> [0: bb5, otherwise: bb7];
switchInt(copy (((_1.2: std::option::Option<i32>) as Some).0: i32)) -> [1: bb5, 8: bb5, otherwise: bb1];
}
bb5: {
_3 = Le(const 13_u32, copy (_1.3: u32));
switchInt(move _3) -> [0: bb1, otherwise: bb6];
_5 = Le(const 6_u32, copy (_1.3: u32));
switchInt(move _5) -> [0: bb6, otherwise: bb8];
}
bb6: {
_4 = Le(copy (_1.3: u32), const 16_u32);
switchInt(move _4) -> [0: bb1, otherwise: bb8];
_3 = Le(const 13_u32, copy (_1.3: u32));
switchInt(move _3) -> [0: bb1, otherwise: bb7];
}
bb7: {
_6 = Le(copy (_1.3: u32), const 9_u32);
switchInt(move _6) -> [0: bb5, otherwise: bb8];
_4 = Le(copy (_1.3: u32), const 16_u32);
switchInt(move _4) -> [0: bb1, otherwise: bb9];
}
bb8: {
falseEdge -> [real: bb9, imaginary: bb1];
_6 = Le(copy (_1.3: u32), const 9_u32);
switchInt(move _6) -> [0: bb6, otherwise: bb9];
}
bb9: {
falseEdge -> [real: bb10, imaginary: bb1];
}
bb10: {
StorageLive(_7);
_7 = copy (_1.0: u32);
StorageLive(_8);
@ -74,10 +78,10 @@ fn match_tuple(_1: (u32, bool, Option<i32>, u32)) -> u32 {
StorageDead(_9);
StorageDead(_8);
StorageDead(_7);
goto -> bb10;
goto -> bb11;
}
bb10: {
bb11: {
return;
}
}

View file

@ -2,7 +2,7 @@
// and don't remove it as a dead store.
//
//@ test-mir-pass: DeadStoreElimination-initial
//@ compile-flags: -Zmir-keep-place-mention
//@ compile-flags: -Zmir-preserve-ub
// EMIT_MIR place_mention.main.DeadStoreElimination-initial.diff
fn main() {

View file

@ -14,7 +14,7 @@ fn single_switchint() -> () {
}
bb1: {
switchInt(copy (_2.0: i32)) -> [3: bb8, 4: bb8, otherwise: bb7];
switchInt(copy (_2.0: i32)) -> [3: bb9, 4: bb9, otherwise: bb8];
}
bb2: {
@ -22,7 +22,7 @@ fn single_switchint() -> () {
}
bb3: {
falseEdge -> [real: bb12, imaginary: bb4];
falseEdge -> [real: bb14, imaginary: bb4];
}
bb4: {
@ -30,43 +30,51 @@ fn single_switchint() -> () {
}
bb5: {
falseEdge -> [real: bb11, imaginary: bb6];
falseEdge -> [real: bb13, imaginary: bb6];
}
bb6: {
falseEdge -> [real: bb10, imaginary: bb1];
switchInt(copy (_2.1: bool)) -> [0: bb7, otherwise: bb7];
}
bb7: {
_1 = const 5_i32;
goto -> bb13;
falseEdge -> [real: bb12, imaginary: bb1];
}
bb8: {
falseEdge -> [real: bb9, imaginary: bb7];
_1 = const 5_i32;
goto -> bb15;
}
bb9: {
_1 = const 4_i32;
goto -> bb13;
switchInt(copy (_2.1: bool)) -> [0: bb10, otherwise: bb10];
}
bb10: {
_1 = const 3_i32;
goto -> bb13;
falseEdge -> [real: bb11, imaginary: bb8];
}
bb11: {
_1 = const 2_i32;
goto -> bb13;
_1 = const 4_i32;
goto -> bb15;
}
bb12: {
_1 = const 1_i32;
goto -> bb13;
_1 = const 3_i32;
goto -> bb15;
}
bb13: {
_1 = const 2_i32;
goto -> bb15;
}
bb14: {
_1 = const 1_i32;
goto -> bb15;
}
bb15: {
StorageDead(_2);
StorageDead(_1);
_0 = const ();

View file

@ -0,0 +1,49 @@
- // MIR for `main` before SimplifyCfg-initial
+ // MIR for `main` after SimplifyCfg-initial
fn main() -> () {
let mut _0: ();
let _1: &i32;
let _2: i32;
scope 1 {
debug ref_ => _1;
scope 2 {
}
}
bb0: {
StorageLive(_1);
StorageLive(_2);
_2 = const 1_i32;
_1 = &_2;
FakeRead(ForLet(None), _1);
PlaceMention(_1);
- switchInt(copy (*_1)) -> [0: bb2, otherwise: bb1];
+ switchInt(copy (*_1)) -> [0: bb1, otherwise: bb1];
}
bb1: {
- goto -> bb5;
- }
-
- bb2: {
- goto -> bb5;
- }
-
- bb3: {
- goto -> bb1;
- }
-
- bb4: {
- FakeRead(ForMatchedPlace(None), _1);
- unreachable;
- }
-
- bb5: {
_0 = const ();
StorageDead(_2);
StorageDead(_1);
return;
}
}

View file

@ -0,0 +1,15 @@
// Ensure that we don't optimize out `SwitchInt` reads even if that terminator
// branches to the same basic block on every target, since the operand may have
// side-effects that affect analysis of the MIR.
//
// See <https://github.com/rust-lang/miri/issues/4237>.
//@ test-mir-pass: SimplifyCfg-initial
//@ compile-flags: -Zmir-preserve-ub
// EMIT_MIR read_from_trivial_switch.main.SimplifyCfg-initial.diff
fn main() {
let ref_ = &1i32;
// CHECK: switchInt
let &(0 | _) = ref_;
}

View file

@ -0,0 +1,8 @@
// Regression test for the semantic changes in
// <https://github.com/rust-lang/rust/pull/139042>.
fn main() {
let x;
let (0 | _) = x;
//~^ ERROR used binding `x` isn't initialized
}

View file

@ -0,0 +1,16 @@
error[E0381]: used binding `x` isn't initialized
--> $DIR/uninit-trivial.rs:6:10
|
LL | let x;
| - binding declared here but left uninitialized
LL | let (0 | _) = x;
| ^^^^^ `x` used here but it isn't initialized
|
help: consider assigning a value
|
LL | let x = 42;
| ++++
error: aborting due to 1 previous error
For more information about this error, try `rustc --explain E0381`.