Auto merge of #133328 - nnethercote:simplify-SwitchInt-handling, r=tmiasko

Simplify `SwitchInt` handling Dataflow handling of `SwitchInt` is currently complicated. This PR simplifies it. r? `@cjgillot`
2024-12-18 22:57:23 +00:00 · 2024-12-18 22:57:23 +00:00 · c434b4b4b6
commit c434b4b4b6
parent 4ba4ac612d 5f40942f9c
5 changed files with 176 additions and 244 deletions
--- a/compiler/rustc_mir_dataflow/src/framework/direction.rs
+++ b/compiler/rustc_mir_dataflow/src/framework/direction.rs
@ -1,8 +1,6 @@
 use std::ops::RangeInclusive;

-use rustc_middle::mir::{
-    self, BasicBlock, CallReturnPlaces, Location, SwitchTargets, TerminatorEdges,
-};
+use rustc_middle::mir::{self, BasicBlock, CallReturnPlaces, Location, TerminatorEdges};

 use super::visitor::ResultsVisitor;
 use super::{Analysis, Effect, EffectIndex, Results, SwitchIntTarget};
@ -78,8 +76,6 @@ impl Direction for Backward {
        for pred in body.basic_blocks.predecessors()[block].iter().copied() {
            match body[pred].terminator().kind {
                // Apply terminator-specific edge effects.
-                //
-                // FIXME(ecstaticmorse): Avoid cloning the exit state unconditionally.
                mir::TerminatorKind::Call { destination, target: Some(dest), .. }
                    if dest == block =>
                {
@ -115,18 +111,18 @@ impl Direction for Backward {
                }

                mir::TerminatorKind::SwitchInt { targets: _, ref discr } => {
-                    let mut applier = BackwardSwitchIntEdgeEffectsApplier {
-                        body,
-                        pred,
-                        exit_state,
-                        block,
-                        propagate: &mut propagate,
-                        effects_applied: false,
-                    };
+                    if let Some(mut data) = analysis.get_switch_int_data(block, discr) {
+                        let values = &body.basic_blocks.switch_sources()[&(block, pred)];
+                        let targets =
+                            values.iter().map(|&value| SwitchIntTarget { value, target: block });

-                    analysis.apply_switch_int_edge_effects(pred, discr, &mut applier);
-
-                    if !applier.effects_applied {
+                        let mut tmp = analysis.bottom_value(body);
+                        for target in targets {
+                            tmp.clone_from(&exit_state);
+                            analysis.apply_switch_int_edge_effect(&mut data, &mut tmp, target);
+                            propagate(pred, &tmp);
+                        }
+                    } else {
                        propagate(pred, exit_state)
                    }
                }
@ -245,37 +241,6 @@ impl Direction for Backward {
    }
 }

-struct BackwardSwitchIntEdgeEffectsApplier<'mir, 'tcx, D, F> {
-    body: &'mir mir::Body<'tcx>,
-    pred: BasicBlock,
-    exit_state: &'mir mut D,
-    block: BasicBlock,
-    propagate: &'mir mut F,
-    effects_applied: bool,
-}
-
-impl<D, F> super::SwitchIntEdgeEffects<D> for BackwardSwitchIntEdgeEffectsApplier<'_, '_, D, F>
-where
-    D: Clone,
-    F: FnMut(BasicBlock, &D),
-{
-    fn apply(&mut self, mut apply_edge_effect: impl FnMut(&mut D, SwitchIntTarget)) {
-        assert!(!self.effects_applied);
-
-        let values = &self.body.basic_blocks.switch_sources()[&(self.block, self.pred)];
-        let targets = values.iter().map(|&value| SwitchIntTarget { value, target: self.block });
-
-        let mut tmp = None;
-        for target in targets {
-            let tmp = opt_clone_from_or_clone(&mut tmp, self.exit_state);
-            apply_edge_effect(tmp, target);
-            (self.propagate)(self.pred, tmp);
-        }
-
-        self.effects_applied = true;
-    }
-}
-
 /// Dataflow that runs from the entry of a block (the first statement), to its exit (terminator).
 pub struct Forward;

@ -284,7 +249,7 @@ impl Direction for Forward {

    fn apply_effects_in_block<'mir, 'tcx, A>(
        analysis: &mut A,
-        _body: &mir::Body<'tcx>,
+        body: &mir::Body<'tcx>,
        state: &mut A::Domain,
        block: BasicBlock,
        block_data: &'mir mir::BasicBlockData<'tcx>,
@ -324,23 +289,28 @@ impl Direction for Forward {
                }
            }
            TerminatorEdges::SwitchInt { targets, discr } => {
-                let mut applier = ForwardSwitchIntEdgeEffectsApplier {
-                    exit_state,
-                    targets,
-                    propagate,
-                    effects_applied: false,
-                };
+                if let Some(mut data) = analysis.get_switch_int_data(block, discr) {
+                    let mut tmp = analysis.bottom_value(body);
+                    for (value, target) in targets.iter() {
+                        tmp.clone_from(&exit_state);
+                        analysis.apply_switch_int_edge_effect(
+                            &mut data,
+                            &mut tmp,
+                            SwitchIntTarget { value: Some(value), target },
+                        );
+                        propagate(target, &tmp);
+                    }

-                analysis.apply_switch_int_edge_effects(block, discr, &mut applier);
-
-                let ForwardSwitchIntEdgeEffectsApplier {
-                    exit_state,
-                    mut propagate,
-                    effects_applied,
-                    ..
-                } = applier;
-
-                if !effects_applied {
+                    // Once we get to the final, "otherwise" branch, there is no need to preserve
+                    // `exit_state`, so pass it directly to `apply_switch_int_edge_effect` to save
+                    // a clone of the dataflow state.
+                    let otherwise = targets.otherwise();
+                    analysis.apply_switch_int_edge_effect(&mut data, exit_state, SwitchIntTarget {
+                        value: None,
+                        target: otherwise,
+                    });
+                    propagate(otherwise, exit_state);
+                } else {
                    for target in targets.all_targets() {
                        propagate(*target, exit_state);
                    }
@ -454,54 +424,3 @@ impl Direction for Forward {
        vis.visit_block_end(state);
    }
 }
-
-struct ForwardSwitchIntEdgeEffectsApplier<'mir, D, F> {
-    exit_state: &'mir mut D,
-    targets: &'mir SwitchTargets,
-    propagate: F,
-
-    effects_applied: bool,
-}
-
-impl<D, F> super::SwitchIntEdgeEffects<D> for ForwardSwitchIntEdgeEffectsApplier<'_, D, F>
-where
-    D: Clone,
-    F: FnMut(BasicBlock, &D),
-{
-    fn apply(&mut self, mut apply_edge_effect: impl FnMut(&mut D, SwitchIntTarget)) {
-        assert!(!self.effects_applied);
-
-        let mut tmp = None;
-        for (value, target) in self.targets.iter() {
-            let tmp = opt_clone_from_or_clone(&mut tmp, self.exit_state);
-            apply_edge_effect(tmp, SwitchIntTarget { value: Some(value), target });
-            (self.propagate)(target, tmp);
-        }
-
-        // Once we get to the final, "otherwise" branch, there is no need to preserve `exit_state`,
-        // so pass it directly to `apply_edge_effect` to save a clone of the dataflow state.
-        let otherwise = self.targets.otherwise();
-        apply_edge_effect(self.exit_state, SwitchIntTarget { value: None, target: otherwise });
-        (self.propagate)(otherwise, self.exit_state);
-
-        self.effects_applied = true;
-    }
-}
-
-/// An analogue of `Option::get_or_insert_with` that stores a clone of `val` into `opt`, but uses
-/// the more efficient `clone_from` if `opt` was `Some`.
-///
-/// Returns a mutable reference to the new clone that resides in `opt`.
-//
-// FIXME: Figure out how to express this using `Option::clone_from`, or maybe lift it into the
-// standard library?
-fn opt_clone_from_or_clone<'a, T: Clone>(opt: &'a mut Option<T>, val: &T) -> &'a mut T {
-    if opt.is_some() {
-        let ret = opt.as_mut().unwrap();
-        ret.clone_from(val);
-        ret
-    } else {
-        *opt = Some(val.clone());
-        opt.as_mut().unwrap()
-    }
-}
--- a/compiler/rustc_mir_dataflow/src/framework/mod.rs
+++ b/compiler/rustc_mir_dataflow/src/framework/mod.rs
@ -103,6 +103,9 @@ pub trait Analysis<'tcx> {
    /// The direction of this analysis. Either `Forward` or `Backward`.
    type Direction: Direction = Forward;

+    /// Auxiliary data used for analyzing `SwitchInt` terminators, if necessary.
+    type SwitchIntData = !;
+
    /// A descriptive name for this analysis. Used only for debugging.
    ///
    /// This name should be brief and contain no spaces, periods or other characters that are not
@ -190,25 +193,36 @@ pub trait Analysis<'tcx> {
    ) {
    }

-    /// Updates the current dataflow state with the effect of taking a particular branch in a
-    /// `SwitchInt` terminator.
+    /// Used to update the current dataflow state with the effect of taking a particular branch in
+    /// a `SwitchInt` terminator.
    ///
    /// Unlike the other edge-specific effects, which are allowed to mutate `Self::Domain`
-    /// directly, overriders of this method must pass a callback to
-    /// `SwitchIntEdgeEffects::apply`. The callback will be run once for each outgoing edge and
-    /// will have access to the dataflow state that will be propagated along that edge.
+    /// directly, overriders of this method must return a `Self::SwitchIntData` value (wrapped in
+    /// `Some`). The `apply_switch_int_edge_effect` method will then be called once for each
+    /// outgoing edge and will have access to the dataflow state that will be propagated along that
+    /// edge, and also the `Self::SwitchIntData` value.
    ///
    /// This interface is somewhat more complex than the other visitor-like "effect" methods.
    /// However, it is both more ergonomic—callers don't need to recompute or cache information
    /// about a given `SwitchInt` terminator for each one of its edges—and more efficient—the
    /// engine doesn't need to clone the exit state for a block unless
-    /// `SwitchIntEdgeEffects::apply` is actually called.
-    fn apply_switch_int_edge_effects(
+    /// `get_switch_int_data` is actually called.
+    fn get_switch_int_data(
        &mut self,
-        _block: BasicBlock,
+        _block: mir::BasicBlock,
        _discr: &mir::Operand<'tcx>,
-        _apply_edge_effects: &mut impl SwitchIntEdgeEffects<Self::Domain>,
+    ) -> Option<Self::SwitchIntData> {
+        None
+    }
+
+    /// See comments on `get_switch_int_data`.
+    fn apply_switch_int_edge_effect(
+        &mut self,
+        _data: &mut Self::SwitchIntData,
+        _state: &mut Self::Domain,
+        _edge: SwitchIntTarget,
    ) {
+        unreachable!();
    }

    /* Extension methods */
@ -421,12 +435,5 @@ pub struct SwitchIntTarget {
    pub target: BasicBlock,
 }

-/// A type that records the edge-specific effects for a `SwitchInt` terminator.
-pub trait SwitchIntEdgeEffects<D> {
-    /// Calls `apply_edge_effect` for each outgoing edge from a `SwitchInt` terminator and
-    /// records the results.
-    fn apply(&mut self, apply_edge_effect: impl FnMut(&mut D, SwitchIntTarget));
-}
-
 #[cfg(test)]
 mod tests;
--- a/compiler/rustc_mir_dataflow/src/impls/initialized.rs
+++ b/compiler/rustc_mir_dataflow/src/impls/initialized.rs
@ -1,20 +1,96 @@
 use std::assert_matches::assert_matches;

+use rustc_abi::VariantIdx;
 use rustc_index::Idx;
 use rustc_index::bit_set::{BitSet, MixedBitSet};
 use rustc_middle::bug;
 use rustc_middle::mir::{self, Body, CallReturnPlaces, Location, TerminatorEdges};
+use rustc_middle::ty::util::Discr;
 use rustc_middle::ty::{self, TyCtxt};
 use tracing::{debug, instrument};

 use crate::elaborate_drops::DropFlagState;
-use crate::framework::SwitchIntEdgeEffects;
+use crate::framework::SwitchIntTarget;
 use crate::move_paths::{HasMoveData, InitIndex, InitKind, LookupResult, MoveData, MovePathIndex};
 use crate::{
    Analysis, GenKill, MaybeReachable, drop_flag_effects, drop_flag_effects_for_function_entry,
    drop_flag_effects_for_location, on_all_children_bits, on_lookup_result_bits,
 };

+// Used by both `MaybeInitializedPlaces` and `MaybeUninitializedPlaces`.
+pub struct MaybePlacesSwitchIntData<'tcx> {
+    enum_place: mir::Place<'tcx>,
+    discriminants: Vec<(VariantIdx, Discr<'tcx>)>,
+    index: usize,
+}
+
+impl<'tcx> MaybePlacesSwitchIntData<'tcx> {
+    // The discriminant order in the `SwitchInt` targets should match the order yielded by
+    // `AdtDef::discriminants`. We rely on this to match each discriminant in the targets to its
+    // corresponding variant in linear time.
+    fn next_discr(&mut self, value: u128) -> VariantIdx {
+        // An out-of-bounds abort will occur if the discriminant ordering isn't as described above.
+        loop {
+            let (variant, discr) = self.discriminants[self.index];
+            self.index += 1;
+            if discr.val == value {
+                return variant;
+            }
+        }
+    }
+}
+
+impl<'tcx> MaybePlacesSwitchIntData<'tcx> {
+    fn new(
+        tcx: TyCtxt<'tcx>,
+        body: &Body<'tcx>,
+        block: mir::BasicBlock,
+        discr: &mir::Operand<'tcx>,
+    ) -> Option<Self> {
+        let Some(discr) = discr.place() else { return None };
+
+        // Inspect a `SwitchInt`-terminated basic block to see if the condition of that `SwitchInt`
+        // is an enum discriminant.
+        //
+        // We expect such blocks to have a call to `discriminant` as their last statement like so:
+        // ```text
+        // ...
+        // _42 = discriminant(_1)
+        // SwitchInt(_42, ..)
+        // ```
+        // If the basic block matches this pattern, this function gathers the place corresponding
+        // to the enum (`_1` in the example above) as well as the discriminants.
+        let block_data = &body[block];
+        for statement in block_data.statements.iter().rev() {
+            match statement.kind {
+                mir::StatementKind::Assign(box (lhs, mir::Rvalue::Discriminant(enum_place)))
+                    if lhs == discr =>
+                {
+                    match enum_place.ty(body, tcx).ty.kind() {
+                        ty::Adt(enum_def, _) => {
+                            return Some(MaybePlacesSwitchIntData {
+                                enum_place,
+                                discriminants: enum_def.discriminants(tcx).collect(),
+                                index: 0,
+                            });
+                        }
+
+                        // `Rvalue::Discriminant` is also used to get the active yield point for a
+                        // coroutine, but we do not need edge-specific effects in that case. This
+                        // may change in the future.
+                        ty::Coroutine(..) => break,
+
+                        t => bug!("`discriminant` called on unexpected type {:?}", t),
+                    }
+                }
+                mir::StatementKind::Coverage(_) => continue,
+                _ => break,
+            }
+        }
+        None
+    }
+}
+
 /// `MaybeInitializedPlaces` tracks all places that might be
 /// initialized upon reaching a particular point in the control flow
 /// for a function.
@ -247,6 +323,8 @@ impl<'tcx> Analysis<'tcx> for MaybeInitializedPlaces<'_, 'tcx> {
    /// We use a mixed bitset to avoid paying too high a memory footprint.
    type Domain = MaybeReachable<MixedBitSet<MovePathIndex>>;

+    type SwitchIntData = MaybePlacesSwitchIntData<'tcx>;
+
    const NAME: &'static str = "maybe_init";

    fn bottom_value(&self, _: &mir::Body<'tcx>) -> Self::Domain {
@ -293,6 +371,8 @@ impl<'tcx> Analysis<'tcx> for MaybeInitializedPlaces<'_, 'tcx> {
        terminator: &'mir mir::Terminator<'tcx>,
        location: Location,
    ) -> TerminatorEdges<'mir, 'tcx> {
+        // Note: `edges` must be computed first because `drop_flag_effects_for_location` can change
+        // the result of `is_unwind_dead`.
        let mut edges = terminator.edges();
        if self.skip_unreachable_unwind
            && let mir::TerminatorKind::Drop { target, unwind, place, replace: _ } = terminator.kind
@ -326,46 +406,34 @@ impl<'tcx> Analysis<'tcx> for MaybeInitializedPlaces<'_, 'tcx> {
        });
    }

-    fn apply_switch_int_edge_effects(
+    fn get_switch_int_data(
        &mut self,
        block: mir::BasicBlock,
        discr: &mir::Operand<'tcx>,
-        edge_effects: &mut impl SwitchIntEdgeEffects<Self::Domain>,
-    ) {
+    ) -> Option<Self::SwitchIntData> {
        if !self.tcx.sess.opts.unstable_opts.precise_enum_drop_elaboration {
-            return;
+            return None;
        }

-        let enum_ = discr.place().and_then(|discr| {
-            switch_on_enum_discriminant(self.tcx, self.body, &self.body[block], discr)
-        });
-
-        let Some((enum_place, enum_def)) = enum_ else {
-            return;
-        };
-
-        let mut discriminants = enum_def.discriminants(self.tcx);
-        edge_effects.apply(|state, edge| {
-            let Some(value) = edge.value else {
-                return;
-            };
-
-            // MIR building adds discriminants to the `values` array in the same order as they
-            // are yielded by `AdtDef::discriminants`. We rely on this to match each
-            // discriminant in `values` to its corresponding variant in linear time.
-            let (variant, _) = discriminants
-                .find(|&(_, discr)| discr.val == value)
-                .expect("Order of `AdtDef::discriminants` differed from `SwitchInt::values`");
+        MaybePlacesSwitchIntData::new(self.tcx, self.body, block, discr)
+    }

+    fn apply_switch_int_edge_effect(
+        &mut self,
+        data: &mut Self::SwitchIntData,
+        state: &mut Self::Domain,
+        edge: SwitchIntTarget,
+    ) {
+        if let Some(value) = edge.value {
            // Kill all move paths that correspond to variants we know to be inactive along this
            // particular outgoing edge of a `SwitchInt`.
            drop_flag_effects::on_all_inactive_variants(
-                self.move_data(),
-                enum_place,
-                variant,
+                self.move_data,
+                data.enum_place,
+                data.next_discr(value),
                |mpi| state.kill(mpi),
            );
-        });
+        }
    }
 }

@ -376,6 +444,8 @@ pub type MaybeUninitializedPlacesDomain = MixedBitSet<MovePathIndex>;
 impl<'tcx> Analysis<'tcx> for MaybeUninitializedPlaces<'_, 'tcx> {
    type Domain = MaybeUninitializedPlacesDomain;

+    type SwitchIntData = MaybePlacesSwitchIntData<'tcx>;
+
    const NAME: &'static str = "maybe_uninit";

    fn bottom_value(&self, _: &mir::Body<'tcx>) -> Self::Domain {
@ -445,50 +515,38 @@ impl<'tcx> Analysis<'tcx> for MaybeUninitializedPlaces<'_, 'tcx> {
        });
    }

-    fn apply_switch_int_edge_effects(
+    fn get_switch_int_data(
        &mut self,
        block: mir::BasicBlock,
        discr: &mir::Operand<'tcx>,
-        edge_effects: &mut impl SwitchIntEdgeEffects<Self::Domain>,
-    ) {
+    ) -> Option<Self::SwitchIntData> {
        if !self.tcx.sess.opts.unstable_opts.precise_enum_drop_elaboration {
-            return;
+            return None;
        }

        if !self.mark_inactive_variants_as_uninit {
-            return;
+            return None;
        }

-        let enum_ = discr.place().and_then(|discr| {
-            switch_on_enum_discriminant(self.tcx, self.body, &self.body[block], discr)
-        });
-
-        let Some((enum_place, enum_def)) = enum_ else {
-            return;
-        };
-
-        let mut discriminants = enum_def.discriminants(self.tcx);
-        edge_effects.apply(|state, edge| {
-            let Some(value) = edge.value else {
-                return;
-            };
-
-            // MIR building adds discriminants to the `values` array in the same order as they
-            // are yielded by `AdtDef::discriminants`. We rely on this to match each
-            // discriminant in `values` to its corresponding variant in linear time.
-            let (variant, _) = discriminants
-                .find(|&(_, discr)| discr.val == value)
-                .expect("Order of `AdtDef::discriminants` differed from `SwitchInt::values`");
+        MaybePlacesSwitchIntData::new(self.tcx, self.body, block, discr)
+    }

+    fn apply_switch_int_edge_effect(
+        &mut self,
+        data: &mut Self::SwitchIntData,
+        state: &mut Self::Domain,
+        edge: SwitchIntTarget,
+    ) {
+        if let Some(value) = edge.value {
            // Mark all move paths that correspond to variants other than this one as maybe
            // uninitialized (in reality, they are *definitely* uninitialized).
            drop_flag_effects::on_all_inactive_variants(
-                self.move_data(),
-                enum_place,
-                variant,
+                self.move_data,
+                data.enum_place,
+                data.next_discr(value),
                |mpi| state.gen_(mpi),
            );
-        });
+        }
    }
 }

@ -578,45 +636,3 @@ impl<'tcx> Analysis<'tcx> for EverInitializedPlaces<'_, 'tcx> {
        }
    }
 }
-
-/// Inspect a `SwitchInt`-terminated basic block to see if the condition of that `SwitchInt` is
-/// an enum discriminant.
-///
-/// We expect such blocks to have a call to `discriminant` as their last statement like so:
-///
-/// ```text
-/// ...
-/// _42 = discriminant(_1)
-/// SwitchInt(_42, ..)
-/// ```
-///
-/// If the basic block matches this pattern, this function returns the place corresponding to the
-/// enum (`_1` in the example above) as well as the `AdtDef` of that enum.
-fn switch_on_enum_discriminant<'mir, 'tcx>(
-    tcx: TyCtxt<'tcx>,
-    body: &'mir mir::Body<'tcx>,
-    block: &'mir mir::BasicBlockData<'tcx>,
-    switch_on: mir::Place<'tcx>,
-) -> Option<(mir::Place<'tcx>, ty::AdtDef<'tcx>)> {
-    for statement in block.statements.iter().rev() {
-        match &statement.kind {
-            mir::StatementKind::Assign(box (lhs, mir::Rvalue::Discriminant(discriminated)))
-                if *lhs == switch_on =>
-            {
-                match discriminated.ty(body, tcx).ty.kind() {
-                    ty::Adt(def, _) => return Some((*discriminated, *def)),
-
-                    // `Rvalue::Discriminant` is also used to get the active yield point for a
-                    // coroutine, but we do not need edge-specific effects in that case. This may
-                    // change in the future.
-                    ty::Coroutine(..) => return None,
-
-                    t => bug!("`discriminant` called on unexpected type {:?}", t),
-                }
-            }
-            mir::StatementKind::Coverage(_) => continue,
-            _ => return None,
-        }
-    }
-    None
-}
--- a/compiler/rustc_mir_dataflow/src/lib.rs
+++ b/compiler/rustc_mir_dataflow/src/lib.rs
@ -5,6 +5,7 @@
 #![feature(exact_size_is_empty)]
 #![feature(file_buffered)]
 #![feature(let_chains)]
+#![feature(never_type)]
 #![feature(try_blocks)]
 #![warn(unreachable_pub)]
 // tidy-alphabetical-end
@ -19,8 +20,7 @@ pub use self::drop_flag_effects::{
 };
 pub use self::framework::{
    Analysis, Backward, Direction, EntryStates, Forward, GenKill, JoinSemiLattice, MaybeReachable,
-    Results, ResultsCursor, ResultsVisitor, SwitchIntEdgeEffects, fmt, graphviz, lattice,
-    visit_results,
+    Results, ResultsCursor, ResultsVisitor, fmt, graphviz, lattice, visit_results,
 };
 use self::move_paths::MoveData;