Auto merge of #129283 - saethlin:unreachable-allocas, r=scottmcm

Don't alloca for unused locals We already have a concept of mono-unreachable basic blocks; this is primarily useful for ensuring that we do not compile code under an `if false`. But since we never gave locals the same analysis, a large local only used under an `if false` will still have stack space allocated for it. There are 3 places we traverse MIR during monomorphization: Inside the collector, `non_ssa_locals`, and the walk to generate code. Unfortunately, https://github.com/rust-lang/rust/pull/129283#issuecomment-2297925578 indicates that we cannot afford the expense of tracking reachable locals during the collector's traversal, so we do need at least two mono-reachable traversals. And of course caching is of no help here because the benchmarks that regress are incr-unchanged; they don't do any codegen. This fixes the second problem in https://github.com/rust-lang/rust/issues/129282, and brings us anther step toward `const if` at home.
2024-09-21 13:48:14 +00:00 · 2024-09-21 13:48:14 +00:00 · 2836482241
commit 2836482241
parent f48c99a004 aa28ee1718
9 changed files with 151 additions and 54 deletions
--- a/compiler/rustc_codegen_ssa/src/mir/analyze.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/analyze.rs
@ -15,6 +15,7 @@ use crate::traits::*;

 pub(crate) fn non_ssa_locals<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
    fx: &FunctionCx<'a, 'tcx, Bx>,
+    traversal_order: &[mir::BasicBlock],
 ) -> BitSet<mir::Local> {
    let mir = fx.mir;
    let dominators = mir.basic_blocks.dominators();
@ -24,13 +25,7 @@ pub(crate) fn non_ssa_locals<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
        .map(|decl| {
            let ty = fx.monomorphize(decl.ty);
            let layout = fx.cx.spanned_layout_of(ty, decl.source_info.span);
-            if layout.is_zst() {
-                LocalKind::ZST
-            } else if fx.cx.is_backend_immediate(layout) || fx.cx.is_backend_scalar_pair(layout) {
-                LocalKind::Unused
-            } else {
-                LocalKind::Memory
-            }
+            if layout.is_zst() { LocalKind::ZST } else { LocalKind::Unused }
        })
        .collect();

@ -44,7 +39,8 @@ pub(crate) fn non_ssa_locals<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
    // If there exists a local definition that dominates all uses of that local,
    // the definition should be visited first. Traverse blocks in an order that
    // is a topological sort of dominance partial order.
-    for (bb, data) in traversal::reverse_postorder(mir) {
+    for bb in traversal_order.iter().copied() {
+        let data = &mir.basic_blocks[bb];
        analyzer.visit_basic_block_data(bb, data);
    }

@ -77,11 +73,22 @@ struct LocalAnalyzer<'a, 'b, 'tcx, Bx: BuilderMethods<'b, 'tcx>> {

 impl<'a, 'b, 'tcx, Bx: BuilderMethods<'b, 'tcx>> LocalAnalyzer<'a, 'b, 'tcx, Bx> {
    fn define(&mut self, local: mir::Local, location: DefLocation) {
+        let fx = self.fx;
        let kind = &mut self.locals[local];
+        let decl = &fx.mir.local_decls[local];
        match *kind {
            LocalKind::ZST => {}
            LocalKind::Memory => {}
-            LocalKind::Unused => *kind = LocalKind::SSA(location),
+            LocalKind::Unused => {
+                let ty = fx.monomorphize(decl.ty);
+                let layout = fx.cx.spanned_layout_of(ty, decl.source_info.span);
+                *kind =
+                    if fx.cx.is_backend_immediate(layout) || fx.cx.is_backend_scalar_pair(layout) {
+                        LocalKind::SSA(location)
+                    } else {
+                        LocalKind::Memory
+                    };
+            }
            LocalKind::SSA(_) => *kind = LocalKind::Memory,
        }
    }
--- a/compiler/rustc_codegen_ssa/src/mir/mod.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/mod.rs
@ -218,7 +218,8 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(

    fx.per_local_var_debug_info = fx.compute_per_local_var_debug_info(&mut start_bx);

-    let memory_locals = analyze::non_ssa_locals(&fx);
+    let traversal_order = traversal::mono_reachable_reverse_postorder(mir, cx.tcx(), instance);
+    let memory_locals = analyze::non_ssa_locals(&fx, &traversal_order);

    // Allocate variable and temp allocas
    let local_values = {
@ -277,17 +278,20 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
    // So drop the builder of `start_llbb` to avoid having two at the same time.
    drop(start_bx);

-    let reachable_blocks = traversal::mono_reachable_as_bitset(mir, cx.tcx(), instance);
+    let mut unreached_blocks = BitSet::new_filled(mir.basic_blocks.len());
+    // Codegen the body of each reachable block using our reverse postorder list.
+    for bb in traversal_order {
+        fx.codegen_block(bb);
+        unreached_blocks.remove(bb);
+    }

-    // Codegen the body of each block using reverse postorder
-    for (bb, _) in traversal::reverse_postorder(mir) {
-        if reachable_blocks.contains(bb) {
-            fx.codegen_block(bb);
-        } else {
-            // We want to skip this block, because it's not reachable. But we still create
-            // the block so terminators in other blocks can reference it.
-            fx.codegen_block_as_unreachable(bb);
-        }
+    // FIXME: These empty unreachable blocks are *mostly* a waste. They are occasionally
+    // targets for a SwitchInt terminator, but the reimplementation of the mono-reachable
+    // simplification in SwitchInt lowering sometimes misses cases that
+    // mono_reachable_reverse_postorder manages to figure out.
+    // The solution is to do something like post-mono GVN. But for now we have this hack.
+    for bb in unreached_blocks.iter() {
+        fx.codegen_block_as_unreachable(bb);
    }
 }