Auto merge of #90491 - Mark-Simulacrum:push-pred-faster, r=matthewjasper
Optimize live point computation This refactors the live-point computation to lower per-MIR-instruction costs by operating on a largely per-block level. This doesn't fundamentally change the number of operations necessary, but it greatly improves the practical performance by aggregating bit manipulation into ranges rather than single-bit; this scales much better with larger blocks. On the benchmark provided in #90445, with 100,000 array elements, walltime for a check build is improved from 143 seconds to 15. I consider the tiny losses here acceptable given the many small wins on real world benchmarks and large wins on stress tests. The new code scales much better, but on some subset of inputs the slightly higher constant overheads decrease performance somewhat. Overall though, this is expected to be a big win for pathological cases (as illustrated by the test case motivating this work) and largely not material for non-pathological cases. I consider the new code somewhat easier to follow, too.
This commit is contained in:
commit
8a48b376d5
5 changed files with 278 additions and 29 deletions
|
@ -60,6 +60,11 @@ impl RegionValueElements {
|
|||
PointIndex::new(start_index)
|
||||
}
|
||||
|
||||
/// Return the PointIndex for the block start of this index.
|
||||
crate fn to_block_start(&self, index: PointIndex) -> PointIndex {
|
||||
PointIndex::new(self.statements_before_block[self.basic_blocks[index]])
|
||||
}
|
||||
|
||||
/// Converts a `PointIndex` back to a location. O(1).
|
||||
crate fn to_location(&self, index: PointIndex) -> Location {
|
||||
assert!(index.index() < self.num_points);
|
||||
|
@ -76,29 +81,6 @@ impl RegionValueElements {
|
|||
crate fn point_in_range(&self, index: PointIndex) -> bool {
|
||||
index.index() < self.num_points
|
||||
}
|
||||
|
||||
/// Pushes all predecessors of `index` onto `stack`.
|
||||
crate fn push_predecessors(
|
||||
&self,
|
||||
body: &Body<'_>,
|
||||
index: PointIndex,
|
||||
stack: &mut Vec<PointIndex>,
|
||||
) {
|
||||
let Location { block, statement_index } = self.to_location(index);
|
||||
if statement_index == 0 {
|
||||
// If this is a basic block head, then the predecessors are
|
||||
// the terminators of other basic blocks
|
||||
stack.extend(
|
||||
body.predecessors()[block]
|
||||
.iter()
|
||||
.map(|&pred_bb| body.terminator_loc(pred_bb))
|
||||
.map(|pred_loc| self.point_from_location(pred_loc)),
|
||||
);
|
||||
} else {
|
||||
// Otherwise, the pred is just the previous statement
|
||||
stack.push(PointIndex::new(index.index() - 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rustc_index::newtype_index! {
|
||||
|
|
|
@ -205,12 +205,42 @@ impl LivenessResults<'me, 'typeck, 'flow, 'tcx> {
|
|||
|
||||
self.stack.extend(self.cx.local_use_map.uses(local));
|
||||
while let Some(p) = self.stack.pop() {
|
||||
if self.defs.contains(p) {
|
||||
continue;
|
||||
}
|
||||
// We are live in this block from the closest to us of:
|
||||
//
|
||||
// * Inclusively, the block start
|
||||
// * Exclusively, the previous definition (if it's in this block)
|
||||
// * Exclusively, the previous live_at setting (an optimization)
|
||||
let block_start = self.cx.elements.to_block_start(p);
|
||||
let previous_defs = self.defs.last_set_in(block_start..=p);
|
||||
let previous_live_at = self.use_live_at.last_set_in(block_start..=p);
|
||||
|
||||
if self.use_live_at.insert(p) {
|
||||
self.cx.elements.push_predecessors(self.cx.body, p, &mut self.stack)
|
||||
let exclusive_start = match (previous_defs, previous_live_at) {
|
||||
(Some(a), Some(b)) => Some(std::cmp::max(a, b)),
|
||||
(Some(a), None) | (None, Some(a)) => Some(a),
|
||||
(None, None) => None,
|
||||
};
|
||||
|
||||
if let Some(exclusive) = exclusive_start {
|
||||
self.use_live_at.insert_range(exclusive + 1..=p);
|
||||
|
||||
// If we have a bound after the start of the block, we should
|
||||
// not add the predecessors for this block.
|
||||
continue;
|
||||
} else {
|
||||
// Add all the elements of this block.
|
||||
self.use_live_at.insert_range(block_start..=p);
|
||||
|
||||
// Then add the predecessors for this block, which are the
|
||||
// terminators of predecessor basic blocks. Push those onto the
|
||||
// stack so that the next iteration(s) will process them.
|
||||
|
||||
let block = self.cx.elements.to_location(block_start).block;
|
||||
self.stack.extend(
|
||||
self.cx.body.predecessors()[block]
|
||||
.iter()
|
||||
.map(|&pred_bb| self.cx.body.terminator_loc(pred_bb))
|
||||
.map(|pred_loc| self.cx.elements.point_from_location(pred_loc)),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue