1
Fork 0

Auto merge of #106343 - the8472:slice-iter-fold, r=scottmcm

optimize slice::Iter::fold

Fixes 2 of 4 cases from #106288

```
OLD: test slice::fold_to_last                                           ... bench:         248 ns/iter (+/- 3)
NEW: test slice::fold_to_last                                           ... bench:           0 ns/iter (+/- 0)
```
This commit is contained in:
bors 2023-06-15 09:38:53 +00:00
commit 4996b56ba9
4 changed files with 56 additions and 8 deletions

View file

@ -191,6 +191,39 @@ macro_rules! iterator {
self.next_back()
}
#[inline]
fn fold<B, F>(self, init: B, mut f: F) -> B
where
F: FnMut(B, Self::Item) -> B,
{
// this implementation consists of the following optimizations compared to the
// default implementation:
// - do-while loop, as is llvm's preferred loop shape,
// see https://releases.llvm.org/16.0.0/docs/LoopTerminology.html#more-canonical-loops
// - bumps an index instead of a pointer since the latter case inhibits
// some optimizations, see #111603
// - avoids Option wrapping/matching
if is_empty!(self) {
return init;
}
let mut acc = init;
let mut i = 0;
let len = len!(self);
loop {
// SAFETY: the loop iterates `i in 0..len`, which always is in bounds of
// the slice allocation
acc = f(acc, unsafe { & $( $mut_ )? *self.ptr.add(i).as_ptr() });
// SAFETY: `i` can't overflow since it'll only reach usize::MAX if the
// slice had that length, in which case we'll break out of the loop
// after the increment
i = unsafe { i.unchecked_add(1) };
if i == len {
break;
}
}
acc
}
// We override the default implementation, which uses `try_fold`,
// because this simple implementation generates less LLVM IR and is
// faster to compile.