Rollup merge of #58122 - matthieu-m:range_incl_perf, r=dtolnay
RangeInclusive internal iteration performance improvement. Specialize `Iterator::try_fold` and `DoubleEndedIterator::try_rfold` to improve code generation in all internal iteration scenarios. This changes brings the performance of internal iteration with `RangeInclusive` on par with the performance of iteration with `Range`: - Single conditional jump in hot loop, - Unrolling and vectorization, - And even Closed Form substitution. Unfortunately, it only applies to internal iteration. Despite various attempts at stream-lining the implementation of `next` and `next_back`, LLVM has stubbornly refused to optimize external iteration appropriately, leaving me with a choice between: - The current implementation, for which Closed Form substitution is performed, but which uses 2 conditional jumps in the hot loop when optimization fail. - An implementation using a `is_done` boolean, which uses 1 conditional jump in the hot loop when optimization fail, allowing unrolling and vectorization, but for which Closed Form substitution fails. In the absence of any conclusive evidence as to which usecase matters most, and with no assurance that the lack of Closed Form substitution is not indicative of other optimizations being foiled, there is no way to pick one implementation over the other, and thus I defer to the statu quo as far as `next` and `next_back` are concerned.
This commit is contained in:
commit
f19bec89d7
3 changed files with 81 additions and 6 deletions
|
@ -1,6 +1,6 @@
|
||||||
use convert::TryFrom;
|
use convert::TryFrom;
|
||||||
use mem;
|
use mem;
|
||||||
use ops::{self, Add, Sub};
|
use ops::{self, Add, Sub, Try};
|
||||||
use usize;
|
use usize;
|
||||||
|
|
||||||
use super::{FusedIterator, TrustedLen};
|
use super::{FusedIterator, TrustedLen};
|
||||||
|
@ -368,11 +368,11 @@ impl<A: Step> Iterator for ops::RangeInclusive<A> {
|
||||||
Some(Less) => {
|
Some(Less) => {
|
||||||
self.is_empty = Some(false);
|
self.is_empty = Some(false);
|
||||||
self.start = plus_n.add_one();
|
self.start = plus_n.add_one();
|
||||||
return Some(plus_n)
|
return Some(plus_n);
|
||||||
}
|
}
|
||||||
Some(Equal) => {
|
Some(Equal) => {
|
||||||
self.is_empty = Some(true);
|
self.is_empty = Some(true);
|
||||||
return Some(plus_n)
|
return Some(plus_n);
|
||||||
}
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
|
@ -382,6 +382,34 @@ impl<A: Step> Iterator for ops::RangeInclusive<A> {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn try_fold<B, F, R>(&mut self, init: B, mut f: F) -> R
|
||||||
|
where
|
||||||
|
Self: Sized, F: FnMut(B, Self::Item) -> R, R: Try<Ok=B>
|
||||||
|
{
|
||||||
|
self.compute_is_empty();
|
||||||
|
|
||||||
|
if self.is_empty() {
|
||||||
|
return Try::from_ok(init);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut accum = init;
|
||||||
|
|
||||||
|
while self.start < self.end {
|
||||||
|
let n = self.start.add_one();
|
||||||
|
let n = mem::replace(&mut self.start, n);
|
||||||
|
accum = f(accum, n)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.is_empty = Some(true);
|
||||||
|
|
||||||
|
if self.start == self.end {
|
||||||
|
accum = f(accum, self.start.clone())?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Try::from_ok(accum)
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn last(mut self) -> Option<A> {
|
fn last(mut self) -> Option<A> {
|
||||||
self.next_back()
|
self.next_back()
|
||||||
|
@ -415,6 +443,33 @@ impl<A: Step> DoubleEndedIterator for ops::RangeInclusive<A> {
|
||||||
self.end.clone()
|
self.end.clone()
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn try_rfold<B, F, R>(&mut self, init: B, mut f: F) -> R where
|
||||||
|
Self: Sized, F: FnMut(B, Self::Item) -> R, R: Try<Ok=B>
|
||||||
|
{
|
||||||
|
self.compute_is_empty();
|
||||||
|
|
||||||
|
if self.is_empty() {
|
||||||
|
return Try::from_ok(init);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut accum = init;
|
||||||
|
|
||||||
|
while self.start < self.end {
|
||||||
|
let n = self.end.sub_one();
|
||||||
|
let n = mem::replace(&mut self.end, n);
|
||||||
|
accum = f(accum, n)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.is_empty = Some(true);
|
||||||
|
|
||||||
|
if self.start == self.end {
|
||||||
|
accum = f(accum, self.start.clone())?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Try::from_ok(accum)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[stable(feature = "fused", since = "1.26.0")]
|
#[stable(feature = "fused", since = "1.26.0")]
|
||||||
|
|
|
@ -334,12 +334,14 @@ pub struct RangeInclusive<Idx> {
|
||||||
trait RangeInclusiveEquality: Sized {
|
trait RangeInclusiveEquality: Sized {
|
||||||
fn canonicalized_is_empty(range: &RangeInclusive<Self>) -> bool;
|
fn canonicalized_is_empty(range: &RangeInclusive<Self>) -> bool;
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> RangeInclusiveEquality for T {
|
impl<T> RangeInclusiveEquality for T {
|
||||||
#[inline]
|
#[inline]
|
||||||
default fn canonicalized_is_empty(range: &RangeInclusive<Self>) -> bool {
|
default fn canonicalized_is_empty(range: &RangeInclusive<Self>) -> bool {
|
||||||
range.is_empty.unwrap_or_default()
|
range.is_empty.unwrap_or_default()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: PartialOrd> RangeInclusiveEquality for T {
|
impl<T: PartialOrd> RangeInclusiveEquality for T {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn canonicalized_is_empty(range: &RangeInclusive<Self>) -> bool {
|
fn canonicalized_is_empty(range: &RangeInclusive<Self>) -> bool {
|
||||||
|
|
|
@ -1741,19 +1741,37 @@ fn test_range_inclusive_folds() {
|
||||||
assert_eq!((1..=10).sum::<i32>(), 55);
|
assert_eq!((1..=10).sum::<i32>(), 55);
|
||||||
assert_eq!((1..=10).rev().sum::<i32>(), 55);
|
assert_eq!((1..=10).rev().sum::<i32>(), 55);
|
||||||
|
|
||||||
let mut it = 40..=50;
|
let mut it = 44..=50;
|
||||||
assert_eq!(it.try_fold(0, i8::checked_add), None);
|
assert_eq!(it.try_fold(0, i8::checked_add), None);
|
||||||
assert_eq!(it, 44..=50);
|
assert_eq!(it, 47..=50);
|
||||||
|
assert_eq!(it.try_fold(0, i8::checked_add), None);
|
||||||
|
assert_eq!(it, 50..=50);
|
||||||
|
assert_eq!(it.try_fold(0, i8::checked_add), Some(50));
|
||||||
|
assert!(it.is_empty());
|
||||||
|
assert_eq!(it.try_fold(0, i8::checked_add), Some(0));
|
||||||
|
assert!(it.is_empty());
|
||||||
|
|
||||||
|
let mut it = 40..=47;
|
||||||
assert_eq!(it.try_rfold(0, i8::checked_add), None);
|
assert_eq!(it.try_rfold(0, i8::checked_add), None);
|
||||||
assert_eq!(it, 44..=47);
|
assert_eq!(it, 40..=44);
|
||||||
|
assert_eq!(it.try_rfold(0, i8::checked_add), None);
|
||||||
|
assert_eq!(it, 40..=41);
|
||||||
|
assert_eq!(it.try_rfold(0, i8::checked_add), Some(81));
|
||||||
|
assert!(it.is_empty());
|
||||||
|
assert_eq!(it.try_rfold(0, i8::checked_add), Some(0));
|
||||||
|
assert!(it.is_empty());
|
||||||
|
|
||||||
let mut it = 10..=20;
|
let mut it = 10..=20;
|
||||||
assert_eq!(it.try_fold(0, |a,b| Some(a+b)), Some(165));
|
assert_eq!(it.try_fold(0, |a,b| Some(a+b)), Some(165));
|
||||||
assert!(it.is_empty());
|
assert!(it.is_empty());
|
||||||
|
assert_eq!(it.try_fold(0, |a,b| Some(a+b)), Some(0));
|
||||||
|
assert!(it.is_empty());
|
||||||
|
|
||||||
let mut it = 10..=20;
|
let mut it = 10..=20;
|
||||||
assert_eq!(it.try_rfold(0, |a,b| Some(a+b)), Some(165));
|
assert_eq!(it.try_rfold(0, |a,b| Some(a+b)), Some(165));
|
||||||
assert!(it.is_empty());
|
assert!(it.is_empty());
|
||||||
|
assert_eq!(it.try_rfold(0, |a,b| Some(a+b)), Some(0));
|
||||||
|
assert!(it.is_empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue