Further Implement Power of Two Optimization
This commit is contained in:
parent
971e37ff7e
commit
9dccd5dce1
3 changed files with 354 additions and 189 deletions
|
@ -901,26 +901,59 @@ macro_rules! int_impl {
|
|||
#[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
|
||||
#[must_use = "this returns the result of the operation, \
|
||||
without modifying the original"]
|
||||
#[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
|
||||
#[inline]
|
||||
pub const fn checked_pow(self, mut exp: u32) -> Option<Self> {
|
||||
if exp == 0 {
|
||||
return Some(1);
|
||||
}
|
||||
let mut base = self;
|
||||
let mut acc: Self = 1;
|
||||
|
||||
while exp > 1 {
|
||||
if (exp & 1) == 1 {
|
||||
acc = try_opt!(acc.checked_mul(base));
|
||||
// SAFETY: This path has the same behavior as the other.
|
||||
if unsafe { intrinsics::is_val_statically_known(self) }
|
||||
&& self.unsigned_abs().is_power_of_two()
|
||||
{
|
||||
if self == 1 { // Avoid divide by zero
|
||||
return Some(1);
|
||||
}
|
||||
exp /= 2;
|
||||
base = try_opt!(base.checked_mul(base));
|
||||
if self == -1 { // Avoid divide by zero
|
||||
return Some(if exp & 1 != 0 { -1 } else { 1 });
|
||||
}
|
||||
// SAFETY: We just checked this is a power of two. and above zero.
|
||||
let power_used = unsafe { intrinsics::cttz_nonzero(self.wrapping_abs()) as u32 };
|
||||
if exp > Self::BITS / power_used { return None; } // Division of constants is free
|
||||
|
||||
// SAFETY: exp <= Self::BITS / power_used
|
||||
let res = unsafe { intrinsics::unchecked_shl(
|
||||
1 as Self,
|
||||
intrinsics::unchecked_mul(power_used, exp) as Self
|
||||
)};
|
||||
// LLVM doesn't always optimize out the checks
|
||||
// at the ir level.
|
||||
|
||||
let sign = self.is_negative() && exp & 1 != 0;
|
||||
if !sign && res == Self::MIN {
|
||||
None
|
||||
} else if sign {
|
||||
Some(res.wrapping_neg())
|
||||
} else {
|
||||
Some(res)
|
||||
}
|
||||
} else {
|
||||
if exp == 0 {
|
||||
return Some(1);
|
||||
}
|
||||
let mut base = self;
|
||||
let mut acc: Self = 1;
|
||||
|
||||
while exp > 1 {
|
||||
if (exp & 1) == 1 {
|
||||
acc = try_opt!(acc.checked_mul(base));
|
||||
}
|
||||
exp /= 2;
|
||||
base = try_opt!(base.checked_mul(base));
|
||||
}
|
||||
// since exp!=0, finally the exp must be 1.
|
||||
// Deal with the final bit of the exponent separately, since
|
||||
// squaring the base afterwards is not necessary and may cause a
|
||||
// needless overflow.
|
||||
acc.checked_mul(base)
|
||||
}
|
||||
// since exp!=0, finally the exp must be 1.
|
||||
// Deal with the final bit of the exponent separately, since
|
||||
// squaring the base afterwards is not necessary and may cause a
|
||||
// needless overflow.
|
||||
acc.checked_mul(base)
|
||||
}
|
||||
|
||||
/// Returns the square root of the number, rounded down.
|
||||
|
@ -1537,27 +1570,58 @@ macro_rules! int_impl {
|
|||
#[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
|
||||
#[must_use = "this returns the result of the operation, \
|
||||
without modifying the original"]
|
||||
#[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
|
||||
#[inline]
|
||||
pub const fn wrapping_pow(self, mut exp: u32) -> Self {
|
||||
if exp == 0 {
|
||||
return 1;
|
||||
}
|
||||
let mut base = self;
|
||||
let mut acc: Self = 1;
|
||||
|
||||
while exp > 1 {
|
||||
if (exp & 1) == 1 {
|
||||
acc = acc.wrapping_mul(base);
|
||||
// SAFETY: This path has the same behavior as the other.
|
||||
if unsafe { intrinsics::is_val_statically_known(self) }
|
||||
&& self.unsigned_abs().is_power_of_two()
|
||||
{
|
||||
if self == 1 { // Avoid divide by zero
|
||||
return 1;
|
||||
}
|
||||
exp /= 2;
|
||||
base = base.wrapping_mul(base);
|
||||
}
|
||||
if self == -1 { // Avoid divide by zero
|
||||
return if exp & 1 != 0 { -1 } else { 1 };
|
||||
}
|
||||
// SAFETY: We just checked this is a power of two. and above zero.
|
||||
let power_used = unsafe { intrinsics::cttz_nonzero(self.wrapping_abs()) as u32 };
|
||||
if exp > Self::BITS / power_used { return 0; } // Division of constants is free
|
||||
|
||||
// since exp!=0, finally the exp must be 1.
|
||||
// Deal with the final bit of the exponent separately, since
|
||||
// squaring the base afterwards is not necessary and may cause a
|
||||
// needless overflow.
|
||||
acc.wrapping_mul(base)
|
||||
// SAFETY: exp <= Self::BITS / power_used
|
||||
let res = unsafe { intrinsics::unchecked_shl(
|
||||
1 as Self,
|
||||
intrinsics::unchecked_mul(power_used, exp) as Self
|
||||
)};
|
||||
// LLVM doesn't always optimize out the checks
|
||||
// at the ir level.
|
||||
|
||||
let sign = self.is_negative() && exp & 1 != 0;
|
||||
if sign {
|
||||
res.wrapping_neg()
|
||||
} else {
|
||||
res
|
||||
}
|
||||
} else {
|
||||
if exp == 0 {
|
||||
return 1;
|
||||
}
|
||||
let mut base = self;
|
||||
let mut acc: Self = 1;
|
||||
|
||||
while exp > 1 {
|
||||
if (exp & 1) == 1 {
|
||||
acc = acc.wrapping_mul(base);
|
||||
}
|
||||
exp /= 2;
|
||||
base = base.wrapping_mul(base);
|
||||
}
|
||||
|
||||
// since exp!=0, finally the exp must be 1.
|
||||
// Deal with the final bit of the exponent separately, since
|
||||
// squaring the base afterwards is not necessary and may cause a
|
||||
// needless overflow.
|
||||
acc.wrapping_mul(base)
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculates `self` + `rhs`
|
||||
|
@ -2039,36 +2103,68 @@ macro_rules! int_impl {
|
|||
#[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
|
||||
#[must_use = "this returns the result of the operation, \
|
||||
without modifying the original"]
|
||||
#[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
|
||||
#[inline]
|
||||
pub const fn overflowing_pow(self, mut exp: u32) -> (Self, bool) {
|
||||
if exp == 0 {
|
||||
return (1,false);
|
||||
}
|
||||
let mut base = self;
|
||||
let mut acc: Self = 1;
|
||||
let mut overflown = false;
|
||||
// Scratch space for storing results of overflowing_mul.
|
||||
let mut r;
|
||||
// SAFETY: This path has the same behavior as the other.
|
||||
if unsafe { intrinsics::is_val_statically_known(self) }
|
||||
&& self.unsigned_abs().is_power_of_two()
|
||||
{
|
||||
if self == 1 { // Avoid divide by zero
|
||||
return (1, false);
|
||||
}
|
||||
if self == -1 { // Avoid divide by zero
|
||||
return (if exp & 1 != 0 { -1 } else { 1 }, false);
|
||||
}
|
||||
// SAFETY: We just checked this is a power of two. and above zero.
|
||||
let power_used = unsafe { intrinsics::cttz_nonzero(self.wrapping_abs()) as u32 };
|
||||
if exp > Self::BITS / power_used { return (0, true); } // Division of constants is free
|
||||
|
||||
while exp > 1 {
|
||||
if (exp & 1) == 1 {
|
||||
r = acc.overflowing_mul(base);
|
||||
acc = r.0;
|
||||
// SAFETY: exp <= Self::BITS / power_used
|
||||
let res = unsafe { intrinsics::unchecked_shl(
|
||||
1 as Self,
|
||||
intrinsics::unchecked_mul(power_used, exp) as Self
|
||||
)};
|
||||
// LLVM doesn't always optimize out the checks
|
||||
// at the ir level.
|
||||
|
||||
let sign = self.is_negative() && exp & 1 != 0;
|
||||
let overflow = res == Self::MIN;
|
||||
if sign {
|
||||
(res.wrapping_neg(), overflow)
|
||||
} else {
|
||||
(res, overflow)
|
||||
}
|
||||
} else {
|
||||
if exp == 0 {
|
||||
return (1,false);
|
||||
}
|
||||
let mut base = self;
|
||||
let mut acc: Self = 1;
|
||||
let mut overflown = false;
|
||||
// Scratch space for storing results of overflowing_mul.
|
||||
let mut r;
|
||||
|
||||
while exp > 1 {
|
||||
if (exp & 1) == 1 {
|
||||
r = acc.overflowing_mul(base);
|
||||
acc = r.0;
|
||||
overflown |= r.1;
|
||||
}
|
||||
exp /= 2;
|
||||
r = base.overflowing_mul(base);
|
||||
base = r.0;
|
||||
overflown |= r.1;
|
||||
}
|
||||
exp /= 2;
|
||||
r = base.overflowing_mul(base);
|
||||
base = r.0;
|
||||
overflown |= r.1;
|
||||
}
|
||||
|
||||
// since exp!=0, finally the exp must be 1.
|
||||
// Deal with the final bit of the exponent separately, since
|
||||
// squaring the base afterwards is not necessary and may cause a
|
||||
// needless overflow.
|
||||
r = acc.overflowing_mul(base);
|
||||
r.1 |= overflown;
|
||||
r
|
||||
// since exp!=0, finally the exp must be 1.
|
||||
// Deal with the final bit of the exponent separately, since
|
||||
// squaring the base afterwards is not necessary and may cause a
|
||||
// needless overflow.
|
||||
r = acc.overflowing_mul(base);
|
||||
r.1 |= overflown;
|
||||
r
|
||||
}
|
||||
}
|
||||
|
||||
/// Raises self to the power of `exp`, using exponentiation by squaring.
|
||||
|
@ -2086,30 +2182,47 @@ macro_rules! int_impl {
|
|||
#[rustc_const_stable(feature = "const_int_pow", since = "1.50.0")]
|
||||
#[must_use = "this returns the result of the operation, \
|
||||
without modifying the original"]
|
||||
#[rustc_allow_const_fn_unstable(is_val_statically_known, const_int_unchecked_arith)]
|
||||
#[inline]
|
||||
#[rustc_inherit_overflow_checks]
|
||||
#[rustc_allow_const_fn_unstable(is_val_statically_known)]
|
||||
#[track_caller] // Hides the hackish overflow check for powers of two.
|
||||
pub const fn pow(self, mut exp: u32) -> Self {
|
||||
// SAFETY: This path has the same behavior as the other.
|
||||
if unsafe { intrinsics::is_val_statically_known(self) }
|
||||
&& self > 0
|
||||
&& (self & (self - 1) == 0)
|
||||
&& self.unsigned_abs().is_power_of_two()
|
||||
{
|
||||
let power_used = match self.checked_ilog2() {
|
||||
Some(v) => v,
|
||||
// SAFETY: We just checked this is a power of two. and above zero.
|
||||
None => unsafe { core::hint::unreachable_unchecked() },
|
||||
};
|
||||
// So it panics. Have to use `overflowing_mul` to efficiently set the
|
||||
// result to 0 if not.
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
_ = power_used * exp;
|
||||
if self == 1 { // Avoid divide by zero
|
||||
return 1;
|
||||
}
|
||||
if self == -1 { // Avoid divide by zero
|
||||
return if exp & 1 != 0 { -1 } else { 1 };
|
||||
}
|
||||
// SAFETY: We just checked this is a power of two. and above zero.
|
||||
let power_used = unsafe { intrinsics::cttz_nonzero(self.wrapping_abs()) as u32 };
|
||||
if exp > Self::BITS / power_used { // Division of constants is free
|
||||
#[allow(arithmetic_overflow)]
|
||||
return Self::MAX * Self::MAX * 0;
|
||||
}
|
||||
|
||||
// SAFETY: exp <= Self::BITS / power_used
|
||||
let res = unsafe { intrinsics::unchecked_shl(
|
||||
1 as Self,
|
||||
intrinsics::unchecked_mul(power_used, exp) as Self
|
||||
)};
|
||||
// LLVM doesn't always optimize out the checks
|
||||
// at the ir level.
|
||||
|
||||
let sign = self.is_negative() && exp & 1 != 0;
|
||||
#[allow(arithmetic_overflow)]
|
||||
if !sign && res == Self::MIN {
|
||||
// So it panics.
|
||||
_ = Self::MAX * Self::MAX;
|
||||
}
|
||||
if sign {
|
||||
res.wrapping_neg()
|
||||
} else {
|
||||
res
|
||||
}
|
||||
let (num_shl, overflowed) = power_used.overflowing_mul(exp);
|
||||
let fine = !overflowed
|
||||
& (num_shl < (mem::size_of::<Self>() * 8) as u32);
|
||||
(1 << num_shl) * fine as Self
|
||||
} else {
|
||||
if exp == 0 {
|
||||
return 1;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue