Explicitly unroll integer pow for small exponents

The newly optimized loop has introduced a regression in the case when pow is called with a small constant exponent. LLVM is no longer able to unroll the loop and the generated code is larger and slower than what's expected in tests. Match and handle small exponent values separately by branching out to an explicit multiplication sequence for that exponent. Powers larger than 6 need more than three multiplications, so these cases are less likely to benefit from this optimization, also such constant exponents are less likely to be used in practice. For uses with a non-constant exponent, this might also provide a performance benefit if the exponent is small and does not vary between successive calls, so the same match arm tends to be taken as a predicted branch.
2024-07-12 00:54:26 +03:00 · 2024-07-12 00:54:26 +03:00 · 1faa1018c7
commit 1faa1018c7
parent df27dfa0ea
2 changed files with 112 additions and 12 deletions
--- a/library/core/src/num/int_macros.rs
+++ b/library/core/src/num/int_macros.rs
@ -2173,10 +2173,35 @@ macro_rules! int_impl {
                      without modifying the original"]
        #[inline]
        pub const fn wrapping_pow(self, mut exp: u32) -> Self {
-            if exp == 0 {
-                return 1;
-            }
            let mut base = self;
+
+            // Unroll multiplications for small exponent values.
+            // This gives the optimizer a way to efficiently inline call sites
+            // for the most common use cases with constant exponents.
+            // Currently, LLVM is unable to unroll the loop below.
+            match exp {
+                0 => return 1,
+                1 => return base,
+                2 => return base.wrapping_mul(base),
+                3 => {
+                    let squared = base.wrapping_mul(base);
+                    return squared.wrapping_mul(base);
+                }
+                4 => {
+                    let squared = base.wrapping_mul(base);
+                    return squared.wrapping_mul(squared);
+                }
+                5 => {
+                    let squared = base.wrapping_mul(base);
+                    return squared.wrapping_mul(squared).wrapping_mul(base);
+                }
+                6 => {
+                    let cubed = base.wrapping_mul(base).wrapping_mul(base);
+                    return cubed.wrapping_mul(cubed);
+                }
+                _ => {}
+            }
+
            let mut acc: Self = 1;

            loop {
@ -2719,10 +2744,35 @@ macro_rules! int_impl {
        #[inline]
        #[rustc_inherit_overflow_checks]
        pub const fn pow(self, mut exp: u32) -> Self {
-            if exp == 0 {
-                return 1;
-            }
            let mut base = self;
+
+            // Unroll multiplications for small exponent values.
+            // This gives the optimizer a way to efficiently inline call sites
+            // for the most common use cases with constant exponents.
+            // Currently, LLVM is unable to unroll the loop below.
+            match exp {
+                0 => return 1,
+                1 => return base,
+                2 => return base * base,
+                3 => {
+                    let squared = base * base;
+                    return squared * base;
+                }
+                4 => {
+                    let squared = base * base;
+                    return squared * squared;
+                }
+                5 => {
+                    let squared = base * base;
+                    return squared * squared * base;
+                }
+                6 => {
+                    let cubed = base * base * base;
+                    return cubed * cubed;
+                }
+                _ => {}
+            }
+
            let mut acc = 1;

            loop {