Improve codegen of align_offset when stride == 1
Previously checking for `pmoda == 0` would get LLVM to generate branchy code, when, for `stride = 1` the offset can be computed without such a branch by doing effectively a `-p % a`. For well-known (constant) alignments, with the new ordering of these conditionals, we end up generating 2 to 3 cheap instructions on x86_64: movq %rdi, %rax negl %eax andl $7, %eax instead of 5+ as previously. For unknown alignments the new code also generates just 3 instructions: negq %rdi leaq -1(%rsi), %rax andq %rdi, %rax
This commit is contained in:
parent
e7271da69a
commit
5d22b18bf2
1 changed files with 11 additions and 13 deletions
|
@ -1172,7 +1172,7 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
|
||||||
|
|
||||||
/// Calculate multiplicative modular inverse of `x` modulo `m`.
|
/// Calculate multiplicative modular inverse of `x` modulo `m`.
|
||||||
///
|
///
|
||||||
/// This implementation is tailored for align_offset and has following preconditions:
|
/// This implementation is tailored for `align_offset` and has following preconditions:
|
||||||
///
|
///
|
||||||
/// * `m` is a power-of-two;
|
/// * `m` is a power-of-two;
|
||||||
/// * `x < m`; (if `x ≥ m`, pass in `x % m` instead)
|
/// * `x < m`; (if `x ≥ m`, pass in `x % m` instead)
|
||||||
|
@ -1220,23 +1220,21 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
|
||||||
}
|
}
|
||||||
|
|
||||||
let stride = mem::size_of::<T>();
|
let stride = mem::size_of::<T>();
|
||||||
// SAFETY: `a` is a power-of-two, hence non-zero.
|
// SAFETY: `a` is a power-of-two, therefore non-zero.
|
||||||
let a_minus_one = unsafe { unchecked_sub(a, 1) };
|
let a_minus_one = unsafe { unchecked_sub(a, 1) };
|
||||||
let pmoda = p as usize & a_minus_one;
|
if stride == 1 {
|
||||||
|
// `stride == 1` case can be computed more efficiently through `-p (mod a)`.
|
||||||
|
return wrapping_sub(0, p as usize) & a_minus_one;
|
||||||
|
}
|
||||||
|
|
||||||
|
let pmoda = p as usize & a_minus_one;
|
||||||
if pmoda == 0 {
|
if pmoda == 0 {
|
||||||
// Already aligned. Yay!
|
// Already aligned. Yay!
|
||||||
return 0;
|
return 0;
|
||||||
}
|
} else if stride == 0 {
|
||||||
|
// If the pointer is not aligned, and the element is zero-sized, then no amount of
|
||||||
if stride <= 1 {
|
// elements will ever align the pointer.
|
||||||
return if stride == 0 {
|
return usize::MAX;
|
||||||
// If the pointer is not aligned, and the element is zero-sized, then no amount of
|
|
||||||
// elements will ever align the pointer.
|
|
||||||
!0
|
|
||||||
} else {
|
|
||||||
wrapping_sub(a, pmoda)
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let smoda = stride & a_minus_one;
|
let smoda = stride & a_minus_one;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue