assume
the runtime range of align_offset
Found when I saw code with `align_to` having extraneous checks.
This commit is contained in:
parent
74c4821045
commit
a1e5c65aa4
3 changed files with 89 additions and 4 deletions
|
@ -107,6 +107,7 @@
|
||||||
#![feature(const_arguments_as_str)]
|
#![feature(const_arguments_as_str)]
|
||||||
#![feature(const_array_from_ref)]
|
#![feature(const_array_from_ref)]
|
||||||
#![feature(const_array_into_iter_constructors)]
|
#![feature(const_array_into_iter_constructors)]
|
||||||
|
#![feature(const_assume)]
|
||||||
#![feature(const_bigint_helper_methods)]
|
#![feature(const_bigint_helper_methods)]
|
||||||
#![feature(const_black_box)]
|
#![feature(const_black_box)]
|
||||||
#![feature(const_caller_location)]
|
#![feature(const_caller_location)]
|
||||||
|
|
|
@ -1632,8 +1632,8 @@ pub(crate) const unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usiz
|
||||||
// FIXME(#75598): Direct use of these intrinsics improves codegen significantly at opt-level <=
|
// FIXME(#75598): Direct use of these intrinsics improves codegen significantly at opt-level <=
|
||||||
// 1, where the method versions of these operations are not inlined.
|
// 1, where the method versions of these operations are not inlined.
|
||||||
use intrinsics::{
|
use intrinsics::{
|
||||||
cttz_nonzero, exact_div, mul_with_overflow, unchecked_rem, unchecked_shl, unchecked_shr,
|
assume, cttz_nonzero, exact_div, mul_with_overflow, unchecked_rem, unchecked_shl,
|
||||||
unchecked_sub, wrapping_add, wrapping_mul, wrapping_sub,
|
unchecked_shr, unchecked_sub, wrapping_add, wrapping_mul, wrapping_sub,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Calculate multiplicative modular inverse of `x` modulo `m`.
|
/// Calculate multiplicative modular inverse of `x` modulo `m`.
|
||||||
|
@ -1724,12 +1724,18 @@ pub(crate) const unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usiz
|
||||||
// in a branch-free way and then bitwise-OR it with whatever result the `-p mod a`
|
// in a branch-free way and then bitwise-OR it with whatever result the `-p mod a`
|
||||||
// computation produces.
|
// computation produces.
|
||||||
|
|
||||||
|
let aligned_address = wrapping_add(addr, a_minus_one) & wrapping_sub(0, a);
|
||||||
|
let byte_offset = wrapping_sub(aligned_address, addr);
|
||||||
|
// FIXME: Remove the assume after <https://github.com/llvm/llvm-project/issues/62502>
|
||||||
|
// SAFETY: Masking by `-a` can only affect the low bits, and thus cannot have reduced
|
||||||
|
// the value by more than `a-1`, so even though the intermediate values might have
|
||||||
|
// wrapped, the byte_offset is always in `[0, a)`.
|
||||||
|
unsafe { assume(byte_offset < a) };
|
||||||
|
|
||||||
// SAFETY: `stride == 0` case has been handled by the special case above.
|
// SAFETY: `stride == 0` case has been handled by the special case above.
|
||||||
let addr_mod_stride = unsafe { unchecked_rem(addr, stride) };
|
let addr_mod_stride = unsafe { unchecked_rem(addr, stride) };
|
||||||
|
|
||||||
return if addr_mod_stride == 0 {
|
return if addr_mod_stride == 0 {
|
||||||
let aligned_address = wrapping_add(addr, a_minus_one) & wrapping_sub(0, a);
|
|
||||||
let byte_offset = wrapping_sub(aligned_address, addr);
|
|
||||||
// SAFETY: `stride` is non-zero. This is guaranteed to divide exactly as well, because
|
// SAFETY: `stride` is non-zero. This is guaranteed to divide exactly as well, because
|
||||||
// addr has been verified to be aligned to the original type’s alignment requirements.
|
// addr has been verified to be aligned to the original type’s alignment requirements.
|
||||||
unsafe { exact_div(byte_offset, stride) }
|
unsafe { exact_div(byte_offset, stride) }
|
||||||
|
|
78
tests/codegen/align-offset.rs
Normal file
78
tests/codegen/align-offset.rs
Normal file
|
@ -0,0 +1,78 @@
|
||||||
|
// compile-flags: -O
|
||||||
|
// min-llvm-version: 15.0 (because we're using opaque pointers)
|
||||||
|
// ignore-debug (debug assertions in `slice::from_raw_parts` block optimizations)
|
||||||
|
|
||||||
|
#![crate_type = "lib"]
|
||||||
|
|
||||||
|
// CHECK-LABEL: @align8
|
||||||
|
#[no_mangle]
|
||||||
|
pub fn align8(p: *const u8) -> bool {
|
||||||
|
// CHECK: ret i1 true
|
||||||
|
p.align_offset(8) < 8
|
||||||
|
}
|
||||||
|
|
||||||
|
#[repr(align(4))]
|
||||||
|
pub struct Align4([u8; 4]);
|
||||||
|
|
||||||
|
// CHECK-LABEL: @align_to4
|
||||||
|
#[no_mangle]
|
||||||
|
pub fn align_to4(x: &[u8]) -> bool {
|
||||||
|
// CHECK: ret i1 true
|
||||||
|
let (prefix, _middle, suffix) = unsafe { x.align_to::<Align4>() };
|
||||||
|
prefix.len() < 4 && suffix.len() < 4
|
||||||
|
}
|
||||||
|
|
||||||
|
// CHECK-LABEL: @align_offset_byte_ptr(ptr{{.+}}%ptr)
|
||||||
|
#[no_mangle]
|
||||||
|
pub fn align_offset_byte_ptr(ptr: *const u8) -> usize {
|
||||||
|
// CHECK: %[[ADDR:.+]] = ptrtoint ptr %ptr to [[USIZE:i[0-9]+]]
|
||||||
|
// CHECK: %[[UP:.+]] = add [[USIZE]] %[[ADDR]], 31
|
||||||
|
// CHECK: %[[ALIGNED:.+]] = and [[USIZE]] %[[UP]], -32
|
||||||
|
// CHECK: %[[OFFSET:.+]] = sub [[USIZE]] %[[ALIGNED]], %[[ADDR]]
|
||||||
|
|
||||||
|
// Since we're offsetting a byte pointer, there's no further fixups
|
||||||
|
// CHECK-NOT: shr
|
||||||
|
// CHECK-NOT: div
|
||||||
|
// CHECK-NOT: select
|
||||||
|
|
||||||
|
// CHECK: ret [[USIZE]] %[[OFFSET]]
|
||||||
|
ptr.align_offset(32)
|
||||||
|
}
|
||||||
|
|
||||||
|
// CHECK-LABEL: @align_offset_word_slice(ptr{{.+}}align 4{{.+}}%slice.0
|
||||||
|
#[no_mangle]
|
||||||
|
pub fn align_offset_word_slice(slice: &[Align4]) -> usize {
|
||||||
|
// CHECK: %[[ADDR:.+]] = ptrtoint ptr %slice.0 to [[USIZE]]
|
||||||
|
// CHECK: %[[UP:.+]] = add [[USIZE]] %[[ADDR]], 31
|
||||||
|
// CHECK: %[[ALIGNED:.+]] = and [[USIZE]] %[[UP]], -32
|
||||||
|
// CHECK: %[[BOFFSET:.+]] = sub [[USIZE]] %[[ALIGNED]], %[[ADDR]]
|
||||||
|
// CHECK: %[[OFFSET:.+]] = lshr exact [[USIZE]] %[[BOFFSET]], 2
|
||||||
|
|
||||||
|
// Slices are known to be aligned, so we don't need the "maybe -1" path
|
||||||
|
// CHECK-NOT: select
|
||||||
|
|
||||||
|
// CHECK: ret [[USIZE]] %[[OFFSET]]
|
||||||
|
slice.as_ptr().align_offset(32)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// CHECK-LABEL: @align_offset_word_ptr(ptr{{.+}}%ptr
|
||||||
|
#[no_mangle]
|
||||||
|
pub fn align_offset_word_ptr(ptr: *const Align4) -> usize {
|
||||||
|
// CHECK: %[[ADDR:.+]] = ptrtoint ptr %ptr to [[USIZE]]
|
||||||
|
// CHECK: %[[UP:.+]] = add [[USIZE]] %[[ADDR]], 31
|
||||||
|
// CHECK: %[[ALIGNED:.+]] = and [[USIZE]] %[[UP]], -32
|
||||||
|
// CHECK: %[[BOFFSET:.+]] = sub [[USIZE]] %[[ALIGNED]], %[[ADDR]]
|
||||||
|
|
||||||
|
// While we can always get a *byte* offset that will work, if the original
|
||||||
|
// pointer is unaligned it might be impossible to return an *element* offset
|
||||||
|
// that will make it aligned. We want it to be a `select`, not a `br`, so
|
||||||
|
// that the assembly will be branchless.
|
||||||
|
// CHECK: %[[LOW:.+]] = and [[USIZE]] %[[ADDR]], 3
|
||||||
|
// CHECK: %[[ORIGINAL_ALIGNED:.+]] = icmp eq [[USIZE]] %[[LOW]], 0
|
||||||
|
// CHECK: %[[OFFSET:.+]] = lshr exact [[USIZE]] %[[BOFFSET]], 2
|
||||||
|
// CHECK: %[[R:.+]] = select i1 %[[ORIGINAL_ALIGNED]], [[USIZE]] %[[OFFSET]], [[USIZE]] -1
|
||||||
|
|
||||||
|
// CHECK: ret [[USIZE]] %[[R]]
|
||||||
|
ptr.align_offset(32)
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue