rust/tests/codegen/swap-large-types.rs
2025-04-09 09:09:37 -07:00

116 lines
3.3 KiB
Rust

//@ compile-flags: -Copt-level=3
//@ only-x86_64
#![crate_type = "lib"]
use std::mem::swap;
use std::ptr::{copy_nonoverlapping, read, write};
type KeccakBuffer = [[u64; 5]; 5];
// A basic read+copy+write swap implementation ends up copying one of the values
// to stack for large types, which is completely unnecessary as the lack of
// overlap means we can just do whatever fits in registers at a time.
// The tests here (after the first one showing that the problem still exists)
// are less about testing *exactly* what the codegen is, and more about testing
// 1) That things are swapped directly from one argument to the other,
// never going through stack along the way, and
// 2) That we're doing the swapping for big things using large vector types,
// rather then `i64` or `<8 x i8>` (or, even worse, `i8`) at a time.
//
// (There are separate tests for intrinsics::typed_swap_nonoverlapping that
// check that it, as an intrinsic, are emitting exactly what it should.)
// CHECK-LABEL: @swap_basic
#[no_mangle]
pub fn swap_basic(x: &mut KeccakBuffer, y: &mut KeccakBuffer) {
// CHECK: alloca [200 x i8]
// SAFETY: exclusive references are always valid to read/write,
// are non-overlapping, and nothing here panics so it's drop-safe.
unsafe {
let z = read(x);
copy_nonoverlapping(y, x, 1);
write(y, z);
}
}
// CHECK-LABEL: @swap_std
#[no_mangle]
pub fn swap_std(x: &mut KeccakBuffer, y: &mut KeccakBuffer) {
// CHECK-NOT: alloca
// CHECK: load <{{2|4}} x i64>
// CHECK: store <{{2|4}} x i64>
swap(x, y)
}
// CHECK-LABEL: @swap_slice
#[no_mangle]
pub fn swap_slice(x: &mut [KeccakBuffer], y: &mut [KeccakBuffer]) {
// CHECK-NOT: alloca
// CHECK: load <{{2|4}} x i64>
// CHECK: store <{{2|4}} x i64>
if x.len() == y.len() {
x.swap_with_slice(y);
}
}
type OneKilobyteBuffer = [u8; 1024];
// CHECK-LABEL: @swap_1kb_slices
#[no_mangle]
pub fn swap_1kb_slices(x: &mut [OneKilobyteBuffer], y: &mut [OneKilobyteBuffer]) {
// CHECK-NOT: alloca
// CHECK-NOT: load i32
// CHECK-NOT: store i32
// CHECK-NOT: load i16
// CHECK-NOT: store i16
// CHECK-NOT: load i8
// CHECK-NOT: store i8
// CHECK: load <{{2|4}} x i64>{{.+}}align 1,
// CHECK: store <{{2|4}} x i64>{{.+}}align 1,
// CHECK-NOT: load i32
// CHECK-NOT: store i32
// CHECK-NOT: load i16
// CHECK-NOT: store i16
// CHECK-NOT: load i8
// CHECK-NOT: store i8
if x.len() == y.len() {
x.swap_with_slice(y);
}
}
#[repr(align(64))]
pub struct BigButHighlyAligned([u8; 64 * 3]);
// CHECK-LABEL: @swap_big_aligned
#[no_mangle]
pub fn swap_big_aligned(x: &mut BigButHighlyAligned, y: &mut BigButHighlyAligned) {
// CHECK-NOT: call void @llvm.memcpy
// CHECK-NOT: load i32
// CHECK-NOT: store i32
// CHECK-NOT: load i16
// CHECK-NOT: store i16
// CHECK-NOT: load i8
// CHECK-NOT: store i8
// CHECK-COUNT-2: load <{{2|4}} x i64>{{.+}}align 64,
// CHECK-COUNT-2: store <{{2|4}} x i64>{{.+}}align 64,
// CHECK-COUNT-2: load <{{2|4}} x i64>{{.+}}align 32,
// CHECK-COUNT-2: store <{{2|4}} x i64>{{.+}}align 32,
// CHECK-NOT: load i32
// CHECK-NOT: store i32
// CHECK-NOT: load i16
// CHECK-NOT: store i16
// CHECK-NOT: load i8
// CHECK-NOT: store i8
// CHECK-NOT: call void @llvm.memcpy
swap(x, y)
}