116 lines
3.3 KiB
Rust
116 lines
3.3 KiB
Rust
//@ compile-flags: -Copt-level=3
|
|
//@ only-x86_64
|
|
|
|
#![crate_type = "lib"]
|
|
|
|
use std::mem::swap;
|
|
use std::ptr::{copy_nonoverlapping, read, write};
|
|
|
|
type KeccakBuffer = [[u64; 5]; 5];
|
|
|
|
// A basic read+copy+write swap implementation ends up copying one of the values
|
|
// to stack for large types, which is completely unnecessary as the lack of
|
|
// overlap means we can just do whatever fits in registers at a time.
|
|
|
|
// The tests here (after the first one showing that the problem still exists)
|
|
// are less about testing *exactly* what the codegen is, and more about testing
|
|
// 1) That things are swapped directly from one argument to the other,
|
|
// never going through stack along the way, and
|
|
// 2) That we're doing the swapping for big things using large vector types,
|
|
// rather then `i64` or `<8 x i8>` (or, even worse, `i8`) at a time.
|
|
//
|
|
// (There are separate tests for intrinsics::typed_swap_nonoverlapping that
|
|
// check that it, as an intrinsic, are emitting exactly what it should.)
|
|
|
|
// CHECK-LABEL: @swap_basic
|
|
#[no_mangle]
|
|
pub fn swap_basic(x: &mut KeccakBuffer, y: &mut KeccakBuffer) {
|
|
// CHECK: alloca [200 x i8]
|
|
|
|
// SAFETY: exclusive references are always valid to read/write,
|
|
// are non-overlapping, and nothing here panics so it's drop-safe.
|
|
unsafe {
|
|
let z = read(x);
|
|
copy_nonoverlapping(y, x, 1);
|
|
write(y, z);
|
|
}
|
|
}
|
|
|
|
// CHECK-LABEL: @swap_std
|
|
#[no_mangle]
|
|
pub fn swap_std(x: &mut KeccakBuffer, y: &mut KeccakBuffer) {
|
|
// CHECK-NOT: alloca
|
|
// CHECK: load <{{2|4}} x i64>
|
|
// CHECK: store <{{2|4}} x i64>
|
|
swap(x, y)
|
|
}
|
|
|
|
// CHECK-LABEL: @swap_slice
|
|
#[no_mangle]
|
|
pub fn swap_slice(x: &mut [KeccakBuffer], y: &mut [KeccakBuffer]) {
|
|
// CHECK-NOT: alloca
|
|
// CHECK: load <{{2|4}} x i64>
|
|
// CHECK: store <{{2|4}} x i64>
|
|
if x.len() == y.len() {
|
|
x.swap_with_slice(y);
|
|
}
|
|
}
|
|
|
|
type OneKilobyteBuffer = [u8; 1024];
|
|
|
|
// CHECK-LABEL: @swap_1kb_slices
|
|
#[no_mangle]
|
|
pub fn swap_1kb_slices(x: &mut [OneKilobyteBuffer], y: &mut [OneKilobyteBuffer]) {
|
|
// CHECK-NOT: alloca
|
|
|
|
// CHECK-NOT: load i32
|
|
// CHECK-NOT: store i32
|
|
// CHECK-NOT: load i16
|
|
// CHECK-NOT: store i16
|
|
// CHECK-NOT: load i8
|
|
// CHECK-NOT: store i8
|
|
|
|
// CHECK: load <{{2|4}} x i64>{{.+}}align 1,
|
|
// CHECK: store <{{2|4}} x i64>{{.+}}align 1,
|
|
|
|
// CHECK-NOT: load i32
|
|
// CHECK-NOT: store i32
|
|
// CHECK-NOT: load i16
|
|
// CHECK-NOT: store i16
|
|
// CHECK-NOT: load i8
|
|
// CHECK-NOT: store i8
|
|
|
|
if x.len() == y.len() {
|
|
x.swap_with_slice(y);
|
|
}
|
|
}
|
|
|
|
#[repr(align(64))]
|
|
pub struct BigButHighlyAligned([u8; 64 * 3]);
|
|
|
|
// CHECK-LABEL: @swap_big_aligned
|
|
#[no_mangle]
|
|
pub fn swap_big_aligned(x: &mut BigButHighlyAligned, y: &mut BigButHighlyAligned) {
|
|
// CHECK-NOT: call void @llvm.memcpy
|
|
// CHECK-NOT: load i32
|
|
// CHECK-NOT: store i32
|
|
// CHECK-NOT: load i16
|
|
// CHECK-NOT: store i16
|
|
// CHECK-NOT: load i8
|
|
// CHECK-NOT: store i8
|
|
|
|
// CHECK-COUNT-2: load <{{2|4}} x i64>{{.+}}align 64,
|
|
// CHECK-COUNT-2: store <{{2|4}} x i64>{{.+}}align 64,
|
|
|
|
// CHECK-COUNT-2: load <{{2|4}} x i64>{{.+}}align 32,
|
|
// CHECK-COUNT-2: store <{{2|4}} x i64>{{.+}}align 32,
|
|
|
|
// CHECK-NOT: load i32
|
|
// CHECK-NOT: store i32
|
|
// CHECK-NOT: load i16
|
|
// CHECK-NOT: store i16
|
|
// CHECK-NOT: load i8
|
|
// CHECK-NOT: store i8
|
|
// CHECK-NOT: call void @llvm.memcpy
|
|
swap(x, y)
|
|
}
|