1
Fork 0

Remove my scalar_copy_backend_type optimization attempt

I added this back in 111999, but I no longer think it's a good idea
- It had to get scaled back to only power-of-two things to not break a bunch of targets
- LLVM seems to be getting better at memcpy removal anyway
- Introducing vector instructions has seemed to sometimes (115515) make autovectorization worse

So this removes it from the codegen crates entirely, and instead just tries to use <https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_ssa/traits/builder/trait.BuilderMethods.html#method.typed_place_copy> instead of direct `memcpy` so things will still use load/store for immediates.
This commit is contained in:
Scott McMurray 2024-03-29 00:00:24 -07:00
parent ff24ef91fc
commit b5376ba601
10 changed files with 60 additions and 154 deletions

View file

@ -5,52 +5,58 @@
// CHECK-LABEL: @array_load
#[no_mangle]
pub fn array_load(a: &[u8; 4]) -> [u8; 4] {
// CHECK: %_0 = alloca [4 x i8], align 1
// CHECK: %[[TEMP1:.+]] = load <4 x i8>, ptr %a, align 1
// CHECK: store <4 x i8> %[[TEMP1]], ptr %_0, align 1
// CHECK: %[[TEMP2:.+]] = load i32, ptr %_0, align 1
// CHECK: ret i32 %[[TEMP2]]
// CHECK-NOT: alloca
// CHECK: %[[ALLOCA:.+]] = alloca [4 x i8], align 1
// CHECK-NOT: alloca
// CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %[[ALLOCA]], ptr align 1 %a, {{.+}} 4, i1 false)
// CHECK: %[[TEMP:.+]] = load i32, ptr %[[ALLOCA]], align 1
// CHECK: ret i32 %[[TEMP]]
*a
}
// CHECK-LABEL: @array_store
#[no_mangle]
pub fn array_store(a: [u8; 4], p: &mut [u8; 4]) {
// CHECK-NOT: alloca
// CHECK: %[[TEMP:.+]] = alloca i32, [[TEMPALIGN:align [0-9]+]]
// CHECK-NOT: alloca
// CHECK: %a = alloca [4 x i8]
// CHECK: %[[TEMP:.+]] = load <4 x i8>, ptr %a, align 1
// CHECK-NEXT: store <4 x i8> %[[TEMP]], ptr %p, align 1
// CHECK-NOT: alloca
// store i32 %0, ptr %[[TEMP]]
// CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %a, ptr [[TEMPALIGN]] %[[TEMP]], {{.+}} 4, i1 false)
// CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %p, ptr align 1 %a, {{.+}} 4, i1 false)
*p = a;
}
// CHECK-LABEL: @array_copy
#[no_mangle]
pub fn array_copy(a: &[u8; 4], p: &mut [u8; 4]) {
// CHECK-NOT: alloca
// CHECK: %[[LOCAL:.+]] = alloca [4 x i8], align 1
// CHECK: %[[TEMP1:.+]] = load <4 x i8>, ptr %a, align 1
// CHECK: store <4 x i8> %[[TEMP1]], ptr %[[LOCAL]], align 1
// CHECK: %[[TEMP2:.+]] = load <4 x i8>, ptr %[[LOCAL]], align 1
// CHECK: store <4 x i8> %[[TEMP2]], ptr %p, align 1
// CHECK-NOT: alloca
// CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %[[LOCAL]], ptr align 1 %a, {{.+}} 4, i1 false)
// CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %p, ptr align 1 %[[LOCAL]], {{.+}} 4, i1 false)
*p = *a;
}
// CHECK-LABEL: @array_copy_1_element
#[no_mangle]
pub fn array_copy_1_element(a: &[u8; 1], p: &mut [u8; 1]) {
// CHECK-NOT: alloca
// CHECK: %[[LOCAL:.+]] = alloca [1 x i8], align 1
// CHECK: %[[TEMP1:.+]] = load i8, ptr %a, align 1
// CHECK: store i8 %[[TEMP1]], ptr %[[LOCAL]], align 1
// CHECK: %[[TEMP2:.+]] = load i8, ptr %[[LOCAL]], align 1
// CHECK: store i8 %[[TEMP2]], ptr %p, align 1
// CHECK-NOT: alloca
// CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %[[LOCAL]], ptr align 1 %a, {{.+}} 1, i1 false)
// CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %p, ptr align 1 %[[LOCAL]], {{.+}} 1, i1 false)
*p = *a;
}
// CHECK-LABEL: @array_copy_2_elements
#[no_mangle]
pub fn array_copy_2_elements(a: &[u8; 2], p: &mut [u8; 2]) {
// CHECK-NOT: alloca
// CHECK: %[[LOCAL:.+]] = alloca [2 x i8], align 1
// CHECK: %[[TEMP1:.+]] = load <2 x i8>, ptr %a, align 1
// CHECK: store <2 x i8> %[[TEMP1]], ptr %[[LOCAL]], align 1
// CHECK: %[[TEMP2:.+]] = load <2 x i8>, ptr %[[LOCAL]], align 1
// CHECK: store <2 x i8> %[[TEMP2]], ptr %p, align 1
// CHECK-NOT: alloca
// CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %[[LOCAL]], ptr align 1 %a, {{.+}} 2, i1 false)
// CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %p, ptr align 1 %[[LOCAL]], {{.+}} 2, i1 false)
*p = *a;
}