Auto merge of #123185 - scottmcm:more-typed-copy, r=compiler-errors
Remove my `scalar_copy_backend_type` optimization attempt I added this back in https://github.com/rust-lang/rust/pull/111999 , but I no longer think it's a good idea - It had to get scaled back to only power-of-two things to not break a bunch of targets - LLVM seems to be getting better at memcpy removal anyway - Introducing vector instructions has seemed to sometimes (https://github.com/rust-lang/rust/pull/115515#issuecomment-1750069529) make autovectorization worse So this removes it from the codegen crates entirely, and instead just tries to use <https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_ssa/traits/builder/trait.BuilderMethods.html#method.typed_place_copy> instead of direct `memcpy` so things will still use load/store when a type isn't `OperandValue::Ref`.
This commit is contained in:
commit
c2239bca5b
11 changed files with 91 additions and 165 deletions
|
@ -5,52 +5,58 @@
|
|||
// CHECK-LABEL: @array_load
|
||||
#[no_mangle]
|
||||
pub fn array_load(a: &[u8; 4]) -> [u8; 4] {
|
||||
// CHECK: %_0 = alloca [4 x i8], align 1
|
||||
// CHECK: %[[TEMP1:.+]] = load <4 x i8>, ptr %a, align 1
|
||||
// CHECK: store <4 x i8> %[[TEMP1]], ptr %_0, align 1
|
||||
// CHECK: %[[TEMP2:.+]] = load i32, ptr %_0, align 1
|
||||
// CHECK: ret i32 %[[TEMP2]]
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: %[[ALLOCA:.+]] = alloca [4 x i8], align 1
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %[[ALLOCA]], ptr align 1 %a, {{.+}} 4, i1 false)
|
||||
// CHECK: %[[TEMP:.+]] = load i32, ptr %[[ALLOCA]], align 1
|
||||
// CHECK: ret i32 %[[TEMP]]
|
||||
*a
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @array_store
|
||||
#[no_mangle]
|
||||
pub fn array_store(a: [u8; 4], p: &mut [u8; 4]) {
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: %[[TEMP:.+]] = alloca i32, [[TEMPALIGN:align [0-9]+]]
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: %a = alloca [4 x i8]
|
||||
// CHECK: %[[TEMP:.+]] = load <4 x i8>, ptr %a, align 1
|
||||
// CHECK-NEXT: store <4 x i8> %[[TEMP]], ptr %p, align 1
|
||||
// CHECK-NOT: alloca
|
||||
// store i32 %0, ptr %[[TEMP]]
|
||||
// CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %a, ptr [[TEMPALIGN]] %[[TEMP]], {{.+}} 4, i1 false)
|
||||
// CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %p, ptr align 1 %a, {{.+}} 4, i1 false)
|
||||
*p = a;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @array_copy
|
||||
#[no_mangle]
|
||||
pub fn array_copy(a: &[u8; 4], p: &mut [u8; 4]) {
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: %[[LOCAL:.+]] = alloca [4 x i8], align 1
|
||||
// CHECK: %[[TEMP1:.+]] = load <4 x i8>, ptr %a, align 1
|
||||
// CHECK: store <4 x i8> %[[TEMP1]], ptr %[[LOCAL]], align 1
|
||||
// CHECK: %[[TEMP2:.+]] = load <4 x i8>, ptr %[[LOCAL]], align 1
|
||||
// CHECK: store <4 x i8> %[[TEMP2]], ptr %p, align 1
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %[[LOCAL]], ptr align 1 %a, {{.+}} 4, i1 false)
|
||||
// CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %p, ptr align 1 %[[LOCAL]], {{.+}} 4, i1 false)
|
||||
*p = *a;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @array_copy_1_element
|
||||
#[no_mangle]
|
||||
pub fn array_copy_1_element(a: &[u8; 1], p: &mut [u8; 1]) {
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: %[[LOCAL:.+]] = alloca [1 x i8], align 1
|
||||
// CHECK: %[[TEMP1:.+]] = load i8, ptr %a, align 1
|
||||
// CHECK: store i8 %[[TEMP1]], ptr %[[LOCAL]], align 1
|
||||
// CHECK: %[[TEMP2:.+]] = load i8, ptr %[[LOCAL]], align 1
|
||||
// CHECK: store i8 %[[TEMP2]], ptr %p, align 1
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %[[LOCAL]], ptr align 1 %a, {{.+}} 1, i1 false)
|
||||
// CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %p, ptr align 1 %[[LOCAL]], {{.+}} 1, i1 false)
|
||||
*p = *a;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @array_copy_2_elements
|
||||
#[no_mangle]
|
||||
pub fn array_copy_2_elements(a: &[u8; 2], p: &mut [u8; 2]) {
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: %[[LOCAL:.+]] = alloca [2 x i8], align 1
|
||||
// CHECK: %[[TEMP1:.+]] = load <2 x i8>, ptr %a, align 1
|
||||
// CHECK: store <2 x i8> %[[TEMP1]], ptr %[[LOCAL]], align 1
|
||||
// CHECK: %[[TEMP2:.+]] = load <2 x i8>, ptr %[[LOCAL]], align 1
|
||||
// CHECK: store <2 x i8> %[[TEMP2]], ptr %p, align 1
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %[[LOCAL]], ptr align 1 %a, {{.+}} 2, i1 false)
|
||||
// CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %p, ptr align 1 %[[LOCAL]], {{.+}} 2, i1 false)
|
||||
*p = *a;
|
||||
}
|
||||
|
|
|
@ -16,8 +16,8 @@ pub fn array_copy_1_element(a: &[u8; 1], p: &mut [u8; 1]) {
|
|||
#[no_mangle]
|
||||
pub fn array_copy_2_elements(a: &[u8; 2], p: &mut [u8; 2]) {
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: %[[TEMP:.+]] = load <2 x i8>, ptr %a, align 1
|
||||
// CHECK: store <2 x i8> %[[TEMP]], ptr %p, align 1
|
||||
// CHECK: %[[TEMP:.+]] = load i16, ptr %a, align 1
|
||||
// CHECK: store i16 %[[TEMP]], ptr %p, align 1
|
||||
// CHECK: ret
|
||||
*p = *a;
|
||||
}
|
||||
|
@ -26,8 +26,8 @@ pub fn array_copy_2_elements(a: &[u8; 2], p: &mut [u8; 2]) {
|
|||
#[no_mangle]
|
||||
pub fn array_copy_4_elements(a: &[u8; 4], p: &mut [u8; 4]) {
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: %[[TEMP:.+]] = load <4 x i8>, ptr %a, align 1
|
||||
// CHECK: store <4 x i8> %[[TEMP]], ptr %p, align 1
|
||||
// CHECK: %[[TEMP:.+]] = load i32, ptr %a, align 1
|
||||
// CHECK: store i32 %[[TEMP]], ptr %p, align 1
|
||||
// CHECK: ret
|
||||
*p = *a;
|
||||
}
|
||||
|
|
|
@ -1,4 +1,11 @@
|
|||
//@ compile-flags: -O
|
||||
//@ revisions: OPT2 OPT3WINX64 OPT3LINX64
|
||||
//@ [OPT2] compile-flags: -O
|
||||
//@ [OPT3LINX64] compile-flags: -C opt-level=3
|
||||
//@ [OPT3WINX64] compile-flags: -C opt-level=3
|
||||
//@ [OPT3LINX64] only-linux
|
||||
//@ [OPT3WINX64] only-windows
|
||||
//@ [OPT3LINX64] only-x86_64
|
||||
//@ [OPT3WINX64] only-x86_64
|
||||
//@ min-llvm-version: 18.1.3
|
||||
|
||||
#![crate_type = "lib"]
|
||||
|
@ -9,15 +16,27 @@
|
|||
// to avoid complicating the code.
|
||||
// CHECK-LABEL: define{{.*}}void @convert(
|
||||
// CHECK-NOT: shufflevector
|
||||
// CHECK: insertelement <8 x i16>
|
||||
// CHECK-NEXT: insertelement <8 x i16>
|
||||
// CHECK-NEXT: insertelement <8 x i16>
|
||||
// CHECK-NEXT: insertelement <8 x i16>
|
||||
// CHECK-NEXT: insertelement <8 x i16>
|
||||
// CHECK-NEXT: insertelement <8 x i16>
|
||||
// CHECK-NEXT: insertelement <8 x i16>
|
||||
// CHECK-NEXT: insertelement <8 x i16>
|
||||
// CHECK-NEXT: store <8 x i16>
|
||||
// OPT2: store i16
|
||||
// OPT2-NEXT: getelementptr inbounds i8, {{.+}} 2
|
||||
// OPT2-NEXT: store i16
|
||||
// OPT2-NEXT: getelementptr inbounds i8, {{.+}} 4
|
||||
// OPT2-NEXT: store i16
|
||||
// OPT2-NEXT: getelementptr inbounds i8, {{.+}} 6
|
||||
// OPT2-NEXT: store i16
|
||||
// OPT2-NEXT: getelementptr inbounds i8, {{.+}} 8
|
||||
// OPT2-NEXT: store i16
|
||||
// OPT2-NEXT: getelementptr inbounds i8, {{.+}} 10
|
||||
// OPT2-NEXT: store i16
|
||||
// OPT2-NEXT: getelementptr inbounds i8, {{.+}} 12
|
||||
// OPT2-NEXT: store i16
|
||||
// OPT2-NEXT: getelementptr inbounds i8, {{.+}} 14
|
||||
// OPT2-NEXT: store i16
|
||||
// OPT3LINX64: load <8 x i16>
|
||||
// OPT3LINX64-NEXT: call <8 x i16> @llvm.bswap
|
||||
// OPT3LINX64-NEXT: store <8 x i16>
|
||||
// OPT3WINX64: load <8 x i16>
|
||||
// OPT3WINX64-NEXT: call <8 x i16> @llvm.bswap
|
||||
// OPT3WINX64-NEXT: store <8 x i16>
|
||||
// CHECK-NEXT: ret void
|
||||
#[no_mangle]
|
||||
#[cfg(target_endian = "little")]
|
||||
|
|
|
@ -45,9 +45,7 @@ pub fn replace_short_array_3(r: &mut [u32; 3], v: [u32; 3]) -> [u32; 3] {
|
|||
// CHECK-LABEL: @replace_short_array_4(
|
||||
pub fn replace_short_array_4(r: &mut [u32; 4], v: [u32; 4]) -> [u32; 4] {
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: %[[R:.+]] = load <4 x i32>, ptr %r, align 4
|
||||
// CHECK: store <4 x i32> %[[R]], ptr %result
|
||||
// CHECK: %[[V:.+]] = load <4 x i32>, ptr %v, align 4
|
||||
// CHECK: store <4 x i32> %[[V]], ptr %r
|
||||
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %result, ptr align 4 %r, i64 16, i1 false)
|
||||
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %r, ptr align 4 %v, i64 16, i1 false)
|
||||
std::mem::replace(r, v)
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue