diff --git a/compiler/rustc_ty_utils/src/abi.rs b/compiler/rustc_ty_utils/src/abi.rs index f23c2cf2c07..536bcc0cd9e 100644 --- a/compiler/rustc_ty_utils/src/abi.rs +++ b/compiler/rustc_ty_utils/src/abi.rs @@ -726,6 +726,49 @@ fn fn_abi_adjust_for_abi<'tcx>( }; } + if arg_idx.is_none() && arg.layout.size > Pointer(AddressSpace::DATA).size(cx) * 2 { + // Return values larger than 2 registers using a return area + // pointer. LLVM and Cranelift disagree about how to return + // values that don't fit in the registers designated for return + // values. LLVM will force the entire return value to be passed + // by return area pointer, while Cranelift will look at each IR level + // return value independently and decide to pass it in a + // register or not, which would result in the return value + // being passed partially in registers and partially through a + // return area pointer. + // + // While Cranelift may need to be fixed as the LLVM behavior is + // generally more correct with respect to the surface language, + // forcing this behavior in rustc itself makes it easier for + // other backends to conform to the Rust ABI and for the C ABI + // rustc already handles this behavior anyway. + // + // In addition LLVM's decision to pass the return value in + // registers or using a return area pointer depends on how + // exactly the return type is lowered to an LLVM IR type. For + // example `Option` can be lowered as `{ i128, i128 }` + // in which case the x86_64 backend would use a return area + // pointer, or it could be passed as `{ i32, i128 }` in which + // case the x86_64 backend would pass it in registers by taking + // advantage of an LLVM ABI extension that allows using 3 + // registers for the x86_64 sysv call conv rather than the + // officially specified 2 registers. + // + // FIXME: Technically we should look at the amount of available + // return registers rather than guessing that there are 2 + // registers for return values. In practice only a couple of + // architectures have less than 2 return registers. None of + // which supported by Cranelift. + // + // NOTE: This adjustment is only necessary for the Rust ABI as + // for other ABI's the calling convention implementations in + // rustc_target already ensure any return value which doesn't + // fit in the available amount of return registers is passed in + // the right way for the current target. + arg.make_indirect(); + return; + } + match arg.layout.abi { Abi::Aggregate { .. } => {} diff --git a/tests/codegen/i128-x86-align.rs b/tests/codegen/i128-x86-align.rs index 3e6ed2b8e16..6b1da445c40 100644 --- a/tests/codegen/i128-x86-align.rs +++ b/tests/codegen/i128-x86-align.rs @@ -19,13 +19,15 @@ pub struct ScalarPair { #[no_mangle] pub fn load(x: &ScalarPair) -> ScalarPair { // CHECK-LABEL: @load( + // CHECK-SAME: sret([32 x i8]) align 16 dereferenceable(32) %_0, // CHECK-SAME: align 16 dereferenceable(32) %x // CHECK: [[A:%.*]] = load i32, ptr %x, align 16 // CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr %x, i64 16 // CHECK-NEXT: [[B:%.*]] = load i128, ptr [[GEP]], align 16 - // CHECK-NEXT: [[IV1:%.*]] = insertvalue { i32, i128 } poison, i32 [[A]], 0 - // CHECK-NEXT: [[IV2:%.*]] = insertvalue { i32, i128 } [[IV1]], i128 [[B]], 1 - // CHECK-NEXT: ret { i32, i128 } [[IV2]] + // CHECK-NEXT: store i32 [[A]], ptr %_0, align 16 + // CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr %_0, i64 16 + // CHECK-NEXT: store i128 [[B]], ptr [[GEP]], align 16 + // CHECK-NEXT: ret void *x } @@ -53,29 +55,23 @@ pub fn alloca() { #[no_mangle] pub fn load_volatile(x: &ScalarPair) -> ScalarPair { // CHECK-LABEL: @load_volatile( + // CHECK-SAME: sret([32 x i8]) align 16 dereferenceable(32) %_0, // CHECK-SAME: align 16 dereferenceable(32) %x - // CHECK: [[TMP:%.*]] = alloca [32 x i8], align 16 // CHECK: [[LOAD:%.*]] = load volatile %ScalarPair, ptr %x, align 16 - // CHECK-NEXT: store %ScalarPair [[LOAD]], ptr [[TMP]], align 16 - // CHECK-NEXT: [[A:%.*]] = load i32, ptr [[TMP]], align 16 - // CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 16 - // CHECK-NEXT: [[B:%.*]] = load i128, ptr [[GEP]], align 16 + // CHECK-NEXT: store %ScalarPair [[LOAD]], ptr %_0, align 16 + // CHECK-NEXT: ret void unsafe { std::intrinsics::volatile_load(x) } } #[no_mangle] pub fn transmute(x: ScalarPair) -> (std::mem::MaybeUninit, i128) { - // CHECK-LABEL: define { i128, i128 } @transmute(i32 noundef %x.0, i128 noundef %x.1) - // CHECK: [[TMP:%.*]] = alloca [32 x i8], align 16 - // CHECK-NEXT: store i32 %x.0, ptr [[TMP]], align 16 - // CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 16 + // CHECK-LABEL: @transmute( + // CHECK-SAME: sret([32 x i8]) align 16 dereferenceable(32) %_0, + // CHECK-SAME: i32 noundef %x.0, i128 noundef %x.1 + // CHECK: store i32 %x.0, ptr %_0, align 16 + // CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr %_0, i64 16 // CHECK-NEXT: store i128 %x.1, ptr [[GEP]], align 16 - // CHECK-NEXT: [[LOAD1:%.*]] = load i128, ptr %_0, align 16 - // CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 16 - // CHECK-NEXT: [[LOAD2:%.*]] = load i128, ptr [[GEP2]], align 16 - // CHECK-NEXT: [[IV1:%.*]] = insertvalue { i128, i128 } poison, i128 [[LOAD1]], 0 - // CHECK-NEXT: [[IV2:%.*]] = insertvalue { i128, i128 } [[IV1]], i128 [[LOAD2]], 1 - // CHECK-NEXT: ret { i128, i128 } [[IV2]] + // CHECK-NEXT: ret void unsafe { std::mem::transmute(x) } } diff --git a/tests/codegen/tuple-layout-opt.rs b/tests/codegen/tuple-layout-opt.rs index 601563bc061..3202e4749c5 100644 --- a/tests/codegen/tuple-layout-opt.rs +++ b/tests/codegen/tuple-layout-opt.rs @@ -19,28 +19,28 @@ pub fn test_ScalarZstFirst(_: ScalarZstFirst) -> ScalarZstFirst { } type ScalarPairZstLast = (u8, u128, ()); -// CHECK: define {{(dso_local )?}}{ i128, i8 } @test_ScalarPairZstLast(i128 %_1.0, i8 %_1.1) +// CHECK: define {{(dso_local )?}}void @test_ScalarPairZstLast(ptr sret([32 x i8]) align 16 %_0, i128 %_1.0, i8 %_1.1) #[no_mangle] pub fn test_ScalarPairZstLast(_: ScalarPairZstLast) -> ScalarPairZstLast { loop {} } type ScalarPairZstFirst = ((), u8, u128); -// CHECK: define {{(dso_local )?}}{ i8, i128 } @test_ScalarPairZstFirst(i8 %_1.0, i128 %_1.1) +// CHECK: define {{(dso_local )?}}void @test_ScalarPairZstFirst(ptr sret([32 x i8]) align 16 %_0, i8 %_1.0, i128 %_1.1) #[no_mangle] pub fn test_ScalarPairZstFirst(_: ScalarPairZstFirst) -> ScalarPairZstFirst { loop {} } type ScalarPairLotsOfZsts = ((), u8, (), u128, ()); -// CHECK: define {{(dso_local )?}}{ i128, i8 } @test_ScalarPairLotsOfZsts(i128 %_1.0, i8 %_1.1) +// CHECK: define {{(dso_local )?}}void @test_ScalarPairLotsOfZsts(ptr sret([32 x i8]) align 16 %_0, i128 %_1.0, i8 %_1.1) #[no_mangle] pub fn test_ScalarPairLotsOfZsts(_: ScalarPairLotsOfZsts) -> ScalarPairLotsOfZsts { loop {} } type ScalarPairLottaNesting = (((), ((), u8, (), u128, ())), ()); -// CHECK: define {{(dso_local )?}}{ i128, i8 } @test_ScalarPairLottaNesting(i128 %_1.0, i8 %_1.1) +// CHECK: define {{(dso_local )?}}void @test_ScalarPairLottaNesting(ptr sret([32 x i8]) align 16 %_0, i128 %_1.0, i8 %_1.1) #[no_mangle] pub fn test_ScalarPairLottaNesting(_: ScalarPairLottaNesting) -> ScalarPairLottaNesting { loop {}