1
Fork 0

copy byval argument to alloca if alignment is insufficient

This commit is contained in:
Erik Desjardins 2024-03-08 17:11:27 -05:00
parent e919669d42
commit 207fe38630
3 changed files with 220 additions and 72 deletions

View file

@ -203,57 +203,63 @@ impl<'ll, 'tcx> ArgAbiExt<'ll, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
val: &'ll Value, val: &'ll Value,
dst: PlaceRef<'tcx, &'ll Value>, dst: PlaceRef<'tcx, &'ll Value>,
) { ) {
if self.is_ignore() { match &self.mode {
return; PassMode::Ignore => {}
} // Sized indirect arguments
if self.is_sized_indirect() { PassMode::Indirect { attrs, meta_attrs: None, on_stack: _ } => {
OperandValue::Ref(val, None, self.layout.align.abi).store(bx, dst) let align = attrs.pointee_align.unwrap_or(self.layout.align.abi);
} else if self.is_unsized_indirect() { OperandValue::Ref(val, None, align).store(bx, dst);
bug!("unsized `ArgAbi` must be handled through `store_fn_arg`"); }
} else if let PassMode::Cast { cast, pad_i32: _ } = &self.mode { // Unsized indirect qrguments
// FIXME(eddyb): Figure out when the simpler Store is safe, clang PassMode::Indirect { attrs: _, meta_attrs: Some(_), on_stack: _ } => {
// uses it for i16 -> {i8, i8}, but not for i24 -> {i8, i8, i8}. bug!("unsized `ArgAbi` must be handled through `store_fn_arg`");
let can_store_through_cast_ptr = false; }
if can_store_through_cast_ptr { PassMode::Cast { cast, pad_i32: _ } => {
bx.store(val, dst.llval, self.layout.align.abi); // FIXME(eddyb): Figure out when the simpler Store is safe, clang
} else { // uses it for i16 -> {i8, i8}, but not for i24 -> {i8, i8, i8}.
// The actual return type is a struct, but the ABI let can_store_through_cast_ptr = false;
// adaptation code has cast it into some scalar type. The if can_store_through_cast_ptr {
// code that follows is the only reliable way I have bx.store(val, dst.llval, self.layout.align.abi);
// found to do a transform like i64 -> {i32,i32}. } else {
// Basically we dump the data onto the stack then memcpy it. // The actual return type is a struct, but the ABI
// // adaptation code has cast it into some scalar type. The
// Other approaches I tried: // code that follows is the only reliable way I have
// - Casting rust ret pointer to the foreign type and using Store // found to do a transform like i64 -> {i32,i32}.
// is (a) unsafe if size of foreign type > size of rust type and // Basically we dump the data onto the stack then memcpy it.
// (b) runs afoul of strict aliasing rules, yielding invalid //
// assembly under -O (specifically, the store gets removed). // Other approaches I tried:
// - Truncating foreign type to correct integral type and then // - Casting rust ret pointer to the foreign type and using Store
// bitcasting to the struct type yields invalid cast errors. // is (a) unsafe if size of foreign type > size of rust type and
// (b) runs afoul of strict aliasing rules, yielding invalid
// We instead thus allocate some scratch space... // assembly under -O (specifically, the store gets removed).
let scratch_size = cast.size(bx); // - Truncating foreign type to correct integral type and then
let scratch_align = cast.align(bx); // bitcasting to the struct type yields invalid cast errors.
let llscratch = bx.alloca(cast.llvm_type(bx), scratch_align);
bx.lifetime_start(llscratch, scratch_size); // We instead thus allocate some scratch space...
let scratch_size = cast.size(bx);
// ... where we first store the value... let scratch_align = cast.align(bx);
bx.store(val, llscratch, scratch_align); let llscratch = bx.alloca(cast.llvm_type(bx), scratch_align);
bx.lifetime_start(llscratch, scratch_size);
// ... and then memcpy it to the intended destination.
bx.memcpy( // ... where we first store the value...
dst.llval, bx.store(val, llscratch, scratch_align);
self.layout.align.abi,
llscratch, // ... and then memcpy it to the intended destination.
scratch_align, bx.memcpy(
bx.const_usize(self.layout.size.bytes()), dst.llval,
MemFlags::empty(), self.layout.align.abi,
); llscratch,
scratch_align,
bx.lifetime_end(llscratch, scratch_size); bx.const_usize(self.layout.size.bytes()),
MemFlags::empty(),
);
bx.lifetime_end(llscratch, scratch_size);
}
}
_ => {
OperandRef::from_immediate_or_packed_pair(bx, val, self.layout).val.store(bx, dst);
} }
} else {
OperandRef::from_immediate_or_packed_pair(bx, val, self.layout).val.store(bx, dst);
} }
} }

View file

@ -367,29 +367,45 @@ fn arg_local_refs<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
} }
} }
if arg.is_sized_indirect() { match arg.mode {
// Don't copy an indirect argument to an alloca, the caller // Sized indirect arguments
// already put it in a temporary alloca and gave it up. PassMode::Indirect { attrs, meta_attrs: None, on_stack: _ } => {
// FIXME: lifetimes // Don't copy an indirect argument to an alloca, the caller already put it
let llarg = bx.get_param(llarg_idx); // in a temporary alloca and gave it up.
llarg_idx += 1; // FIXME: lifetimes
LocalRef::Place(PlaceRef::new_sized(llarg, arg.layout)) if let Some(pointee_align) = attrs.pointee_align
} else if arg.is_unsized_indirect() { && pointee_align < arg.layout.align.abi
// As the storage for the indirect argument lives during {
// the whole function call, we just copy the fat pointer. // ...unless the argument is underaligned, then we need to copy it to
let llarg = bx.get_param(llarg_idx); // a higher-aligned alloca.
llarg_idx += 1; let tmp = PlaceRef::alloca(bx, arg.layout);
let llextra = bx.get_param(llarg_idx); bx.store_fn_arg(arg, &mut llarg_idx, tmp);
llarg_idx += 1; LocalRef::Place(tmp)
let indirect_operand = OperandValue::Pair(llarg, llextra); } else {
let llarg = bx.get_param(llarg_idx);
llarg_idx += 1;
LocalRef::Place(PlaceRef::new_sized(llarg, arg.layout))
}
}
// Unsized indirect qrguments
PassMode::Indirect { attrs: _, meta_attrs: Some(_), on_stack: _ } => {
// As the storage for the indirect argument lives during
// the whole function call, we just copy the fat pointer.
let llarg = bx.get_param(llarg_idx);
llarg_idx += 1;
let llextra = bx.get_param(llarg_idx);
llarg_idx += 1;
let indirect_operand = OperandValue::Pair(llarg, llextra);
let tmp = PlaceRef::alloca_unsized_indirect(bx, arg.layout); let tmp = PlaceRef::alloca_unsized_indirect(bx, arg.layout);
indirect_operand.store(bx, tmp); indirect_operand.store(bx, tmp);
LocalRef::UnsizedPlace(tmp) LocalRef::UnsizedPlace(tmp)
} else { }
let tmp = PlaceRef::alloca(bx, arg.layout); _ => {
bx.store_fn_arg(arg, &mut llarg_idx, tmp); let tmp = PlaceRef::alloca(bx, arg.layout);
LocalRef::Place(tmp) bx.store_fn_arg(arg, &mut llarg_idx, tmp);
LocalRef::Place(tmp)
}
} }
}) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();

View file

@ -0,0 +1,126 @@
// ignore-tidy-linelength
//@ revisions:i686-linux x86_64-linux
//@[i686-linux] compile-flags: --target i686-unknown-linux-gnu
//@[i686-linux] needs-llvm-components: x86
//@[x86_64-linux] compile-flags: --target x86_64-unknown-linux-gnu
//@[x86_64-linux] needs-llvm-components: x86
// Tests that we correctly copy arguments into allocas when the alignment of the byval argument
// is different from the alignment of the Rust type.
// For the following test cases:
// All of the `*_decreases_alignment` functions should codegen to a direct call, since the
// alignment is already sufficient.
// All off the `*_increases_alignment` functions should copy the argument to an alloca
// on i686-unknown-linux-gnu, since the alignment needs to be increased, and should codegen
// to a direct call on x86_64-unknown-linux-gnu, where byval alignment matches Rust alignment.
#![feature(no_core, lang_items)]
#![crate_type = "lib"]
#![no_std]
#![no_core]
#![allow(non_camel_case_types)]
#[lang = "sized"]
trait Sized {}
#[lang = "freeze"]
trait Freeze {}
#[lang = "copy"]
trait Copy {}
// This type has align 1 in Rust, but as a byval argument on i686-linux, it will have align 4.
#[repr(C)]
#[repr(packed)]
struct Align1 {
x: u128,
y: u128,
z: u128,
}
// This type has align 16 in Rust, but as a byval argument on i686-linux, it will have align 4.
#[repr(C)]
#[repr(align(16))]
struct Align16 {
x: u128,
y: u128,
z: u128,
}
extern "C" {
fn extern_c_align1(x: Align1);
fn extern_c_align16(x: Align16);
}
// CHECK-LABEL: @rust_to_c_increases_alignment
#[no_mangle]
pub unsafe fn rust_to_c_increases_alignment(x: Align1) {
// i686-linux: start:
// i686-linux-NEXT: [[ALLOCA:%[0-9a-z]+]] = alloca %Align1, align 4
// i686-linux-NEXT: call void @llvm.memcpy.{{.+}}(ptr {{.*}}align 4 {{.*}}[[ALLOCA]], ptr {{.*}}align 1 {{.*}}%x
// i686-linux-NEXT: call void @extern_c_align1({{.+}} [[ALLOCA]])
// x86_64-linux: start:
// x86_64-linux-NEXT: call void @extern_c_align1
extern_c_align1(x);
}
// CHECK-LABEL: @rust_to_c_decreases_alignment
#[no_mangle]
pub unsafe fn rust_to_c_decreases_alignment(x: Align16) {
// CHECK: start:
// CHECK-NEXT: call void @extern_c_align16
extern_c_align16(x);
}
extern "Rust" {
fn extern_rust_align1(x: Align1);
fn extern_rust_align16(x: Align16);
}
// CHECK-LABEL: @c_to_rust_decreases_alignment
#[no_mangle]
pub unsafe extern "C" fn c_to_rust_decreases_alignment(x: Align1) {
// CHECK: start:
// CHECK-NEXT: call void @extern_rust_align1
extern_rust_align1(x);
}
// CHECK-LABEL: @c_to_rust_increases_alignment
#[no_mangle]
pub unsafe extern "C" fn c_to_rust_increases_alignment(x: Align16) {
// i686-linux: start:
// i686-linux-NEXT: [[ALLOCA:%[0-9a-z]+]] = alloca %Align16, align 16
// i686-linux-NEXT: call void @llvm.memcpy.{{.+}}(ptr {{.*}}align 16 {{.*}}[[ALLOCA]], ptr {{.*}}align 4 {{.*}}%0
// i686-linux-NEXT: call void @extern_rust_align16({{.+}} [[ALLOCA]])
// x86_64-linux: start:
// x86_64-linux-NEXT: call void @extern_rust_align16
extern_rust_align16(x);
}
extern "Rust" {
fn extern_rust_ref_align1(x: &Align1);
fn extern_rust_ref_align16(x: &Align16);
}
// CHECK-LABEL: @c_to_rust_ref_decreases_alignment
#[no_mangle]
pub unsafe extern "C" fn c_to_rust_ref_decreases_alignment(x: Align1) {
// CHECK: start:
// CHECK-NEXT: call void @extern_rust_ref_align1
extern_rust_ref_align1(&x);
}
// CHECK-LABEL: @c_to_rust_ref_increases_alignment
#[no_mangle]
pub unsafe extern "C" fn c_to_rust_ref_increases_alignment(x: Align16) {
// i686-linux: start:
// i686-linux-NEXT: [[ALLOCA:%[0-9a-z]+]] = alloca %Align16, align 16
// i686-linux-NEXT: call void @llvm.memcpy.{{.+}}(ptr {{.*}}align 16 {{.*}}[[ALLOCA]], ptr {{.*}}align 4 {{.*}}%0
// i686-linux-NEXT: call void @extern_rust_ref_align16({{.+}} [[ALLOCA]])
// x86_64-linux: start:
// x86_64-linux-NEXT: call void @extern_rust_ref_align16
extern_rust_ref_align16(&x);
}