remove struct_gep, use manual layout calculations for va_arg

2024-02-24 01:46:30 -05:00 · 2024-02-24 01:46:30 -05:00 · beed25be9a
commit beed25be9a
parent 123015e722
6 changed files with 71 additions and 111 deletions
--- a/compiler/rustc_codegen_llvm/src/builder.rs
+++ b/compiler/rustc_codegen_llvm/src/builder.rs
@ -778,11 +778,6 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
        }
    }

-    fn struct_gep(&mut self, ty: &'ll Type, ptr: &'ll Value, idx: u64) -> &'ll Value {
-        assert_eq!(idx as c_uint as u64, idx);
-        unsafe { llvm::LLVMBuildStructGEP2(self.llbuilder, ty, ptr, idx as c_uint, UNNAMED) }
-    }
-
    /* Casts */
    fn trunc(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
        unsafe { llvm::LLVMBuildTrunc(self.llbuilder, val, dest_ty, UNNAMED) }
--- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@ -1301,13 +1301,6 @@ extern "C" {
        NumIndices: c_uint,
        Name: *const c_char,
    ) -> &'a Value;
-    pub fn LLVMBuildStructGEP2<'a>(
-        B: &Builder<'a>,
-        Ty: &'a Type,
-        Pointer: &'a Value,
-        Idx: c_uint,
-        Name: *const c_char,
-    ) -> &'a Value;

    // Casts
    pub fn LLVMBuildTrunc<'a>(
--- a/compiler/rustc_codegen_llvm/src/type_of.rs
+++ b/compiler/rustc_codegen_llvm/src/type_of.rs
@ -174,7 +174,6 @@ pub trait LayoutLlvmExt<'tcx> {
        index: usize,
        immediate: bool,
    ) -> &'a Type;
-    fn llvm_field_index<'a>(&self, cx: &CodegenCx<'a, 'tcx>, index: usize) -> u64;
    fn scalar_copy_llvm_type<'a>(&self, cx: &CodegenCx<'a, 'tcx>) -> Option<&'a Type>;
 }

@ -324,42 +323,6 @@ impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> {
        self.scalar_llvm_type_at(cx, scalar)
    }

-    fn llvm_field_index<'a>(&self, cx: &CodegenCx<'a, 'tcx>, index: usize) -> u64 {
-        match self.abi {
-            Abi::Scalar(_) | Abi::ScalarPair(..) => {
-                bug!("TyAndLayout::llvm_field_index({:?}): not applicable", self)
-            }
-            _ => {}
-        }
-        match self.fields {
-            FieldsShape::Primitive | FieldsShape::Union(_) => {
-                bug!("TyAndLayout::llvm_field_index({:?}): not applicable", self)
-            }
-
-            FieldsShape::Array { .. } => index as u64,
-
-            FieldsShape::Arbitrary { .. } => {
-                let variant_index = match self.variants {
-                    Variants::Single { index } => Some(index),
-                    _ => None,
-                };
-
-                // Look up llvm field if indexes do not match memory order due to padding. If
-                // `field_remapping` is `None` no padding was used and the llvm field index
-                // matches the memory index.
-                match cx.type_lowering.borrow().get(&(self.ty, variant_index)) {
-                    Some(TypeLowering { field_remapping: Some(ref remap), .. }) => {
-                        remap[index] as u64
-                    }
-                    Some(_) => self.fields.memory_index(index) as u64,
-                    None => {
-                        bug!("TyAndLayout::llvm_field_index({:?}): type info not found", self)
-                    }
-                }
-            }
-        }
-    }
-
    fn scalar_copy_llvm_type<'a>(&self, cx: &CodegenCx<'a, 'tcx>) -> Option<&'a Type> {
        debug_assert!(self.is_sized());

--- a/compiler/rustc_codegen_llvm/src/va_arg.rs
+++ b/compiler/rustc_codegen_llvm/src/va_arg.rs
@ -89,11 +89,35 @@ fn emit_aapcs_va_arg<'ll, 'tcx>(
    list: OperandRef<'tcx, &'ll Value>,
    target_ty: Ty<'tcx>,
 ) -> &'ll Value {
+    let dl = bx.cx.data_layout();
+
    // Implementation of the AAPCS64 calling convention for va_args see
    // https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst
+    //
+    // typedef struct  va_list {
+    //     void * stack; // next stack param
+    //     void * gr_top; // end of GP arg reg save area
+    //     void * vr_top; // end of FP/SIMD arg reg save area
+    //     int gr_offs; // offset from  gr_top to next GP register arg
+    //     int vr_offs; // offset from  vr_top to next FP/SIMD register arg
+    // } va_list;
    let va_list_addr = list.immediate();
-    let va_list_layout = list.deref(bx.cx).layout;
-    let va_list_ty = va_list_layout.llvm_type(bx);
+
+    // There is no padding between fields since `void*` is size=8 align=8, `int` is size=4 align=4.
+    // See https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst
+    // Table 1, Byte size and byte alignment of fundamental data types
+    // Table 3, Mapping of C & C++ built-in data types
+    let ptr_offset = 8;
+    let i32_offset = 4;
+    let gr_top = bx.inbounds_gep(bx.type_i8(), va_list_addr, &[bx.cx.const_usize(ptr_offset)]);
+    let vr_top = bx.inbounds_gep(bx.type_i8(), va_list_addr, &[bx.cx.const_usize(2 * ptr_offset)]);
+    let gr_offs = bx.inbounds_gep(bx.type_i8(), va_list_addr, &[bx.cx.const_usize(3 * ptr_offset)]);
+    let vr_offs = bx.inbounds_gep(
+        bx.type_i8(),
+        va_list_addr,
+        &[bx.cx.const_usize(3 * ptr_offset + i32_offset)],
+    );
+
    let layout = bx.cx.layout_of(target_ty);

    let maybe_reg = bx.append_sibling_block("va_arg.maybe_reg");
@ -104,16 +128,12 @@ fn emit_aapcs_va_arg<'ll, 'tcx>(
    let offset_align = Align::from_bytes(4).unwrap();

    let gr_type = target_ty.is_any_ptr() || target_ty.is_integral();
-    let (reg_off, reg_top_index, slot_size) = if gr_type {
-        let gr_offs =
-            bx.struct_gep(va_list_ty, va_list_addr, va_list_layout.llvm_field_index(bx.cx, 3));
+    let (reg_off, reg_top, slot_size) = if gr_type {
        let nreg = (layout.size.bytes() + 7) / 8;
-        (gr_offs, va_list_layout.llvm_field_index(bx.cx, 1), nreg * 8)
+        (gr_offs, gr_top, nreg * 8)
    } else {
-        let vr_off =
-            bx.struct_gep(va_list_ty, va_list_addr, va_list_layout.llvm_field_index(bx.cx, 4));
        let nreg = (layout.size.bytes() + 15) / 16;
-        (vr_off, va_list_layout.llvm_field_index(bx.cx, 2), nreg * 16)
+        (vr_offs, vr_top, nreg * 16)
    };

    // if the offset >= 0 then the value will be on the stack
@ -141,8 +161,7 @@ fn emit_aapcs_va_arg<'ll, 'tcx>(

    bx.switch_to_block(in_reg);
    let top_type = bx.type_ptr();
-    let top = bx.struct_gep(va_list_ty, va_list_addr, reg_top_index);
-    let top = bx.load(top_type, top, bx.tcx().data_layout.pointer_align.abi);
+    let top = bx.load(top_type, reg_top, dl.pointer_align.abi);

    // reg_value = *(@top + reg_off_v);
    let mut reg_addr = bx.gep(bx.type_i8(), top, &[reg_off_v]);
@ -173,11 +192,33 @@ fn emit_s390x_va_arg<'ll, 'tcx>(
    list: OperandRef<'tcx, &'ll Value>,
    target_ty: Ty<'tcx>,
 ) -> &'ll Value {
+    let dl = bx.cx.data_layout();
+
    // Implementation of the s390x ELF ABI calling convention for va_args see
    // https://github.com/IBM/s390x-abi (chapter 1.2.4)
+    //
+    // typedef struct __va_list_tag {
+    //     long __gpr;
+    //     long __fpr;
+    //     void *__overflow_arg_area;
+    //     void *__reg_save_area;
+    // } va_list[1];
    let va_list_addr = list.immediate();
-    let va_list_layout = list.deref(bx.cx).layout;
-    let va_list_ty = va_list_layout.llvm_type(bx);
+
+    // There is no padding between fields since `long` and `void*` both have size=8 align=8.
+    // https://github.com/IBM/s390x-abi (Table 1.1.: Scalar types)
+    let i64_offset = 8;
+    let ptr_offset = 8;
+    let gpr = va_list_addr;
+    let fpr = bx.inbounds_gep(bx.type_i8(), va_list_addr, &[bx.cx.const_usize(i64_offset)]);
+    let overflow_arg_area =
+        bx.inbounds_gep(bx.type_i8(), va_list_addr, &[bx.cx.const_usize(2 * i64_offset)]);
+    let reg_save_area = bx.inbounds_gep(
+        bx.type_i8(),
+        va_list_addr,
+        &[bx.cx.const_usize(2 * i64_offset + ptr_offset)],
+    );
+
    let layout = bx.cx.layout_of(target_ty);

    let in_reg = bx.append_sibling_block("va_arg.in_reg");
@ -192,15 +233,10 @@ fn emit_s390x_va_arg<'ll, 'tcx>(
    let padding = padded_size - unpadded_size;

    let gpr_type = indirect || !layout.is_single_fp_element(bx.cx);
-    let (max_regs, reg_count_field, reg_save_index, reg_padding) =
-        if gpr_type { (5, 0, 2, padding) } else { (4, 1, 16, 0) };
+    let (max_regs, reg_count, reg_save_index, reg_padding) =
+        if gpr_type { (5, gpr, 2, padding) } else { (4, fpr, 16, 0) };

    // Check whether the value was passed in a register or in memory.
-    let reg_count = bx.struct_gep(
-        va_list_ty,
-        va_list_addr,
-        va_list_layout.llvm_field_index(bx.cx, reg_count_field),
-    );
    let reg_count_v = bx.load(bx.type_i64(), reg_count, Align::from_bytes(8).unwrap());
    let use_regs = bx.icmp(IntPredicate::IntULT, reg_count_v, bx.const_u64(max_regs));
    bx.cond_br(use_regs, in_reg, in_mem);
@ -209,9 +245,7 @@ fn emit_s390x_va_arg<'ll, 'tcx>(
    bx.switch_to_block(in_reg);

    // Work out the address of the value in the register save area.
-    let reg_ptr =
-        bx.struct_gep(va_list_ty, va_list_addr, va_list_layout.llvm_field_index(bx.cx, 3));
-    let reg_ptr_v = bx.load(bx.type_ptr(), reg_ptr, bx.tcx().data_layout.pointer_align.abi);
+    let reg_ptr_v = bx.load(bx.type_ptr(), reg_save_area, dl.pointer_align.abi);
    let scaled_reg_count = bx.mul(reg_count_v, bx.const_u64(8));
    let reg_off = bx.add(scaled_reg_count, bx.const_u64(reg_save_index * 8 + reg_padding));
    let reg_addr = bx.gep(bx.type_i8(), reg_ptr_v, &[reg_off]);
@ -225,27 +259,23 @@ fn emit_s390x_va_arg<'ll, 'tcx>(
    bx.switch_to_block(in_mem);

    // Work out the address of the value in the argument overflow area.
-    let arg_ptr =
-        bx.struct_gep(va_list_ty, va_list_addr, va_list_layout.llvm_field_index(bx.cx, 2));
-    let arg_ptr_v = bx.load(bx.type_ptr(), arg_ptr, bx.tcx().data_layout.pointer_align.abi);
+    let arg_ptr_v =
+        bx.load(bx.type_ptr(), overflow_arg_area, bx.tcx().data_layout.pointer_align.abi);
    let arg_off = bx.const_u64(padding);
    let mem_addr = bx.gep(bx.type_i8(), arg_ptr_v, &[arg_off]);

    // Update the argument overflow area pointer.
    let arg_size = bx.cx().const_u64(padded_size);
    let new_arg_ptr_v = bx.inbounds_gep(bx.type_i8(), arg_ptr_v, &[arg_size]);
-    bx.store(new_arg_ptr_v, arg_ptr, bx.tcx().data_layout.pointer_align.abi);
+    bx.store(new_arg_ptr_v, overflow_arg_area, dl.pointer_align.abi);
    bx.br(end);

    // Return the appropriate result.
    bx.switch_to_block(end);
    let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]);
    let val_type = layout.llvm_type(bx);
-    let val_addr = if indirect {
-        bx.load(bx.cx.type_ptr(), val_addr, bx.tcx().data_layout.pointer_align.abi)
-    } else {
-        val_addr
-    };
+    let val_addr =
+        if indirect { bx.load(bx.cx.type_ptr(), val_addr, dl.pointer_align.abi) } else { val_addr };
    bx.load(val_type, val_addr, layout.align.abi)
 }