Auto merge of #87254 - rusticstuff:rustc_codegen_llvm_dont_emit_zero_sized_padding, r=eddyb
LLVM codegen: Don't emit zero-sized padding for fields Currently padding is emitted before fields of a struct and at the end of the struct regardless of the ABI. Even if no padding is required zero-sized padding fields are emitted. This is not useful and - more importantly - it make it impossible to generate the exact vector types that LLVM expects for certain ARM SIMD intrinsics. This change should unblock the implementation of many ARM intrinsics using the `unadjusted` ABI, see https://github.com/rust-lang/stdarch/issues/1143#issuecomment-827404092. This is a proof of concept only because the field lookup now takes O(number of fields) time compared to O(1) before since it recalculates the mapping at every lookup. I would like to find out how big the performance impact actually is before implementing caching or restricting this behavior to the `unadjusted` ABI. cc `@SparrowLii` `@bjorn3` ([Discussion on internals](https://internals.rust-lang.org/t/feature-request-add-a-way-in-rustc-for-generating-struct-type-llvm-ir-without-paddings/15007))
This commit is contained in:
commit
47b41b7788
7 changed files with 107 additions and 41 deletions
|
@ -24,6 +24,7 @@ use rustc_span::source_map::{Span, DUMMY_SP};
|
|||
use rustc_span::symbol::Symbol;
|
||||
use rustc_target::abi::{HasDataLayout, LayoutOf, PointeeInfo, Size, TargetDataLayout, VariantIdx};
|
||||
use rustc_target::spec::{HasTargetSpec, RelocModel, Target, TlsModel};
|
||||
use smallvec::SmallVec;
|
||||
|
||||
use std::cell::{Cell, RefCell};
|
||||
use std::ffi::CStr;
|
||||
|
@ -74,8 +75,12 @@ pub struct CodegenCx<'ll, 'tcx> {
|
|||
/// See <https://llvm.org/docs/LangRef.html#the-llvm-used-global-variable> for details
|
||||
pub used_statics: RefCell<Vec<&'ll Value>>,
|
||||
|
||||
pub lltypes: RefCell<FxHashMap<(Ty<'tcx>, Option<VariantIdx>), &'ll Type>>,
|
||||
/// Mapping of non-scalar types to llvm types and field remapping if needed.
|
||||
pub type_lowering: RefCell<FxHashMap<(Ty<'tcx>, Option<VariantIdx>), TypeLowering<'ll>>>,
|
||||
|
||||
/// Mapping of scalar types to llvm types.
|
||||
pub scalar_lltypes: RefCell<FxHashMap<Ty<'tcx>, &'ll Type>>,
|
||||
|
||||
pub pointee_infos: RefCell<FxHashMap<(Ty<'tcx>, Size), Option<PointeeInfo>>>,
|
||||
pub isize_ty: &'ll Type,
|
||||
|
||||
|
@ -92,6 +97,15 @@ pub struct CodegenCx<'ll, 'tcx> {
|
|||
local_gen_sym_counter: Cell<usize>,
|
||||
}
|
||||
|
||||
pub struct TypeLowering<'ll> {
|
||||
/// Associated LLVM type
|
||||
pub lltype: &'ll Type,
|
||||
|
||||
/// If padding is used the slice maps fields from source order
|
||||
/// to llvm order.
|
||||
pub field_remapping: Option<SmallVec<[u32; 4]>>,
|
||||
}
|
||||
|
||||
fn to_llvm_tls_model(tls_model: TlsModel) -> llvm::ThreadLocalMode {
|
||||
match tls_model {
|
||||
TlsModel::GeneralDynamic => llvm::ThreadLocalMode::GeneralDynamic,
|
||||
|
@ -304,7 +318,7 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
|
|||
const_globals: Default::default(),
|
||||
statics_to_rauw: RefCell::new(Vec::new()),
|
||||
used_statics: RefCell::new(Vec::new()),
|
||||
lltypes: Default::default(),
|
||||
type_lowering: Default::default(),
|
||||
scalar_lltypes: Default::default(),
|
||||
pointee_infos: Default::default(),
|
||||
isize_ty,
|
||||
|
|
|
@ -266,7 +266,7 @@ impl LayoutTypeMethods<'tcx> for CodegenCx<'ll, 'tcx> {
|
|||
layout.is_llvm_scalar_pair()
|
||||
}
|
||||
fn backend_field_index(&self, layout: TyAndLayout<'tcx>, index: usize) -> u64 {
|
||||
layout.llvm_field_index(index)
|
||||
layout.llvm_field_index(self, index)
|
||||
}
|
||||
fn scalar_pair_element_backend_type(
|
||||
&self,
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
use crate::abi::FnAbi;
|
||||
use crate::common::*;
|
||||
use crate::context::TypeLowering;
|
||||
use crate::type_::Type;
|
||||
use rustc_codegen_ssa::traits::*;
|
||||
use rustc_middle::bug;
|
||||
|
@ -9,6 +10,7 @@ use rustc_middle::ty::{self, Ty, TypeFoldable};
|
|||
use rustc_target::abi::{Abi, AddressSpace, Align, FieldsShape};
|
||||
use rustc_target::abi::{Int, Pointer, F32, F64};
|
||||
use rustc_target::abi::{LayoutOf, PointeeInfo, Scalar, Size, TyAndLayoutMethods, Variants};
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use tracing::debug;
|
||||
|
||||
use std::fmt::Write;
|
||||
|
@ -17,6 +19,7 @@ fn uncached_llvm_type<'a, 'tcx>(
|
|||
cx: &CodegenCx<'a, 'tcx>,
|
||||
layout: TyAndLayout<'tcx>,
|
||||
defer: &mut Option<(&'a Type, TyAndLayout<'tcx>)>,
|
||||
field_remapping: &mut Option<SmallVec<[u32; 4]>>,
|
||||
) -> &'a Type {
|
||||
match layout.abi {
|
||||
Abi::Scalar(_) => bug!("handled elsewhere"),
|
||||
|
@ -75,7 +78,8 @@ fn uncached_llvm_type<'a, 'tcx>(
|
|||
FieldsShape::Array { count, .. } => cx.type_array(layout.field(cx, 0).llvm_type(cx), count),
|
||||
FieldsShape::Arbitrary { .. } => match name {
|
||||
None => {
|
||||
let (llfields, packed) = struct_llfields(cx, layout);
|
||||
let (llfields, packed, new_field_remapping) = struct_llfields(cx, layout);
|
||||
*field_remapping = new_field_remapping;
|
||||
cx.type_struct(&llfields, packed)
|
||||
}
|
||||
Some(ref name) => {
|
||||
|
@ -90,7 +94,7 @@ fn uncached_llvm_type<'a, 'tcx>(
|
|||
fn struct_llfields<'a, 'tcx>(
|
||||
cx: &CodegenCx<'a, 'tcx>,
|
||||
layout: TyAndLayout<'tcx>,
|
||||
) -> (Vec<&'a Type>, bool) {
|
||||
) -> (Vec<&'a Type>, bool, Option<SmallVec<[u32; 4]>>) {
|
||||
debug!("struct_llfields: {:#?}", layout);
|
||||
let field_count = layout.fields.count();
|
||||
|
||||
|
@ -98,6 +102,7 @@ fn struct_llfields<'a, 'tcx>(
|
|||
let mut offset = Size::ZERO;
|
||||
let mut prev_effective_align = layout.align.abi;
|
||||
let mut result: Vec<_> = Vec::with_capacity(1 + field_count * 2);
|
||||
let mut field_remapping = smallvec![0; field_count];
|
||||
for i in layout.fields.index_by_increasing_offset() {
|
||||
let target_offset = layout.fields.offset(i as usize);
|
||||
let field = layout.field(cx, i);
|
||||
|
@ -116,33 +121,37 @@ fn struct_llfields<'a, 'tcx>(
|
|||
);
|
||||
assert!(target_offset >= offset);
|
||||
let padding = target_offset - offset;
|
||||
let padding_align = prev_effective_align.min(effective_field_align);
|
||||
assert_eq!(offset.align_to(padding_align) + padding, target_offset);
|
||||
result.push(cx.type_padding_filler(padding, padding_align));
|
||||
debug!(" padding before: {:?}", padding);
|
||||
|
||||
if padding != Size::ZERO {
|
||||
let padding_align = prev_effective_align.min(effective_field_align);
|
||||
assert_eq!(offset.align_to(padding_align) + padding, target_offset);
|
||||
result.push(cx.type_padding_filler(padding, padding_align));
|
||||
debug!(" padding before: {:?}", padding);
|
||||
}
|
||||
field_remapping[i] = result.len() as u32;
|
||||
result.push(field.llvm_type(cx));
|
||||
offset = target_offset + field.size;
|
||||
prev_effective_align = effective_field_align;
|
||||
}
|
||||
let padding_used = result.len() > field_count;
|
||||
if !layout.is_unsized() && field_count > 0 {
|
||||
if offset > layout.size {
|
||||
bug!("layout: {:#?} stride: {:?} offset: {:?}", layout, layout.size, offset);
|
||||
}
|
||||
let padding = layout.size - offset;
|
||||
let padding_align = prev_effective_align;
|
||||
assert_eq!(offset.align_to(padding_align) + padding, layout.size);
|
||||
debug!(
|
||||
"struct_llfields: pad_bytes: {:?} offset: {:?} stride: {:?}",
|
||||
padding, offset, layout.size
|
||||
);
|
||||
result.push(cx.type_padding_filler(padding, padding_align));
|
||||
assert_eq!(result.len(), 1 + field_count * 2);
|
||||
if padding != Size::ZERO {
|
||||
let padding_align = prev_effective_align;
|
||||
assert_eq!(offset.align_to(padding_align) + padding, layout.size);
|
||||
debug!(
|
||||
"struct_llfields: pad_bytes: {:?} offset: {:?} stride: {:?}",
|
||||
padding, offset, layout.size
|
||||
);
|
||||
result.push(cx.type_padding_filler(padding, padding_align));
|
||||
}
|
||||
} else {
|
||||
debug!("struct_llfields: offset: {:?} stride: {:?}", offset, layout.size);
|
||||
}
|
||||
|
||||
(result, packed)
|
||||
let field_remapping = if padding_used { Some(field_remapping) } else { None };
|
||||
(result, packed, field_remapping)
|
||||
}
|
||||
|
||||
impl<'a, 'tcx> CodegenCx<'a, 'tcx> {
|
||||
|
@ -177,7 +186,7 @@ pub trait LayoutLlvmExt<'tcx> {
|
|||
index: usize,
|
||||
immediate: bool,
|
||||
) -> &'a Type;
|
||||
fn llvm_field_index(&self, index: usize) -> u64;
|
||||
fn llvm_field_index<'a>(&self, cx: &CodegenCx<'a, 'tcx>, index: usize) -> u64;
|
||||
fn pointee_info_at<'a>(&self, cx: &CodegenCx<'a, 'tcx>, offset: Size) -> Option<PointeeInfo>;
|
||||
}
|
||||
|
||||
|
@ -234,8 +243,8 @@ impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> {
|
|||
Variants::Single { index } => Some(index),
|
||||
_ => None,
|
||||
};
|
||||
if let Some(&llty) = cx.lltypes.borrow().get(&(self.ty, variant_index)) {
|
||||
return llty;
|
||||
if let Some(ref llty) = cx.type_lowering.borrow().get(&(self.ty, variant_index)) {
|
||||
return llty.lltype;
|
||||
}
|
||||
|
||||
debug!("llvm_type({:#?})", self);
|
||||
|
@ -247,6 +256,7 @@ impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> {
|
|||
let normal_ty = cx.tcx.erase_regions(self.ty);
|
||||
|
||||
let mut defer = None;
|
||||
let mut field_remapping = None;
|
||||
let llty = if self.ty != normal_ty {
|
||||
let mut layout = cx.layout_of(normal_ty);
|
||||
if let Some(v) = variant_index {
|
||||
|
@ -254,17 +264,24 @@ impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> {
|
|||
}
|
||||
layout.llvm_type(cx)
|
||||
} else {
|
||||
uncached_llvm_type(cx, *self, &mut defer)
|
||||
uncached_llvm_type(cx, *self, &mut defer, &mut field_remapping)
|
||||
};
|
||||
debug!("--> mapped {:#?} to llty={:?}", self, llty);
|
||||
|
||||
cx.lltypes.borrow_mut().insert((self.ty, variant_index), llty);
|
||||
cx.type_lowering.borrow_mut().insert(
|
||||
(self.ty, variant_index),
|
||||
TypeLowering { lltype: llty, field_remapping: field_remapping },
|
||||
);
|
||||
|
||||
if let Some((llty, layout)) = defer {
|
||||
let (llfields, packed) = struct_llfields(cx, layout);
|
||||
cx.set_struct_body(llty, &llfields, packed)
|
||||
let (llfields, packed, new_field_remapping) = struct_llfields(cx, layout);
|
||||
cx.set_struct_body(llty, &llfields, packed);
|
||||
cx.type_lowering
|
||||
.borrow_mut()
|
||||
.get_mut(&(self.ty, variant_index))
|
||||
.unwrap()
|
||||
.field_remapping = new_field_remapping;
|
||||
}
|
||||
|
||||
llty
|
||||
}
|
||||
|
||||
|
@ -340,7 +357,7 @@ impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> {
|
|||
self.scalar_llvm_type_at(cx, scalar, offset)
|
||||
}
|
||||
|
||||
fn llvm_field_index(&self, index: usize) -> u64 {
|
||||
fn llvm_field_index<'a>(&self, cx: &CodegenCx<'a, 'tcx>, index: usize) -> u64 {
|
||||
match self.abi {
|
||||
Abi::Scalar(_) | Abi::ScalarPair(..) => {
|
||||
bug!("TyAndLayout::llvm_field_index({:?}): not applicable", self)
|
||||
|
@ -354,7 +371,25 @@ impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> {
|
|||
|
||||
FieldsShape::Array { .. } => index as u64,
|
||||
|
||||
FieldsShape::Arbitrary { .. } => 1 + (self.fields.memory_index(index) as u64) * 2,
|
||||
FieldsShape::Arbitrary { .. } => {
|
||||
let variant_index = match self.variants {
|
||||
Variants::Single { index } => Some(index),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
// Look up llvm field if indexes do not match memory order due to padding. If
|
||||
// `field_remapping` is `None` no padding was used and the llvm field index
|
||||
// matches the memory index.
|
||||
match cx.type_lowering.borrow().get(&(self.ty, variant_index)) {
|
||||
Some(TypeLowering { field_remapping: Some(ref remap), .. }) => {
|
||||
remap[index] as u64
|
||||
}
|
||||
Some(_) => self.fields.memory_index(index) as u64,
|
||||
None => {
|
||||
bug!("TyAndLayout::llvm_field_index({:?}): type info not found", self)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -98,7 +98,8 @@ fn emit_aapcs_va_arg(
|
|||
// Implementation of the AAPCS64 calling convention for va_args see
|
||||
// https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst
|
||||
let va_list_addr = list.immediate();
|
||||
let va_list_ty = list.deref(bx.cx).layout.llvm_type(bx);
|
||||
let va_list_layout = list.deref(bx.cx).layout;
|
||||
let va_list_ty = va_list_layout.llvm_type(bx);
|
||||
let layout = bx.cx.layout_of(target_ty);
|
||||
|
||||
let mut maybe_reg = bx.build_sibling_block("va_arg.maybe_reg");
|
||||
|
@ -110,13 +111,15 @@ fn emit_aapcs_va_arg(
|
|||
|
||||
let gr_type = target_ty.is_any_ptr() || target_ty.is_integral();
|
||||
let (reg_off, reg_top_index, slot_size) = if gr_type {
|
||||
let gr_offs = bx.struct_gep(va_list_ty, va_list_addr, 7);
|
||||
let gr_offs =
|
||||
bx.struct_gep(va_list_ty, va_list_addr, va_list_layout.llvm_field_index(bx.cx, 3));
|
||||
let nreg = (layout.size.bytes() + 7) / 8;
|
||||
(gr_offs, 3, nreg * 8)
|
||||
(gr_offs, va_list_layout.llvm_field_index(bx.cx, 1), nreg * 8)
|
||||
} else {
|
||||
let vr_off = bx.struct_gep(va_list_ty, va_list_addr, 9);
|
||||
let vr_off =
|
||||
bx.struct_gep(va_list_ty, va_list_addr, va_list_layout.llvm_field_index(bx.cx, 4));
|
||||
let nreg = (layout.size.bytes() + 15) / 16;
|
||||
(vr_off, 5, nreg * 16)
|
||||
(vr_off, va_list_layout.llvm_field_index(bx.cx, 2), nreg * 16)
|
||||
};
|
||||
|
||||
// if the offset >= 0 then the value will be on the stack
|
||||
|
|
|
@ -8,7 +8,7 @@ pub enum Align64 {
|
|||
A(u32),
|
||||
B(u32),
|
||||
}
|
||||
// CHECK: %Align64 = type { [0 x i32], i32, [15 x i32] }
|
||||
// CHECK: %Align64 = type { i32, [15 x i32] }
|
||||
|
||||
pub struct Nested64 {
|
||||
a: u8,
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
#[repr(align(64))]
|
||||
pub struct Align64(i32);
|
||||
// CHECK: %Align64 = type { [0 x i32], i32, [15 x i32] }
|
||||
// CHECK: %Align64 = type { i32, [15 x i32] }
|
||||
|
||||
pub struct Nested64 {
|
||||
a: Align64,
|
||||
|
@ -13,20 +13,20 @@ pub struct Nested64 {
|
|||
c: i32,
|
||||
d: i8,
|
||||
}
|
||||
// CHECK: %Nested64 = type { [0 x i64], %Align64, [0 x i32], i32, [0 x i32], i32, [0 x i8], i8, [55 x i8] }
|
||||
// CHECK: %Nested64 = type { %Align64, i32, i32, i8, [55 x i8] }
|
||||
|
||||
pub enum Enum4 {
|
||||
A(i32),
|
||||
B(i32),
|
||||
}
|
||||
// CHECK: %"Enum4::A" = type { [1 x i32], i32, [0 x i32] }
|
||||
// CHECK: %"Enum4::A" = type { [1 x i32], i32 }
|
||||
|
||||
pub enum Enum64 {
|
||||
A(Align64),
|
||||
B(i32),
|
||||
}
|
||||
// CHECK: %Enum64 = type { [0 x i32], i32, [31 x i32] }
|
||||
// CHECK: %"Enum64::A" = type { [8 x i64], %Align64, [0 x i64] }
|
||||
// CHECK: %Enum64 = type { i32, [31 x i32] }
|
||||
// CHECK: %"Enum64::A" = type { [8 x i64], %Align64 }
|
||||
|
||||
// CHECK-LABEL: @align64
|
||||
#[no_mangle]
|
||||
|
|
14
src/test/codegen/unpadded-simd.rs
Normal file
14
src/test/codegen/unpadded-simd.rs
Normal file
|
@ -0,0 +1,14 @@
|
|||
// Make sure that no 0-sized padding is inserted in structs and that
|
||||
// structs are represented as expected by Neon intrinsics in LLVM.
|
||||
// See #87254.
|
||||
|
||||
#![crate_type = "lib"]
|
||||
#![feature(repr_simd)]
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
#[repr(simd)]
|
||||
pub struct int16x4_t(pub i16, pub i16, pub i16, pub i16);
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub struct int16x4x2_t(pub int16x4_t, pub int16x4_t);
|
||||
// CHECK: %int16x4x2_t = type { <4 x i16>, <4 x i16> }
|
Loading…
Add table
Add a link
Reference in a new issue