Auto merge of #87254 - rusticstuff:rustc_codegen_llvm_dont_emit_zero_sized_padding, r=eddyb

LLVM codegen: Don't emit zero-sized padding for fields

Currently padding is emitted before fields of a struct and at the end of the struct regardless of the ABI. Even if no padding is required zero-sized padding fields are emitted. This is not useful and - more importantly - it make it impossible to generate the exact vector types that LLVM expects for certain ARM SIMD intrinsics. This change should unblock the implementation of many ARM intrinsics using the `unadjusted` ABI, see https://github.com/rust-lang/stdarch/issues/1143#issuecomment-827404092.

This is a proof of concept only because the field lookup now takes O(number of fields) time compared to O(1) before since it recalculates the mapping at every lookup. I would like to find out how big the performance impact actually is before implementing caching or restricting this behavior to the `unadjusted` ABI.

cc `@SparrowLii` `@bjorn3`

([Discussion on internals](https://internals.rust-lang.org/t/feature-request-add-a-way-in-rustc-for-generating-struct-type-llvm-ir-without-paddings/15007))
This commit is contained in:
bors 2021-08-11 01:36:23 +00:00
commit 47b41b7788
7 changed files with 107 additions and 41 deletions

View file

@ -24,6 +24,7 @@ use rustc_span::source_map::{Span, DUMMY_SP};
use rustc_span::symbol::Symbol; use rustc_span::symbol::Symbol;
use rustc_target::abi::{HasDataLayout, LayoutOf, PointeeInfo, Size, TargetDataLayout, VariantIdx}; use rustc_target::abi::{HasDataLayout, LayoutOf, PointeeInfo, Size, TargetDataLayout, VariantIdx};
use rustc_target::spec::{HasTargetSpec, RelocModel, Target, TlsModel}; use rustc_target::spec::{HasTargetSpec, RelocModel, Target, TlsModel};
use smallvec::SmallVec;
use std::cell::{Cell, RefCell}; use std::cell::{Cell, RefCell};
use std::ffi::CStr; use std::ffi::CStr;
@ -74,8 +75,12 @@ pub struct CodegenCx<'ll, 'tcx> {
/// See <https://llvm.org/docs/LangRef.html#the-llvm-used-global-variable> for details /// See <https://llvm.org/docs/LangRef.html#the-llvm-used-global-variable> for details
pub used_statics: RefCell<Vec<&'ll Value>>, pub used_statics: RefCell<Vec<&'ll Value>>,
pub lltypes: RefCell<FxHashMap<(Ty<'tcx>, Option<VariantIdx>), &'ll Type>>, /// Mapping of non-scalar types to llvm types and field remapping if needed.
pub type_lowering: RefCell<FxHashMap<(Ty<'tcx>, Option<VariantIdx>), TypeLowering<'ll>>>,
/// Mapping of scalar types to llvm types.
pub scalar_lltypes: RefCell<FxHashMap<Ty<'tcx>, &'ll Type>>, pub scalar_lltypes: RefCell<FxHashMap<Ty<'tcx>, &'ll Type>>,
pub pointee_infos: RefCell<FxHashMap<(Ty<'tcx>, Size), Option<PointeeInfo>>>, pub pointee_infos: RefCell<FxHashMap<(Ty<'tcx>, Size), Option<PointeeInfo>>>,
pub isize_ty: &'ll Type, pub isize_ty: &'ll Type,
@ -92,6 +97,15 @@ pub struct CodegenCx<'ll, 'tcx> {
local_gen_sym_counter: Cell<usize>, local_gen_sym_counter: Cell<usize>,
} }
pub struct TypeLowering<'ll> {
/// Associated LLVM type
pub lltype: &'ll Type,
/// If padding is used the slice maps fields from source order
/// to llvm order.
pub field_remapping: Option<SmallVec<[u32; 4]>>,
}
fn to_llvm_tls_model(tls_model: TlsModel) -> llvm::ThreadLocalMode { fn to_llvm_tls_model(tls_model: TlsModel) -> llvm::ThreadLocalMode {
match tls_model { match tls_model {
TlsModel::GeneralDynamic => llvm::ThreadLocalMode::GeneralDynamic, TlsModel::GeneralDynamic => llvm::ThreadLocalMode::GeneralDynamic,
@ -304,7 +318,7 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
const_globals: Default::default(), const_globals: Default::default(),
statics_to_rauw: RefCell::new(Vec::new()), statics_to_rauw: RefCell::new(Vec::new()),
used_statics: RefCell::new(Vec::new()), used_statics: RefCell::new(Vec::new()),
lltypes: Default::default(), type_lowering: Default::default(),
scalar_lltypes: Default::default(), scalar_lltypes: Default::default(),
pointee_infos: Default::default(), pointee_infos: Default::default(),
isize_ty, isize_ty,

View file

@ -266,7 +266,7 @@ impl LayoutTypeMethods<'tcx> for CodegenCx<'ll, 'tcx> {
layout.is_llvm_scalar_pair() layout.is_llvm_scalar_pair()
} }
fn backend_field_index(&self, layout: TyAndLayout<'tcx>, index: usize) -> u64 { fn backend_field_index(&self, layout: TyAndLayout<'tcx>, index: usize) -> u64 {
layout.llvm_field_index(index) layout.llvm_field_index(self, index)
} }
fn scalar_pair_element_backend_type( fn scalar_pair_element_backend_type(
&self, &self,

View file

@ -1,5 +1,6 @@
use crate::abi::FnAbi; use crate::abi::FnAbi;
use crate::common::*; use crate::common::*;
use crate::context::TypeLowering;
use crate::type_::Type; use crate::type_::Type;
use rustc_codegen_ssa::traits::*; use rustc_codegen_ssa::traits::*;
use rustc_middle::bug; use rustc_middle::bug;
@ -9,6 +10,7 @@ use rustc_middle::ty::{self, Ty, TypeFoldable};
use rustc_target::abi::{Abi, AddressSpace, Align, FieldsShape}; use rustc_target::abi::{Abi, AddressSpace, Align, FieldsShape};
use rustc_target::abi::{Int, Pointer, F32, F64}; use rustc_target::abi::{Int, Pointer, F32, F64};
use rustc_target::abi::{LayoutOf, PointeeInfo, Scalar, Size, TyAndLayoutMethods, Variants}; use rustc_target::abi::{LayoutOf, PointeeInfo, Scalar, Size, TyAndLayoutMethods, Variants};
use smallvec::{smallvec, SmallVec};
use tracing::debug; use tracing::debug;
use std::fmt::Write; use std::fmt::Write;
@ -17,6 +19,7 @@ fn uncached_llvm_type<'a, 'tcx>(
cx: &CodegenCx<'a, 'tcx>, cx: &CodegenCx<'a, 'tcx>,
layout: TyAndLayout<'tcx>, layout: TyAndLayout<'tcx>,
defer: &mut Option<(&'a Type, TyAndLayout<'tcx>)>, defer: &mut Option<(&'a Type, TyAndLayout<'tcx>)>,
field_remapping: &mut Option<SmallVec<[u32; 4]>>,
) -> &'a Type { ) -> &'a Type {
match layout.abi { match layout.abi {
Abi::Scalar(_) => bug!("handled elsewhere"), Abi::Scalar(_) => bug!("handled elsewhere"),
@ -75,7 +78,8 @@ fn uncached_llvm_type<'a, 'tcx>(
FieldsShape::Array { count, .. } => cx.type_array(layout.field(cx, 0).llvm_type(cx), count), FieldsShape::Array { count, .. } => cx.type_array(layout.field(cx, 0).llvm_type(cx), count),
FieldsShape::Arbitrary { .. } => match name { FieldsShape::Arbitrary { .. } => match name {
None => { None => {
let (llfields, packed) = struct_llfields(cx, layout); let (llfields, packed, new_field_remapping) = struct_llfields(cx, layout);
*field_remapping = new_field_remapping;
cx.type_struct(&llfields, packed) cx.type_struct(&llfields, packed)
} }
Some(ref name) => { Some(ref name) => {
@ -90,7 +94,7 @@ fn uncached_llvm_type<'a, 'tcx>(
fn struct_llfields<'a, 'tcx>( fn struct_llfields<'a, 'tcx>(
cx: &CodegenCx<'a, 'tcx>, cx: &CodegenCx<'a, 'tcx>,
layout: TyAndLayout<'tcx>, layout: TyAndLayout<'tcx>,
) -> (Vec<&'a Type>, bool) { ) -> (Vec<&'a Type>, bool, Option<SmallVec<[u32; 4]>>) {
debug!("struct_llfields: {:#?}", layout); debug!("struct_llfields: {:#?}", layout);
let field_count = layout.fields.count(); let field_count = layout.fields.count();
@ -98,6 +102,7 @@ fn struct_llfields<'a, 'tcx>(
let mut offset = Size::ZERO; let mut offset = Size::ZERO;
let mut prev_effective_align = layout.align.abi; let mut prev_effective_align = layout.align.abi;
let mut result: Vec<_> = Vec::with_capacity(1 + field_count * 2); let mut result: Vec<_> = Vec::with_capacity(1 + field_count * 2);
let mut field_remapping = smallvec![0; field_count];
for i in layout.fields.index_by_increasing_offset() { for i in layout.fields.index_by_increasing_offset() {
let target_offset = layout.fields.offset(i as usize); let target_offset = layout.fields.offset(i as usize);
let field = layout.field(cx, i); let field = layout.field(cx, i);
@ -116,33 +121,37 @@ fn struct_llfields<'a, 'tcx>(
); );
assert!(target_offset >= offset); assert!(target_offset >= offset);
let padding = target_offset - offset; let padding = target_offset - offset;
let padding_align = prev_effective_align.min(effective_field_align); if padding != Size::ZERO {
assert_eq!(offset.align_to(padding_align) + padding, target_offset); let padding_align = prev_effective_align.min(effective_field_align);
result.push(cx.type_padding_filler(padding, padding_align)); assert_eq!(offset.align_to(padding_align) + padding, target_offset);
debug!(" padding before: {:?}", padding); result.push(cx.type_padding_filler(padding, padding_align));
debug!(" padding before: {:?}", padding);
}
field_remapping[i] = result.len() as u32;
result.push(field.llvm_type(cx)); result.push(field.llvm_type(cx));
offset = target_offset + field.size; offset = target_offset + field.size;
prev_effective_align = effective_field_align; prev_effective_align = effective_field_align;
} }
let padding_used = result.len() > field_count;
if !layout.is_unsized() && field_count > 0 { if !layout.is_unsized() && field_count > 0 {
if offset > layout.size { if offset > layout.size {
bug!("layout: {:#?} stride: {:?} offset: {:?}", layout, layout.size, offset); bug!("layout: {:#?} stride: {:?} offset: {:?}", layout, layout.size, offset);
} }
let padding = layout.size - offset; let padding = layout.size - offset;
let padding_align = prev_effective_align; if padding != Size::ZERO {
assert_eq!(offset.align_to(padding_align) + padding, layout.size); let padding_align = prev_effective_align;
debug!( assert_eq!(offset.align_to(padding_align) + padding, layout.size);
"struct_llfields: pad_bytes: {:?} offset: {:?} stride: {:?}", debug!(
padding, offset, layout.size "struct_llfields: pad_bytes: {:?} offset: {:?} stride: {:?}",
); padding, offset, layout.size
result.push(cx.type_padding_filler(padding, padding_align)); );
assert_eq!(result.len(), 1 + field_count * 2); result.push(cx.type_padding_filler(padding, padding_align));
}
} else { } else {
debug!("struct_llfields: offset: {:?} stride: {:?}", offset, layout.size); debug!("struct_llfields: offset: {:?} stride: {:?}", offset, layout.size);
} }
let field_remapping = if padding_used { Some(field_remapping) } else { None };
(result, packed) (result, packed, field_remapping)
} }
impl<'a, 'tcx> CodegenCx<'a, 'tcx> { impl<'a, 'tcx> CodegenCx<'a, 'tcx> {
@ -177,7 +186,7 @@ pub trait LayoutLlvmExt<'tcx> {
index: usize, index: usize,
immediate: bool, immediate: bool,
) -> &'a Type; ) -> &'a Type;
fn llvm_field_index(&self, index: usize) -> u64; fn llvm_field_index<'a>(&self, cx: &CodegenCx<'a, 'tcx>, index: usize) -> u64;
fn pointee_info_at<'a>(&self, cx: &CodegenCx<'a, 'tcx>, offset: Size) -> Option<PointeeInfo>; fn pointee_info_at<'a>(&self, cx: &CodegenCx<'a, 'tcx>, offset: Size) -> Option<PointeeInfo>;
} }
@ -234,8 +243,8 @@ impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> {
Variants::Single { index } => Some(index), Variants::Single { index } => Some(index),
_ => None, _ => None,
}; };
if let Some(&llty) = cx.lltypes.borrow().get(&(self.ty, variant_index)) { if let Some(ref llty) = cx.type_lowering.borrow().get(&(self.ty, variant_index)) {
return llty; return llty.lltype;
} }
debug!("llvm_type({:#?})", self); debug!("llvm_type({:#?})", self);
@ -247,6 +256,7 @@ impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> {
let normal_ty = cx.tcx.erase_regions(self.ty); let normal_ty = cx.tcx.erase_regions(self.ty);
let mut defer = None; let mut defer = None;
let mut field_remapping = None;
let llty = if self.ty != normal_ty { let llty = if self.ty != normal_ty {
let mut layout = cx.layout_of(normal_ty); let mut layout = cx.layout_of(normal_ty);
if let Some(v) = variant_index { if let Some(v) = variant_index {
@ -254,17 +264,24 @@ impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> {
} }
layout.llvm_type(cx) layout.llvm_type(cx)
} else { } else {
uncached_llvm_type(cx, *self, &mut defer) uncached_llvm_type(cx, *self, &mut defer, &mut field_remapping)
}; };
debug!("--> mapped {:#?} to llty={:?}", self, llty); debug!("--> mapped {:#?} to llty={:?}", self, llty);
cx.lltypes.borrow_mut().insert((self.ty, variant_index), llty); cx.type_lowering.borrow_mut().insert(
(self.ty, variant_index),
TypeLowering { lltype: llty, field_remapping: field_remapping },
);
if let Some((llty, layout)) = defer { if let Some((llty, layout)) = defer {
let (llfields, packed) = struct_llfields(cx, layout); let (llfields, packed, new_field_remapping) = struct_llfields(cx, layout);
cx.set_struct_body(llty, &llfields, packed) cx.set_struct_body(llty, &llfields, packed);
cx.type_lowering
.borrow_mut()
.get_mut(&(self.ty, variant_index))
.unwrap()
.field_remapping = new_field_remapping;
} }
llty llty
} }
@ -340,7 +357,7 @@ impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> {
self.scalar_llvm_type_at(cx, scalar, offset) self.scalar_llvm_type_at(cx, scalar, offset)
} }
fn llvm_field_index(&self, index: usize) -> u64 { fn llvm_field_index<'a>(&self, cx: &CodegenCx<'a, 'tcx>, index: usize) -> u64 {
match self.abi { match self.abi {
Abi::Scalar(_) | Abi::ScalarPair(..) => { Abi::Scalar(_) | Abi::ScalarPair(..) => {
bug!("TyAndLayout::llvm_field_index({:?}): not applicable", self) bug!("TyAndLayout::llvm_field_index({:?}): not applicable", self)
@ -354,7 +371,25 @@ impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> {
FieldsShape::Array { .. } => index as u64, FieldsShape::Array { .. } => index as u64,
FieldsShape::Arbitrary { .. } => 1 + (self.fields.memory_index(index) as u64) * 2, FieldsShape::Arbitrary { .. } => {
let variant_index = match self.variants {
Variants::Single { index } => Some(index),
_ => None,
};
// Look up llvm field if indexes do not match memory order due to padding. If
// `field_remapping` is `None` no padding was used and the llvm field index
// matches the memory index.
match cx.type_lowering.borrow().get(&(self.ty, variant_index)) {
Some(TypeLowering { field_remapping: Some(ref remap), .. }) => {
remap[index] as u64
}
Some(_) => self.fields.memory_index(index) as u64,
None => {
bug!("TyAndLayout::llvm_field_index({:?}): type info not found", self)
}
}
}
} }
} }

View file

@ -98,7 +98,8 @@ fn emit_aapcs_va_arg(
// Implementation of the AAPCS64 calling convention for va_args see // Implementation of the AAPCS64 calling convention for va_args see
// https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst // https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst
let va_list_addr = list.immediate(); let va_list_addr = list.immediate();
let va_list_ty = list.deref(bx.cx).layout.llvm_type(bx); let va_list_layout = list.deref(bx.cx).layout;
let va_list_ty = va_list_layout.llvm_type(bx);
let layout = bx.cx.layout_of(target_ty); let layout = bx.cx.layout_of(target_ty);
let mut maybe_reg = bx.build_sibling_block("va_arg.maybe_reg"); let mut maybe_reg = bx.build_sibling_block("va_arg.maybe_reg");
@ -110,13 +111,15 @@ fn emit_aapcs_va_arg(
let gr_type = target_ty.is_any_ptr() || target_ty.is_integral(); let gr_type = target_ty.is_any_ptr() || target_ty.is_integral();
let (reg_off, reg_top_index, slot_size) = if gr_type { let (reg_off, reg_top_index, slot_size) = if gr_type {
let gr_offs = bx.struct_gep(va_list_ty, va_list_addr, 7); let gr_offs =
bx.struct_gep(va_list_ty, va_list_addr, va_list_layout.llvm_field_index(bx.cx, 3));
let nreg = (layout.size.bytes() + 7) / 8; let nreg = (layout.size.bytes() + 7) / 8;
(gr_offs, 3, nreg * 8) (gr_offs, va_list_layout.llvm_field_index(bx.cx, 1), nreg * 8)
} else { } else {
let vr_off = bx.struct_gep(va_list_ty, va_list_addr, 9); let vr_off =
bx.struct_gep(va_list_ty, va_list_addr, va_list_layout.llvm_field_index(bx.cx, 4));
let nreg = (layout.size.bytes() + 15) / 16; let nreg = (layout.size.bytes() + 15) / 16;
(vr_off, 5, nreg * 16) (vr_off, va_list_layout.llvm_field_index(bx.cx, 2), nreg * 16)
}; };
// if the offset >= 0 then the value will be on the stack // if the offset >= 0 then the value will be on the stack

View file

@ -8,7 +8,7 @@ pub enum Align64 {
A(u32), A(u32),
B(u32), B(u32),
} }
// CHECK: %Align64 = type { [0 x i32], i32, [15 x i32] } // CHECK: %Align64 = type { i32, [15 x i32] }
pub struct Nested64 { pub struct Nested64 {
a: u8, a: u8,

View file

@ -5,7 +5,7 @@
#[repr(align(64))] #[repr(align(64))]
pub struct Align64(i32); pub struct Align64(i32);
// CHECK: %Align64 = type { [0 x i32], i32, [15 x i32] } // CHECK: %Align64 = type { i32, [15 x i32] }
pub struct Nested64 { pub struct Nested64 {
a: Align64, a: Align64,
@ -13,20 +13,20 @@ pub struct Nested64 {
c: i32, c: i32,
d: i8, d: i8,
} }
// CHECK: %Nested64 = type { [0 x i64], %Align64, [0 x i32], i32, [0 x i32], i32, [0 x i8], i8, [55 x i8] } // CHECK: %Nested64 = type { %Align64, i32, i32, i8, [55 x i8] }
pub enum Enum4 { pub enum Enum4 {
A(i32), A(i32),
B(i32), B(i32),
} }
// CHECK: %"Enum4::A" = type { [1 x i32], i32, [0 x i32] } // CHECK: %"Enum4::A" = type { [1 x i32], i32 }
pub enum Enum64 { pub enum Enum64 {
A(Align64), A(Align64),
B(i32), B(i32),
} }
// CHECK: %Enum64 = type { [0 x i32], i32, [31 x i32] } // CHECK: %Enum64 = type { i32, [31 x i32] }
// CHECK: %"Enum64::A" = type { [8 x i64], %Align64, [0 x i64] } // CHECK: %"Enum64::A" = type { [8 x i64], %Align64 }
// CHECK-LABEL: @align64 // CHECK-LABEL: @align64
#[no_mangle] #[no_mangle]

View file

@ -0,0 +1,14 @@
// Make sure that no 0-sized padding is inserted in structs and that
// structs are represented as expected by Neon intrinsics in LLVM.
// See #87254.
#![crate_type = "lib"]
#![feature(repr_simd)]
#[derive(Copy, Clone, Debug)]
#[repr(simd)]
pub struct int16x4_t(pub i16, pub i16, pub i16, pub i16);
#[derive(Copy, Clone, Debug)]
pub struct int16x4x2_t(pub int16x4_t, pub int16x4_t);
// CHECK: %int16x4x2_t = type { <4 x i16>, <4 x i16> }