rustc_target: Add alignment to indirectly-passed by-value types, correcting the
alignment of `byval` on x86 in the process.
Commit 88e4d2c291
from five years ago removed
support for alignment on indirectly-passed arguments because of problems with
the `i686-pc-windows-msvc` target. Unfortunately, the `memcpy` optimizations I
recently added to LLVM 16 depend on this to forward `memcpy`s. This commit
attempts to fix the problems with `byval` parameters on that target and now
correctly adds the `align` attribute.
The problem is summarized in [this comment] by @eddyb. Briefly, 32-bit x86 has
special alignment rules for `byval` parameters: for the most part, their
alignment is forced to 4. This is not well-documented anywhere but in the Clang
source. I looked at the logic in Clang `TargetInfo.cpp` and tried to replicate
it here. The relevant methods in that file are
`X86_32ABIInfo::getIndirectResult()` and
`X86_32ABIInfo::getTypeStackAlignInBytes()`. The `align` parameter attribute
for `byval` parameters in LLVM must match the platform ABI, or miscompilations
will occur. Note that this doesn't use the approach suggested by eddyb, because
I felt it was overkill to store the alignment in `on_stack` when special
handling is really only needed for 32-bit x86.
As a side effect, this should fix #80127, because it will make the `align`
parameter attribute for `byval` parameters match the platform ABI on LLVM
x86-64.
[this comment]: https://github.com/rust-lang/rust/pull/80822#issuecomment-829985417
This commit is contained in:
parent
8ca44ef9ca
commit
0becc89d4a
11 changed files with 208 additions and 17 deletions
|
@ -10,7 +10,7 @@ fn classify_ret<Ty>(ret: &mut ArgAbi<'_, Ty>) {
|
|||
|
||||
fn classify_arg<Ty>(arg: &mut ArgAbi<'_, Ty>) {
|
||||
if arg.layout.is_aggregate() {
|
||||
arg.make_indirect_byval();
|
||||
arg.make_indirect_byval(None);
|
||||
} else {
|
||||
arg.extend_integer_width_to(32);
|
||||
}
|
||||
|
|
|
@ -494,9 +494,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {
|
|||
.set(ArgAttribute::NonNull)
|
||||
.set(ArgAttribute::NoUndef);
|
||||
attrs.pointee_size = layout.size;
|
||||
// FIXME(eddyb) We should be doing this, but at least on
|
||||
// i686-pc-windows-msvc, it results in wrong stack offsets.
|
||||
// attrs.pointee_align = Some(layout.align.abi);
|
||||
attrs.pointee_align = Some(layout.align.abi);
|
||||
|
||||
let extra_attrs = layout.is_unsized().then_some(ArgAttributes::new());
|
||||
|
||||
|
@ -513,11 +511,19 @@ impl<'a, Ty> ArgAbi<'a, Ty> {
|
|||
self.mode = Self::indirect_pass_mode(&self.layout);
|
||||
}
|
||||
|
||||
pub fn make_indirect_byval(&mut self) {
|
||||
pub fn make_indirect_byval(&mut self, byval_align: Option<Align>) {
|
||||
self.make_indirect();
|
||||
match self.mode {
|
||||
PassMode::Indirect { attrs: _, extra_attrs: _, ref mut on_stack } => {
|
||||
PassMode::Indirect { ref mut attrs, extra_attrs: _, ref mut on_stack } => {
|
||||
*on_stack = true;
|
||||
|
||||
// Some platforms, like 32-bit x86, change the alignment of the type when passing
|
||||
// `byval`. Account for that.
|
||||
if let Some(byval_align) = byval_align {
|
||||
// On all targets with byval align this is currently true, so let's assert it.
|
||||
debug_assert!(byval_align >= Align::from_bytes(4).unwrap());
|
||||
attrs.pointee_align = Some(byval_align);
|
||||
}
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
@ -644,7 +650,8 @@ impl<'a, Ty> FnAbi<'a, Ty> {
|
|||
{
|
||||
if abi == spec::abi::Abi::X86Interrupt {
|
||||
if let Some(arg) = self.args.first_mut() {
|
||||
arg.make_indirect_byval();
|
||||
// FIXME(pcwalton): This probably should use the x86 `byval` ABI...
|
||||
arg.make_indirect_byval(None);
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
|
|
|
@ -36,7 +36,7 @@ where
|
|||
{
|
||||
arg.extend_integer_width_to(32);
|
||||
if arg.layout.is_aggregate() && !unwrap_trivial_aggregate(cx, arg) {
|
||||
arg.make_indirect_byval();
|
||||
arg.make_indirect_byval(None);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use crate::abi::call::{ArgAttribute, FnAbi, PassMode, Reg, RegKind};
|
||||
use crate::abi::{HasDataLayout, TyAbiInterface};
|
||||
use crate::abi::{Align, HasDataLayout, TyAbiInterface};
|
||||
use crate::spec::HasTargetSpec;
|
||||
|
||||
#[derive(PartialEq)]
|
||||
|
@ -53,11 +53,38 @@ where
|
|||
if arg.is_ignore() {
|
||||
continue;
|
||||
}
|
||||
if arg.layout.is_aggregate() {
|
||||
arg.make_indirect_byval();
|
||||
} else {
|
||||
if !arg.layout.is_aggregate() {
|
||||
arg.extend_integer_width_to(32);
|
||||
continue;
|
||||
}
|
||||
|
||||
// We need to compute the alignment of the `byval` argument. The rules can be found in
|
||||
// `X86_32ABIInfo::getTypeStackAlignInBytes` in Clang's `TargetInfo.cpp`. Summarized here,
|
||||
// they are:
|
||||
//
|
||||
// 1. If the natural alignment of the type is less than or equal to 4, the alignment is 4.
|
||||
//
|
||||
// 2. Otherwise, on Linux, the alignment of any vector type is the natural alignment.
|
||||
// (This doesn't matter here because we ensure we have an aggregate with the check above.)
|
||||
//
|
||||
// 3. Otherwise, on Apple platforms, the alignment of anything that contains a vector type
|
||||
// is 16.
|
||||
//
|
||||
// 4. If none of these conditions are true, the alignment is 4.
|
||||
let t = cx.target_spec();
|
||||
let align_4 = Align::from_bytes(4).unwrap();
|
||||
let align_16 = Align::from_bytes(16).unwrap();
|
||||
let byval_align = if arg.layout.align.abi < align_4 {
|
||||
align_4
|
||||
} else if t.is_like_osx && arg.layout.align.abi >= align_16 {
|
||||
// FIXME(pcwalton): This is dubious--we should actually be looking inside the type to
|
||||
// determine if it contains SIMD vector values--but I think it's fine?
|
||||
align_16
|
||||
} else {
|
||||
align_4
|
||||
};
|
||||
|
||||
arg.make_indirect_byval(Some(byval_align));
|
||||
}
|
||||
|
||||
if flavor == Flavor::FastcallOrVectorcall {
|
||||
|
|
|
@ -213,7 +213,7 @@ where
|
|||
match cls_or_mem {
|
||||
Err(Memory) => {
|
||||
if is_arg {
|
||||
arg.make_indirect_byval();
|
||||
arg.make_indirect_byval(None);
|
||||
} else {
|
||||
// `sret` parameter thus one less integer register available
|
||||
arg.make_indirect();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue