Auto merge of #135408 - RalfJung:x86-sse2, r=workingjubilee
x86: use SSE2 to pass float and SIMD types This builds on the new X86Sse2 ABI landed in https://github.com/rust-lang/rust/pull/137037 to actually make it a separate ABI from the default x86 ABI, and use SSE2 registers. Specifically, we use it in two ways: to return `f64` values in a register rather than by-ptr, and to pass vectors of size up to 128bit in a register (or, well, whatever LLVM does when passing `<4 x float>` by-val, I don't actually know if this ends up in a register). Cc `@workingjubilee` Fixes #133611 try-job: aarch64-apple try-job: aarch64-gnu try-job: aarch64-gnu-debug try-job: test-various try-job: x86_64-gnu-nopt try-job: dist-i586-gnu-i586-i686-musl try-job: x86_64-msvc-1
This commit is contained in:
commit
17c1c329a5
14 changed files with 271 additions and 149 deletions
|
@ -7,7 +7,7 @@ use rustc_abi::{
|
|||
};
|
||||
use rustc_macros::HashStable_Generic;
|
||||
|
||||
use crate::spec::{HasTargetSpec, HasWasmCAbiOpt, HasX86AbiOpt, WasmCAbi};
|
||||
use crate::spec::{HasTargetSpec, HasWasmCAbiOpt, HasX86AbiOpt, RustcAbi, WasmCAbi};
|
||||
|
||||
mod aarch64;
|
||||
mod amdgpu;
|
||||
|
@ -386,6 +386,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {
|
|||
/// Pass this argument directly instead. Should NOT be used!
|
||||
/// Only exists because of past ABI mistakes that will take time to fix
|
||||
/// (see <https://github.com/rust-lang/rust/issues/115666>).
|
||||
#[track_caller]
|
||||
pub fn make_direct_deprecated(&mut self) {
|
||||
match self.mode {
|
||||
PassMode::Indirect { .. } => {
|
||||
|
@ -398,6 +399,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {
|
|||
|
||||
/// Pass this argument indirectly, by passing a (thin or wide) pointer to the argument instead.
|
||||
/// This is valid for both sized and unsized arguments.
|
||||
#[track_caller]
|
||||
pub fn make_indirect(&mut self) {
|
||||
match self.mode {
|
||||
PassMode::Direct(_) | PassMode::Pair(_, _) => {
|
||||
|
@ -412,6 +414,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {
|
|||
|
||||
/// Same as `make_indirect`, but for arguments that are ignored. Only needed for ABIs that pass
|
||||
/// ZSTs indirectly.
|
||||
#[track_caller]
|
||||
pub fn make_indirect_from_ignore(&mut self) {
|
||||
match self.mode {
|
||||
PassMode::Ignore => {
|
||||
|
@ -716,7 +719,7 @@ impl<'a, Ty> FnAbi<'a, Ty> {
|
|||
C: HasDataLayout + HasTargetSpec,
|
||||
{
|
||||
let spec = cx.target_spec();
|
||||
match &spec.arch[..] {
|
||||
match &*spec.arch {
|
||||
"x86" => x86::compute_rust_abi_info(cx, self, abi),
|
||||
"riscv32" | "riscv64" => riscv::compute_rust_abi_info(cx, self, abi),
|
||||
"loongarch64" => loongarch::compute_rust_abi_info(cx, self, abi),
|
||||
|
@ -724,6 +727,22 @@ impl<'a, Ty> FnAbi<'a, Ty> {
|
|||
_ => {}
|
||||
};
|
||||
|
||||
// Decides whether we can pass the given SIMD argument via `PassMode::Direct`.
|
||||
// May only return `true` if the target will always pass those arguments the same way,
|
||||
// no matter what the user does with `-Ctarget-feature`! In other words, whatever
|
||||
// target features are required to pass a SIMD value in registers must be listed in
|
||||
// the `abi_required_features` for the current target and ABI.
|
||||
let can_pass_simd_directly = |arg: &ArgAbi<'_, Ty>| match &*spec.arch {
|
||||
// On x86, if we have SSE2 (which we have by default for x86_64), we can always pass up
|
||||
// to 128-bit-sized vectors.
|
||||
"x86" if spec.rustc_abi == Some(RustcAbi::X86Sse2) => arg.layout.size.bits() <= 128,
|
||||
"x86_64" if spec.rustc_abi != Some(RustcAbi::X86Softfloat) => {
|
||||
arg.layout.size.bits() <= 128
|
||||
}
|
||||
// So far, we haven't implemented this logic for any other target.
|
||||
_ => false,
|
||||
};
|
||||
|
||||
for (arg_idx, arg) in self
|
||||
.args
|
||||
.iter_mut()
|
||||
|
@ -731,12 +750,15 @@ impl<'a, Ty> FnAbi<'a, Ty> {
|
|||
.map(|(idx, arg)| (Some(idx), arg))
|
||||
.chain(iter::once((None, &mut self.ret)))
|
||||
{
|
||||
if arg.is_ignore() {
|
||||
// If the logic above already picked a specific type to cast the argument to, leave that
|
||||
// in place.
|
||||
if matches!(arg.mode, PassMode::Ignore | PassMode::Cast { .. }) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if arg_idx.is_none()
|
||||
&& arg.layout.size > Primitive::Pointer(AddressSpace::DATA).size(cx) * 2
|
||||
&& !matches!(arg.layout.backend_repr, BackendRepr::Vector { .. })
|
||||
{
|
||||
// Return values larger than 2 registers using a return area
|
||||
// pointer. LLVM and Cranelift disagree about how to return
|
||||
|
@ -746,7 +768,8 @@ impl<'a, Ty> FnAbi<'a, Ty> {
|
|||
// return value independently and decide to pass it in a
|
||||
// register or not, which would result in the return value
|
||||
// being passed partially in registers and partially through a
|
||||
// return area pointer.
|
||||
// return area pointer. For large IR-level values such as `i128`,
|
||||
// cranelift will even split up the value into smaller chunks.
|
||||
//
|
||||
// While Cranelift may need to be fixed as the LLVM behavior is
|
||||
// generally more correct with respect to the surface language,
|
||||
|
@ -776,13 +799,33 @@ impl<'a, Ty> FnAbi<'a, Ty> {
|
|||
// rustc_target already ensure any return value which doesn't
|
||||
// fit in the available amount of return registers is passed in
|
||||
// the right way for the current target.
|
||||
//
|
||||
// The adjustment is not necessary nor desired for types with a vector
|
||||
// representation; those are handled below.
|
||||
arg.make_indirect();
|
||||
continue;
|
||||
}
|
||||
|
||||
match arg.layout.backend_repr {
|
||||
BackendRepr::Memory { .. } => {}
|
||||
BackendRepr::Memory { .. } => {
|
||||
// Compute `Aggregate` ABI.
|
||||
|
||||
let is_indirect_not_on_stack =
|
||||
matches!(arg.mode, PassMode::Indirect { on_stack: false, .. });
|
||||
assert!(is_indirect_not_on_stack);
|
||||
|
||||
let size = arg.layout.size;
|
||||
if arg.layout.is_sized()
|
||||
&& size <= Primitive::Pointer(AddressSpace::DATA).size(cx)
|
||||
{
|
||||
// We want to pass small aggregates as immediates, but using
|
||||
// an LLVM aggregate type for this leads to bad optimizations,
|
||||
// so we pick an appropriately sized integer type instead.
|
||||
arg.cast_to(Reg { kind: RegKind::Integer, size });
|
||||
}
|
||||
}
|
||||
|
||||
BackendRepr::Vector { .. } => {
|
||||
// This is a fun case! The gist of what this is doing is
|
||||
// that we want callers and callees to always agree on the
|
||||
// ABI of how they pass SIMD arguments. If we were to *not*
|
||||
|
@ -799,30 +842,17 @@ impl<'a, Ty> FnAbi<'a, Ty> {
|
|||
// target feature sets. Some more information about this
|
||||
// issue can be found in #44367.
|
||||
//
|
||||
// Note that the intrinsic ABI is exempt here as
|
||||
// that's how we connect up to LLVM and it's unstable
|
||||
// anyway, we control all calls to it in libstd.
|
||||
BackendRepr::Vector { .. }
|
||||
if abi != ExternAbi::RustIntrinsic && spec.simd_types_indirect =>
|
||||
// Note that the intrinsic ABI is exempt here as those are not
|
||||
// real functions anyway, and the backend expects very specific types.
|
||||
if abi != ExternAbi::RustIntrinsic
|
||||
&& spec.simd_types_indirect
|
||||
&& !can_pass_simd_directly(arg)
|
||||
{
|
||||
arg.make_indirect();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
_ => continue,
|
||||
}
|
||||
// Compute `Aggregate` ABI.
|
||||
|
||||
let is_indirect_not_on_stack =
|
||||
matches!(arg.mode, PassMode::Indirect { on_stack: false, .. });
|
||||
assert!(is_indirect_not_on_stack);
|
||||
|
||||
let size = arg.layout.size;
|
||||
if !arg.layout.is_unsized() && size <= Primitive::Pointer(AddressSpace::DATA).size(cx) {
|
||||
// We want to pass small aggregates as immediates, but using
|
||||
// an LLVM aggregate type for this leads to bad optimizations,
|
||||
// so we pick an appropriately sized integer type instead.
|
||||
arg.cast_to(Reg { kind: RegKind::Integer, size });
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@ use rustc_abi::{
|
|||
};
|
||||
|
||||
use crate::callconv::{ArgAttribute, FnAbi, PassMode};
|
||||
use crate::spec::HasTargetSpec;
|
||||
use crate::spec::{HasTargetSpec, RustcAbi};
|
||||
|
||||
#[derive(PartialEq)]
|
||||
pub(crate) enum Flavor {
|
||||
|
@ -236,8 +236,16 @@ where
|
|||
_ => false, // anyway not passed via registers on x86
|
||||
};
|
||||
if has_float {
|
||||
if fn_abi.ret.layout.size <= Primitive::Pointer(AddressSpace::DATA).size(cx) {
|
||||
// Same size or smaller than pointer, return in a register.
|
||||
if cx.target_spec().rustc_abi == Some(RustcAbi::X86Sse2)
|
||||
&& fn_abi.ret.layout.backend_repr.is_scalar()
|
||||
&& fn_abi.ret.layout.size.bits() <= 128
|
||||
{
|
||||
// This is a single scalar that fits into an SSE register, and the target uses the
|
||||
// SSE ABI. We prefer this over integer registers as float scalars need to be in SSE
|
||||
// registers for float operations, so that's the best place to pass them around.
|
||||
fn_abi.ret.cast_to(Reg { kind: RegKind::Vector, size: fn_abi.ret.layout.size });
|
||||
} else if fn_abi.ret.layout.size <= Primitive::Pointer(AddressSpace::DATA).size(cx) {
|
||||
// Same size or smaller than pointer, return in an integer register.
|
||||
fn_abi.ret.cast_to(Reg { kind: RegKind::Integer, size: fn_abi.ret.layout.size });
|
||||
} else {
|
||||
// Larger than a pointer, return indirectly.
|
||||
|
|
|
@ -8,8 +8,9 @@
|
|||
|
||||
use std::arch::x86_64::{__m128, _mm_blend_ps};
|
||||
|
||||
// Use an explicit return pointer to prevent tail call optimization.
|
||||
#[no_mangle]
|
||||
pub unsafe fn sse41_blend_nofeature(x: __m128, y: __m128) -> __m128 {
|
||||
pub unsafe fn sse41_blend_nofeature(x: __m128, y: __m128, ret: *mut __m128) {
|
||||
let f = {
|
||||
// check that _mm_blend_ps is not being inlined into the closure
|
||||
// CHECK-LABEL: {{sse41_blend_nofeature.*closure.*:}}
|
||||
|
@ -18,9 +19,9 @@ pub unsafe fn sse41_blend_nofeature(x: __m128, y: __m128) -> __m128 {
|
|||
// CHECK-NOT: blendps
|
||||
// CHECK: ret
|
||||
#[inline(never)]
|
||||
|x, y| _mm_blend_ps(x, y, 0b0101)
|
||||
|x, y, ret: *mut __m128| unsafe { *ret = _mm_blend_ps(x, y, 0b0101) }
|
||||
};
|
||||
f(x, y)
|
||||
f(x, y, ret);
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
|
|
|
@ -33,19 +33,18 @@ use minicore::*;
|
|||
// CHECK-LABEL: return_f32:
|
||||
#[no_mangle]
|
||||
pub fn return_f32(x: f32) -> f32 {
|
||||
// CHECK: movl {{.*}}(%ebp), %eax
|
||||
// CHECK-NOT: ax
|
||||
// CHECK: retl
|
||||
// CHECK: movss {{.*}}(%ebp), %xmm0
|
||||
// CHECK-NEXT: popl %ebp
|
||||
// CHECK-NEXT: retl
|
||||
x
|
||||
}
|
||||
|
||||
// CHECK-LABEL: return_f64:
|
||||
#[no_mangle]
|
||||
pub fn return_f64(x: f64) -> f64 {
|
||||
// CHECK: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]]
|
||||
// CHECK-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL:.*]]
|
||||
// CHECK-NEXT: movsd %[[VAL]], (%[[PTR]])
|
||||
// CHECK: retl
|
||||
// CHECK: movsd {{.*}}(%ebp), %xmm0
|
||||
// CHECK-NEXT: popl %ebp
|
||||
// CHECK-NEXT: retl
|
||||
x
|
||||
}
|
||||
|
||||
|
@ -157,7 +156,7 @@ pub unsafe fn call_f32(x: &mut f32) {
|
|||
}
|
||||
// CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
|
||||
// CHECK: calll {{()|_}}get_f32
|
||||
// CHECK-NEXT: movl %eax, (%[[PTR]])
|
||||
// CHECK-NEXT: movss %xmm0, (%[[PTR]])
|
||||
*x = get_f32();
|
||||
}
|
||||
|
||||
|
@ -169,8 +168,7 @@ pub unsafe fn call_f64(x: &mut f64) {
|
|||
}
|
||||
// CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
|
||||
// CHECK: calll {{()|_}}get_f64
|
||||
// CHECK: movsd {{.*}}(%{{ebp|esp}}), %[[VAL:.*]]
|
||||
// CHECK-NEXT: movsd %[[VAL:.*]], (%[[PTR]])
|
||||
// CHECK-NEXT: movlps %xmm0, (%[[PTR]])
|
||||
*x = get_f64();
|
||||
}
|
||||
|
||||
|
@ -315,25 +313,21 @@ pub unsafe fn call_other_f64(x: &mut (usize, f64)) {
|
|||
#[no_mangle]
|
||||
pub fn return_f16(x: f16) -> f16 {
|
||||
// CHECK: pushl %ebp
|
||||
// CHECK: movl %esp, %ebp
|
||||
// CHECK: movzwl 8(%ebp), %eax
|
||||
// CHECK: popl %ebp
|
||||
// CHECK: retl
|
||||
// CHECK-NEXT: movl %esp, %ebp
|
||||
// CHECK-NEXT: pinsrw $0, 8(%ebp), %xmm0
|
||||
// CHECK-NEXT: popl %ebp
|
||||
// CHECK-NEXT: retl
|
||||
x
|
||||
}
|
||||
|
||||
// CHECK-LABEL: return_f128:
|
||||
#[no_mangle]
|
||||
pub fn return_f128(x: f128) -> f128 {
|
||||
// CHECK: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]]
|
||||
// CHECK-NEXT: movl [[#%d,OFFSET+4]](%ebp), %[[VAL1:.*]]
|
||||
// CHECK-NEXT: movl [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
|
||||
// CHECK-NEXT: movl [[#%d,OFFSET+12]](%ebp), %[[VAL3:.*]]
|
||||
// CHECK-NEXT: movl [[#%d,OFFSET+16]](%ebp), %[[VAL4:.*]]
|
||||
// CHECK-NEXT: movl %[[VAL4:.*]] 12(%[[PTR]])
|
||||
// CHECK-NEXT: movl %[[VAL3:.*]] 8(%[[PTR]])
|
||||
// CHECK-NEXT: movl %[[VAL2:.*]] 4(%[[PTR]])
|
||||
// CHECK-NEXT: movl %[[VAL1:.*]] (%[[PTR]])
|
||||
// CHECK: retl
|
||||
// CHECK: pushl %ebp
|
||||
// CHECK-NEXT: movl %esp, %ebp
|
||||
// linux-NEXT: movaps 8(%ebp), %xmm0
|
||||
// win-NEXT: movups 8(%ebp), %xmm0
|
||||
// CHECK-NEXT: popl %ebp
|
||||
// CHECK-NEXT: retl
|
||||
x
|
||||
}
|
||||
|
|
36
tests/codegen/abi-x86-sse.rs
Normal file
36
tests/codegen/abi-x86-sse.rs
Normal file
|
@ -0,0 +1,36 @@
|
|||
//@ compile-flags: -Z merge-functions=disabled
|
||||
|
||||
//@ revisions: x86-64
|
||||
//@[x86-64] compile-flags: --target x86_64-unknown-linux-gnu
|
||||
//@[x86-64] needs-llvm-components: x86
|
||||
|
||||
//@ revisions: x86-32
|
||||
//@[x86-32] compile-flags: --target i686-unknown-linux-gnu
|
||||
//@[x86-32] needs-llvm-components: x86
|
||||
|
||||
//@ revisions: x86-32-nosse
|
||||
//@[x86-32-nosse] compile-flags: --target i586-unknown-linux-gnu
|
||||
//@[x86-32-nosse] needs-llvm-components: x86
|
||||
|
||||
#![feature(no_core, lang_items, rustc_attrs, repr_simd)]
|
||||
#![no_core]
|
||||
#![crate_type = "lib"]
|
||||
|
||||
#[lang = "sized"]
|
||||
trait Sized {}
|
||||
|
||||
#[lang = "copy"]
|
||||
trait Copy {}
|
||||
|
||||
// Ensure this type is passed without ptr indirection on targets that
|
||||
// require SSE2.
|
||||
#[repr(simd)]
|
||||
pub struct Sse([f32; 4]);
|
||||
|
||||
// x86-64: <4 x float> @sse_id(<4 x float> {{[^,]*}})
|
||||
// x86-32: <4 x float> @sse_id(<4 x float> {{[^,]*}})
|
||||
// x86-32-nosse: void @sse_id(ptr{{( [^,]*)?}} sret([16 x i8]){{( .*)?}}, ptr{{( [^,]*)?}})
|
||||
#[no_mangle]
|
||||
pub fn sse_id(x: Sse) -> Sse {
|
||||
x
|
||||
}
|
|
@ -1,8 +1,11 @@
|
|||
// 32-bit x86 returns float types differently to avoid the x87 stack.
|
||||
// 32-bit systems will return 128bit values using a return area pointer.
|
||||
// Emscripten aligns f128 to 8 bytes, not 16.
|
||||
//@ revisions: x86 bit32 bit64 emscripten
|
||||
//@[x86] only-x86
|
||||
//@ revisions: x86-sse x86-nosse bit32 bit64 emscripten
|
||||
//@[x86-sse] only-x86
|
||||
//@[x86-sse] only-rustc_abi-x86-sse2
|
||||
//@[x86-nosse] only-x86
|
||||
//@[x86-nosse] ignore-rustc_abi-x86-sse2
|
||||
//@[bit32] ignore-x86
|
||||
//@[bit32] ignore-emscripten
|
||||
//@[bit32] only-32bit
|
||||
|
@ -60,7 +63,8 @@ pub fn f128_le(a: f128, b: f128) -> bool {
|
|||
a <= b
|
||||
}
|
||||
|
||||
// x86-LABEL: void @f128_neg({{.*}}sret([16 x i8])
|
||||
// x86-nosse-LABEL: void @f128_neg({{.*}}sret([16 x i8])
|
||||
// x86-sse-LABEL: <16 x i8> @f128_neg(fp128
|
||||
// bit32-LABEL: void @f128_neg({{.*}}sret([16 x i8])
|
||||
// bit64-LABEL: fp128 @f128_neg(
|
||||
// emscripten-LABEL: void @f128_neg({{.*}}sret([16 x i8])
|
||||
|
@ -70,7 +74,8 @@ pub fn f128_neg(a: f128) -> f128 {
|
|||
-a
|
||||
}
|
||||
|
||||
// x86-LABEL: void @f128_add({{.*}}sret([16 x i8])
|
||||
// x86-nosse-LABEL: void @f128_add({{.*}}sret([16 x i8])
|
||||
// x86-sse-LABEL: <16 x i8> @f128_add(fp128
|
||||
// bit32-LABEL: void @f128_add({{.*}}sret([16 x i8])
|
||||
// bit64-LABEL: fp128 @f128_add(
|
||||
// emscripten-LABEL: void @f128_add({{.*}}sret([16 x i8])
|
||||
|
@ -80,7 +85,8 @@ pub fn f128_add(a: f128, b: f128) -> f128 {
|
|||
a + b
|
||||
}
|
||||
|
||||
// x86-LABEL: void @f128_sub({{.*}}sret([16 x i8])
|
||||
// x86-nosse-LABEL: void @f128_sub({{.*}}sret([16 x i8])
|
||||
// x86-sse-LABEL: <16 x i8> @f128_sub(fp128
|
||||
// bit32-LABEL: void @f128_sub({{.*}}sret([16 x i8])
|
||||
// bit64-LABEL: fp128 @f128_sub(
|
||||
// emscripten-LABEL: void @f128_sub({{.*}}sret([16 x i8])
|
||||
|
@ -90,7 +96,8 @@ pub fn f128_sub(a: f128, b: f128) -> f128 {
|
|||
a - b
|
||||
}
|
||||
|
||||
// x86-LABEL: void @f128_mul({{.*}}sret([16 x i8])
|
||||
// x86-nosse-LABEL: void @f128_mul({{.*}}sret([16 x i8])
|
||||
// x86-sse-LABEL: <16 x i8> @f128_mul(fp128
|
||||
// bit32-LABEL: void @f128_mul({{.*}}sret([16 x i8])
|
||||
// bit64-LABEL: fp128 @f128_mul(
|
||||
// emscripten-LABEL: void @f128_mul({{.*}}sret([16 x i8])
|
||||
|
@ -100,7 +107,8 @@ pub fn f128_mul(a: f128, b: f128) -> f128 {
|
|||
a * b
|
||||
}
|
||||
|
||||
// x86-LABEL: void @f128_div({{.*}}sret([16 x i8])
|
||||
// x86-nosse-LABEL: void @f128_div({{.*}}sret([16 x i8])
|
||||
// x86-sse-LABEL: <16 x i8> @f128_div(fp128
|
||||
// bit32-LABEL: void @f128_div({{.*}}sret([16 x i8])
|
||||
// bit64-LABEL: fp128 @f128_div(
|
||||
// emscripten-LABEL: void @f128_div({{.*}}sret([16 x i8])
|
||||
|
@ -110,7 +118,8 @@ pub fn f128_div(a: f128, b: f128) -> f128 {
|
|||
a / b
|
||||
}
|
||||
|
||||
// x86-LABEL: void @f128_rem({{.*}}sret([16 x i8])
|
||||
// x86-nosse-LABEL: void @f128_rem({{.*}}sret([16 x i8])
|
||||
// x86-sse-LABEL: <16 x i8> @f128_rem(fp128
|
||||
// bit32-LABEL: void @f128_rem({{.*}}sret([16 x i8])
|
||||
// bit64-LABEL: fp128 @f128_rem(
|
||||
// emscripten-LABEL: void @f128_rem({{.*}}sret([16 x i8])
|
||||
|
@ -162,7 +171,8 @@ pub fn f128_rem_assign(a: &mut f128, b: f128) {
|
|||
|
||||
/* float to float conversions */
|
||||
|
||||
// x86-LABEL: i16 @f128_as_f16(
|
||||
// x86-sse-LABEL: <2 x i8> @f128_as_f16(
|
||||
// x86-nosse-LABEL: i16 @f128_as_f16(
|
||||
// bits32-LABEL: half @f128_as_f16(
|
||||
// bits64-LABEL: half @f128_as_f16(
|
||||
#[no_mangle]
|
||||
|
@ -171,7 +181,8 @@ pub fn f128_as_f16(a: f128) -> f16 {
|
|||
a as f16
|
||||
}
|
||||
|
||||
// x86-LABEL: i32 @f128_as_f32(
|
||||
// x86-sse-LABEL: <4 x i8> @f128_as_f32(
|
||||
// x86-nosse-LABEL: i32 @f128_as_f32(
|
||||
// bit32-LABEL: float @f128_as_f32(
|
||||
// bit64-LABEL: float @f128_as_f32(
|
||||
// emscripten-LABEL: float @f128_as_f32(
|
||||
|
@ -181,7 +192,8 @@ pub fn f128_as_f32(a: f128) -> f32 {
|
|||
a as f32
|
||||
}
|
||||
|
||||
// x86-LABEL: void @f128_as_f64(
|
||||
// x86-sse-LABEL: <8 x i8> @f128_as_f64(
|
||||
// x86-nosse-LABEL: void @f128_as_f64({{.*}}sret([8 x i8])
|
||||
// bit32-LABEL: double @f128_as_f64(
|
||||
// bit64-LABEL: double @f128_as_f64(
|
||||
// emscripten-LABEL: double @f128_as_f64(
|
||||
|
@ -191,7 +203,8 @@ pub fn f128_as_f64(a: f128) -> f64 {
|
|||
a as f64
|
||||
}
|
||||
|
||||
// x86-LABEL: void @f128_as_self({{.*}}sret([16 x i8])
|
||||
// x86-sse-LABEL: <16 x i8> @f128_as_self(
|
||||
// x86-nosse-LABEL: void @f128_as_self({{.*}}sret([16 x i8])
|
||||
// bit32-LABEL: void @f128_as_self({{.*}}sret([16 x i8])
|
||||
// bit64-LABEL: fp128 @f128_as_self(
|
||||
// emscripten-LABEL: void @f128_as_self({{.*}}sret([16 x i8])
|
||||
|
@ -204,7 +217,8 @@ pub fn f128_as_self(a: f128) -> f128 {
|
|||
a as f128
|
||||
}
|
||||
|
||||
// x86-LABEL: void @f16_as_f128({{.*}}sret([16 x i8])
|
||||
// x86-sse-LABEL: <16 x i8> @f16_as_f128(
|
||||
// x86-nosse-LABEL: void @f16_as_f128({{.*}}sret([16 x i8])
|
||||
// bit32-LABEL: void @f16_as_f128({{.*}}sret([16 x i8])
|
||||
// bit64-LABEL: fp128 @f16_as_f128(
|
||||
// emscripten-LABEL: void @f16_as_f128({{.*}}sret([16 x i8])
|
||||
|
@ -214,7 +228,8 @@ pub fn f16_as_f128(a: f16) -> f128 {
|
|||
a as f128
|
||||
}
|
||||
|
||||
// x86-LABEL: void @f32_as_f128({{.*}}sret([16 x i8])
|
||||
// x86-sse-LABEL: <16 x i8> @f32_as_f128(
|
||||
// x86-nosse-LABEL: void @f32_as_f128({{.*}}sret([16 x i8])
|
||||
// bit32-LABEL: void @f32_as_f128({{.*}}sret([16 x i8])
|
||||
// bit64-LABEL: fp128 @f32_as_f128(
|
||||
// emscripten-LABEL: void @f32_as_f128({{.*}}sret([16 x i8])
|
||||
|
@ -224,7 +239,8 @@ pub fn f32_as_f128(a: f32) -> f128 {
|
|||
a as f128
|
||||
}
|
||||
|
||||
// x86-LABEL: void @f64_as_f128({{.*}}sret([16 x i8])
|
||||
// x86-sse-LABEL: <16 x i8> @f64_as_f128(
|
||||
// x86-nosse-LABEL: void @f64_as_f128({{.*}}sret([16 x i8])
|
||||
// bit32-LABEL: void @f64_as_f128({{.*}}sret([16 x i8])
|
||||
// bit64-LABEL: fp128 @f64_as_f128(
|
||||
// emscripten-LABEL: void @f64_as_f128({{.*}}sret([16 x i8])
|
||||
|
@ -263,7 +279,8 @@ pub fn f128_as_u64(a: f128) -> u64 {
|
|||
a as u64
|
||||
}
|
||||
|
||||
// x86-LABEL: void @f128_as_u128({{.*}}sret([16 x i8])
|
||||
// x86-sse-LABEL: void @f128_as_u128({{.*}}sret([16 x i8])
|
||||
// x86-nosse-LABEL: void @f128_as_u128({{.*}}sret([16 x i8])
|
||||
// bit32-LABEL: void @f128_as_u128({{.*}}sret([16 x i8])
|
||||
// bit64-LABEL: i128 @f128_as_u128(
|
||||
// emscripten-LABEL: void @f128_as_u128({{.*}}sret([16 x i8])
|
||||
|
@ -300,7 +317,8 @@ pub fn f128_as_i64(a: f128) -> i64 {
|
|||
a as i64
|
||||
}
|
||||
|
||||
// x86-LABEL: void @f128_as_i128({{.*}}sret([16 x i8])
|
||||
// x86-sse-LABEL: void @f128_as_i128({{.*}}sret([16 x i8])
|
||||
// x86-nosse-LABEL: void @f128_as_i128({{.*}}sret([16 x i8])
|
||||
// bit32-LABEL: void @f128_as_i128({{.*}}sret([16 x i8])
|
||||
// bit64-LABEL: i128 @f128_as_i128(
|
||||
// emscripten-LABEL: void @f128_as_i128({{.*}}sret([16 x i8])
|
||||
|
@ -312,7 +330,8 @@ pub fn f128_as_i128(a: f128) -> i128 {
|
|||
|
||||
/* int to float conversions */
|
||||
|
||||
// x86-LABEL: void @u8_as_f128({{.*}}sret([16 x i8])
|
||||
// x86-sse-LABEL: <16 x i8> @u8_as_f128(
|
||||
// x86-nosse-LABEL: void @u8_as_f128({{.*}}sret([16 x i8])
|
||||
// bit32-LABEL: void @u8_as_f128({{.*}}sret([16 x i8])
|
||||
// bit64-LABEL: fp128 @u8_as_f128(
|
||||
// emscripten-LABEL: void @u8_as_f128({{.*}}sret([16 x i8])
|
||||
|
@ -322,7 +341,8 @@ pub fn u8_as_f128(a: u8) -> f128 {
|
|||
a as f128
|
||||
}
|
||||
|
||||
// x86-LABEL: void @u16_as_f128({{.*}}sret([16 x i8])
|
||||
// x86-sse-LABEL: <16 x i8> @u16_as_f128(
|
||||
// x86-nosse-LABEL: void @u16_as_f128({{.*}}sret([16 x i8])
|
||||
// bit32-LABEL: void @u16_as_f128({{.*}}sret([16 x i8])
|
||||
// bit64-LABEL: fp128 @u16_as_f128(
|
||||
// emscripten-LABEL: void @u16_as_f128({{.*}}sret([16 x i8])
|
||||
|
@ -332,7 +352,8 @@ pub fn u16_as_f128(a: u16) -> f128 {
|
|||
a as f128
|
||||
}
|
||||
|
||||
// x86-LABEL: void @u32_as_f128({{.*}}sret([16 x i8])
|
||||
// x86-sse-LABEL: <16 x i8> @u32_as_f128(
|
||||
// x86-nosse-LABEL: void @u32_as_f128({{.*}}sret([16 x i8])
|
||||
// bit32-LABEL: void @u32_as_f128({{.*}}sret([16 x i8])
|
||||
// bit64-LABEL: fp128 @u32_as_f128(
|
||||
// emscripten-LABEL: void @u32_as_f128({{.*}}sret([16 x i8])
|
||||
|
@ -342,7 +363,8 @@ pub fn u32_as_f128(a: u32) -> f128 {
|
|||
a as f128
|
||||
}
|
||||
|
||||
// x86-LABEL: void @u64_as_f128({{.*}}sret([16 x i8])
|
||||
// x86-sse-LABEL: <16 x i8> @u64_as_f128(
|
||||
// x86-nosse-LABEL: void @u64_as_f128({{.*}}sret([16 x i8])
|
||||
// bit32-LABEL: void @u64_as_f128({{.*}}sret([16 x i8])
|
||||
// bit64-LABEL: fp128 @u64_as_f128(
|
||||
// emscripten-LABEL: void @u64_as_f128({{.*}}sret([16 x i8])
|
||||
|
@ -352,7 +374,8 @@ pub fn u64_as_f128(a: u64) -> f128 {
|
|||
a as f128
|
||||
}
|
||||
|
||||
// x86-LABEL: void @u128_as_f128({{.*}}sret([16 x i8])
|
||||
// x86-sse-LABEL: <16 x i8> @u128_as_f128(
|
||||
// x86-nosse-LABEL: void @u128_as_f128({{.*}}sret([16 x i8])
|
||||
// bit32-LABEL: void @u128_as_f128({{.*}}sret([16 x i8])
|
||||
// bit64-LABEL: fp128 @u128_as_f128(
|
||||
// emscripten-LABEL: void @u128_as_f128({{.*}}sret([16 x i8])
|
||||
|
@ -362,7 +385,8 @@ pub fn u128_as_f128(a: u128) -> f128 {
|
|||
a as f128
|
||||
}
|
||||
|
||||
// x86-LABEL: void @i8_as_f128({{.*}}sret([16 x i8])
|
||||
// x86-sse-LABEL: <16 x i8> @i8_as_f128(
|
||||
// x86-nosse-LABEL: void @i8_as_f128({{.*}}sret([16 x i8])
|
||||
// bit32-LABEL: void @i8_as_f128({{.*}}sret([16 x i8])
|
||||
// bit64-LABEL: fp128 @i8_as_f128(
|
||||
// emscripten-LABEL: void @i8_as_f128({{.*}}sret([16 x i8])
|
||||
|
@ -372,7 +396,8 @@ pub fn i8_as_f128(a: i8) -> f128 {
|
|||
a as f128
|
||||
}
|
||||
|
||||
// x86-LABEL: void @i16_as_f128({{.*}}sret([16 x i8])
|
||||
// x86-sse-LABEL: <16 x i8> @i16_as_f128(
|
||||
// x86-nosse-LABEL: void @i16_as_f128({{.*}}sret([16 x i8])
|
||||
// bit32-LABEL: void @i16_as_f128({{.*}}sret([16 x i8])
|
||||
// bit64-LABEL: fp128 @i16_as_f128(
|
||||
// emscripten-LABEL: void @i16_as_f128({{.*}}sret([16 x i8])
|
||||
|
@ -382,7 +407,8 @@ pub fn i16_as_f128(a: i16) -> f128 {
|
|||
a as f128
|
||||
}
|
||||
|
||||
// x86-LABEL: void @i32_as_f128({{.*}}sret([16 x i8])
|
||||
// x86-sse-LABEL: <16 x i8> @i32_as_f128(
|
||||
// x86-nosse-LABEL: void @i32_as_f128({{.*}}sret([16 x i8])
|
||||
// bit32-LABEL: void @i32_as_f128({{.*}}sret([16 x i8])
|
||||
// bit64-LABEL: fp128 @i32_as_f128(
|
||||
// emscripten-LABEL: void @i32_as_f128({{.*}}sret([16 x i8])
|
||||
|
@ -392,7 +418,8 @@ pub fn i32_as_f128(a: i32) -> f128 {
|
|||
a as f128
|
||||
}
|
||||
|
||||
// x86-LABEL: void @i64_as_f128({{.*}}sret([16 x i8])
|
||||
// x86-sse-LABEL: <16 x i8> @i64_as_f128(
|
||||
// x86-nosse-LABEL: void @i64_as_f128({{.*}}sret([16 x i8])
|
||||
// bit32-LABEL: void @i64_as_f128({{.*}}sret([16 x i8])
|
||||
// bit64-LABEL: fp128 @i64_as_f128(
|
||||
// emscripten-LABEL: void @i64_as_f128({{.*}}sret([16 x i8])
|
||||
|
@ -402,7 +429,8 @@ pub fn i64_as_f128(a: i64) -> f128 {
|
|||
a as f128
|
||||
}
|
||||
|
||||
// x86-LABEL: void @i128_as_f128({{.*}}sret([16 x i8])
|
||||
// x86-sse-LABEL: <16 x i8> @i128_as_f128(
|
||||
// x86-nosse-LABEL: void @i128_as_f128({{.*}}sret([16 x i8])
|
||||
// bit32-LABEL: void @i128_as_f128({{.*}}sret([16 x i8])
|
||||
// bit64-LABEL: fp128 @i128_as_f128(
|
||||
// emscripten-LABEL: void @i128_as_f128({{.*}}sret([16 x i8])
|
||||
|
|
|
@ -1,7 +1,10 @@
|
|||
// 32-bit x86 returns float types differently to avoid the x87 stack.
|
||||
// 32-bit systems will return 128bit values using a return area pointer.
|
||||
//@ revisions: x86 bit32 bit64
|
||||
//@[x86] only-x86
|
||||
//@ revisions: x86-sse x86-nosse bit32 bit64
|
||||
//@[x86-sse] only-x86
|
||||
//@[x86-sse] only-rustc_abi-x86-sse2
|
||||
//@[x86-nosse] only-x86
|
||||
//@[x86-nosse] ignore-rustc_abi-x86-sse2
|
||||
//@[bit32] ignore-x86
|
||||
//@[bit32] only-32bit
|
||||
//@[bit64] ignore-x86
|
||||
|
@ -59,8 +62,10 @@ pub fn f16_le(a: f16, b: f16) -> bool {
|
|||
}
|
||||
|
||||
// This is where we check the argument and return ABI for f16.
|
||||
// other-LABEL: half @f16_neg(half
|
||||
// x86-LABEL: i16 @f16_neg(half
|
||||
// bit32-LABEL: half @f16_neg(half
|
||||
// bit64-LABEL: half @f16_neg(half
|
||||
// x86-sse-LABEL: <2 x i8> @f16_neg(half
|
||||
// x86-nosse-LABEL: i16 @f16_neg(half
|
||||
#[no_mangle]
|
||||
pub fn f16_neg(a: f16) -> f16 {
|
||||
// CHECK: fneg half %{{.+}}
|
||||
|
@ -144,17 +149,23 @@ pub fn f16_rem_assign(a: &mut f16, b: f16) {
|
|||
|
||||
/* float to float conversions */
|
||||
|
||||
// other-LABEL: half @f16_as_self(
|
||||
// x86-LABEL: i16 @f16_as_self(
|
||||
// bit32-LABEL: half @f16_as_self(
|
||||
// bit64-LABEL: half @f16_as_self(
|
||||
// x86-sse-LABEL: <2 x i8> @f16_as_self(
|
||||
// x86-nosse-LABEL: i16 @f16_as_self(
|
||||
#[no_mangle]
|
||||
pub fn f16_as_self(a: f16) -> f16 {
|
||||
// other-CHECK: ret half %{{.+}}
|
||||
// x86-CHECK: bitcast half
|
||||
// x86-CHECK: ret i16
|
||||
// bit32-CHECK: ret half %{{.+}}
|
||||
// bit64-CHECK: ret half %{{.+}}
|
||||
// x86-sse-CHECK: bitcast half
|
||||
// x86-nosse-CHECK: bitcast half
|
||||
// x86-sse-CHECK: ret i16
|
||||
// x86-nosse-CHECK: ret i16
|
||||
a as f16
|
||||
}
|
||||
|
||||
// x86-LABEL: i32 @f16_as_f32(
|
||||
// x86-sse-LABEL: <4 x i8> @f16_as_f32(
|
||||
// x86-nosse-LABEL: i32 @f16_as_f32(
|
||||
// bit32-LABEL: float @f16_as_f32(
|
||||
// bit64-LABEL: float @f16_as_f32(
|
||||
#[no_mangle]
|
||||
|
@ -163,7 +174,8 @@ pub fn f16_as_f32(a: f16) -> f32 {
|
|||
a as f32
|
||||
}
|
||||
|
||||
// x86-LABEL: void @f16_as_f64(
|
||||
// x86-sse-LABEL: <8 x i8> @f16_as_f64(
|
||||
// x86-nosse-LABEL: void @f16_as_f64({{.*}}sret([8 x i8])
|
||||
// bit32-LABEL: double @f16_as_f64(
|
||||
// bit64-LABEL: double @f16_as_f64(
|
||||
#[no_mangle]
|
||||
|
@ -172,7 +184,8 @@ pub fn f16_as_f64(a: f16) -> f64 {
|
|||
a as f64
|
||||
}
|
||||
|
||||
// x86-LABEL: void @f16_as_f128({{.*}}sret([16 x i8])
|
||||
// x86-sse-LABEL: <16 x i8> @f16_as_f128(
|
||||
// x86-nosse-LABEL: void @f16_as_f128({{.*}}sret([16 x i8])
|
||||
// bit32-LABEL: void @f16_as_f128({{.*}}sret([16 x i8])
|
||||
// bit64-LABEL: fp128 @f16_as_f128(
|
||||
#[no_mangle]
|
||||
|
@ -231,7 +244,8 @@ pub fn f16_as_u64(a: f16) -> u64 {
|
|||
a as u64
|
||||
}
|
||||
|
||||
// x86-LABEL: void @f16_as_u128({{.*}}sret([16 x i8])
|
||||
// x86-sse-LABEL: void @f16_as_u128({{.*}}sret([16 x i8])
|
||||
// x86-nosse-LABEL: void @f16_as_u128({{.*}}sret([16 x i8])
|
||||
// bit32-LABEL: void @f16_as_u128({{.*}}sret([16 x i8])
|
||||
// bit64-LABEL: i128 @f16_as_u128(
|
||||
#[no_mangle]
|
||||
|
@ -267,7 +281,8 @@ pub fn f16_as_i64(a: f16) -> i64 {
|
|||
a as i64
|
||||
}
|
||||
|
||||
// x86-LABEL: void @f16_as_i128({{.*}}sret([16 x i8])
|
||||
// x86-sse-LABEL: void @f16_as_i128({{.*}}sret([16 x i8])
|
||||
// x86-nosse-LABEL: void @f16_as_i128({{.*}}sret([16 x i8])
|
||||
// bit32-LABEL: void @f16_as_i128({{.*}}sret([16 x i8])
|
||||
// bit64-LABEL: i128 @f16_as_i128(
|
||||
#[no_mangle]
|
||||
|
|
|
@ -6,15 +6,6 @@
|
|||
use std::arch::x86_64::{__m128, __m128i, __m256i};
|
||||
use std::mem::transmute;
|
||||
|
||||
// CHECK-LABEL: @check_sse_float_to_int(
|
||||
#[no_mangle]
|
||||
pub unsafe fn check_sse_float_to_int(x: __m128) -> __m128i {
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: %0 = load <4 x float>, ptr %x, align 16
|
||||
// CHECK: store <4 x float> %0, ptr %_0, align 16
|
||||
transmute(x)
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @check_sse_pair_to_avx(
|
||||
#[no_mangle]
|
||||
pub unsafe fn check_sse_pair_to_avx(x: (__m128i, __m128i)) -> __m256i {
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
//@ compile-flags: -C no-prepopulate-passes -Copt-level=0
|
||||
// 32-bit x86 returns `f32` and `f64` differently to avoid the x87 stack.
|
||||
//@ revisions: x86 other
|
||||
//@[x86] only-x86
|
||||
//@[x86] only-rustc_abi-x86-sse2
|
||||
//@[other] ignore-x86
|
||||
|
||||
#![crate_type = "lib"]
|
||||
|
@ -10,7 +10,7 @@
|
|||
pub struct F32(f32);
|
||||
|
||||
// other: define{{.*}}float @add_newtype_f32(float %a, float %b)
|
||||
// x86: define{{.*}}i32 @add_newtype_f32(float %a, float %b)
|
||||
// x86: define{{.*}}<4 x i8> @add_newtype_f32(float %a, float %b)
|
||||
#[inline(never)]
|
||||
#[no_mangle]
|
||||
pub fn add_newtype_f32(a: F32, b: F32) -> F32 {
|
||||
|
@ -21,7 +21,7 @@ pub fn add_newtype_f32(a: F32, b: F32) -> F32 {
|
|||
pub struct F64(f64);
|
||||
|
||||
// other: define{{.*}}double @add_newtype_f64(double %a, double %b)
|
||||
// x86: define{{.*}}void @add_newtype_f64(ptr{{.*}}sret([8 x i8]){{.*}}%_0, double %a, double %b)
|
||||
// x86: define{{.*}}<8 x i8> @add_newtype_f64(double %a, double %b)
|
||||
#[inline(never)]
|
||||
#[no_mangle]
|
||||
pub fn add_newtype_f64(a: F64, b: F64) -> F64 {
|
||||
|
|
|
@ -1,5 +1,14 @@
|
|||
//
|
||||
//@ compile-flags: -C no-prepopulate-passes
|
||||
// LLVM IR isn't very portable and the one tested here depends on the ABI
|
||||
// which is different between x86 (where we use SSE registers) and others.
|
||||
// `x86-64` and `x86-32-sse2` are identical, but compiletest does not support
|
||||
// taking the union of multiple `only` annotations.
|
||||
//@ revisions: x86-64 x86-32-sse2 other
|
||||
//@[x86-64] only-x86_64
|
||||
//@[x86-32-sse2] only-rustc_abi-x86-sse2
|
||||
//@[other] ignore-rustc_abi-x86-sse2
|
||||
//@[other] ignore-x86_64
|
||||
|
||||
#![crate_type = "lib"]
|
||||
#![allow(non_camel_case_types)]
|
||||
|
@ -38,7 +47,9 @@ pub fn build_array_s(x: [f32; 4]) -> S<4> {
|
|||
#[no_mangle]
|
||||
pub fn build_array_transmute_s(x: [f32; 4]) -> S<4> {
|
||||
// CHECK: %[[VAL:.+]] = load <4 x float>, ptr %x, align [[ARRAY_ALIGN]]
|
||||
// CHECK: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]]
|
||||
// x86-32: ret <4 x float> %[[VAL:.+]]
|
||||
// x86-64: ret <4 x float> %[[VAL:.+]]
|
||||
// other: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]]
|
||||
unsafe { std::mem::transmute(x) }
|
||||
}
|
||||
|
||||
|
@ -53,6 +64,8 @@ pub fn build_array_t(x: [f32; 4]) -> T {
|
|||
#[no_mangle]
|
||||
pub fn build_array_transmute_t(x: [f32; 4]) -> T {
|
||||
// CHECK: %[[VAL:.+]] = load <4 x float>, ptr %x, align [[ARRAY_ALIGN]]
|
||||
// CHECK: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]]
|
||||
// x86-32: ret <4 x float> %[[VAL:.+]]
|
||||
// x86-64: ret <4 x float> %[[VAL:.+]]
|
||||
// other: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]]
|
||||
unsafe { std::mem::transmute(x) }
|
||||
}
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
//@ revisions:opt3 noopt
|
||||
//@ only-x86_64
|
||||
//@[opt3] compile-flags: -Copt-level=3
|
||||
//@[noopt] compile-flags: -Cno-prepopulate-passes
|
||||
|
||||
|
@ -14,14 +15,14 @@ use core::{mem, ptr};
|
|||
|
||||
#[repr(simd, packed)]
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct Simd<T, const N: usize>([T; N]);
|
||||
pub struct PackedSimd<T, const N: usize>([T; N]);
|
||||
|
||||
#[repr(simd)]
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct FullSimd<T, const N: usize>([T; N]);
|
||||
|
||||
// non-powers-of-two have padding and need to be expanded to full vectors
|
||||
fn load<T, const N: usize>(v: Simd<T, N>) -> FullSimd<T, N> {
|
||||
fn load<T, const N: usize>(v: PackedSimd<T, N>) -> FullSimd<T, N> {
|
||||
unsafe {
|
||||
let mut tmp = mem::MaybeUninit::<FullSimd<T, N>>::uninit();
|
||||
ptr::copy_nonoverlapping(&v as *const _, tmp.as_mut_ptr().cast(), 1);
|
||||
|
@ -29,18 +30,16 @@ fn load<T, const N: usize>(v: Simd<T, N>) -> FullSimd<T, N> {
|
|||
}
|
||||
}
|
||||
|
||||
// CHECK-LABEL: square_packed_full
|
||||
// CHECK-SAME: ptr{{[a-z_ ]*}} sret([[RET_TYPE:[^)]+]]) [[RET_ALIGN:align (8|16)]]{{[^%]*}} [[RET_VREG:%[_0-9]*]]
|
||||
// CHECK-SAME: ptr{{[a-z_ ]*}} align 4
|
||||
// CHECK-LABEL: define <3 x float> @square_packed_full(ptr{{[a-z_ ]*}} align 4 {{[^,]*}})
|
||||
#[no_mangle]
|
||||
pub fn square_packed_full(x: Simd<f32, 3>) -> FullSimd<f32, 3> {
|
||||
// CHECK-NEXT: start
|
||||
// noopt: alloca [[RET_TYPE]], [[RET_ALIGN]]
|
||||
// CHECK: load <3 x float>
|
||||
pub fn square_packed_full(x: PackedSimd<f32, 3>) -> FullSimd<f32, 3> {
|
||||
// The unoptimized version of this is not very interesting to check
|
||||
// since `load` does not get inlined.
|
||||
// opt3-NEXT: start:
|
||||
// opt3-NEXT: load <3 x float>
|
||||
let x = load(x);
|
||||
// CHECK: [[VREG:%[a-z0-9_]+]] = fmul <3 x float>
|
||||
// CHECK-NEXT: store <3 x float> [[VREG]], ptr [[RET_VREG]], [[RET_ALIGN]]
|
||||
// CHECK-NEXT: ret void
|
||||
// opt3-NEXT: [[VREG:%[a-z0-9_]+]] = fmul <3 x float>
|
||||
// opt3-NEXT: ret <3 x float> [[VREG:%[a-z0-9_]+]]
|
||||
unsafe { intrinsics::simd_mul(x, x) }
|
||||
}
|
||||
|
||||
|
@ -48,7 +47,7 @@ pub fn square_packed_full(x: Simd<f32, 3>) -> FullSimd<f32, 3> {
|
|||
// CHECK-SAME: ptr{{[a-z_ ]*}} sret([[RET_TYPE:[^)]+]]) [[RET_ALIGN:align 4]]{{[^%]*}} [[RET_VREG:%[_0-9]*]]
|
||||
// CHECK-SAME: ptr{{[a-z_ ]*}} align 4
|
||||
#[no_mangle]
|
||||
pub fn square_packed(x: Simd<f32, 3>) -> Simd<f32, 3> {
|
||||
pub fn square_packed(x: PackedSimd<f32, 3>) -> PackedSimd<f32, 3> {
|
||||
// CHECK-NEXT: start
|
||||
// CHECK-NEXT: load <3 x float>
|
||||
// noopt-NEXT: load <3 x float>
|
||||
|
|
|
@ -2,8 +2,11 @@
|
|||
//@ compile-flags: -Copt-level=3 -C no-prepopulate-passes
|
||||
// 32-bit x86 returns `f32` differently to avoid the x87 stack.
|
||||
// 32-bit systems will return 128bit values using a return area pointer.
|
||||
//@ revisions: x86 bit32 bit64
|
||||
//@[x86] only-x86
|
||||
//@ revisions: x86-sse x86-nosse bit32 bit64
|
||||
//@[x86-sse] only-x86
|
||||
//@[x86-sse] only-rustc_abi-x86-sse2
|
||||
//@[x86-nosse] only-x86
|
||||
//@[x86-nosse] ignore-rustc_abi-x86-sse2
|
||||
//@[bit32] ignore-x86
|
||||
//@[bit32] only-32bit
|
||||
//@[bit64] ignore-x86
|
||||
|
@ -75,7 +78,8 @@ pub union UnionF32 {
|
|||
a: f32,
|
||||
}
|
||||
|
||||
// x86: define {{(dso_local )?}}i32 @test_UnionF32(float %_1)
|
||||
// x86-sse: define {{(dso_local )?}}<4 x i8> @test_UnionF32(float %_1)
|
||||
// x86-nosse: define {{(dso_local )?}}i32 @test_UnionF32(float %_1)
|
||||
// bit32: define {{(dso_local )?}}float @test_UnionF32(float %_1)
|
||||
// bit64: define {{(dso_local )?}}float @test_UnionF32(float %_1)
|
||||
#[no_mangle]
|
||||
|
@ -88,7 +92,8 @@ pub union UnionF32F32 {
|
|||
b: f32,
|
||||
}
|
||||
|
||||
// x86: define {{(dso_local )?}}i32 @test_UnionF32F32(float %_1)
|
||||
// x86-sse: define {{(dso_local )?}}<4 x i8> @test_UnionF32F32(float %_1)
|
||||
// x86-nosse: define {{(dso_local )?}}i32 @test_UnionF32F32(float %_1)
|
||||
// bit32: define {{(dso_local )?}}float @test_UnionF32F32(float %_1)
|
||||
// bit64: define {{(dso_local )?}}float @test_UnionF32F32(float %_1)
|
||||
#[no_mangle]
|
||||
|
@ -110,7 +115,8 @@ pub fn test_UnionF32U32(_: UnionF32U32) -> UnionF32U32 {
|
|||
pub union UnionU128 {
|
||||
a: u128,
|
||||
}
|
||||
// x86: define {{(dso_local )?}}void @test_UnionU128({{.*}}sret([16 x i8]){{.*}}, i128 %_1)
|
||||
// x86-sse: define {{(dso_local )?}}void @test_UnionU128({{.*}}sret([16 x i8]){{.*}}, i128 %_1)
|
||||
// x86-nosse: define {{(dso_local )?}}void @test_UnionU128({{.*}}sret([16 x i8]){{.*}}, i128 %_1)
|
||||
// bit32: define {{(dso_local )?}}void @test_UnionU128({{.*}}sret([16 x i8]){{.*}}, i128 %_1)
|
||||
// bit64: define {{(dso_local )?}}i128 @test_UnionU128(i128 %_1)
|
||||
#[no_mangle]
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
//! Ensure we trigger abi_unsupported_vector_types for target features that are usually enabled
|
||||
//! on a target, but disabled in this file via a `-C` flag.
|
||||
//! on a target via the base CPU, but disabled in this file via a `-C` flag.
|
||||
//@ compile-flags: --crate-type=rlib --target=i586-unknown-linux-gnu
|
||||
//@ compile-flags: -Ctarget-cpu=pentium4 -C target-feature=-sse,-sse2
|
||||
//@ add-core-stubs
|
||||
//@ compile-flags: --crate-type=rlib --target=i586-unknown-linux-gnu -C target-feature=-sse,-sse2
|
||||
//@ build-pass
|
||||
//@ ignore-pass (test emits codegen-time warnings)
|
||||
//@ needs-llvm-components: x86
|
|
@ -1,5 +1,5 @@
|
|||
warning: this function definition uses SIMD vector type `SseVector` which (with the chosen ABI) requires the `sse` target feature, which is not enabled
|
||||
--> $DIR/sse-abi-checks.rs:19:1
|
||||
--> $DIR/sse-simd-abi-checks.rs:20:1
|
||||
|
|
||||
LL | pub unsafe extern "C" fn f(_: SseVector) {
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ function defined here
|
||||
|
@ -13,7 +13,7 @@ warning: 1 warning emitted
|
|||
|
||||
Future incompatibility report: Future breakage diagnostic:
|
||||
warning: this function definition uses SIMD vector type `SseVector` which (with the chosen ABI) requires the `sse` target feature, which is not enabled
|
||||
--> $DIR/sse-abi-checks.rs:19:1
|
||||
--> $DIR/sse-simd-abi-checks.rs:20:1
|
||||
|
|
||||
LL | pub unsafe extern "C" fn f(_: SseVector) {
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ function defined here
|
Loading…
Add table
Add a link
Reference in a new issue