Auto merge of #80652 - calebzulawski:simd-lanes, r=nagisa
Improve SIMD type element count validation Resolves rust-lang/stdsimd#53. These changes are motivated by `stdsimd` moving in the direction of const generic vectors, e.g.: ```rust #[repr(simd)] struct SimdF32<const N: usize>([f32; N]); ``` This makes a few changes: * Establishes a maximum SIMD lane count of 2^16 (65536). This value is arbitrary, but attempts to validate lane count before hitting potential errors in the backend. It's not clear what LLVM's maximum lane count is, but cranelift's appears to be much less than `usize::MAX`, at least. * Expands some SIMD intrinsics to support arbitrary lane counts. This resolves the ICE in the linked issue. * Attempts to catch invalid-sized vectors during typeck when possible. Unresolved questions: * Generic-length vectors can't be validated in typeck and are only validated after monomorphization while computing layout. This "works", but the errors simply bail out with no context beyond the name of the type. Should these errors instead return `LayoutError` or otherwise provide context in some way? As it stands, users of `stdsimd` could trivially produce monomorphization errors by making zero-length vectors. cc `@bjorn3`
This commit is contained in:
commit
bb587b1a17
28 changed files with 325 additions and 399 deletions
|
@ -380,7 +380,7 @@ impl MiscMethods<'tcx> for CodegenCx<'ll, 'tcx> {
|
|||
"rust_eh_personality"
|
||||
};
|
||||
let fty = self.type_variadic_func(&[], self.type_i32());
|
||||
self.declare_cfn(name, fty)
|
||||
self.declare_cfn(name, llvm::UnnamedAddr::Global, fty)
|
||||
}
|
||||
};
|
||||
attributes::apply_target_cpu_attr(self, llfn);
|
||||
|
@ -429,7 +429,7 @@ impl MiscMethods<'tcx> for CodegenCx<'ll, 'tcx> {
|
|||
|
||||
fn declare_c_main(&self, fn_type: Self::Type) -> Option<Self::Function> {
|
||||
if self.get_declared_value("main").is_none() {
|
||||
Some(self.declare_cfn("main", fn_type))
|
||||
Some(self.declare_cfn("main", llvm::UnnamedAddr::Global, fn_type))
|
||||
} else {
|
||||
// If the symbol already exists, it is an error: for example, the user wrote
|
||||
// #[no_mangle] extern "C" fn main(..) {..}
|
||||
|
@ -459,8 +459,7 @@ impl CodegenCx<'b, 'tcx> {
|
|||
} else {
|
||||
self.type_variadic_func(&[], ret)
|
||||
};
|
||||
let f = self.declare_cfn(name, fn_ty);
|
||||
llvm::SetUnnamedAddress(f, llvm::UnnamedAddr::No);
|
||||
let f = self.declare_cfn(name, llvm::UnnamedAddr::No, fn_ty);
|
||||
self.intrinsics.borrow_mut().insert(name, f);
|
||||
f
|
||||
}
|
||||
|
@ -498,25 +497,6 @@ impl CodegenCx<'b, 'tcx> {
|
|||
let t_f32 = self.type_f32();
|
||||
let t_f64 = self.type_f64();
|
||||
|
||||
macro_rules! vector_types {
|
||||
($id_out:ident: $elem_ty:ident, $len:expr) => {
|
||||
let $id_out = self.type_vector($elem_ty, $len);
|
||||
};
|
||||
($($id_out:ident: $elem_ty:ident, $len:expr;)*) => {
|
||||
$(vector_types!($id_out: $elem_ty, $len);)*
|
||||
}
|
||||
}
|
||||
vector_types! {
|
||||
t_v2f32: t_f32, 2;
|
||||
t_v4f32: t_f32, 4;
|
||||
t_v8f32: t_f32, 8;
|
||||
t_v16f32: t_f32, 16;
|
||||
|
||||
t_v2f64: t_f64, 2;
|
||||
t_v4f64: t_f64, 4;
|
||||
t_v8f64: t_f64, 8;
|
||||
}
|
||||
|
||||
ifn!("llvm.wasm.trunc.saturate.unsigned.i32.f32", fn(t_f32) -> t_i32);
|
||||
ifn!("llvm.wasm.trunc.saturate.unsigned.i32.f64", fn(t_f64) -> t_i32);
|
||||
ifn!("llvm.wasm.trunc.saturate.unsigned.i64.f32", fn(t_f32) -> t_i64);
|
||||
|
@ -540,124 +520,40 @@ impl CodegenCx<'b, 'tcx> {
|
|||
ifn!("llvm.sideeffect", fn() -> void);
|
||||
|
||||
ifn!("llvm.powi.f32", fn(t_f32, t_i32) -> t_f32);
|
||||
ifn!("llvm.powi.v2f32", fn(t_v2f32, t_i32) -> t_v2f32);
|
||||
ifn!("llvm.powi.v4f32", fn(t_v4f32, t_i32) -> t_v4f32);
|
||||
ifn!("llvm.powi.v8f32", fn(t_v8f32, t_i32) -> t_v8f32);
|
||||
ifn!("llvm.powi.v16f32", fn(t_v16f32, t_i32) -> t_v16f32);
|
||||
ifn!("llvm.powi.f64", fn(t_f64, t_i32) -> t_f64);
|
||||
ifn!("llvm.powi.v2f64", fn(t_v2f64, t_i32) -> t_v2f64);
|
||||
ifn!("llvm.powi.v4f64", fn(t_v4f64, t_i32) -> t_v4f64);
|
||||
ifn!("llvm.powi.v8f64", fn(t_v8f64, t_i32) -> t_v8f64);
|
||||
|
||||
ifn!("llvm.pow.f32", fn(t_f32, t_f32) -> t_f32);
|
||||
ifn!("llvm.pow.v2f32", fn(t_v2f32, t_v2f32) -> t_v2f32);
|
||||
ifn!("llvm.pow.v4f32", fn(t_v4f32, t_v4f32) -> t_v4f32);
|
||||
ifn!("llvm.pow.v8f32", fn(t_v8f32, t_v8f32) -> t_v8f32);
|
||||
ifn!("llvm.pow.v16f32", fn(t_v16f32, t_v16f32) -> t_v16f32);
|
||||
ifn!("llvm.pow.f64", fn(t_f64, t_f64) -> t_f64);
|
||||
ifn!("llvm.pow.v2f64", fn(t_v2f64, t_v2f64) -> t_v2f64);
|
||||
ifn!("llvm.pow.v4f64", fn(t_v4f64, t_v4f64) -> t_v4f64);
|
||||
ifn!("llvm.pow.v8f64", fn(t_v8f64, t_v8f64) -> t_v8f64);
|
||||
|
||||
ifn!("llvm.sqrt.f32", fn(t_f32) -> t_f32);
|
||||
ifn!("llvm.sqrt.v2f32", fn(t_v2f32) -> t_v2f32);
|
||||
ifn!("llvm.sqrt.v4f32", fn(t_v4f32) -> t_v4f32);
|
||||
ifn!("llvm.sqrt.v8f32", fn(t_v8f32) -> t_v8f32);
|
||||
ifn!("llvm.sqrt.v16f32", fn(t_v16f32) -> t_v16f32);
|
||||
ifn!("llvm.sqrt.f64", fn(t_f64) -> t_f64);
|
||||
ifn!("llvm.sqrt.v2f64", fn(t_v2f64) -> t_v2f64);
|
||||
ifn!("llvm.sqrt.v4f64", fn(t_v4f64) -> t_v4f64);
|
||||
ifn!("llvm.sqrt.v8f64", fn(t_v8f64) -> t_v8f64);
|
||||
|
||||
ifn!("llvm.sin.f32", fn(t_f32) -> t_f32);
|
||||
ifn!("llvm.sin.v2f32", fn(t_v2f32) -> t_v2f32);
|
||||
ifn!("llvm.sin.v4f32", fn(t_v4f32) -> t_v4f32);
|
||||
ifn!("llvm.sin.v8f32", fn(t_v8f32) -> t_v8f32);
|
||||
ifn!("llvm.sin.v16f32", fn(t_v16f32) -> t_v16f32);
|
||||
ifn!("llvm.sin.f64", fn(t_f64) -> t_f64);
|
||||
ifn!("llvm.sin.v2f64", fn(t_v2f64) -> t_v2f64);
|
||||
ifn!("llvm.sin.v4f64", fn(t_v4f64) -> t_v4f64);
|
||||
ifn!("llvm.sin.v8f64", fn(t_v8f64) -> t_v8f64);
|
||||
|
||||
ifn!("llvm.cos.f32", fn(t_f32) -> t_f32);
|
||||
ifn!("llvm.cos.v2f32", fn(t_v2f32) -> t_v2f32);
|
||||
ifn!("llvm.cos.v4f32", fn(t_v4f32) -> t_v4f32);
|
||||
ifn!("llvm.cos.v8f32", fn(t_v8f32) -> t_v8f32);
|
||||
ifn!("llvm.cos.v16f32", fn(t_v16f32) -> t_v16f32);
|
||||
ifn!("llvm.cos.f64", fn(t_f64) -> t_f64);
|
||||
ifn!("llvm.cos.v2f64", fn(t_v2f64) -> t_v2f64);
|
||||
ifn!("llvm.cos.v4f64", fn(t_v4f64) -> t_v4f64);
|
||||
ifn!("llvm.cos.v8f64", fn(t_v8f64) -> t_v8f64);
|
||||
|
||||
ifn!("llvm.exp.f32", fn(t_f32) -> t_f32);
|
||||
ifn!("llvm.exp.v2f32", fn(t_v2f32) -> t_v2f32);
|
||||
ifn!("llvm.exp.v4f32", fn(t_v4f32) -> t_v4f32);
|
||||
ifn!("llvm.exp.v8f32", fn(t_v8f32) -> t_v8f32);
|
||||
ifn!("llvm.exp.v16f32", fn(t_v16f32) -> t_v16f32);
|
||||
ifn!("llvm.exp.f64", fn(t_f64) -> t_f64);
|
||||
ifn!("llvm.exp.v2f64", fn(t_v2f64) -> t_v2f64);
|
||||
ifn!("llvm.exp.v4f64", fn(t_v4f64) -> t_v4f64);
|
||||
ifn!("llvm.exp.v8f64", fn(t_v8f64) -> t_v8f64);
|
||||
|
||||
ifn!("llvm.exp2.f32", fn(t_f32) -> t_f32);
|
||||
ifn!("llvm.exp2.v2f32", fn(t_v2f32) -> t_v2f32);
|
||||
ifn!("llvm.exp2.v4f32", fn(t_v4f32) -> t_v4f32);
|
||||
ifn!("llvm.exp2.v8f32", fn(t_v8f32) -> t_v8f32);
|
||||
ifn!("llvm.exp2.v16f32", fn(t_v16f32) -> t_v16f32);
|
||||
ifn!("llvm.exp2.f64", fn(t_f64) -> t_f64);
|
||||
ifn!("llvm.exp2.v2f64", fn(t_v2f64) -> t_v2f64);
|
||||
ifn!("llvm.exp2.v4f64", fn(t_v4f64) -> t_v4f64);
|
||||
ifn!("llvm.exp2.v8f64", fn(t_v8f64) -> t_v8f64);
|
||||
|
||||
ifn!("llvm.log.f32", fn(t_f32) -> t_f32);
|
||||
ifn!("llvm.log.v2f32", fn(t_v2f32) -> t_v2f32);
|
||||
ifn!("llvm.log.v4f32", fn(t_v4f32) -> t_v4f32);
|
||||
ifn!("llvm.log.v8f32", fn(t_v8f32) -> t_v8f32);
|
||||
ifn!("llvm.log.v16f32", fn(t_v16f32) -> t_v16f32);
|
||||
ifn!("llvm.log.f64", fn(t_f64) -> t_f64);
|
||||
ifn!("llvm.log.v2f64", fn(t_v2f64) -> t_v2f64);
|
||||
ifn!("llvm.log.v4f64", fn(t_v4f64) -> t_v4f64);
|
||||
ifn!("llvm.log.v8f64", fn(t_v8f64) -> t_v8f64);
|
||||
|
||||
ifn!("llvm.log10.f32", fn(t_f32) -> t_f32);
|
||||
ifn!("llvm.log10.v2f32", fn(t_v2f32) -> t_v2f32);
|
||||
ifn!("llvm.log10.v4f32", fn(t_v4f32) -> t_v4f32);
|
||||
ifn!("llvm.log10.v8f32", fn(t_v8f32) -> t_v8f32);
|
||||
ifn!("llvm.log10.v16f32", fn(t_v16f32) -> t_v16f32);
|
||||
ifn!("llvm.log10.f64", fn(t_f64) -> t_f64);
|
||||
ifn!("llvm.log10.v2f64", fn(t_v2f64) -> t_v2f64);
|
||||
ifn!("llvm.log10.v4f64", fn(t_v4f64) -> t_v4f64);
|
||||
ifn!("llvm.log10.v8f64", fn(t_v8f64) -> t_v8f64);
|
||||
|
||||
ifn!("llvm.log2.f32", fn(t_f32) -> t_f32);
|
||||
ifn!("llvm.log2.v2f32", fn(t_v2f32) -> t_v2f32);
|
||||
ifn!("llvm.log2.v4f32", fn(t_v4f32) -> t_v4f32);
|
||||
ifn!("llvm.log2.v8f32", fn(t_v8f32) -> t_v8f32);
|
||||
ifn!("llvm.log2.v16f32", fn(t_v16f32) -> t_v16f32);
|
||||
ifn!("llvm.log2.f64", fn(t_f64) -> t_f64);
|
||||
ifn!("llvm.log2.v2f64", fn(t_v2f64) -> t_v2f64);
|
||||
ifn!("llvm.log2.v4f64", fn(t_v4f64) -> t_v4f64);
|
||||
ifn!("llvm.log2.v8f64", fn(t_v8f64) -> t_v8f64);
|
||||
|
||||
ifn!("llvm.fma.f32", fn(t_f32, t_f32, t_f32) -> t_f32);
|
||||
ifn!("llvm.fma.v2f32", fn(t_v2f32, t_v2f32, t_v2f32) -> t_v2f32);
|
||||
ifn!("llvm.fma.v4f32", fn(t_v4f32, t_v4f32, t_v4f32) -> t_v4f32);
|
||||
ifn!("llvm.fma.v8f32", fn(t_v8f32, t_v8f32, t_v8f32) -> t_v8f32);
|
||||
ifn!("llvm.fma.v16f32", fn(t_v16f32, t_v16f32, t_v16f32) -> t_v16f32);
|
||||
ifn!("llvm.fma.f64", fn(t_f64, t_f64, t_f64) -> t_f64);
|
||||
ifn!("llvm.fma.v2f64", fn(t_v2f64, t_v2f64, t_v2f64) -> t_v2f64);
|
||||
ifn!("llvm.fma.v4f64", fn(t_v4f64, t_v4f64, t_v4f64) -> t_v4f64);
|
||||
ifn!("llvm.fma.v8f64", fn(t_v8f64, t_v8f64, t_v8f64) -> t_v8f64);
|
||||
|
||||
ifn!("llvm.fabs.f32", fn(t_f32) -> t_f32);
|
||||
ifn!("llvm.fabs.v2f32", fn(t_v2f32) -> t_v2f32);
|
||||
ifn!("llvm.fabs.v4f32", fn(t_v4f32) -> t_v4f32);
|
||||
ifn!("llvm.fabs.v8f32", fn(t_v8f32) -> t_v8f32);
|
||||
ifn!("llvm.fabs.v16f32", fn(t_v16f32) -> t_v16f32);
|
||||
ifn!("llvm.fabs.f64", fn(t_f64) -> t_f64);
|
||||
ifn!("llvm.fabs.v2f64", fn(t_v2f64) -> t_v2f64);
|
||||
ifn!("llvm.fabs.v4f64", fn(t_v4f64) -> t_v4f64);
|
||||
ifn!("llvm.fabs.v8f64", fn(t_v8f64) -> t_v8f64);
|
||||
|
||||
ifn!("llvm.minnum.f32", fn(t_f32, t_f32) -> t_f32);
|
||||
ifn!("llvm.minnum.f64", fn(t_f64, t_f64) -> t_f64);
|
||||
|
@ -665,24 +561,10 @@ impl CodegenCx<'b, 'tcx> {
|
|||
ifn!("llvm.maxnum.f64", fn(t_f64, t_f64) -> t_f64);
|
||||
|
||||
ifn!("llvm.floor.f32", fn(t_f32) -> t_f32);
|
||||
ifn!("llvm.floor.v2f32", fn(t_v2f32) -> t_v2f32);
|
||||
ifn!("llvm.floor.v4f32", fn(t_v4f32) -> t_v4f32);
|
||||
ifn!("llvm.floor.v8f32", fn(t_v8f32) -> t_v8f32);
|
||||
ifn!("llvm.floor.v16f32", fn(t_v16f32) -> t_v16f32);
|
||||
ifn!("llvm.floor.f64", fn(t_f64) -> t_f64);
|
||||
ifn!("llvm.floor.v2f64", fn(t_v2f64) -> t_v2f64);
|
||||
ifn!("llvm.floor.v4f64", fn(t_v4f64) -> t_v4f64);
|
||||
ifn!("llvm.floor.v8f64", fn(t_v8f64) -> t_v8f64);
|
||||
|
||||
ifn!("llvm.ceil.f32", fn(t_f32) -> t_f32);
|
||||
ifn!("llvm.ceil.v2f32", fn(t_v2f32) -> t_v2f32);
|
||||
ifn!("llvm.ceil.v4f32", fn(t_v4f32) -> t_v4f32);
|
||||
ifn!("llvm.ceil.v8f32", fn(t_v8f32) -> t_v8f32);
|
||||
ifn!("llvm.ceil.v16f32", fn(t_v16f32) -> t_v16f32);
|
||||
ifn!("llvm.ceil.f64", fn(t_f64) -> t_f64);
|
||||
ifn!("llvm.ceil.v2f64", fn(t_v2f64) -> t_v2f64);
|
||||
ifn!("llvm.ceil.v4f64", fn(t_v4f64) -> t_v4f64);
|
||||
ifn!("llvm.ceil.v8f64", fn(t_v8f64) -> t_v8f64);
|
||||
|
||||
ifn!("llvm.trunc.f32", fn(t_f32) -> t_f32);
|
||||
ifn!("llvm.trunc.f64", fn(t_f64) -> t_f64);
|
||||
|
|
|
@ -30,6 +30,7 @@ fn declare_raw_fn(
|
|||
cx: &CodegenCx<'ll, '_>,
|
||||
name: &str,
|
||||
callconv: llvm::CallConv,
|
||||
unnamed: llvm::UnnamedAddr,
|
||||
ty: &'ll Type,
|
||||
) -> &'ll Value {
|
||||
debug!("declare_raw_fn(name={:?}, ty={:?})", name, ty);
|
||||
|
@ -38,9 +39,7 @@ fn declare_raw_fn(
|
|||
};
|
||||
|
||||
llvm::SetFunctionCallConv(llfn, callconv);
|
||||
// Function addresses in Rust are never significant, allowing functions to
|
||||
// be merged.
|
||||
llvm::SetUnnamedAddress(llfn, llvm::UnnamedAddr::Global);
|
||||
llvm::SetUnnamedAddress(llfn, unnamed);
|
||||
|
||||
if cx.tcx.sess.opts.cg.no_redzone.unwrap_or(cx.tcx.sess.target.disable_redzone) {
|
||||
llvm::Attribute::NoRedZone.apply_llfn(Function, llfn);
|
||||
|
@ -68,8 +67,13 @@ impl CodegenCx<'ll, 'tcx> {
|
|||
///
|
||||
/// If there’s a value with the same name already declared, the function will
|
||||
/// update the declaration and return existing Value instead.
|
||||
pub fn declare_cfn(&self, name: &str, fn_type: &'ll Type) -> &'ll Value {
|
||||
declare_raw_fn(self, name, llvm::CCallConv, fn_type)
|
||||
pub fn declare_cfn(
|
||||
&self,
|
||||
name: &str,
|
||||
unnamed: llvm::UnnamedAddr,
|
||||
fn_type: &'ll Type,
|
||||
) -> &'ll Value {
|
||||
declare_raw_fn(self, name, llvm::CCallConv, unnamed, fn_type)
|
||||
}
|
||||
|
||||
/// Declare a Rust function.
|
||||
|
@ -79,7 +83,15 @@ impl CodegenCx<'ll, 'tcx> {
|
|||
pub fn declare_fn(&self, name: &str, fn_abi: &FnAbi<'tcx, Ty<'tcx>>) -> &'ll Value {
|
||||
debug!("declare_rust_fn(name={:?}, fn_abi={:?})", name, fn_abi);
|
||||
|
||||
let llfn = declare_raw_fn(self, name, fn_abi.llvm_cconv(), fn_abi.llvm_type(self));
|
||||
// Function addresses in Rust are never significant, allowing functions to
|
||||
// be merged.
|
||||
let llfn = declare_raw_fn(
|
||||
self,
|
||||
name,
|
||||
fn_abi.llvm_cconv(),
|
||||
llvm::UnnamedAddr::Global,
|
||||
fn_abi.llvm_type(self),
|
||||
);
|
||||
fn_abi.apply_attrs_llfn(self, llfn);
|
||||
llfn
|
||||
}
|
||||
|
|
|
@ -1009,7 +1009,7 @@ fn generic_simd_intrinsic(
|
|||
}
|
||||
|
||||
fn simd_simple_float_intrinsic(
|
||||
name: &str,
|
||||
name: Symbol,
|
||||
in_elem: &::rustc_middle::ty::TyS<'_>,
|
||||
in_ty: &::rustc_middle::ty::TyS<'_>,
|
||||
in_len: u64,
|
||||
|
@ -1036,93 +1036,69 @@ fn generic_simd_intrinsic(
|
|||
}
|
||||
}
|
||||
}
|
||||
let ety = match in_elem.kind() {
|
||||
ty::Float(f) if f.bit_width() == 32 => {
|
||||
if in_len < 2 || in_len > 16 {
|
||||
|
||||
let (elem_ty_str, elem_ty) = if let ty::Float(f) = in_elem.kind() {
|
||||
let elem_ty = bx.cx.type_float_from_ty(*f);
|
||||
match f.bit_width() {
|
||||
32 => ("f32", elem_ty),
|
||||
64 => ("f64", elem_ty),
|
||||
_ => {
|
||||
return_error!(
|
||||
"unsupported floating-point vector `{}` with length `{}` \
|
||||
out-of-range [2, 16]",
|
||||
in_ty,
|
||||
in_len
|
||||
"unsupported element type `{}` of floating-point vector `{}`",
|
||||
f.name_str(),
|
||||
in_ty
|
||||
);
|
||||
}
|
||||
"f32"
|
||||
}
|
||||
ty::Float(f) if f.bit_width() == 64 => {
|
||||
if in_len < 2 || in_len > 8 {
|
||||
return_error!(
|
||||
"unsupported floating-point vector `{}` with length `{}` \
|
||||
out-of-range [2, 8]",
|
||||
in_ty,
|
||||
in_len
|
||||
);
|
||||
}
|
||||
"f64"
|
||||
}
|
||||
ty::Float(f) => {
|
||||
return_error!(
|
||||
"unsupported element type `{}` of floating-point vector `{}`",
|
||||
f.name_str(),
|
||||
in_ty
|
||||
);
|
||||
}
|
||||
_ => {
|
||||
return_error!("`{}` is not a floating-point type", in_ty);
|
||||
}
|
||||
} else {
|
||||
return_error!("`{}` is not a floating-point type", in_ty);
|
||||
};
|
||||
|
||||
let llvm_name = &format!("llvm.{0}.v{1}{2}", name, in_len, ety);
|
||||
let intrinsic = bx.get_intrinsic(&llvm_name);
|
||||
let c =
|
||||
bx.call(intrinsic, &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(), None);
|
||||
let vec_ty = bx.type_vector(elem_ty, in_len);
|
||||
|
||||
let (intr_name, fn_ty) = match name {
|
||||
sym::simd_fsqrt => ("sqrt", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_fsin => ("sin", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_fcos => ("cos", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_fabs => ("fabs", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_floor => ("floor", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_ceil => ("ceil", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_fexp => ("exp", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_fexp2 => ("exp2", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_flog10 => ("log10", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_flog2 => ("log2", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_flog => ("log", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_fpowi => ("powi", bx.type_func(&[vec_ty, bx.type_i32()], vec_ty)),
|
||||
sym::simd_fpow => ("pow", bx.type_func(&[vec_ty, vec_ty], vec_ty)),
|
||||
sym::simd_fma => ("fma", bx.type_func(&[vec_ty, vec_ty, vec_ty], vec_ty)),
|
||||
_ => return_error!("unrecognized intrinsic `{}`", name),
|
||||
};
|
||||
|
||||
let llvm_name = &format!("llvm.{0}.v{1}{2}", intr_name, in_len, elem_ty_str);
|
||||
let f = bx.declare_cfn(&llvm_name, llvm::UnnamedAddr::No, fn_ty);
|
||||
let c = bx.call(f, &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(), None);
|
||||
unsafe { llvm::LLVMRustSetHasUnsafeAlgebra(c) };
|
||||
Ok(c)
|
||||
}
|
||||
|
||||
match name {
|
||||
sym::simd_fsqrt => {
|
||||
return simd_simple_float_intrinsic("sqrt", in_elem, in_ty, in_len, bx, span, args);
|
||||
}
|
||||
sym::simd_fsin => {
|
||||
return simd_simple_float_intrinsic("sin", in_elem, in_ty, in_len, bx, span, args);
|
||||
}
|
||||
sym::simd_fcos => {
|
||||
return simd_simple_float_intrinsic("cos", in_elem, in_ty, in_len, bx, span, args);
|
||||
}
|
||||
sym::simd_fabs => {
|
||||
return simd_simple_float_intrinsic("fabs", in_elem, in_ty, in_len, bx, span, args);
|
||||
}
|
||||
sym::simd_floor => {
|
||||
return simd_simple_float_intrinsic("floor", in_elem, in_ty, in_len, bx, span, args);
|
||||
}
|
||||
sym::simd_ceil => {
|
||||
return simd_simple_float_intrinsic("ceil", in_elem, in_ty, in_len, bx, span, args);
|
||||
}
|
||||
sym::simd_fexp => {
|
||||
return simd_simple_float_intrinsic("exp", in_elem, in_ty, in_len, bx, span, args);
|
||||
}
|
||||
sym::simd_fexp2 => {
|
||||
return simd_simple_float_intrinsic("exp2", in_elem, in_ty, in_len, bx, span, args);
|
||||
}
|
||||
sym::simd_flog10 => {
|
||||
return simd_simple_float_intrinsic("log10", in_elem, in_ty, in_len, bx, span, args);
|
||||
}
|
||||
sym::simd_flog2 => {
|
||||
return simd_simple_float_intrinsic("log2", in_elem, in_ty, in_len, bx, span, args);
|
||||
}
|
||||
sym::simd_flog => {
|
||||
return simd_simple_float_intrinsic("log", in_elem, in_ty, in_len, bx, span, args);
|
||||
}
|
||||
sym::simd_fpowi => {
|
||||
return simd_simple_float_intrinsic("powi", in_elem, in_ty, in_len, bx, span, args);
|
||||
}
|
||||
sym::simd_fpow => {
|
||||
return simd_simple_float_intrinsic("pow", in_elem, in_ty, in_len, bx, span, args);
|
||||
}
|
||||
sym::simd_fma => {
|
||||
return simd_simple_float_intrinsic("fma", in_elem, in_ty, in_len, bx, span, args);
|
||||
}
|
||||
_ => { /* fallthrough */ }
|
||||
if std::matches!(
|
||||
name,
|
||||
sym::simd_fsqrt
|
||||
| sym::simd_fsin
|
||||
| sym::simd_fcos
|
||||
| sym::simd_fabs
|
||||
| sym::simd_floor
|
||||
| sym::simd_ceil
|
||||
| sym::simd_fexp
|
||||
| sym::simd_fexp2
|
||||
| sym::simd_flog10
|
||||
| sym::simd_flog2
|
||||
| sym::simd_flog
|
||||
| sym::simd_fpowi
|
||||
| sym::simd_fpow
|
||||
| sym::simd_fma
|
||||
) {
|
||||
return simd_simple_float_intrinsic(name, in_elem, in_ty, in_len, bx, span, args);
|
||||
}
|
||||
|
||||
// FIXME: use:
|
||||
|
@ -1278,12 +1254,12 @@ fn generic_simd_intrinsic(
|
|||
format!("llvm.masked.gather.{}.{}", llvm_elem_vec_str, llvm_pointer_vec_str);
|
||||
let f = bx.declare_cfn(
|
||||
&llvm_intrinsic,
|
||||
llvm::UnnamedAddr::No,
|
||||
bx.type_func(
|
||||
&[llvm_pointer_vec_ty, alignment_ty, mask_ty, llvm_elem_vec_ty],
|
||||
llvm_elem_vec_ty,
|
||||
),
|
||||
);
|
||||
llvm::SetUnnamedAddress(f, llvm::UnnamedAddr::No);
|
||||
let v = bx.call(f, &[args[1].immediate(), alignment, mask, args[0].immediate()], None);
|
||||
return Ok(v);
|
||||
}
|
||||
|
@ -1408,9 +1384,9 @@ fn generic_simd_intrinsic(
|
|||
format!("llvm.masked.scatter.{}.{}", llvm_elem_vec_str, llvm_pointer_vec_str);
|
||||
let f = bx.declare_cfn(
|
||||
&llvm_intrinsic,
|
||||
llvm::UnnamedAddr::No,
|
||||
bx.type_func(&[llvm_elem_vec_ty, llvm_pointer_vec_ty, alignment_ty, mask_ty], ret_t),
|
||||
);
|
||||
llvm::SetUnnamedAddress(f, llvm::UnnamedAddr::No);
|
||||
let v = bx.call(f, &[args[0].immediate(), args[1].immediate(), alignment, mask], None);
|
||||
return Ok(v);
|
||||
}
|
||||
|
@ -1714,8 +1690,11 @@ unsupported {} from `{}` with element `{}` of size `{}` to `{}`"#,
|
|||
);
|
||||
let vec_ty = bx.cx.type_vector(elem_ty, in_len as u64);
|
||||
|
||||
let f = bx.declare_cfn(&llvm_intrinsic, bx.type_func(&[vec_ty, vec_ty], vec_ty));
|
||||
llvm::SetUnnamedAddress(f, llvm::UnnamedAddr::No);
|
||||
let f = bx.declare_cfn(
|
||||
&llvm_intrinsic,
|
||||
llvm::UnnamedAddr::No,
|
||||
bx.type_func(&[vec_ty, vec_ty], vec_ty),
|
||||
);
|
||||
let v = bx.call(f, &[lhs, rhs], None);
|
||||
return Ok(v);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue