Merge commit 'e8dca3e87d
' into sync_from_cg_gcc
This commit is contained in:
commit
3fac982e07
41 changed files with 7933 additions and 592 deletions
5722
compiler/rustc_codegen_gcc/src/intrinsic/archs.rs
Normal file
5722
compiler/rustc_codegen_gcc/src/intrinsic/archs.rs
Normal file
File diff suppressed because it is too large
Load diff
|
@ -1,22 +1,250 @@
|
|||
use gccjit::Function;
|
||||
use std::borrow::Cow;
|
||||
|
||||
use crate::context::CodegenCx;
|
||||
use gccjit::{Function, FunctionPtrType, RValue, ToRValue};
|
||||
|
||||
pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function<'gcc> {
|
||||
let _gcc_name =
|
||||
match name {
|
||||
"llvm.x86.xgetbv" => {
|
||||
let gcc_name = "__builtin_trap";
|
||||
let func = cx.context.get_builtin_function(gcc_name);
|
||||
cx.functions.borrow_mut().insert(gcc_name.to_string(), func);
|
||||
return func;
|
||||
},
|
||||
// NOTE: this doc specifies the equivalent GCC builtins: http://huonw.github.io/llvmint/llvmint/x86/index.html
|
||||
"llvm.x86.sse2.cmp.pd" => "__builtin_ia32_cmppd",
|
||||
"llvm.x86.sse2.movmsk.pd" => "__builtin_ia32_movmskpd",
|
||||
"llvm.x86.sse2.pmovmskb.128" => "__builtin_ia32_pmovmskb128",
|
||||
_ => unimplemented!("unsupported LLVM intrinsic {}", name)
|
||||
};
|
||||
use crate::{context::CodegenCx, builder::Builder};
|
||||
|
||||
unimplemented!();
|
||||
pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc, 'tcx>, gcc_func: FunctionPtrType<'gcc>, mut args: Cow<'b, [RValue<'gcc>]>, func_name: &str) -> Cow<'b, [RValue<'gcc>]> {
|
||||
// Some LLVM intrinsics do not map 1-to-1 to GCC intrinsics, so we add the missing
|
||||
// arguments here.
|
||||
if gcc_func.get_param_count() != args.len() {
|
||||
match &*func_name {
|
||||
"__builtin_ia32_pmuldq512_mask" | "__builtin_ia32_pmuludq512_mask"
|
||||
// FIXME(antoyo): the following intrinsics has 4 (or 5) arguments according to the doc, but is defined with 2 (or 3) arguments in library/stdarch/crates/core_arch/src/x86/avx512f.rs.
|
||||
| "__builtin_ia32_pmaxsd512_mask" | "__builtin_ia32_pmaxsq512_mask" | "__builtin_ia32_pmaxsq256_mask"
|
||||
| "__builtin_ia32_pmaxsq128_mask" | "__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
|
||||
| "__builtin_ia32_pmaxud512_mask" | "__builtin_ia32_pmaxuq512_mask" | "__builtin_ia32_pmaxuq256_mask"
|
||||
| "__builtin_ia32_pmaxuq128_mask"
|
||||
| "__builtin_ia32_pminsd512_mask" | "__builtin_ia32_pminsq512_mask" | "__builtin_ia32_pminsq256_mask"
|
||||
| "__builtin_ia32_pminsq128_mask" | "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask"
|
||||
| "__builtin_ia32_pminud512_mask" | "__builtin_ia32_pminuq512_mask" | "__builtin_ia32_pminuq256_mask"
|
||||
| "__builtin_ia32_pminuq128_mask" | "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask"
|
||||
=> {
|
||||
// TODO: refactor by separating those intrinsics outside of this branch.
|
||||
let add_before_last_arg =
|
||||
match &*func_name {
|
||||
"__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
|
||||
| "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask"
|
||||
| "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => true,
|
||||
_ => false,
|
||||
};
|
||||
let new_first_arg_is_zero =
|
||||
match &*func_name {
|
||||
"__builtin_ia32_pmaxuq256_mask" | "__builtin_ia32_pmaxuq128_mask"
|
||||
| "__builtin_ia32_pminuq256_mask" | "__builtin_ia32_pminuq128_mask" => true,
|
||||
_ => false
|
||||
};
|
||||
let arg3_index =
|
||||
match &*func_name {
|
||||
"__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => 1,
|
||||
_ => 2,
|
||||
};
|
||||
let mut new_args = args.to_vec();
|
||||
let arg3_type = gcc_func.get_param_type(arg3_index);
|
||||
let first_arg =
|
||||
if new_first_arg_is_zero {
|
||||
let vector_type = arg3_type.dyncast_vector().expect("vector type");
|
||||
let zero = builder.context.new_rvalue_zero(vector_type.get_element_type());
|
||||
let num_units = vector_type.get_num_units();
|
||||
builder.context.new_rvalue_from_vector(None, arg3_type, &vec![zero; num_units])
|
||||
}
|
||||
else {
|
||||
builder.current_func().new_local(None, arg3_type, "undefined_for_intrinsic").to_rvalue()
|
||||
};
|
||||
if add_before_last_arg {
|
||||
new_args.insert(new_args.len() - 1, first_arg);
|
||||
}
|
||||
else {
|
||||
new_args.push(first_arg);
|
||||
}
|
||||
let arg4_index =
|
||||
match &*func_name {
|
||||
"__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => 2,
|
||||
_ => 3,
|
||||
};
|
||||
let arg4_type = gcc_func.get_param_type(arg4_index);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
|
||||
if add_before_last_arg {
|
||||
new_args.insert(new_args.len() - 1, minus_one);
|
||||
}
|
||||
else {
|
||||
new_args.push(minus_one);
|
||||
}
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_pternlogd512_mask" | "__builtin_ia32_pternlogd256_mask"
|
||||
| "__builtin_ia32_pternlogd128_mask" | "__builtin_ia32_pternlogq512_mask"
|
||||
| "__builtin_ia32_pternlogq256_mask" | "__builtin_ia32_pternlogq128_mask" => {
|
||||
let mut new_args = args.to_vec();
|
||||
let arg5_type = gcc_func.get_param_type(4);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg5_type, -1);
|
||||
new_args.push(minus_one);
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_vfmaddps512_mask" | "__builtin_ia32_vfmaddpd512_mask" => {
|
||||
let mut new_args = args.to_vec();
|
||||
|
||||
let mut last_arg = None;
|
||||
if args.len() == 4 {
|
||||
last_arg = new_args.pop();
|
||||
}
|
||||
|
||||
let arg4_type = gcc_func.get_param_type(3);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
|
||||
new_args.push(minus_one);
|
||||
|
||||
if args.len() == 3 {
|
||||
// Both llvm.fma.v16f32 and llvm.x86.avx512.vfmadd.ps.512 maps to
|
||||
// the same GCC intrinsic, but the former has 3 parameters and the
|
||||
// latter has 4 so it doesn't require this additional argument.
|
||||
let arg5_type = gcc_func.get_param_type(4);
|
||||
new_args.push(builder.context.new_rvalue_from_int(arg5_type, 4));
|
||||
}
|
||||
|
||||
if let Some(last_arg) = last_arg {
|
||||
new_args.push(last_arg);
|
||||
}
|
||||
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_addps512_mask" | "__builtin_ia32_addpd512_mask"
|
||||
| "__builtin_ia32_subps512_mask" | "__builtin_ia32_subpd512_mask"
|
||||
| "__builtin_ia32_mulps512_mask" | "__builtin_ia32_mulpd512_mask"
|
||||
| "__builtin_ia32_divps512_mask" | "__builtin_ia32_divpd512_mask" => {
|
||||
let mut new_args = args.to_vec();
|
||||
let last_arg = new_args.pop().expect("last arg");
|
||||
let arg3_type = gcc_func.get_param_type(2);
|
||||
let undefined = builder.current_func().new_local(None, arg3_type, "undefined_for_intrinsic").to_rvalue();
|
||||
new_args.push(undefined);
|
||||
let arg4_type = gcc_func.get_param_type(3);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
|
||||
new_args.push(minus_one);
|
||||
new_args.push(last_arg);
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_vfmaddsubps512_mask" | "__builtin_ia32_vfmaddsubpd512_mask" => {
|
||||
let mut new_args = args.to_vec();
|
||||
let last_arg = new_args.pop().expect("last arg");
|
||||
let arg4_type = gcc_func.get_param_type(3);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
|
||||
new_args.push(minus_one);
|
||||
new_args.push(last_arg);
|
||||
args = new_args.into();
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
args
|
||||
}
|
||||
|
||||
pub fn ignore_arg_cast(func_name: &str, index: usize, args_len: usize) -> bool {
|
||||
// NOTE: these intrinsics have missing parameters before the last one, so ignore the
|
||||
// last argument type check.
|
||||
// FIXME(antoyo): find a way to refactor in order to avoid this hack.
|
||||
match func_name {
|
||||
"__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
|
||||
| "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask" | "__builtin_ia32_sqrtps512_mask"
|
||||
| "__builtin_ia32_sqrtpd512_mask" | "__builtin_ia32_addps512_mask" | "__builtin_ia32_addpd512_mask"
|
||||
| "__builtin_ia32_subps512_mask" | "__builtin_ia32_subpd512_mask"
|
||||
| "__builtin_ia32_mulps512_mask" | "__builtin_ia32_mulpd512_mask"
|
||||
| "__builtin_ia32_divps512_mask" | "__builtin_ia32_divpd512_mask"
|
||||
| "__builtin_ia32_vfmaddsubps512_mask" | "__builtin_ia32_vfmaddsubpd512_mask" => {
|
||||
if index == args_len - 1 {
|
||||
return true;
|
||||
}
|
||||
},
|
||||
"__builtin_ia32_vfmaddps512_mask" | "__builtin_ia32_vfmaddpd512_mask" => {
|
||||
// Since there are two LLVM intrinsics that map to each of these GCC builtins and only
|
||||
// one of them has a missing parameter before the last one, we check the number of
|
||||
// arguments to distinguish those cases.
|
||||
if args_len == 4 && index == args_len - 1 {
|
||||
return true;
|
||||
}
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
#[cfg(not(feature="master"))]
|
||||
pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function<'gcc> {
|
||||
match name {
|
||||
"llvm.x86.xgetbv" => {
|
||||
let gcc_name = "__builtin_trap";
|
||||
let func = cx.context.get_builtin_function(gcc_name);
|
||||
cx.functions.borrow_mut().insert(gcc_name.to_string(), func);
|
||||
return func;
|
||||
},
|
||||
_ => unimplemented!("unsupported LLVM intrinsic {}", name),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature="master")]
|
||||
pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function<'gcc> {
|
||||
let gcc_name = match name {
|
||||
"llvm.x86.xgetbv" => "__builtin_ia32_xgetbv",
|
||||
// NOTE: this doc specifies the equivalent GCC builtins: http://huonw.github.io/llvmint/llvmint/x86/index.html
|
||||
"llvm.sqrt.v2f64" => "__builtin_ia32_sqrtpd",
|
||||
"llvm.x86.avx512.pmul.dq.512" => "__builtin_ia32_pmuldq512_mask",
|
||||
"llvm.x86.avx512.pmulu.dq.512" => "__builtin_ia32_pmuludq512_mask",
|
||||
"llvm.x86.avx512.mask.pmaxs.q.256" => "__builtin_ia32_pmaxsq256_mask",
|
||||
"llvm.x86.avx512.mask.pmaxs.q.128" => "__builtin_ia32_pmaxsq128_mask",
|
||||
"llvm.x86.avx512.max.ps.512" => "__builtin_ia32_maxps512_mask",
|
||||
"llvm.x86.avx512.max.pd.512" => "__builtin_ia32_maxpd512_mask",
|
||||
"llvm.x86.avx512.mask.pmaxu.q.256" => "__builtin_ia32_pmaxuq256_mask",
|
||||
"llvm.x86.avx512.mask.pmaxu.q.128" => "__builtin_ia32_pmaxuq128_mask",
|
||||
"llvm.x86.avx512.mask.pmins.q.256" => "__builtin_ia32_pminsq256_mask",
|
||||
"llvm.x86.avx512.mask.pmins.q.128" => "__builtin_ia32_pminsq128_mask",
|
||||
"llvm.x86.avx512.min.ps.512" => "__builtin_ia32_minps512_mask",
|
||||
"llvm.x86.avx512.min.pd.512" => "__builtin_ia32_minpd512_mask",
|
||||
"llvm.x86.avx512.mask.pminu.q.256" => "__builtin_ia32_pminuq256_mask",
|
||||
"llvm.x86.avx512.mask.pminu.q.128" => "__builtin_ia32_pminuq128_mask",
|
||||
"llvm.fma.v16f32" => "__builtin_ia32_vfmaddps512_mask",
|
||||
"llvm.fma.v8f64" => "__builtin_ia32_vfmaddpd512_mask",
|
||||
"llvm.x86.avx512.vfmaddsub.ps.512" => "__builtin_ia32_vfmaddsubps512_mask",
|
||||
"llvm.x86.avx512.vfmaddsub.pd.512" => "__builtin_ia32_vfmaddsubpd512_mask",
|
||||
"llvm.x86.avx512.pternlog.d.512" => "__builtin_ia32_pternlogd512_mask",
|
||||
"llvm.x86.avx512.pternlog.d.256" => "__builtin_ia32_pternlogd256_mask",
|
||||
"llvm.x86.avx512.pternlog.d.128" => "__builtin_ia32_pternlogd128_mask",
|
||||
"llvm.x86.avx512.pternlog.q.512" => "__builtin_ia32_pternlogq512_mask",
|
||||
"llvm.x86.avx512.pternlog.q.256" => "__builtin_ia32_pternlogq256_mask",
|
||||
"llvm.x86.avx512.pternlog.q.128" => "__builtin_ia32_pternlogq128_mask",
|
||||
"llvm.x86.avx512.add.ps.512" => "__builtin_ia32_addps512_mask",
|
||||
"llvm.x86.avx512.add.pd.512" => "__builtin_ia32_addpd512_mask",
|
||||
"llvm.x86.avx512.sub.ps.512" => "__builtin_ia32_subps512_mask",
|
||||
"llvm.x86.avx512.sub.pd.512" => "__builtin_ia32_subpd512_mask",
|
||||
"llvm.x86.avx512.mul.ps.512" => "__builtin_ia32_mulps512_mask",
|
||||
"llvm.x86.avx512.mul.pd.512" => "__builtin_ia32_mulpd512_mask",
|
||||
"llvm.x86.avx512.div.ps.512" => "__builtin_ia32_divps512_mask",
|
||||
"llvm.x86.avx512.div.pd.512" => "__builtin_ia32_divpd512_mask",
|
||||
"llvm.x86.avx512.vfmadd.ps.512" => "__builtin_ia32_vfmaddps512_mask",
|
||||
"llvm.x86.avx512.vfmadd.pd.512" => "__builtin_ia32_vfmaddpd512_mask",
|
||||
|
||||
// The above doc points to unknown builtins for the following, so override them:
|
||||
"llvm.x86.avx2.gather.d.d" => "__builtin_ia32_gathersiv4si",
|
||||
"llvm.x86.avx2.gather.d.d.256" => "__builtin_ia32_gathersiv8si",
|
||||
"llvm.x86.avx2.gather.d.ps" => "__builtin_ia32_gathersiv4sf",
|
||||
"llvm.x86.avx2.gather.d.ps.256" => "__builtin_ia32_gathersiv8sf",
|
||||
"llvm.x86.avx2.gather.d.q" => "__builtin_ia32_gathersiv2di",
|
||||
"llvm.x86.avx2.gather.d.q.256" => "__builtin_ia32_gathersiv4di",
|
||||
"llvm.x86.avx2.gather.d.pd" => "__builtin_ia32_gathersiv2df",
|
||||
"llvm.x86.avx2.gather.d.pd.256" => "__builtin_ia32_gathersiv4df",
|
||||
"llvm.x86.avx2.gather.q.d" => "__builtin_ia32_gatherdiv4si",
|
||||
"llvm.x86.avx2.gather.q.d.256" => "__builtin_ia32_gatherdiv4si256",
|
||||
"llvm.x86.avx2.gather.q.ps" => "__builtin_ia32_gatherdiv4sf",
|
||||
"llvm.x86.avx2.gather.q.ps.256" => "__builtin_ia32_gatherdiv4sf256",
|
||||
"llvm.x86.avx2.gather.q.q" => "__builtin_ia32_gatherdiv2di",
|
||||
"llvm.x86.avx2.gather.q.q.256" => "__builtin_ia32_gatherdiv4di",
|
||||
"llvm.x86.avx2.gather.q.pd" => "__builtin_ia32_gatherdiv2df",
|
||||
"llvm.x86.avx2.gather.q.pd.256" => "__builtin_ia32_gatherdiv4df",
|
||||
"" => "",
|
||||
// NOTE: this file is generated by https://github.com/GuillaumeGomez/llvmint/blob/master/generate_list.py
|
||||
_ => include!("archs.rs"),
|
||||
};
|
||||
|
||||
let func = cx.context.get_target_builtin_function(gcc_name);
|
||||
cx.functions.borrow_mut().insert(gcc_name.to_string(), func);
|
||||
func
|
||||
}
|
||||
|
|
|
@ -967,34 +967,55 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
|
|||
}
|
||||
|
||||
fn saturating_add(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>, signed: bool, width: u64) -> RValue<'gcc> {
|
||||
let func = self.current_func.borrow().expect("func");
|
||||
|
||||
let result_type = lhs.get_type();
|
||||
if signed {
|
||||
// Algorithm from: https://stackoverflow.com/a/56531252/389119
|
||||
let after_block = func.new_block("after");
|
||||
let func_name =
|
||||
match width {
|
||||
8 => "__builtin_add_overflow",
|
||||
16 => "__builtin_add_overflow",
|
||||
32 => "__builtin_sadd_overflow",
|
||||
64 => "__builtin_saddll_overflow",
|
||||
128 => "__builtin_add_overflow",
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let overflow_func = self.context.get_builtin_function(func_name);
|
||||
let result_type = lhs.get_type();
|
||||
// Based on algorithm from: https://stackoverflow.com/a/56531252/389119
|
||||
let func = self.current_func.borrow().expect("func");
|
||||
let res = func.new_local(None, result_type, "saturating_sum");
|
||||
let overflow = self.overflow_call(overflow_func, &[lhs, rhs, res.get_address(None)], None);
|
||||
let supports_native_type = self.is_native_int_type(result_type);
|
||||
let overflow =
|
||||
if supports_native_type {
|
||||
let func_name =
|
||||
match width {
|
||||
8 => "__builtin_add_overflow",
|
||||
16 => "__builtin_add_overflow",
|
||||
32 => "__builtin_sadd_overflow",
|
||||
64 => "__builtin_saddll_overflow",
|
||||
128 => "__builtin_add_overflow",
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let overflow_func = self.context.get_builtin_function(func_name);
|
||||
self.overflow_call(overflow_func, &[lhs, rhs, res.get_address(None)], None)
|
||||
}
|
||||
else {
|
||||
let func_name =
|
||||
match width {
|
||||
128 => "__rust_i128_addo",
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let param_a = self.context.new_parameter(None, result_type, "a");
|
||||
let param_b = self.context.new_parameter(None, result_type, "b");
|
||||
let result_field = self.context.new_field(None, result_type, "result");
|
||||
let overflow_field = self.context.new_field(None, self.bool_type, "overflow");
|
||||
let return_type = self.context.new_struct_type(None, "result_overflow", &[result_field, overflow_field]);
|
||||
let func = self.context.new_function(None, FunctionType::Extern, return_type.as_type(), &[param_a, param_b], func_name, false);
|
||||
let result = self.context.new_call(None, func, &[lhs, rhs]);
|
||||
let overflow = result.access_field(None, overflow_field);
|
||||
let int_result = result.access_field(None, result_field);
|
||||
self.llbb().add_assignment(None, res, int_result);
|
||||
overflow
|
||||
};
|
||||
|
||||
let then_block = func.new_block("then");
|
||||
let after_block = func.new_block("after");
|
||||
|
||||
let unsigned_type = self.context.new_int_type(width as i32 / 8, false);
|
||||
let shifted = self.context.new_cast(None, lhs, unsigned_type) >> self.context.new_rvalue_from_int(unsigned_type, width as i32 - 1);
|
||||
let uint_max = self.context.new_unary_op(None, UnaryOp::BitwiseNegate, unsigned_type,
|
||||
self.context.new_rvalue_from_int(unsigned_type, 0)
|
||||
);
|
||||
let int_max = uint_max >> self.context.new_rvalue_one(unsigned_type);
|
||||
then_block.add_assignment(None, res, self.context.new_cast(None, shifted + int_max, result_type));
|
||||
// Return `result_type`'s maximum or minimum value on overflow
|
||||
// NOTE: convert the type to unsigned to have an unsigned shift.
|
||||
let unsigned_type = result_type.to_unsigned(&self.cx);
|
||||
let shifted = self.gcc_lshr(self.gcc_int_cast(lhs, unsigned_type), self.gcc_int(unsigned_type, width as i64 - 1));
|
||||
let uint_max = self.gcc_not(self.gcc_int(unsigned_type, 0));
|
||||
let int_max = self.gcc_lshr(uint_max, self.gcc_int(unsigned_type, 1));
|
||||
then_block.add_assignment(None, res, self.gcc_int_cast(self.gcc_add(shifted, int_max), result_type));
|
||||
then_block.end_with_jump(None, after_block);
|
||||
|
||||
self.llbb().end_with_conditional(None, overflow, then_block, after_block);
|
||||
|
@ -1007,19 +1028,18 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
|
|||
}
|
||||
else {
|
||||
// Algorithm from: http://locklessinc.com/articles/sat_arithmetic/
|
||||
let res = lhs + rhs;
|
||||
let res_type = res.get_type();
|
||||
let cond = self.context.new_comparison(None, ComparisonOp::LessThan, res, lhs);
|
||||
let value = self.context.new_unary_op(None, UnaryOp::Minus, res_type, self.context.new_cast(None, cond, res_type));
|
||||
res | value
|
||||
let res = self.gcc_add(lhs, rhs);
|
||||
let cond = self.gcc_icmp(IntPredicate::IntULT, res, lhs);
|
||||
let value = self.gcc_neg(self.gcc_int_cast(cond, result_type));
|
||||
self.gcc_or(res, value)
|
||||
}
|
||||
}
|
||||
|
||||
// Algorithm from: https://locklessinc.com/articles/sat_arithmetic/
|
||||
fn saturating_sub(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>, signed: bool, width: u64) -> RValue<'gcc> {
|
||||
let result_type = lhs.get_type();
|
||||
if signed {
|
||||
// Also based on algorithm from: https://stackoverflow.com/a/56531252/389119
|
||||
let result_type = lhs.get_type();
|
||||
// Based on algorithm from: https://stackoverflow.com/a/56531252/389119
|
||||
let func = self.current_func.borrow().expect("func");
|
||||
let res = func.new_local(None, result_type, "saturating_diff");
|
||||
let supports_native_type = self.is_native_int_type(result_type);
|
||||
|
@ -1059,6 +1079,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
|
|||
let then_block = func.new_block("then");
|
||||
let after_block = func.new_block("after");
|
||||
|
||||
// Return `result_type`'s maximum or minimum value on overflow
|
||||
// NOTE: convert the type to unsigned to have an unsigned shift.
|
||||
let unsigned_type = result_type.to_unsigned(&self.cx);
|
||||
let shifted = self.gcc_lshr(self.gcc_int_cast(lhs, unsigned_type), self.gcc_int(unsigned_type, width as i64 - 1));
|
||||
|
@ -1076,11 +1097,10 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
|
|||
res.to_rvalue()
|
||||
}
|
||||
else {
|
||||
let res = lhs - rhs;
|
||||
let comparison = self.context.new_comparison(None, ComparisonOp::LessThanEquals, res, lhs);
|
||||
let comparison = self.context.new_cast(None, comparison, lhs.get_type());
|
||||
let unary_op = self.context.new_unary_op(None, UnaryOp::Minus, comparison.get_type(), comparison);
|
||||
self.and(res, unary_op)
|
||||
let res = self.gcc_sub(lhs, rhs);
|
||||
let comparison = self.gcc_icmp(IntPredicate::IntULE, res, lhs);
|
||||
let value = self.gcc_neg(self.gcc_int_cast(comparison, result_type));
|
||||
self.gcc_and(res, value)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,15 +1,20 @@
|
|||
use gccjit::{RValue, Type};
|
||||
use std::cmp::Ordering;
|
||||
|
||||
use gccjit::{BinaryOp, RValue, Type, ToRValue};
|
||||
use rustc_codegen_ssa::base::compare_simd_types;
|
||||
use rustc_codegen_ssa::common::{TypeKind, span_invalid_monomorphization_error};
|
||||
use rustc_codegen_ssa::mir::operand::OperandRef;
|
||||
use rustc_codegen_ssa::mir::place::PlaceRef;
|
||||
use rustc_codegen_ssa::traits::{BaseTypeMethods, BuilderMethods};
|
||||
use rustc_hir as hir;
|
||||
use rustc_middle::span_bug;
|
||||
use rustc_middle::ty::layout::HasTyCtxt;
|
||||
use rustc_middle::ty::{self, Ty};
|
||||
use rustc_span::{Span, Symbol, sym};
|
||||
use rustc_target::abi::Align;
|
||||
|
||||
use crate::builder::Builder;
|
||||
use crate::intrinsic;
|
||||
|
||||
pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>, name: Symbol, callee_ty: Ty<'tcx>, args: &[OperandRef<'tcx, RValue<'gcc>>], ret_ty: Ty<'tcx>, llret_ty: Type<'gcc>, span: Span) -> Result<RValue<'gcc>, ()> {
|
||||
// macros for error handling:
|
||||
|
@ -53,7 +58,53 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
|
|||
let sig =
|
||||
tcx.normalize_erasing_late_bound_regions(ty::ParamEnv::reveal_all(), callee_ty.fn_sig(tcx));
|
||||
let arg_tys = sig.inputs();
|
||||
let name_str = name.as_str();
|
||||
|
||||
if name == sym::simd_select_bitmask {
|
||||
require_simd!(arg_tys[1], "argument");
|
||||
let (len, _) = arg_tys[1].simd_size_and_type(bx.tcx());
|
||||
|
||||
let expected_int_bits = (len.max(8) - 1).next_power_of_two();
|
||||
let expected_bytes = len / 8 + ((len % 8 > 0) as u64);
|
||||
|
||||
let mask_ty = arg_tys[0];
|
||||
let mut mask = match mask_ty.kind() {
|
||||
ty::Int(i) if i.bit_width() == Some(expected_int_bits) => args[0].immediate(),
|
||||
ty::Uint(i) if i.bit_width() == Some(expected_int_bits) => args[0].immediate(),
|
||||
ty::Array(elem, len)
|
||||
if matches!(elem.kind(), ty::Uint(ty::UintTy::U8))
|
||||
&& len.try_eval_usize(bx.tcx, ty::ParamEnv::reveal_all())
|
||||
== Some(expected_bytes) =>
|
||||
{
|
||||
let place = PlaceRef::alloca(bx, args[0].layout);
|
||||
args[0].val.store(bx, place);
|
||||
let int_ty = bx.type_ix(expected_bytes * 8);
|
||||
let ptr = bx.pointercast(place.llval, bx.cx.type_ptr_to(int_ty));
|
||||
bx.load(int_ty, ptr, Align::ONE)
|
||||
}
|
||||
_ => return_error!(
|
||||
"invalid bitmask `{}`, expected `u{}` or `[u8; {}]`",
|
||||
mask_ty,
|
||||
expected_int_bits,
|
||||
expected_bytes
|
||||
),
|
||||
};
|
||||
|
||||
let arg1 = args[1].immediate();
|
||||
let arg1_type = arg1.get_type();
|
||||
let arg1_vector_type = arg1_type.unqualified().dyncast_vector().expect("vector type");
|
||||
let arg1_element_type = arg1_vector_type.get_element_type();
|
||||
|
||||
let mut elements = vec![];
|
||||
let one = bx.context.new_rvalue_one(mask.get_type());
|
||||
for _ in 0..len {
|
||||
let element = bx.context.new_cast(None, mask & one, arg1_element_type);
|
||||
elements.push(element);
|
||||
mask = mask >> one;
|
||||
}
|
||||
let vector_mask = bx.context.new_rvalue_from_vector(None, arg1_type, &elements);
|
||||
|
||||
return Ok(bx.vector_select(vector_mask, arg1, args[2].immediate()));
|
||||
}
|
||||
|
||||
// every intrinsic below takes a SIMD vector as its first argument
|
||||
require_simd!(arg_tys[0], "input");
|
||||
|
@ -100,10 +151,28 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
|
|||
));
|
||||
}
|
||||
|
||||
if let Some(stripped) = name_str.strip_prefix("simd_shuffle") {
|
||||
let n: u64 = stripped.parse().unwrap_or_else(|_| {
|
||||
span_bug!(span, "bad `simd_shuffle` instruction only caught in codegen?")
|
||||
});
|
||||
if let Some(stripped) = name.as_str().strip_prefix("simd_shuffle") {
|
||||
let n: u64 =
|
||||
if stripped.is_empty() {
|
||||
// Make sure this is actually an array, since typeck only checks the length-suffixed
|
||||
// version of this intrinsic.
|
||||
match args[2].layout.ty.kind() {
|
||||
ty::Array(ty, len) if matches!(ty.kind(), ty::Uint(ty::UintTy::U32)) => {
|
||||
len.try_eval_usize(bx.cx.tcx, ty::ParamEnv::reveal_all()).unwrap_or_else(|| {
|
||||
span_bug!(span, "could not evaluate shuffle index array length")
|
||||
})
|
||||
}
|
||||
_ => return_error!(
|
||||
"simd_shuffle index must be an array of `u32`, got `{}`",
|
||||
args[2].layout.ty
|
||||
),
|
||||
}
|
||||
}
|
||||
else {
|
||||
stripped.parse().unwrap_or_else(|_| {
|
||||
span_bug!(span, "bad `simd_shuffle` instruction only caught in codegen?")
|
||||
})
|
||||
};
|
||||
|
||||
require_simd!(ret_ty, "return");
|
||||
|
||||
|
@ -134,6 +203,225 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
|
|||
));
|
||||
}
|
||||
|
||||
#[cfg(feature="master")]
|
||||
if name == sym::simd_insert {
|
||||
require!(
|
||||
in_elem == arg_tys[2],
|
||||
"expected inserted type `{}` (element of input `{}`), found `{}`",
|
||||
in_elem,
|
||||
in_ty,
|
||||
arg_tys[2]
|
||||
);
|
||||
let vector = args[0].immediate();
|
||||
let index = args[1].immediate();
|
||||
let value = args[2].immediate();
|
||||
// TODO(antoyo): use a recursive unqualified() here.
|
||||
let vector_type = vector.get_type().unqualified().dyncast_vector().expect("vector type");
|
||||
let element_type = vector_type.get_element_type();
|
||||
// NOTE: we cannot cast to an array and assign to its element here because the value might
|
||||
// not be an l-value. So, call a builtin to set the element.
|
||||
// TODO(antoyo): perhaps we could create a new vector or maybe there's a GIMPLE instruction for that?
|
||||
// TODO(antoyo): don't use target specific builtins here.
|
||||
let func_name =
|
||||
match in_len {
|
||||
2 => {
|
||||
if element_type == bx.i64_type {
|
||||
"__builtin_ia32_vec_set_v2di"
|
||||
}
|
||||
else {
|
||||
unimplemented!();
|
||||
}
|
||||
},
|
||||
4 => {
|
||||
if element_type == bx.i32_type {
|
||||
"__builtin_ia32_vec_set_v4si"
|
||||
}
|
||||
else {
|
||||
unimplemented!();
|
||||
}
|
||||
},
|
||||
8 => {
|
||||
if element_type == bx.i16_type {
|
||||
"__builtin_ia32_vec_set_v8hi"
|
||||
}
|
||||
else {
|
||||
unimplemented!();
|
||||
}
|
||||
},
|
||||
_ => unimplemented!("Len: {}", in_len),
|
||||
};
|
||||
let builtin = bx.context.get_target_builtin_function(func_name);
|
||||
let param1_type = builtin.get_param(0).to_rvalue().get_type();
|
||||
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
|
||||
let vector = bx.cx.bitcast_if_needed(vector, param1_type);
|
||||
let result = bx.context.new_call(None, builtin, &[vector, value, bx.context.new_cast(None, index, bx.int_type)]);
|
||||
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
|
||||
return Ok(bx.context.new_bitcast(None, result, vector.get_type()));
|
||||
}
|
||||
|
||||
#[cfg(feature="master")]
|
||||
if name == sym::simd_extract {
|
||||
require!(
|
||||
ret_ty == in_elem,
|
||||
"expected return type `{}` (element of input `{}`), found `{}`",
|
||||
in_elem,
|
||||
in_ty,
|
||||
ret_ty
|
||||
);
|
||||
let vector = args[0].immediate();
|
||||
return Ok(bx.context.new_vector_access(None, vector, args[1].immediate()).to_rvalue());
|
||||
}
|
||||
|
||||
if name == sym::simd_select {
|
||||
let m_elem_ty = in_elem;
|
||||
let m_len = in_len;
|
||||
require_simd!(arg_tys[1], "argument");
|
||||
let (v_len, _) = arg_tys[1].simd_size_and_type(bx.tcx());
|
||||
require!(
|
||||
m_len == v_len,
|
||||
"mismatched lengths: mask length `{}` != other vector length `{}`",
|
||||
m_len,
|
||||
v_len
|
||||
);
|
||||
match m_elem_ty.kind() {
|
||||
ty::Int(_) => {}
|
||||
_ => return_error!("mask element type is `{}`, expected `i_`", m_elem_ty),
|
||||
}
|
||||
return Ok(bx.vector_select(args[0].immediate(), args[1].immediate(), args[2].immediate()));
|
||||
}
|
||||
|
||||
if name == sym::simd_cast {
|
||||
require_simd!(ret_ty, "return");
|
||||
let (out_len, out_elem) = ret_ty.simd_size_and_type(bx.tcx());
|
||||
require!(
|
||||
in_len == out_len,
|
||||
"expected return type with length {} (same as input type `{}`), \
|
||||
found `{}` with length {}",
|
||||
in_len,
|
||||
in_ty,
|
||||
ret_ty,
|
||||
out_len
|
||||
);
|
||||
// casting cares about nominal type, not just structural type
|
||||
if in_elem == out_elem {
|
||||
return Ok(args[0].immediate());
|
||||
}
|
||||
|
||||
enum Style {
|
||||
Float,
|
||||
Int(/* is signed? */ bool),
|
||||
Unsupported,
|
||||
}
|
||||
|
||||
let (in_style, in_width) = match in_elem.kind() {
|
||||
// vectors of pointer-sized integers should've been
|
||||
// disallowed before here, so this unwrap is safe.
|
||||
ty::Int(i) => (
|
||||
Style::Int(true),
|
||||
i.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
|
||||
),
|
||||
ty::Uint(u) => (
|
||||
Style::Int(false),
|
||||
u.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
|
||||
),
|
||||
ty::Float(f) => (Style::Float, f.bit_width()),
|
||||
_ => (Style::Unsupported, 0),
|
||||
};
|
||||
let (out_style, out_width) = match out_elem.kind() {
|
||||
ty::Int(i) => (
|
||||
Style::Int(true),
|
||||
i.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
|
||||
),
|
||||
ty::Uint(u) => (
|
||||
Style::Int(false),
|
||||
u.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
|
||||
),
|
||||
ty::Float(f) => (Style::Float, f.bit_width()),
|
||||
_ => (Style::Unsupported, 0),
|
||||
};
|
||||
|
||||
let extend = |in_type, out_type| {
|
||||
let vector_type = bx.context.new_vector_type(out_type, 8);
|
||||
let vector = args[0].immediate();
|
||||
let array_type = bx.context.new_array_type(None, in_type, 8);
|
||||
// TODO(antoyo): switch to using new_vector_access or __builtin_convertvector for vector casting.
|
||||
let array = bx.context.new_bitcast(None, vector, array_type);
|
||||
|
||||
let cast_vec_element = |index| {
|
||||
let index = bx.context.new_rvalue_from_int(bx.int_type, index);
|
||||
bx.context.new_cast(None, bx.context.new_array_access(None, array, index).to_rvalue(), out_type)
|
||||
};
|
||||
|
||||
bx.context.new_rvalue_from_vector(None, vector_type, &[
|
||||
cast_vec_element(0),
|
||||
cast_vec_element(1),
|
||||
cast_vec_element(2),
|
||||
cast_vec_element(3),
|
||||
cast_vec_element(4),
|
||||
cast_vec_element(5),
|
||||
cast_vec_element(6),
|
||||
cast_vec_element(7),
|
||||
])
|
||||
};
|
||||
|
||||
match (in_style, out_style) {
|
||||
(Style::Int(in_is_signed), Style::Int(_)) => {
|
||||
return Ok(match in_width.cmp(&out_width) {
|
||||
Ordering::Greater => bx.trunc(args[0].immediate(), llret_ty),
|
||||
Ordering::Equal => args[0].immediate(),
|
||||
Ordering::Less => {
|
||||
if in_is_signed {
|
||||
match (in_width, out_width) {
|
||||
// FIXME(antoyo): the function _mm_cvtepi8_epi16 should directly
|
||||
// call an intrinsic equivalent to __builtin_ia32_pmovsxbw128 so that
|
||||
// we can generate a call to it.
|
||||
(8, 16) => extend(bx.i8_type, bx.i16_type),
|
||||
(8, 32) => extend(bx.i8_type, bx.i32_type),
|
||||
(8, 64) => extend(bx.i8_type, bx.i64_type),
|
||||
(16, 32) => extend(bx.i16_type, bx.i32_type),
|
||||
(32, 64) => extend(bx.i32_type, bx.i64_type),
|
||||
(16, 64) => extend(bx.i16_type, bx.i64_type),
|
||||
_ => unimplemented!("in: {}, out: {}", in_width, out_width),
|
||||
}
|
||||
} else {
|
||||
match (in_width, out_width) {
|
||||
(8, 16) => extend(bx.u8_type, bx.u16_type),
|
||||
(8, 32) => extend(bx.u8_type, bx.u32_type),
|
||||
(8, 64) => extend(bx.u8_type, bx.u64_type),
|
||||
(16, 32) => extend(bx.u16_type, bx.u32_type),
|
||||
(16, 64) => extend(bx.u16_type, bx.u64_type),
|
||||
(32, 64) => extend(bx.u32_type, bx.u64_type),
|
||||
_ => unimplemented!("in: {}, out: {}", in_width, out_width),
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
(Style::Int(_), Style::Float) => {
|
||||
// TODO: add support for internal functions in libgccjit to get access to IFN_VEC_CONVERT which is
|
||||
// doing like __builtin_convertvector?
|
||||
// Or maybe provide convert_vector as an API since it might not easy to get the
|
||||
// types of internal functions.
|
||||
unimplemented!();
|
||||
}
|
||||
(Style::Float, Style::Int(_)) => {
|
||||
unimplemented!();
|
||||
}
|
||||
(Style::Float, Style::Float) => {
|
||||
unimplemented!();
|
||||
}
|
||||
_ => { /* Unsupported. Fallthrough. */ }
|
||||
}
|
||||
require!(
|
||||
false,
|
||||
"unsupported cast from `{}` with element `{}` to `{}` with element `{}`",
|
||||
in_ty,
|
||||
in_elem,
|
||||
ret_ty,
|
||||
out_elem
|
||||
);
|
||||
}
|
||||
|
||||
macro_rules! arith_binary {
|
||||
($($name: ident: $($($p: ident),* => $call: ident),*;)*) => {
|
||||
$(if name == sym::$name {
|
||||
|
@ -151,6 +439,105 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
|
|||
}
|
||||
}
|
||||
|
||||
fn simd_simple_float_intrinsic<'gcc, 'tcx>(
|
||||
name: Symbol,
|
||||
in_elem: Ty<'_>,
|
||||
in_ty: Ty<'_>,
|
||||
in_len: u64,
|
||||
bx: &mut Builder<'_, 'gcc, 'tcx>,
|
||||
span: Span,
|
||||
args: &[OperandRef<'tcx, RValue<'gcc>>],
|
||||
) -> Result<RValue<'gcc>, ()> {
|
||||
macro_rules! emit_error {
|
||||
($msg: tt) => {
|
||||
emit_error!($msg, )
|
||||
};
|
||||
($msg: tt, $($fmt: tt)*) => {
|
||||
span_invalid_monomorphization_error(
|
||||
bx.sess(), span,
|
||||
&format!(concat!("invalid monomorphization of `{}` intrinsic: ", $msg),
|
||||
name, $($fmt)*));
|
||||
}
|
||||
}
|
||||
macro_rules! return_error {
|
||||
($($fmt: tt)*) => {
|
||||
{
|
||||
emit_error!($($fmt)*);
|
||||
return Err(());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let (elem_ty_str, elem_ty) =
|
||||
if let ty::Float(f) = in_elem.kind() {
|
||||
let elem_ty = bx.cx.type_float_from_ty(*f);
|
||||
match f.bit_width() {
|
||||
32 => ("f32", elem_ty),
|
||||
64 => ("f64", elem_ty),
|
||||
_ => {
|
||||
return_error!(
|
||||
"unsupported element type `{}` of floating-point vector `{}`",
|
||||
f.name_str(),
|
||||
in_ty
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
return_error!("`{}` is not a floating-point type", in_ty);
|
||||
};
|
||||
|
||||
let vec_ty = bx.cx.type_vector(elem_ty, in_len);
|
||||
|
||||
let (intr_name, fn_ty) =
|
||||
match name {
|
||||
sym::simd_ceil => ("ceil", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_fabs => ("fabs", bx.type_func(&[vec_ty], vec_ty)), // TODO(antoyo): pand with 170141183420855150465331762880109871103
|
||||
sym::simd_fcos => ("cos", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_fexp2 => ("exp2", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_fexp => ("exp", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_flog10 => ("log10", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_flog2 => ("log2", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_flog => ("log", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_floor => ("floor", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_fma => ("fma", bx.type_func(&[vec_ty, vec_ty, vec_ty], vec_ty)),
|
||||
sym::simd_fpowi => ("powi", bx.type_func(&[vec_ty, bx.type_i32()], vec_ty)),
|
||||
sym::simd_fpow => ("pow", bx.type_func(&[vec_ty, vec_ty], vec_ty)),
|
||||
sym::simd_fsin => ("sin", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_fsqrt => ("sqrt", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_round => ("round", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_trunc => ("trunc", bx.type_func(&[vec_ty], vec_ty)),
|
||||
_ => return_error!("unrecognized intrinsic `{}`", name),
|
||||
};
|
||||
let llvm_name = &format!("llvm.{0}.v{1}{2}", intr_name, in_len, elem_ty_str);
|
||||
let function = intrinsic::llvm::intrinsic(llvm_name, &bx.cx);
|
||||
let function: RValue<'gcc> = unsafe { std::mem::transmute(function) };
|
||||
let c = bx.call(fn_ty, function, &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(), None);
|
||||
Ok(c)
|
||||
}
|
||||
|
||||
if std::matches!(
|
||||
name,
|
||||
sym::simd_ceil
|
||||
| sym::simd_fabs
|
||||
| sym::simd_fcos
|
||||
| sym::simd_fexp2
|
||||
| sym::simd_fexp
|
||||
| sym::simd_flog10
|
||||
| sym::simd_flog2
|
||||
| sym::simd_flog
|
||||
| sym::simd_floor
|
||||
| sym::simd_fma
|
||||
| sym::simd_fpow
|
||||
| sym::simd_fpowi
|
||||
| sym::simd_fsin
|
||||
| sym::simd_fsqrt
|
||||
| sym::simd_round
|
||||
| sym::simd_trunc
|
||||
) {
|
||||
return simd_simple_float_intrinsic(name, in_elem, in_ty, in_len, bx, span, args);
|
||||
}
|
||||
|
||||
arith_binary! {
|
||||
simd_add: Uint, Int => add, Float => fadd;
|
||||
simd_sub: Uint, Int => sub, Float => fsub;
|
||||
|
@ -185,5 +572,183 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
|
|||
simd_neg: Int => neg, Float => fneg;
|
||||
}
|
||||
|
||||
#[cfg(feature="master")]
|
||||
if name == sym::simd_saturating_add || name == sym::simd_saturating_sub {
|
||||
let lhs = args[0].immediate();
|
||||
let rhs = args[1].immediate();
|
||||
let is_add = name == sym::simd_saturating_add;
|
||||
let ptr_bits = bx.tcx().data_layout.pointer_size.bits() as _;
|
||||
let (signed, elem_width, elem_ty) = match *in_elem.kind() {
|
||||
ty::Int(i) => (true, i.bit_width().unwrap_or(ptr_bits), bx.cx.type_int_from_ty(i)),
|
||||
ty::Uint(i) => (false, i.bit_width().unwrap_or(ptr_bits), bx.cx.type_uint_from_ty(i)),
|
||||
_ => {
|
||||
return_error!(
|
||||
"expected element type `{}` of vector type `{}` \
|
||||
to be a signed or unsigned integer type",
|
||||
arg_tys[0].simd_size_and_type(bx.tcx()).1,
|
||||
arg_tys[0]
|
||||
);
|
||||
}
|
||||
};
|
||||
let builtin_name =
|
||||
match (signed, is_add, in_len, elem_width) {
|
||||
(true, true, 32, 8) => "__builtin_ia32_paddsb256", // TODO(antoyo): cast arguments to unsigned.
|
||||
(false, true, 32, 8) => "__builtin_ia32_paddusb256",
|
||||
(true, true, 16, 16) => "__builtin_ia32_paddsw256",
|
||||
(false, true, 16, 16) => "__builtin_ia32_paddusw256",
|
||||
(true, false, 16, 16) => "__builtin_ia32_psubsw256",
|
||||
(false, false, 16, 16) => "__builtin_ia32_psubusw256",
|
||||
(true, false, 32, 8) => "__builtin_ia32_psubsb256",
|
||||
(false, false, 32, 8) => "__builtin_ia32_psubusb256",
|
||||
_ => unimplemented!("signed: {}, is_add: {}, in_len: {}, elem_width: {}", signed, is_add, in_len, elem_width),
|
||||
};
|
||||
let vec_ty = bx.cx.type_vector(elem_ty, in_len as u64);
|
||||
|
||||
let func = bx.context.get_target_builtin_function(builtin_name);
|
||||
let param1_type = func.get_param(0).to_rvalue().get_type();
|
||||
let param2_type = func.get_param(1).to_rvalue().get_type();
|
||||
let lhs = bx.cx.bitcast_if_needed(lhs, param1_type);
|
||||
let rhs = bx.cx.bitcast_if_needed(rhs, param2_type);
|
||||
let result = bx.context.new_call(None, func, &[lhs, rhs]);
|
||||
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
|
||||
return Ok(bx.context.new_bitcast(None, result, vec_ty));
|
||||
}
|
||||
|
||||
macro_rules! arith_red {
|
||||
($name:ident : $vec_op:expr, $float_reduce:ident, $ordered:expr, $op:ident,
|
||||
$identity:expr) => {
|
||||
if name == sym::$name {
|
||||
require!(
|
||||
ret_ty == in_elem,
|
||||
"expected return type `{}` (element of input `{}`), found `{}`",
|
||||
in_elem,
|
||||
in_ty,
|
||||
ret_ty
|
||||
);
|
||||
return match in_elem.kind() {
|
||||
ty::Int(_) | ty::Uint(_) => {
|
||||
let r = bx.vector_reduce_op(args[0].immediate(), $vec_op);
|
||||
if $ordered {
|
||||
// if overflow occurs, the result is the
|
||||
// mathematical result modulo 2^n:
|
||||
Ok(bx.$op(args[1].immediate(), r))
|
||||
}
|
||||
else {
|
||||
Ok(bx.vector_reduce_op(args[0].immediate(), $vec_op))
|
||||
}
|
||||
}
|
||||
ty::Float(_) => {
|
||||
if $ordered {
|
||||
// ordered arithmetic reductions take an accumulator
|
||||
let acc = args[1].immediate();
|
||||
Ok(bx.$float_reduce(acc, args[0].immediate()))
|
||||
}
|
||||
else {
|
||||
Ok(bx.vector_reduce_op(args[0].immediate(), $vec_op))
|
||||
}
|
||||
}
|
||||
_ => return_error!(
|
||||
"unsupported {} from `{}` with element `{}` to `{}`",
|
||||
sym::$name,
|
||||
in_ty,
|
||||
in_elem,
|
||||
ret_ty
|
||||
),
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
arith_red!(
|
||||
simd_reduce_add_unordered: BinaryOp::Plus,
|
||||
vector_reduce_fadd_fast,
|
||||
false,
|
||||
add,
|
||||
0.0 // TODO: Use this argument.
|
||||
);
|
||||
arith_red!(
|
||||
simd_reduce_mul_unordered: BinaryOp::Mult,
|
||||
vector_reduce_fmul_fast,
|
||||
false,
|
||||
mul,
|
||||
1.0
|
||||
);
|
||||
|
||||
macro_rules! minmax_red {
|
||||
($name:ident: $reduction:ident) => {
|
||||
if name == sym::$name {
|
||||
require!(
|
||||
ret_ty == in_elem,
|
||||
"expected return type `{}` (element of input `{}`), found `{}`",
|
||||
in_elem,
|
||||
in_ty,
|
||||
ret_ty
|
||||
);
|
||||
return match in_elem.kind() {
|
||||
ty::Int(_) | ty::Uint(_) | ty::Float(_) => Ok(bx.$reduction(args[0].immediate())),
|
||||
_ => return_error!(
|
||||
"unsupported {} from `{}` with element `{}` to `{}`",
|
||||
sym::$name,
|
||||
in_ty,
|
||||
in_elem,
|
||||
ret_ty
|
||||
),
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
minmax_red!(simd_reduce_min: vector_reduce_min);
|
||||
minmax_red!(simd_reduce_max: vector_reduce_max);
|
||||
|
||||
macro_rules! bitwise_red {
|
||||
($name:ident : $op:expr, $boolean:expr) => {
|
||||
if name == sym::$name {
|
||||
let input = if !$boolean {
|
||||
require!(
|
||||
ret_ty == in_elem,
|
||||
"expected return type `{}` (element of input `{}`), found `{}`",
|
||||
in_elem,
|
||||
in_ty,
|
||||
ret_ty
|
||||
);
|
||||
args[0].immediate()
|
||||
} else {
|
||||
match in_elem.kind() {
|
||||
ty::Int(_) | ty::Uint(_) => {}
|
||||
_ => return_error!(
|
||||
"unsupported {} from `{}` with element `{}` to `{}`",
|
||||
sym::$name,
|
||||
in_ty,
|
||||
in_elem,
|
||||
ret_ty
|
||||
),
|
||||
}
|
||||
|
||||
// boolean reductions operate on vectors of i1s:
|
||||
let i1 = bx.type_i1();
|
||||
let i1xn = bx.type_vector(i1, in_len as u64);
|
||||
bx.trunc(args[0].immediate(), i1xn)
|
||||
};
|
||||
return match in_elem.kind() {
|
||||
ty::Int(_) | ty::Uint(_) => {
|
||||
let r = bx.vector_reduce_op(input, $op);
|
||||
Ok(if !$boolean { r } else { bx.zext(r, bx.type_bool()) })
|
||||
}
|
||||
_ => return_error!(
|
||||
"unsupported {} from `{}` with element `{}` to `{}`",
|
||||
sym::$name,
|
||||
in_ty,
|
||||
in_elem,
|
||||
ret_ty
|
||||
),
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
bitwise_red!(simd_reduce_and: BinaryOp::BitwiseAnd, false);
|
||||
bitwise_red!(simd_reduce_or: BinaryOp::BitwiseOr, false);
|
||||
|
||||
unimplemented!("simd {}", name);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue