Rollup merge of #108783 - antoyo:sync-cg_gcc-2023-03-04, r=cjgillot
Sync rustc_codegen_gcc 2023/03/04 Hi. This sync all the changes from rustc_codegen_gcc. Thanks for the review.
This commit is contained in:
commit
c21a640c5a
61 changed files with 5745 additions and 1125 deletions
File diff suppressed because it is too large
Load diff
|
@ -1,159 +1,387 @@
|
|||
use std::borrow::Cow;
|
||||
|
||||
use gccjit::{Function, FunctionPtrType, RValue, ToRValue};
|
||||
use gccjit::{Function, FunctionPtrType, RValue, ToRValue, UnaryOp};
|
||||
use rustc_codegen_ssa::traits::BuilderMethods;
|
||||
|
||||
use crate::{context::CodegenCx, builder::Builder};
|
||||
|
||||
pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc, 'tcx>, gcc_func: FunctionPtrType<'gcc>, mut args: Cow<'b, [RValue<'gcc>]>, func_name: &str) -> Cow<'b, [RValue<'gcc>]> {
|
||||
pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc, 'tcx>, gcc_func: FunctionPtrType<'gcc>, mut args: Cow<'b, [RValue<'gcc>]>, func_name: &str, original_function_name: Option<&String>) -> Cow<'b, [RValue<'gcc>]> {
|
||||
// Some LLVM intrinsics do not map 1-to-1 to GCC intrinsics, so we add the missing
|
||||
// arguments here.
|
||||
if gcc_func.get_param_count() != args.len() {
|
||||
match &*func_name {
|
||||
"__builtin_ia32_pmuldq512_mask" | "__builtin_ia32_pmuludq512_mask"
|
||||
// FIXME(antoyo): the following intrinsics has 4 (or 5) arguments according to the doc, but is defined with 2 (or 3) arguments in library/stdarch/crates/core_arch/src/x86/avx512f.rs.
|
||||
// NOTE: the following intrinsics have a different number of parameters in LLVM and GCC.
|
||||
"__builtin_ia32_prold512_mask" | "__builtin_ia32_pmuldq512_mask" | "__builtin_ia32_pmuludq512_mask"
|
||||
| "__builtin_ia32_pmaxsd512_mask" | "__builtin_ia32_pmaxsq512_mask" | "__builtin_ia32_pmaxsq256_mask"
|
||||
| "__builtin_ia32_pmaxsq128_mask" | "__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
|
||||
| "__builtin_ia32_pmaxud512_mask" | "__builtin_ia32_pmaxuq512_mask" | "__builtin_ia32_pmaxuq256_mask"
|
||||
| "__builtin_ia32_pmaxuq128_mask"
|
||||
| "__builtin_ia32_pmaxsq128_mask" | "__builtin_ia32_pmaxud512_mask" | "__builtin_ia32_pmaxuq512_mask"
|
||||
| "__builtin_ia32_pminsd512_mask" | "__builtin_ia32_pminsq512_mask" | "__builtin_ia32_pminsq256_mask"
|
||||
| "__builtin_ia32_pminsq128_mask" | "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask"
|
||||
| "__builtin_ia32_pminud512_mask" | "__builtin_ia32_pminuq512_mask" | "__builtin_ia32_pminuq256_mask"
|
||||
| "__builtin_ia32_pminuq128_mask" | "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask"
|
||||
| "__builtin_ia32_pminsq128_mask" | "__builtin_ia32_pminud512_mask" | "__builtin_ia32_pminuq512_mask"
|
||||
| "__builtin_ia32_prolq512_mask" | "__builtin_ia32_prorq512_mask" | "__builtin_ia32_pslldi512_mask"
|
||||
| "__builtin_ia32_psrldi512_mask" | "__builtin_ia32_psllqi512_mask" | "__builtin_ia32_psrlqi512_mask"
|
||||
| "__builtin_ia32_pslld512_mask" | "__builtin_ia32_psrld512_mask" | "__builtin_ia32_psllq512_mask"
|
||||
| "__builtin_ia32_psrlq512_mask" | "__builtin_ia32_psrad512_mask" | "__builtin_ia32_psraq512_mask"
|
||||
| "__builtin_ia32_psradi512_mask" | "__builtin_ia32_psraqi512_mask" | "__builtin_ia32_psrav16si_mask"
|
||||
| "__builtin_ia32_psrav8di_mask" | "__builtin_ia32_prolvd512_mask" | "__builtin_ia32_prorvd512_mask"
|
||||
| "__builtin_ia32_prolvq512_mask" | "__builtin_ia32_prorvq512_mask" | "__builtin_ia32_psllv16si_mask"
|
||||
| "__builtin_ia32_psrlv16si_mask" | "__builtin_ia32_psllv8di_mask" | "__builtin_ia32_psrlv8di_mask"
|
||||
| "__builtin_ia32_permvarsi512_mask" | "__builtin_ia32_vpermilvarps512_mask"
|
||||
| "__builtin_ia32_vpermilvarpd512_mask" | "__builtin_ia32_permvardi512_mask"
|
||||
| "__builtin_ia32_permvarsf512_mask" | "__builtin_ia32_permvarqi512_mask"
|
||||
| "__builtin_ia32_permvarqi256_mask" | "__builtin_ia32_permvarqi128_mask"
|
||||
| "__builtin_ia32_vpmultishiftqb512_mask" | "__builtin_ia32_vpmultishiftqb256_mask"
|
||||
| "__builtin_ia32_vpmultishiftqb128_mask"
|
||||
=> {
|
||||
// TODO: refactor by separating those intrinsics outside of this branch.
|
||||
let add_before_last_arg =
|
||||
match &*func_name {
|
||||
"__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
|
||||
| "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask"
|
||||
| "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => true,
|
||||
_ => false,
|
||||
};
|
||||
let new_first_arg_is_zero =
|
||||
match &*func_name {
|
||||
"__builtin_ia32_pmaxuq256_mask" | "__builtin_ia32_pmaxuq128_mask"
|
||||
| "__builtin_ia32_pminuq256_mask" | "__builtin_ia32_pminuq128_mask" => true,
|
||||
_ => false
|
||||
};
|
||||
let arg3_index =
|
||||
match &*func_name {
|
||||
"__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => 1,
|
||||
_ => 2,
|
||||
};
|
||||
let mut new_args = args.to_vec();
|
||||
let arg3_type = gcc_func.get_param_type(arg3_index);
|
||||
let first_arg =
|
||||
if new_first_arg_is_zero {
|
||||
let vector_type = arg3_type.dyncast_vector().expect("vector type");
|
||||
let zero = builder.context.new_rvalue_zero(vector_type.get_element_type());
|
||||
let num_units = vector_type.get_num_units();
|
||||
builder.context.new_rvalue_from_vector(None, arg3_type, &vec![zero; num_units])
|
||||
}
|
||||
else {
|
||||
builder.current_func().new_local(None, arg3_type, "undefined_for_intrinsic").to_rvalue()
|
||||
};
|
||||
if add_before_last_arg {
|
||||
new_args.insert(new_args.len() - 1, first_arg);
|
||||
let mut new_args = args.to_vec();
|
||||
let arg3_type = gcc_func.get_param_type(2);
|
||||
let first_arg = builder.current_func().new_local(None, arg3_type, "undefined_for_intrinsic").to_rvalue();
|
||||
new_args.push(first_arg);
|
||||
let arg4_type = gcc_func.get_param_type(3);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
|
||||
new_args.push(minus_one);
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_pmaxuq256_mask" | "__builtin_ia32_pmaxuq128_mask" | "__builtin_ia32_pminuq256_mask"
|
||||
| "__builtin_ia32_pminuq128_mask" | "__builtin_ia32_prold256_mask" | "__builtin_ia32_prold128_mask"
|
||||
| "__builtin_ia32_prord512_mask" | "__builtin_ia32_prord256_mask" | "__builtin_ia32_prord128_mask"
|
||||
| "__builtin_ia32_prolq256_mask" | "__builtin_ia32_prolq128_mask" | "__builtin_ia32_prorq256_mask"
|
||||
| "__builtin_ia32_prorq128_mask" | "__builtin_ia32_psraq256_mask" | "__builtin_ia32_psraq128_mask"
|
||||
| "__builtin_ia32_psraqi256_mask" | "__builtin_ia32_psraqi128_mask" | "__builtin_ia32_psravq256_mask"
|
||||
| "__builtin_ia32_psravq128_mask" | "__builtin_ia32_prolvd256_mask" | "__builtin_ia32_prolvd128_mask"
|
||||
| "__builtin_ia32_prorvd256_mask" | "__builtin_ia32_prorvd128_mask" | "__builtin_ia32_prolvq256_mask"
|
||||
| "__builtin_ia32_prolvq128_mask" | "__builtin_ia32_prorvq256_mask" | "__builtin_ia32_prorvq128_mask"
|
||||
| "__builtin_ia32_permvardi256_mask" | "__builtin_ia32_permvardf512_mask" | "__builtin_ia32_permvardf256_mask"
|
||||
| "__builtin_ia32_pmulhuw512_mask" | "__builtin_ia32_pmulhw512_mask" | "__builtin_ia32_pmulhrsw512_mask"
|
||||
| "__builtin_ia32_pmaxuw512_mask" | "__builtin_ia32_pmaxub512_mask" | "__builtin_ia32_pmaxsw512_mask"
|
||||
| "__builtin_ia32_pmaxsb512_mask" | "__builtin_ia32_pminuw512_mask" | "__builtin_ia32_pminub512_mask"
|
||||
| "__builtin_ia32_pminsw512_mask" | "__builtin_ia32_pminsb512_mask"
|
||||
| "__builtin_ia32_pmaddwd512_mask" | "__builtin_ia32_pmaddubsw512_mask" | "__builtin_ia32_packssdw512_mask"
|
||||
| "__builtin_ia32_packsswb512_mask" | "__builtin_ia32_packusdw512_mask" | "__builtin_ia32_packuswb512_mask"
|
||||
| "__builtin_ia32_pavgw512_mask" | "__builtin_ia32_pavgb512_mask" | "__builtin_ia32_psllw512_mask"
|
||||
| "__builtin_ia32_psllwi512_mask" | "__builtin_ia32_psllv32hi_mask" | "__builtin_ia32_psrlw512_mask"
|
||||
| "__builtin_ia32_psrlwi512_mask" | "__builtin_ia32_psllv16hi_mask" | "__builtin_ia32_psllv8hi_mask"
|
||||
| "__builtin_ia32_psrlv32hi_mask" | "__builtin_ia32_psraw512_mask" | "__builtin_ia32_psrawi512_mask"
|
||||
| "__builtin_ia32_psrlv16hi_mask" | "__builtin_ia32_psrlv8hi_mask" | "__builtin_ia32_psrav32hi_mask"
|
||||
| "__builtin_ia32_permvarhi512_mask" | "__builtin_ia32_pshufb512_mask" | "__builtin_ia32_psrav16hi_mask"
|
||||
| "__builtin_ia32_psrav8hi_mask" | "__builtin_ia32_permvarhi256_mask" | "__builtin_ia32_permvarhi128_mask"
|
||||
=> {
|
||||
let mut new_args = args.to_vec();
|
||||
let arg3_type = gcc_func.get_param_type(2);
|
||||
let vector_type = arg3_type.dyncast_vector().expect("vector type");
|
||||
let zero = builder.context.new_rvalue_zero(vector_type.get_element_type());
|
||||
let num_units = vector_type.get_num_units();
|
||||
let first_arg = builder.context.new_rvalue_from_vector(None, arg3_type, &vec![zero; num_units]);
|
||||
new_args.push(first_arg);
|
||||
let arg4_type = gcc_func.get_param_type(3);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
|
||||
new_args.push(minus_one);
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_dbpsadbw512_mask" | "__builtin_ia32_dbpsadbw256_mask" | "__builtin_ia32_dbpsadbw128_mask" => {
|
||||
let mut new_args = args.to_vec();
|
||||
let arg4_type = gcc_func.get_param_type(3);
|
||||
let vector_type = arg4_type.dyncast_vector().expect("vector type");
|
||||
let zero = builder.context.new_rvalue_zero(vector_type.get_element_type());
|
||||
let num_units = vector_type.get_num_units();
|
||||
let first_arg = builder.context.new_rvalue_from_vector(None, arg4_type, &vec![zero; num_units]);
|
||||
new_args.push(first_arg);
|
||||
let arg5_type = gcc_func.get_param_type(4);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg5_type, -1);
|
||||
new_args.push(minus_one);
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_vplzcntd_512_mask" | "__builtin_ia32_vplzcntd_256_mask" | "__builtin_ia32_vplzcntd_128_mask"
|
||||
| "__builtin_ia32_vplzcntq_512_mask" | "__builtin_ia32_vplzcntq_256_mask" | "__builtin_ia32_vplzcntq_128_mask" => {
|
||||
let mut new_args = args.to_vec();
|
||||
// Remove last arg as it doesn't seem to be used in GCC and is always false.
|
||||
new_args.pop();
|
||||
let arg2_type = gcc_func.get_param_type(1);
|
||||
let vector_type = arg2_type.dyncast_vector().expect("vector type");
|
||||
let zero = builder.context.new_rvalue_zero(vector_type.get_element_type());
|
||||
let num_units = vector_type.get_num_units();
|
||||
let first_arg = builder.context.new_rvalue_from_vector(None, arg2_type, &vec![zero; num_units]);
|
||||
new_args.push(first_arg);
|
||||
let arg3_type = gcc_func.get_param_type(2);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg3_type, -1);
|
||||
new_args.push(minus_one);
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_vpconflictsi_512_mask" | "__builtin_ia32_vpconflictsi_256_mask"
|
||||
| "__builtin_ia32_vpconflictsi_128_mask" | "__builtin_ia32_vpconflictdi_512_mask"
|
||||
| "__builtin_ia32_vpconflictdi_256_mask" | "__builtin_ia32_vpconflictdi_128_mask" => {
|
||||
let mut new_args = args.to_vec();
|
||||
let arg2_type = gcc_func.get_param_type(1);
|
||||
let vector_type = arg2_type.dyncast_vector().expect("vector type");
|
||||
let zero = builder.context.new_rvalue_zero(vector_type.get_element_type());
|
||||
let num_units = vector_type.get_num_units();
|
||||
let first_arg = builder.context.new_rvalue_from_vector(None, arg2_type, &vec![zero; num_units]);
|
||||
new_args.push(first_arg);
|
||||
let arg3_type = gcc_func.get_param_type(2);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg3_type, -1);
|
||||
new_args.push(minus_one);
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_pternlogd512_mask" | "__builtin_ia32_pternlogd256_mask"
|
||||
| "__builtin_ia32_pternlogd128_mask" | "__builtin_ia32_pternlogq512_mask"
|
||||
| "__builtin_ia32_pternlogq256_mask" | "__builtin_ia32_pternlogq128_mask" => {
|
||||
let mut new_args = args.to_vec();
|
||||
let arg5_type = gcc_func.get_param_type(4);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg5_type, -1);
|
||||
new_args.push(minus_one);
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_vfmaddps512_mask" | "__builtin_ia32_vfmaddpd512_mask" => {
|
||||
let mut new_args = args.to_vec();
|
||||
|
||||
let mut last_arg = None;
|
||||
if args.len() == 4 {
|
||||
last_arg = new_args.pop();
|
||||
}
|
||||
|
||||
let arg4_type = gcc_func.get_param_type(3);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
|
||||
new_args.push(minus_one);
|
||||
|
||||
if args.len() == 3 {
|
||||
// Both llvm.fma.v16f32 and llvm.x86.avx512.vfmadd.ps.512 maps to
|
||||
// the same GCC intrinsic, but the former has 3 parameters and the
|
||||
// latter has 4 so it doesn't require this additional argument.
|
||||
let arg5_type = gcc_func.get_param_type(4);
|
||||
new_args.push(builder.context.new_rvalue_from_int(arg5_type, 4));
|
||||
}
|
||||
|
||||
if let Some(last_arg) = last_arg {
|
||||
new_args.push(last_arg);
|
||||
}
|
||||
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_addps512_mask" | "__builtin_ia32_addpd512_mask"
|
||||
| "__builtin_ia32_subps512_mask" | "__builtin_ia32_subpd512_mask"
|
||||
| "__builtin_ia32_mulps512_mask" | "__builtin_ia32_mulpd512_mask"
|
||||
| "__builtin_ia32_divps512_mask" | "__builtin_ia32_divpd512_mask"
|
||||
| "__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
|
||||
| "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask" => {
|
||||
let mut new_args = args.to_vec();
|
||||
let last_arg = new_args.pop().expect("last arg");
|
||||
let arg3_type = gcc_func.get_param_type(2);
|
||||
let undefined = builder.current_func().new_local(None, arg3_type, "undefined_for_intrinsic").to_rvalue();
|
||||
new_args.push(undefined);
|
||||
let arg4_type = gcc_func.get_param_type(3);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
|
||||
new_args.push(minus_one);
|
||||
new_args.push(last_arg);
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_vfmaddsubps512_mask" | "__builtin_ia32_vfmaddsubpd512_mask" => {
|
||||
let mut new_args = args.to_vec();
|
||||
let last_arg = new_args.pop().expect("last arg");
|
||||
let arg4_type = gcc_func.get_param_type(3);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
|
||||
new_args.push(minus_one);
|
||||
new_args.push(last_arg);
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_vpermi2vard512_mask" | "__builtin_ia32_vpermi2vard256_mask"
|
||||
| "__builtin_ia32_vpermi2vard128_mask" | "__builtin_ia32_vpermi2varq512_mask"
|
||||
| "__builtin_ia32_vpermi2varq256_mask" | "__builtin_ia32_vpermi2varq128_mask"
|
||||
| "__builtin_ia32_vpermi2varps512_mask" | "__builtin_ia32_vpermi2varps256_mask"
|
||||
| "__builtin_ia32_vpermi2varps128_mask" | "__builtin_ia32_vpermi2varpd512_mask"
|
||||
| "__builtin_ia32_vpermi2varpd256_mask" | "__builtin_ia32_vpermi2varpd128_mask" | "__builtin_ia32_vpmadd52huq512_mask"
|
||||
| "__builtin_ia32_vpmadd52luq512_mask" | "__builtin_ia32_vpmadd52huq256_mask" | "__builtin_ia32_vpmadd52luq256_mask"
|
||||
| "__builtin_ia32_vpmadd52huq128_mask"
|
||||
=> {
|
||||
let mut new_args = args.to_vec();
|
||||
let arg4_type = gcc_func.get_param_type(3);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
|
||||
new_args.push(minus_one);
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_cvtdq2ps512_mask" | "__builtin_ia32_cvtudq2ps512_mask"
|
||||
| "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => {
|
||||
let mut new_args = args.to_vec();
|
||||
let last_arg = new_args.pop().expect("last arg");
|
||||
let arg2_type = gcc_func.get_param_type(1);
|
||||
let undefined = builder.current_func().new_local(None, arg2_type, "undefined_for_intrinsic").to_rvalue();
|
||||
new_args.push(undefined);
|
||||
let arg3_type = gcc_func.get_param_type(2);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg3_type, -1);
|
||||
new_args.push(minus_one);
|
||||
new_args.push(last_arg);
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_stmxcsr" => {
|
||||
args = vec![].into();
|
||||
},
|
||||
"__builtin_ia32_addcarryx_u64" | "__builtin_ia32_sbb_u64" | "__builtin_ia32_addcarryx_u32" | "__builtin_ia32_sbb_u32" => {
|
||||
let mut new_args = args.to_vec();
|
||||
let arg2_type = gcc_func.get_param_type(1);
|
||||
let variable = builder.current_func().new_local(None, arg2_type, "addcarryResult");
|
||||
new_args.push(variable.get_address(None));
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_vpermt2varqi512_mask" | "__builtin_ia32_vpermt2varqi256_mask"
|
||||
| "__builtin_ia32_vpermt2varqi128_mask" | "__builtin_ia32_vpermt2varhi512_mask"
|
||||
| "__builtin_ia32_vpermt2varhi256_mask" | "__builtin_ia32_vpermt2varhi128_mask"
|
||||
=> {
|
||||
let new_args = args.to_vec();
|
||||
let arg4_type = gcc_func.get_param_type(3);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
|
||||
args = vec![new_args[1], new_args[0], new_args[2], minus_one].into();
|
||||
},
|
||||
"__builtin_ia32_xrstor" | "__builtin_ia32_xsavec" => {
|
||||
let new_args = args.to_vec();
|
||||
let thirty_two = builder.context.new_rvalue_from_int(new_args[1].get_type(), 32);
|
||||
let arg2 = new_args[1] << thirty_two | new_args[2];
|
||||
let arg2_type = gcc_func.get_param_type(1);
|
||||
let arg2 = builder.context.new_cast(None, arg2, arg2_type);
|
||||
args = vec![new_args[0], arg2].into();
|
||||
},
|
||||
"__builtin_prefetch" => {
|
||||
let mut new_args = args.to_vec();
|
||||
new_args.pop();
|
||||
args = new_args.into();
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
else {
|
||||
match &*func_name {
|
||||
"__builtin_ia32_rndscaless_mask_round" | "__builtin_ia32_rndscalesd_mask_round" => {
|
||||
let new_args = args.to_vec();
|
||||
let arg3_type = gcc_func.get_param_type(2);
|
||||
let arg3 = builder.context.new_cast(None, new_args[4], arg3_type);
|
||||
let arg4_type = gcc_func.get_param_type(3);
|
||||
let arg4 = builder.context.new_bitcast(None, new_args[2], arg4_type);
|
||||
args = vec![new_args[0], new_args[1], arg3, arg4, new_args[3], new_args[5]].into();
|
||||
},
|
||||
// NOTE: the LLVM intrinsic receives 3 floats, but the GCC builtin requires 3 vectors.
|
||||
// FIXME: the intrinsics like _mm_mask_fmadd_sd should probably directly call the GCC
|
||||
// instrinsic to avoid this.
|
||||
"__builtin_ia32_vfmaddss3_round" => {
|
||||
let new_args = args.to_vec();
|
||||
let arg1_type = gcc_func.get_param_type(0);
|
||||
let arg2_type = gcc_func.get_param_type(1);
|
||||
let arg3_type = gcc_func.get_param_type(2);
|
||||
let a = builder.context.new_rvalue_from_vector(None, arg1_type, &[new_args[0]; 4]);
|
||||
let b = builder.context.new_rvalue_from_vector(None, arg2_type, &[new_args[1]; 4]);
|
||||
let c = builder.context.new_rvalue_from_vector(None, arg3_type, &[new_args[2]; 4]);
|
||||
args = vec![a, b, c, new_args[3]].into();
|
||||
},
|
||||
"__builtin_ia32_vfmaddsd3_round" => {
|
||||
let new_args = args.to_vec();
|
||||
let arg1_type = gcc_func.get_param_type(0);
|
||||
let arg2_type = gcc_func.get_param_type(1);
|
||||
let arg3_type = gcc_func.get_param_type(2);
|
||||
let a = builder.context.new_rvalue_from_vector(None, arg1_type, &[new_args[0]; 2]);
|
||||
let b = builder.context.new_rvalue_from_vector(None, arg2_type, &[new_args[1]; 2]);
|
||||
let c = builder.context.new_rvalue_from_vector(None, arg3_type, &[new_args[2]; 2]);
|
||||
args = vec![a, b, c, new_args[3]].into();
|
||||
},
|
||||
"__builtin_ia32_vfmaddsubpd256" | "__builtin_ia32_vfmaddsubps" | "__builtin_ia32_vfmaddsubps256"
|
||||
| "__builtin_ia32_vfmaddsubpd" => {
|
||||
if let Some(original_function_name) = original_function_name {
|
||||
match &**original_function_name {
|
||||
"llvm.x86.fma.vfmsubadd.pd.256" | "llvm.x86.fma.vfmsubadd.ps" | "llvm.x86.fma.vfmsubadd.ps.256"
|
||||
| "llvm.x86.fma.vfmsubadd.pd" => {
|
||||
// NOTE: since both llvm.x86.fma.vfmsubadd.ps and llvm.x86.fma.vfmaddsub.ps maps to
|
||||
// __builtin_ia32_vfmaddsubps, only add minus if this comes from a
|
||||
// subadd LLVM intrinsic, e.g. _mm256_fmsubadd_pd.
|
||||
let mut new_args = args.to_vec();
|
||||
let arg3 = &mut new_args[2];
|
||||
*arg3 = builder.context.new_unary_op(None, UnaryOp::Minus, arg3.get_type(), *arg3);
|
||||
args = new_args.into();
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
else {
|
||||
new_args.push(first_arg);
|
||||
}
|
||||
let arg4_index =
|
||||
match &*func_name {
|
||||
"__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => 2,
|
||||
_ => 3,
|
||||
};
|
||||
let arg4_type = gcc_func.get_param_type(arg4_index);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
|
||||
if add_before_last_arg {
|
||||
new_args.insert(new_args.len() - 1, minus_one);
|
||||
}
|
||||
else {
|
||||
new_args.push(minus_one);
|
||||
}
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_pternlogd512_mask" | "__builtin_ia32_pternlogd256_mask"
|
||||
| "__builtin_ia32_pternlogd128_mask" | "__builtin_ia32_pternlogq512_mask"
|
||||
| "__builtin_ia32_pternlogq256_mask" | "__builtin_ia32_pternlogq128_mask" => {
|
||||
let mut new_args = args.to_vec();
|
||||
let arg5_type = gcc_func.get_param_type(4);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg5_type, -1);
|
||||
new_args.push(minus_one);
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_vfmaddps512_mask" | "__builtin_ia32_vfmaddpd512_mask" => {
|
||||
let mut new_args = args.to_vec();
|
||||
|
||||
let mut last_arg = None;
|
||||
if args.len() == 4 {
|
||||
last_arg = new_args.pop();
|
||||
}
|
||||
|
||||
let arg4_type = gcc_func.get_param_type(3);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
|
||||
new_args.push(minus_one);
|
||||
|
||||
if args.len() == 3 {
|
||||
// Both llvm.fma.v16f32 and llvm.x86.avx512.vfmadd.ps.512 maps to
|
||||
// the same GCC intrinsic, but the former has 3 parameters and the
|
||||
// latter has 4 so it doesn't require this additional argument.
|
||||
let arg5_type = gcc_func.get_param_type(4);
|
||||
new_args.push(builder.context.new_rvalue_from_int(arg5_type, 4));
|
||||
}
|
||||
|
||||
if let Some(last_arg) = last_arg {
|
||||
new_args.push(last_arg);
|
||||
}
|
||||
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_addps512_mask" | "__builtin_ia32_addpd512_mask"
|
||||
| "__builtin_ia32_subps512_mask" | "__builtin_ia32_subpd512_mask"
|
||||
| "__builtin_ia32_mulps512_mask" | "__builtin_ia32_mulpd512_mask"
|
||||
| "__builtin_ia32_divps512_mask" | "__builtin_ia32_divpd512_mask" => {
|
||||
let mut new_args = args.to_vec();
|
||||
let last_arg = new_args.pop().expect("last arg");
|
||||
let arg3_type = gcc_func.get_param_type(2);
|
||||
let undefined = builder.current_func().new_local(None, arg3_type, "undefined_for_intrinsic").to_rvalue();
|
||||
new_args.push(undefined);
|
||||
let arg4_type = gcc_func.get_param_type(3);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
|
||||
new_args.push(minus_one);
|
||||
new_args.push(last_arg);
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_vfmaddsubps512_mask" | "__builtin_ia32_vfmaddsubpd512_mask" => {
|
||||
let mut new_args = args.to_vec();
|
||||
let last_arg = new_args.pop().expect("last arg");
|
||||
let arg4_type = gcc_func.get_param_type(3);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
|
||||
new_args.push(minus_one);
|
||||
new_args.push(last_arg);
|
||||
args = new_args.into();
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
},
|
||||
"__builtin_ia32_ldmxcsr" => {
|
||||
// The builtin __builtin_ia32_ldmxcsr takes an integer value while llvm.x86.sse.ldmxcsr takes a pointer,
|
||||
// so dereference the pointer.
|
||||
let mut new_args = args.to_vec();
|
||||
let uint_ptr_type = builder.uint_type.make_pointer();
|
||||
let arg1 = builder.context.new_cast(None, args[0], uint_ptr_type);
|
||||
new_args[0] = arg1.dereference(None).to_rvalue();
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_rcp14sd_mask" | "__builtin_ia32_rcp14ss_mask" | "__builtin_ia32_rsqrt14sd_mask"
|
||||
| "__builtin_ia32_rsqrt14ss_mask" => {
|
||||
let new_args = args.to_vec();
|
||||
args = vec![new_args[1], new_args[0], new_args[2], new_args[3]].into();
|
||||
},
|
||||
"__builtin_ia32_sqrtsd_mask_round" | "__builtin_ia32_sqrtss_mask_round" => {
|
||||
let new_args = args.to_vec();
|
||||
args = vec![new_args[1], new_args[0], new_args[2], new_args[3], new_args[4]].into();
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
args
|
||||
}
|
||||
|
||||
pub fn adjust_intrinsic_return_value<'a, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc, 'tcx>, mut return_value: RValue<'gcc>, func_name: &str, args: &[RValue<'gcc>], args_adjusted: bool, orig_args: &[RValue<'gcc>]) -> RValue<'gcc> {
|
||||
match func_name {
|
||||
"__builtin_ia32_vfmaddss3_round" | "__builtin_ia32_vfmaddsd3_round" => {
|
||||
#[cfg(feature="master")]
|
||||
{
|
||||
let zero = builder.context.new_rvalue_zero(builder.int_type);
|
||||
return_value = builder.context.new_vector_access(None, return_value, zero).to_rvalue();
|
||||
}
|
||||
},
|
||||
"__builtin_ia32_addcarryx_u64" | "__builtin_ia32_sbb_u64" | "__builtin_ia32_addcarryx_u32" | "__builtin_ia32_sbb_u32" => {
|
||||
// Both llvm.x86.addcarry.32 and llvm.x86.addcarryx.u32 points to the same GCC builtin,
|
||||
// but only the former requires adjusting the return value.
|
||||
// Those 2 LLVM intrinsics differ by their argument count, that's why we check if the
|
||||
// arguments were adjusted.
|
||||
if args_adjusted {
|
||||
let last_arg = args.last().expect("last arg");
|
||||
let field1 = builder.context.new_field(None, builder.u8_type, "carryFlag");
|
||||
let field2 = builder.context.new_field(None, args[1].get_type(), "carryResult");
|
||||
let struct_type = builder.context.new_struct_type(None, "addcarryResult", &[field1, field2]);
|
||||
return_value = builder.context.new_struct_constructor(None, struct_type.as_type(), None, &[return_value, last_arg.dereference(None).to_rvalue()]);
|
||||
}
|
||||
},
|
||||
"__builtin_ia32_stmxcsr" => {
|
||||
// The builtin __builtin_ia32_stmxcsr returns a value while llvm.x86.sse.stmxcsr writes
|
||||
// the result in its pointer argument.
|
||||
// We removed the argument since __builtin_ia32_stmxcsr takes no arguments, so we need
|
||||
// to get back the original argument to get the pointer we need to write the result to.
|
||||
let uint_ptr_type = builder.uint_type.make_pointer();
|
||||
let ptr = builder.context.new_cast(None, orig_args[0], uint_ptr_type);
|
||||
builder.llbb().add_assignment(None, ptr.dereference(None), return_value);
|
||||
// The return value was assigned to the result pointer above. In order to not call the
|
||||
// builtin twice, we overwrite the return value with a dummy value.
|
||||
return_value = builder.context.new_rvalue_zero(builder.int_type);
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
|
||||
return_value
|
||||
}
|
||||
|
||||
pub fn ignore_arg_cast(func_name: &str, index: usize, args_len: usize) -> bool {
|
||||
// NOTE: these intrinsics have missing parameters before the last one, so ignore the
|
||||
// last argument type check.
|
||||
// FIXME(antoyo): find a way to refactor in order to avoid this hack.
|
||||
match func_name {
|
||||
// NOTE: these intrinsics have missing parameters before the last one, so ignore the
|
||||
// last argument type check.
|
||||
"__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
|
||||
| "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask" | "__builtin_ia32_sqrtps512_mask"
|
||||
| "__builtin_ia32_sqrtpd512_mask" | "__builtin_ia32_addps512_mask" | "__builtin_ia32_addpd512_mask"
|
||||
| "__builtin_ia32_subps512_mask" | "__builtin_ia32_subpd512_mask"
|
||||
| "__builtin_ia32_mulps512_mask" | "__builtin_ia32_mulpd512_mask"
|
||||
| "__builtin_ia32_divps512_mask" | "__builtin_ia32_divpd512_mask"
|
||||
| "__builtin_ia32_vfmaddsubps512_mask" | "__builtin_ia32_vfmaddsubpd512_mask" => {
|
||||
| "__builtin_ia32_vfmaddsubps512_mask" | "__builtin_ia32_vfmaddsubpd512_mask"
|
||||
| "__builtin_ia32_cvtdq2ps512_mask" | "__builtin_ia32_cvtudq2ps512_mask" => {
|
||||
if index == args_len - 1 {
|
||||
return true;
|
||||
}
|
||||
},
|
||||
"__builtin_ia32_rndscaless_mask_round" | "__builtin_ia32_rndscalesd_mask_round" => {
|
||||
if index == 2 || index == 3 {
|
||||
return true;
|
||||
}
|
||||
},
|
||||
"__builtin_ia32_vfmaddps512_mask" | "__builtin_ia32_vfmaddpd512_mask" => {
|
||||
// Since there are two LLVM intrinsics that map to each of these GCC builtins and only
|
||||
// one of them has a missing parameter before the last one, we check the number of
|
||||
|
@ -162,6 +390,14 @@ pub fn ignore_arg_cast(func_name: &str, index: usize, args_len: usize) -> bool {
|
|||
return true;
|
||||
}
|
||||
},
|
||||
// NOTE: the LLVM intrinsic receives 3 floats, but the GCC builtin requires 3 vectors.
|
||||
"__builtin_ia32_vfmaddss3_round" | "__builtin_ia32_vfmaddsd3_round" => return true,
|
||||
"__builtin_ia32_vplzcntd_512_mask" | "__builtin_ia32_vplzcntd_256_mask" | "__builtin_ia32_vplzcntd_128_mask"
|
||||
| "__builtin_ia32_vplzcntq_512_mask" | "__builtin_ia32_vplzcntq_256_mask" | "__builtin_ia32_vplzcntq_128_mask" => {
|
||||
if index == args_len - 1 {
|
||||
return true;
|
||||
}
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
|
||||
|
@ -171,7 +407,7 @@ pub fn ignore_arg_cast(func_name: &str, index: usize, args_len: usize) -> bool {
|
|||
#[cfg(not(feature="master"))]
|
||||
pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function<'gcc> {
|
||||
match name {
|
||||
"llvm.x86.xgetbv" => {
|
||||
"llvm.x86.xgetbv" | "llvm.x86.sse2.pause" => {
|
||||
let gcc_name = "__builtin_trap";
|
||||
let func = cx.context.get_builtin_function(gcc_name);
|
||||
cx.functions.borrow_mut().insert(gcc_name.to_string(), func);
|
||||
|
@ -183,24 +419,26 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
|
|||
|
||||
#[cfg(feature="master")]
|
||||
pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function<'gcc> {
|
||||
match name {
|
||||
"llvm.prefetch" => {
|
||||
let gcc_name = "__builtin_prefetch";
|
||||
let func = cx.context.get_builtin_function(gcc_name);
|
||||
cx.functions.borrow_mut().insert(gcc_name.to_string(), func);
|
||||
return func
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
|
||||
let gcc_name = match name {
|
||||
"llvm.x86.xgetbv" => "__builtin_ia32_xgetbv",
|
||||
// NOTE: this doc specifies the equivalent GCC builtins: http://huonw.github.io/llvmint/llvmint/x86/index.html
|
||||
"llvm.sqrt.v2f64" => "__builtin_ia32_sqrtpd",
|
||||
"llvm.x86.avx512.pmul.dq.512" => "__builtin_ia32_pmuldq512_mask",
|
||||
"llvm.x86.avx512.pmulu.dq.512" => "__builtin_ia32_pmuludq512_mask",
|
||||
"llvm.x86.avx512.mask.pmaxs.q.256" => "__builtin_ia32_pmaxsq256_mask",
|
||||
"llvm.x86.avx512.mask.pmaxs.q.128" => "__builtin_ia32_pmaxsq128_mask",
|
||||
"llvm.x86.avx512.max.ps.512" => "__builtin_ia32_maxps512_mask",
|
||||
"llvm.x86.avx512.max.pd.512" => "__builtin_ia32_maxpd512_mask",
|
||||
"llvm.x86.avx512.mask.pmaxu.q.256" => "__builtin_ia32_pmaxuq256_mask",
|
||||
"llvm.x86.avx512.mask.pmaxu.q.128" => "__builtin_ia32_pmaxuq128_mask",
|
||||
"llvm.x86.avx512.mask.pmins.q.256" => "__builtin_ia32_pminsq256_mask",
|
||||
"llvm.x86.avx512.mask.pmins.q.128" => "__builtin_ia32_pminsq128_mask",
|
||||
"llvm.x86.avx512.min.ps.512" => "__builtin_ia32_minps512_mask",
|
||||
"llvm.x86.avx512.min.pd.512" => "__builtin_ia32_minpd512_mask",
|
||||
"llvm.x86.avx512.mask.pminu.q.256" => "__builtin_ia32_pminuq256_mask",
|
||||
"llvm.x86.avx512.mask.pminu.q.128" => "__builtin_ia32_pminuq128_mask",
|
||||
"llvm.fma.v16f32" => "__builtin_ia32_vfmaddps512_mask",
|
||||
"llvm.fma.v8f64" => "__builtin_ia32_vfmaddpd512_mask",
|
||||
"llvm.x86.avx512.vfmaddsub.ps.512" => "__builtin_ia32_vfmaddsubps512_mask",
|
||||
|
@ -221,6 +459,153 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
|
|||
"llvm.x86.avx512.div.pd.512" => "__builtin_ia32_divpd512_mask",
|
||||
"llvm.x86.avx512.vfmadd.ps.512" => "__builtin_ia32_vfmaddps512_mask",
|
||||
"llvm.x86.avx512.vfmadd.pd.512" => "__builtin_ia32_vfmaddpd512_mask",
|
||||
"llvm.x86.avx512.sitofp.round.v16f32.v16i32" => "__builtin_ia32_cvtdq2ps512_mask",
|
||||
"llvm.x86.avx512.uitofp.round.v16f32.v16i32" => "__builtin_ia32_cvtudq2ps512_mask",
|
||||
"llvm.x86.avx512.mask.ucmp.d.512" => "__builtin_ia32_ucmpd512_mask",
|
||||
"llvm.x86.avx512.mask.ucmp.d.256" => "__builtin_ia32_ucmpd256_mask",
|
||||
"llvm.x86.avx512.mask.ucmp.d.128" => "__builtin_ia32_ucmpd128_mask",
|
||||
"llvm.x86.avx512.mask.cmp.d.512" => "__builtin_ia32_cmpd512_mask",
|
||||
"llvm.x86.avx512.mask.cmp.d.256" => "__builtin_ia32_cmpd256_mask",
|
||||
"llvm.x86.avx512.mask.cmp.d.128" => "__builtin_ia32_cmpd128_mask",
|
||||
"llvm.x86.avx512.mask.ucmp.q.512" => "__builtin_ia32_ucmpq512_mask",
|
||||
"llvm.x86.avx512.mask.ucmp.q.256" => "__builtin_ia32_ucmpq256_mask",
|
||||
"llvm.x86.avx512.mask.ucmp.q.128" => "__builtin_ia32_ucmpq128_mask",
|
||||
"llvm.x86.avx512.mask.cmp.q.512" => "__builtin_ia32_cmpq512_mask",
|
||||
"llvm.x86.avx512.mask.cmp.q.256" => "__builtin_ia32_cmpq256_mask",
|
||||
"llvm.x86.avx512.mask.cmp.q.128" => "__builtin_ia32_cmpq128_mask",
|
||||
"llvm.x86.avx512.mask.max.ss.round" => "__builtin_ia32_maxss_mask_round",
|
||||
"llvm.x86.avx512.mask.max.sd.round" => "__builtin_ia32_maxsd_mask_round",
|
||||
"llvm.x86.avx512.mask.min.ss.round" => "__builtin_ia32_minss_mask_round",
|
||||
"llvm.x86.avx512.mask.min.sd.round" => "__builtin_ia32_minsd_mask_round",
|
||||
"llvm.x86.avx512.mask.sqrt.ss" => "__builtin_ia32_sqrtss_mask_round",
|
||||
"llvm.x86.avx512.mask.sqrt.sd" => "__builtin_ia32_sqrtsd_mask_round",
|
||||
"llvm.x86.avx512.mask.getexp.ss" => "__builtin_ia32_getexpss_mask_round",
|
||||
"llvm.x86.avx512.mask.getexp.sd" => "__builtin_ia32_getexpsd_mask_round",
|
||||
"llvm.x86.avx512.mask.getmant.ss" => "__builtin_ia32_getmantss_mask_round",
|
||||
"llvm.x86.avx512.mask.getmant.sd" => "__builtin_ia32_getmantsd_mask_round",
|
||||
"llvm.x86.avx512.mask.rndscale.ss" => "__builtin_ia32_rndscaless_mask_round",
|
||||
"llvm.x86.avx512.mask.rndscale.sd" => "__builtin_ia32_rndscalesd_mask_round",
|
||||
"llvm.x86.avx512.mask.scalef.ss" => "__builtin_ia32_scalefss_mask_round",
|
||||
"llvm.x86.avx512.mask.scalef.sd" => "__builtin_ia32_scalefsd_mask_round",
|
||||
"llvm.x86.avx512.vfmadd.f32" => "__builtin_ia32_vfmaddss3_round",
|
||||
"llvm.x86.avx512.vfmadd.f64" => "__builtin_ia32_vfmaddsd3_round",
|
||||
"llvm.ceil.v4f64" => "__builtin_ia32_ceilpd256",
|
||||
"llvm.ceil.v8f32" => "__builtin_ia32_ceilps256",
|
||||
"llvm.floor.v4f64" => "__builtin_ia32_floorpd256",
|
||||
"llvm.floor.v8f32" => "__builtin_ia32_floorps256",
|
||||
"llvm.sqrt.v4f64" => "__builtin_ia32_sqrtpd256",
|
||||
"llvm.x86.sse.stmxcsr" => "__builtin_ia32_stmxcsr",
|
||||
"llvm.x86.sse.ldmxcsr" => "__builtin_ia32_ldmxcsr",
|
||||
"llvm.ctpop.v16i32" => "__builtin_ia32_vpopcountd_v16si",
|
||||
"llvm.ctpop.v8i32" => "__builtin_ia32_vpopcountd_v8si",
|
||||
"llvm.ctpop.v4i32" => "__builtin_ia32_vpopcountd_v4si",
|
||||
"llvm.ctpop.v8i64" => "__builtin_ia32_vpopcountq_v8di",
|
||||
"llvm.ctpop.v4i64" => "__builtin_ia32_vpopcountq_v4di",
|
||||
"llvm.ctpop.v2i64" => "__builtin_ia32_vpopcountq_v2di",
|
||||
"llvm.x86.addcarry.64" => "__builtin_ia32_addcarryx_u64",
|
||||
"llvm.x86.subborrow.64" => "__builtin_ia32_sbb_u64",
|
||||
"llvm.floor.v2f64" => "__builtin_ia32_floorpd",
|
||||
"llvm.floor.v4f32" => "__builtin_ia32_floorps",
|
||||
"llvm.ceil.v2f64" => "__builtin_ia32_ceilpd",
|
||||
"llvm.ceil.v4f32" => "__builtin_ia32_ceilps",
|
||||
"llvm.fma.v2f64" => "__builtin_ia32_vfmaddpd",
|
||||
"llvm.fma.v4f64" => "__builtin_ia32_vfmaddpd256",
|
||||
"llvm.fma.v4f32" => "__builtin_ia32_vfmaddps",
|
||||
"llvm.fma.v8f32" => "__builtin_ia32_vfmaddps256",
|
||||
"llvm.ctlz.v16i32" => "__builtin_ia32_vplzcntd_512_mask",
|
||||
"llvm.ctlz.v8i32" => "__builtin_ia32_vplzcntd_256_mask",
|
||||
"llvm.ctlz.v4i32" => "__builtin_ia32_vplzcntd_128_mask",
|
||||
"llvm.ctlz.v8i64" => "__builtin_ia32_vplzcntq_512_mask",
|
||||
"llvm.ctlz.v4i64" => "__builtin_ia32_vplzcntq_256_mask",
|
||||
"llvm.ctlz.v2i64" => "__builtin_ia32_vplzcntq_128_mask",
|
||||
"llvm.ctpop.v32i16" => "__builtin_ia32_vpopcountw_v32hi",
|
||||
"llvm.x86.fma.vfmsub.sd" => "__builtin_ia32_vfmsubsd3",
|
||||
"llvm.x86.fma.vfmsub.ss" => "__builtin_ia32_vfmsubss3",
|
||||
"llvm.x86.fma.vfmsubadd.pd" => "__builtin_ia32_vfmaddsubpd",
|
||||
"llvm.x86.fma.vfmsubadd.pd.256" => "__builtin_ia32_vfmaddsubpd256",
|
||||
"llvm.x86.fma.vfmsubadd.ps" => "__builtin_ia32_vfmaddsubps",
|
||||
"llvm.x86.fma.vfmsubadd.ps.256" => "__builtin_ia32_vfmaddsubps256",
|
||||
"llvm.x86.fma.vfnmadd.sd" => "__builtin_ia32_vfnmaddsd3",
|
||||
"llvm.x86.fma.vfnmadd.ss" => "__builtin_ia32_vfnmaddss3",
|
||||
"llvm.x86.fma.vfnmsub.sd" => "__builtin_ia32_vfnmsubsd3",
|
||||
"llvm.x86.fma.vfnmsub.ss" => "__builtin_ia32_vfnmsubss3",
|
||||
"llvm.x86.avx512.conflict.d.512" => "__builtin_ia32_vpconflictsi_512_mask",
|
||||
"llvm.x86.avx512.conflict.d.256" => "__builtin_ia32_vpconflictsi_256_mask",
|
||||
"llvm.x86.avx512.conflict.d.128" => "__builtin_ia32_vpconflictsi_128_mask",
|
||||
"llvm.x86.avx512.conflict.q.512" => "__builtin_ia32_vpconflictdi_512_mask",
|
||||
"llvm.x86.avx512.conflict.q.256" => "__builtin_ia32_vpconflictdi_256_mask",
|
||||
"llvm.x86.avx512.conflict.q.128" => "__builtin_ia32_vpconflictdi_128_mask",
|
||||
"llvm.x86.avx512.vpermi2var.qi.512" => "__builtin_ia32_vpermt2varqi512_mask",
|
||||
"llvm.x86.avx512.vpermi2var.qi.256" => "__builtin_ia32_vpermt2varqi256_mask",
|
||||
"llvm.x86.avx512.vpermi2var.qi.128" => "__builtin_ia32_vpermt2varqi128_mask",
|
||||
"llvm.x86.avx512.permvar.qi.512" => "__builtin_ia32_permvarqi512_mask",
|
||||
"llvm.x86.avx512.permvar.qi.256" => "__builtin_ia32_permvarqi256_mask",
|
||||
"llvm.x86.avx512.permvar.qi.128" => "__builtin_ia32_permvarqi128_mask",
|
||||
"llvm.x86.avx512.pmultishift.qb.512" => "__builtin_ia32_vpmultishiftqb512_mask",
|
||||
"llvm.x86.avx512.pmultishift.qb.256" => "__builtin_ia32_vpmultishiftqb256_mask",
|
||||
"llvm.x86.avx512.pmultishift.qb.128" => "__builtin_ia32_vpmultishiftqb128_mask",
|
||||
"llvm.ctpop.v16i16" => "__builtin_ia32_vpopcountw_v16hi",
|
||||
"llvm.ctpop.v8i16" => "__builtin_ia32_vpopcountw_v8hi",
|
||||
"llvm.ctpop.v64i8" => "__builtin_ia32_vpopcountb_v64qi",
|
||||
"llvm.ctpop.v32i8" => "__builtin_ia32_vpopcountb_v32qi",
|
||||
"llvm.ctpop.v16i8" => "__builtin_ia32_vpopcountb_v16qi",
|
||||
"llvm.x86.avx512.mask.vpshufbitqmb.512" => "__builtin_ia32_vpshufbitqmb512_mask",
|
||||
"llvm.x86.avx512.mask.vpshufbitqmb.256" => "__builtin_ia32_vpshufbitqmb256_mask",
|
||||
"llvm.x86.avx512.mask.vpshufbitqmb.128" => "__builtin_ia32_vpshufbitqmb128_mask",
|
||||
"llvm.x86.avx512.mask.ucmp.w.512" => "__builtin_ia32_ucmpw512_mask",
|
||||
"llvm.x86.avx512.mask.ucmp.w.256" => "__builtin_ia32_ucmpw256_mask",
|
||||
"llvm.x86.avx512.mask.ucmp.w.128" => "__builtin_ia32_ucmpw128_mask",
|
||||
"llvm.x86.avx512.mask.ucmp.b.512" => "__builtin_ia32_ucmpb512_mask",
|
||||
"llvm.x86.avx512.mask.ucmp.b.256" => "__builtin_ia32_ucmpb256_mask",
|
||||
"llvm.x86.avx512.mask.ucmp.b.128" => "__builtin_ia32_ucmpb128_mask",
|
||||
"llvm.x86.avx512.mask.cmp.w.512" => "__builtin_ia32_cmpw512_mask",
|
||||
"llvm.x86.avx512.mask.cmp.w.256" => "__builtin_ia32_cmpw256_mask",
|
||||
"llvm.x86.avx512.mask.cmp.w.128" => "__builtin_ia32_cmpw128_mask",
|
||||
"llvm.x86.avx512.mask.cmp.b.512" => "__builtin_ia32_cmpb512_mask",
|
||||
"llvm.x86.avx512.mask.cmp.b.256" => "__builtin_ia32_cmpb256_mask",
|
||||
"llvm.x86.avx512.mask.cmp.b.128" => "__builtin_ia32_cmpb128_mask",
|
||||
"llvm.x86.xrstor" => "__builtin_ia32_xrstor",
|
||||
"llvm.x86.xsavec" => "__builtin_ia32_xsavec",
|
||||
"llvm.x86.addcarry.32" => "__builtin_ia32_addcarryx_u32",
|
||||
"llvm.x86.subborrow.32" => "__builtin_ia32_sbb_u32",
|
||||
"llvm.x86.avx512.mask.compress.store.w.512" => "__builtin_ia32_compressstoreuhi512_mask",
|
||||
"llvm.x86.avx512.mask.compress.store.w.256" => "__builtin_ia32_compressstoreuhi256_mask",
|
||||
"llvm.x86.avx512.mask.compress.store.w.128" => "__builtin_ia32_compressstoreuhi128_mask",
|
||||
"llvm.x86.avx512.mask.compress.store.b.512" => "__builtin_ia32_compressstoreuqi512_mask",
|
||||
"llvm.x86.avx512.mask.compress.store.b.256" => "__builtin_ia32_compressstoreuqi256_mask",
|
||||
"llvm.x86.avx512.mask.compress.store.b.128" => "__builtin_ia32_compressstoreuqi128_mask",
|
||||
"llvm.x86.avx512.mask.compress.w.512" => "__builtin_ia32_compresshi512_mask",
|
||||
"llvm.x86.avx512.mask.compress.w.256" => "__builtin_ia32_compresshi256_mask",
|
||||
"llvm.x86.avx512.mask.compress.w.128" => "__builtin_ia32_compresshi128_mask",
|
||||
"llvm.x86.avx512.mask.compress.b.512" => "__builtin_ia32_compressqi512_mask",
|
||||
"llvm.x86.avx512.mask.compress.b.256" => "__builtin_ia32_compressqi256_mask",
|
||||
"llvm.x86.avx512.mask.compress.b.128" => "__builtin_ia32_compressqi128_mask",
|
||||
"llvm.x86.avx512.mask.expand.w.512" => "__builtin_ia32_expandhi512_mask",
|
||||
"llvm.x86.avx512.mask.expand.w.256" => "__builtin_ia32_expandhi256_mask",
|
||||
"llvm.x86.avx512.mask.expand.w.128" => "__builtin_ia32_expandhi128_mask",
|
||||
"llvm.x86.avx512.mask.expand.b.512" => "__builtin_ia32_expandqi512_mask",
|
||||
"llvm.x86.avx512.mask.expand.b.256" => "__builtin_ia32_expandqi256_mask",
|
||||
"llvm.x86.avx512.mask.expand.b.128" => "__builtin_ia32_expandqi128_mask",
|
||||
"llvm.fshl.v8i64" => "__builtin_ia32_vpshldv_v8di",
|
||||
"llvm.fshl.v4i64" => "__builtin_ia32_vpshldv_v4di",
|
||||
"llvm.fshl.v2i64" => "__builtin_ia32_vpshldv_v2di",
|
||||
"llvm.fshl.v16i32" => "__builtin_ia32_vpshldv_v16si",
|
||||
"llvm.fshl.v8i32" => "__builtin_ia32_vpshldv_v8si",
|
||||
"llvm.fshl.v4i32" => "__builtin_ia32_vpshldv_v4si",
|
||||
"llvm.fshl.v32i16" => "__builtin_ia32_vpshldv_v32hi",
|
||||
"llvm.fshl.v16i16" => "__builtin_ia32_vpshldv_v16hi",
|
||||
"llvm.fshl.v8i16" => "__builtin_ia32_vpshldv_v8hi",
|
||||
"llvm.fshr.v8i64" => "__builtin_ia32_vpshrdv_v8di",
|
||||
"llvm.fshr.v4i64" => "__builtin_ia32_vpshrdv_v4di",
|
||||
"llvm.fshr.v2i64" => "__builtin_ia32_vpshrdv_v2di",
|
||||
"llvm.fshr.v16i32" => "__builtin_ia32_vpshrdv_v16si",
|
||||
"llvm.fshr.v8i32" => "__builtin_ia32_vpshrdv_v8si",
|
||||
"llvm.fshr.v4i32" => "__builtin_ia32_vpshrdv_v4si",
|
||||
"llvm.fshr.v32i16" => "__builtin_ia32_vpshrdv_v32hi",
|
||||
"llvm.fshr.v16i16" => "__builtin_ia32_vpshrdv_v16hi",
|
||||
"llvm.fshr.v8i16" => "__builtin_ia32_vpshrdv_v8hi",
|
||||
"llvm.x86.fma.vfmadd.sd" => "__builtin_ia32_vfmaddsd3",
|
||||
"llvm.x86.fma.vfmadd.ss" => "__builtin_ia32_vfmaddss3",
|
||||
|
||||
// The above doc points to unknown builtins for the following, so override them:
|
||||
"llvm.x86.avx2.gather.d.d" => "__builtin_ia32_gathersiv4si",
|
||||
|
@ -239,7 +624,151 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
|
|||
"llvm.x86.avx2.gather.q.q.256" => "__builtin_ia32_gatherdiv4di",
|
||||
"llvm.x86.avx2.gather.q.pd" => "__builtin_ia32_gatherdiv2df",
|
||||
"llvm.x86.avx2.gather.q.pd.256" => "__builtin_ia32_gatherdiv4df",
|
||||
"" => "",
|
||||
"llvm.x86.avx512.pslli.d.512" => "__builtin_ia32_pslldi512_mask",
|
||||
"llvm.x86.avx512.psrli.d.512" => "__builtin_ia32_psrldi512_mask",
|
||||
"llvm.x86.avx512.pslli.q.512" => "__builtin_ia32_psllqi512_mask",
|
||||
"llvm.x86.avx512.psrli.q.512" => "__builtin_ia32_psrlqi512_mask",
|
||||
"llvm.x86.avx512.psll.d.512" => "__builtin_ia32_pslld512_mask",
|
||||
"llvm.x86.avx512.psrl.d.512" => "__builtin_ia32_psrld512_mask",
|
||||
"llvm.x86.avx512.psll.q.512" => "__builtin_ia32_psllq512_mask",
|
||||
"llvm.x86.avx512.psrl.q.512" => "__builtin_ia32_psrlq512_mask",
|
||||
"llvm.x86.avx512.psra.d.512" => "__builtin_ia32_psrad512_mask",
|
||||
"llvm.x86.avx512.psra.q.512" => "__builtin_ia32_psraq512_mask",
|
||||
"llvm.x86.avx512.psra.q.256" => "__builtin_ia32_psraq256_mask",
|
||||
"llvm.x86.avx512.psra.q.128" => "__builtin_ia32_psraq128_mask",
|
||||
"llvm.x86.avx512.psrai.d.512" => "__builtin_ia32_psradi512_mask",
|
||||
"llvm.x86.avx512.psrai.q.512" => "__builtin_ia32_psraqi512_mask",
|
||||
"llvm.x86.avx512.psrai.q.256" => "__builtin_ia32_psraqi256_mask",
|
||||
"llvm.x86.avx512.psrai.q.128" => "__builtin_ia32_psraqi128_mask",
|
||||
"llvm.x86.avx512.psrav.d.512" => "__builtin_ia32_psrav16si_mask",
|
||||
"llvm.x86.avx512.psrav.q.512" => "__builtin_ia32_psrav8di_mask",
|
||||
"llvm.x86.avx512.psrav.q.256" => "__builtin_ia32_psravq256_mask",
|
||||
"llvm.x86.avx512.psrav.q.128" => "__builtin_ia32_psravq128_mask",
|
||||
"llvm.x86.avx512.psllv.d.512" => "__builtin_ia32_psllv16si_mask",
|
||||
"llvm.x86.avx512.psrlv.d.512" => "__builtin_ia32_psrlv16si_mask",
|
||||
"llvm.x86.avx512.psllv.q.512" => "__builtin_ia32_psllv8di_mask",
|
||||
"llvm.x86.avx512.psrlv.q.512" => "__builtin_ia32_psrlv8di_mask",
|
||||
"llvm.x86.avx512.permvar.si.512" => "__builtin_ia32_permvarsi512_mask",
|
||||
"llvm.x86.avx512.vpermilvar.ps.512" => "__builtin_ia32_vpermilvarps512_mask",
|
||||
"llvm.x86.avx512.vpermilvar.pd.512" => "__builtin_ia32_vpermilvarpd512_mask",
|
||||
"llvm.x86.avx512.permvar.di.512" => "__builtin_ia32_permvardi512_mask",
|
||||
"llvm.x86.avx512.permvar.di.256" => "__builtin_ia32_permvardi256_mask",
|
||||
"llvm.x86.avx512.permvar.sf.512" => "__builtin_ia32_permvarsf512_mask",
|
||||
"llvm.x86.avx512.permvar.df.512" => "__builtin_ia32_permvardf512_mask",
|
||||
"llvm.x86.avx512.permvar.df.256" => "__builtin_ia32_permvardf256_mask",
|
||||
"llvm.x86.avx512.vpermi2var.d.512" => "__builtin_ia32_vpermi2vard512_mask",
|
||||
"llvm.x86.avx512.vpermi2var.d.256" => "__builtin_ia32_vpermi2vard256_mask",
|
||||
"llvm.x86.avx512.vpermi2var.d.128" => "__builtin_ia32_vpermi2vard128_mask",
|
||||
"llvm.x86.avx512.vpermi2var.q.512" => "__builtin_ia32_vpermi2varq512_mask",
|
||||
"llvm.x86.avx512.vpermi2var.q.256" => "__builtin_ia32_vpermi2varq256_mask",
|
||||
"llvm.x86.avx512.vpermi2var.q.128" => "__builtin_ia32_vpermi2varq128_mask",
|
||||
"llvm.x86.avx512.vpermi2var.ps.512" => "__builtin_ia32_vpermi2varps512_mask",
|
||||
"llvm.x86.avx512.vpermi2var.ps.256" => "__builtin_ia32_vpermi2varps256_mask",
|
||||
"llvm.x86.avx512.vpermi2var.ps.128" => "__builtin_ia32_vpermi2varps128_mask",
|
||||
"llvm.x86.avx512.vpermi2var.pd.512" => "__builtin_ia32_vpermi2varpd512_mask",
|
||||
"llvm.x86.avx512.vpermi2var.pd.256" => "__builtin_ia32_vpermi2varpd256_mask",
|
||||
"llvm.x86.avx512.vpermi2var.pd.128" => "__builtin_ia32_vpermi2varpd128_mask",
|
||||
"llvm.x86.avx512.mask.add.ss.round" => "__builtin_ia32_addss_mask_round",
|
||||
"llvm.x86.avx512.mask.add.sd.round" => "__builtin_ia32_addsd_mask_round",
|
||||
"llvm.x86.avx512.mask.sub.ss.round" => "__builtin_ia32_subss_mask_round",
|
||||
"llvm.x86.avx512.mask.sub.sd.round" => "__builtin_ia32_subsd_mask_round",
|
||||
"llvm.x86.avx512.mask.mul.ss.round" => "__builtin_ia32_mulss_mask_round",
|
||||
"llvm.x86.avx512.mask.mul.sd.round" => "__builtin_ia32_mulsd_mask_round",
|
||||
"llvm.x86.avx512.mask.div.ss.round" => "__builtin_ia32_divss_mask_round",
|
||||
"llvm.x86.avx512.mask.div.sd.round" => "__builtin_ia32_divsd_mask_round",
|
||||
"llvm.x86.avx512.mask.cvtss2sd.round" => "__builtin_ia32_cvtss2sd_mask_round",
|
||||
"llvm.x86.avx512.mask.cvtsd2ss.round" => "__builtin_ia32_cvtsd2ss_mask_round",
|
||||
"llvm.x86.avx512.mask.range.ss" => "__builtin_ia32_rangess128_mask_round",
|
||||
"llvm.x86.avx512.mask.range.sd" => "__builtin_ia32_rangesd128_mask_round",
|
||||
"llvm.x86.avx512.rcp28.ss" => "__builtin_ia32_rcp28ss_mask_round",
|
||||
"llvm.x86.avx512.rcp28.sd" => "__builtin_ia32_rcp28sd_mask_round",
|
||||
"llvm.x86.avx512.rsqrt28.ss" => "__builtin_ia32_rsqrt28ss_mask_round",
|
||||
"llvm.x86.avx512.rsqrt28.sd" => "__builtin_ia32_rsqrt28sd_mask_round",
|
||||
"llvm.x86.avx512fp16.mask.add.sh.round" => "__builtin_ia32_addsh_mask_round",
|
||||
"llvm.x86.avx512fp16.mask.div.sh.round" => "__builtin_ia32_divsh_mask_round",
|
||||
"llvm.x86.avx512fp16.mask.getmant.sh" => "__builtin_ia32_getmantsh_mask_round",
|
||||
"llvm.x86.avx512fp16.mask.max.sh.round" => "__builtin_ia32_maxsh_mask_round",
|
||||
"llvm.x86.avx512fp16.mask.min.sh.round" => "__builtin_ia32_minsh_mask_round",
|
||||
"llvm.x86.avx512fp16.mask.mul.sh.round" => "__builtin_ia32_mulsh_mask_round",
|
||||
"llvm.x86.avx512fp16.mask.rndscale.sh" => "__builtin_ia32_rndscalesh_mask_round",
|
||||
"llvm.x86.avx512fp16.mask.scalef.sh" => "__builtin_ia32_scalefsh_mask_round",
|
||||
"llvm.x86.avx512fp16.mask.sub.sh.round" => "__builtin_ia32_subsh_mask_round",
|
||||
"llvm.x86.avx512fp16.mask.vcvtsd2sh.round" => "__builtin_ia32_vcvtsd2sh_mask_round",
|
||||
"llvm.x86.avx512fp16.mask.vcvtsh2sd.round" => "__builtin_ia32_vcvtsh2sd_mask_round",
|
||||
"llvm.x86.avx512fp16.mask.vcvtsh2ss.round" => "__builtin_ia32_vcvtsh2ss_mask_round",
|
||||
"llvm.x86.avx512fp16.mask.vcvtss2sh.round" => "__builtin_ia32_vcvtss2sh_mask_round",
|
||||
"llvm.x86.aesni.aesenc.256" => "__builtin_ia32_vaesenc_v32qi",
|
||||
"llvm.x86.aesni.aesenclast.256" => "__builtin_ia32_vaesenclast_v32qi",
|
||||
"llvm.x86.aesni.aesdec.256" => "__builtin_ia32_vaesdec_v32qi",
|
||||
"llvm.x86.aesni.aesdeclast.256" => "__builtin_ia32_vaesdeclast_v32qi",
|
||||
"llvm.x86.aesni.aesenc.512" => "__builtin_ia32_vaesenc_v64qi",
|
||||
"llvm.x86.aesni.aesenclast.512" => "__builtin_ia32_vaesenclast_v64qi",
|
||||
"llvm.x86.aesni.aesdec.512" => "__builtin_ia32_vaesdec_v64qi",
|
||||
"llvm.x86.aesni.aesdeclast.512" => "__builtin_ia32_vaesdeclast_v64qi",
|
||||
"llvm.x86.avx512bf16.cvtne2ps2bf16.128" => "__builtin_ia32_cvtne2ps2bf16_v8bf",
|
||||
"llvm.x86.avx512bf16.cvtne2ps2bf16.256" => "__builtin_ia32_cvtne2ps2bf16_v16bf",
|
||||
"llvm.x86.avx512bf16.cvtne2ps2bf16.512" => "__builtin_ia32_cvtne2ps2bf16_v32bf",
|
||||
"llvm.x86.avx512bf16.cvtneps2bf16.256" => "__builtin_ia32_cvtneps2bf16_v8sf",
|
||||
"llvm.x86.avx512bf16.cvtneps2bf16.512" => "__builtin_ia32_cvtneps2bf16_v16sf",
|
||||
"llvm.x86.avx512bf16.dpbf16ps.128" => "__builtin_ia32_dpbf16ps_v4sf",
|
||||
"llvm.x86.avx512bf16.dpbf16ps.256" => "__builtin_ia32_dpbf16ps_v8sf",
|
||||
"llvm.x86.avx512bf16.dpbf16ps.512" => "__builtin_ia32_dpbf16ps_v16sf",
|
||||
"llvm.x86.pclmulqdq.512" => "__builtin_ia32_vpclmulqdq_v8di",
|
||||
"llvm.x86.pclmulqdq.256" => "__builtin_ia32_vpclmulqdq_v4di",
|
||||
"llvm.x86.avx512.pmulhu.w.512" => "__builtin_ia32_pmulhuw512_mask",
|
||||
"llvm.x86.avx512.pmulh.w.512" => "__builtin_ia32_pmulhw512_mask",
|
||||
"llvm.x86.avx512.pmul.hr.sw.512" => "__builtin_ia32_pmulhrsw512_mask",
|
||||
"llvm.x86.avx512.pmaddw.d.512" => "__builtin_ia32_pmaddwd512_mask",
|
||||
"llvm.x86.avx512.pmaddubs.w.512" => "__builtin_ia32_pmaddubsw512_mask",
|
||||
"llvm.x86.avx512.packssdw.512" => "__builtin_ia32_packssdw512_mask",
|
||||
"llvm.x86.avx512.packsswb.512" => "__builtin_ia32_packsswb512_mask",
|
||||
"llvm.x86.avx512.packusdw.512" => "__builtin_ia32_packusdw512_mask",
|
||||
"llvm.x86.avx512.packuswb.512" => "__builtin_ia32_packuswb512_mask",
|
||||
"llvm.x86.avx512.pavg.w.512" => "__builtin_ia32_pavgw512_mask",
|
||||
"llvm.x86.avx512.pavg.b.512" => "__builtin_ia32_pavgb512_mask",
|
||||
"llvm.x86.avx512.psll.w.512" => "__builtin_ia32_psllw512_mask",
|
||||
"llvm.x86.avx512.pslli.w.512" => "__builtin_ia32_psllwi512_mask",
|
||||
"llvm.x86.avx512.psllv.w.512" => "__builtin_ia32_psllv32hi_mask",
|
||||
"llvm.x86.avx512.psllv.w.256" => "__builtin_ia32_psllv16hi_mask",
|
||||
"llvm.x86.avx512.psllv.w.128" => "__builtin_ia32_psllv8hi_mask",
|
||||
"llvm.x86.avx512.psrl.w.512" => "__builtin_ia32_psrlw512_mask",
|
||||
"llvm.x86.avx512.psrli.w.512" => "__builtin_ia32_psrlwi512_mask",
|
||||
"llvm.x86.avx512.psrlv.w.512" => "__builtin_ia32_psrlv32hi_mask",
|
||||
"llvm.x86.avx512.psrlv.w.256" => "__builtin_ia32_psrlv16hi_mask",
|
||||
"llvm.x86.avx512.psrlv.w.128" => "__builtin_ia32_psrlv8hi_mask",
|
||||
"llvm.x86.avx512.psra.w.512" => "__builtin_ia32_psraw512_mask",
|
||||
"llvm.x86.avx512.psrai.w.512" => "__builtin_ia32_psrawi512_mask",
|
||||
"llvm.x86.avx512.psrav.w.512" => "__builtin_ia32_psrav32hi_mask",
|
||||
"llvm.x86.avx512.psrav.w.256" => "__builtin_ia32_psrav16hi_mask",
|
||||
"llvm.x86.avx512.psrav.w.128" => "__builtin_ia32_psrav8hi_mask",
|
||||
"llvm.x86.avx512.vpermi2var.hi.512" => "__builtin_ia32_vpermt2varhi512_mask",
|
||||
"llvm.x86.avx512.vpermi2var.hi.256" => "__builtin_ia32_vpermt2varhi256_mask",
|
||||
"llvm.x86.avx512.vpermi2var.hi.128" => "__builtin_ia32_vpermt2varhi128_mask",
|
||||
"llvm.x86.avx512.permvar.hi.512" => "__builtin_ia32_permvarhi512_mask",
|
||||
"llvm.x86.avx512.permvar.hi.256" => "__builtin_ia32_permvarhi256_mask",
|
||||
"llvm.x86.avx512.permvar.hi.128" => "__builtin_ia32_permvarhi128_mask",
|
||||
"llvm.x86.avx512.pshuf.b.512" => "__builtin_ia32_pshufb512_mask",
|
||||
"llvm.x86.avx512.dbpsadbw.512" => "__builtin_ia32_dbpsadbw512_mask",
|
||||
"llvm.x86.avx512.dbpsadbw.256" => "__builtin_ia32_dbpsadbw256_mask",
|
||||
"llvm.x86.avx512.dbpsadbw.128" => "__builtin_ia32_dbpsadbw128_mask",
|
||||
"llvm.x86.avx512.vpmadd52h.uq.512" => "__builtin_ia32_vpmadd52huq512_mask",
|
||||
"llvm.x86.avx512.vpmadd52l.uq.512" => "__builtin_ia32_vpmadd52luq512_mask",
|
||||
"llvm.x86.avx512.vpmadd52h.uq.256" => "__builtin_ia32_vpmadd52huq256_mask",
|
||||
"llvm.x86.avx512.vpmadd52l.uq.256" => "__builtin_ia32_vpmadd52luq256_mask",
|
||||
"llvm.x86.avx512.vpmadd52h.uq.128" => "__builtin_ia32_vpmadd52huq128_mask",
|
||||
"llvm.x86.avx512.vpdpwssd.512" => "__builtin_ia32_vpdpwssd_v16si",
|
||||
"llvm.x86.avx512.vpdpwssd.256" => "__builtin_ia32_vpdpwssd_v8si",
|
||||
"llvm.x86.avx512.vpdpwssd.128" => "__builtin_ia32_vpdpwssd_v4si",
|
||||
"llvm.x86.avx512.vpdpwssds.512" => "__builtin_ia32_vpdpwssds_v16si",
|
||||
"llvm.x86.avx512.vpdpwssds.256" => "__builtin_ia32_vpdpwssds_v8si",
|
||||
"llvm.x86.avx512.vpdpwssds.128" => "__builtin_ia32_vpdpwssds_v4si",
|
||||
"llvm.x86.avx512.vpdpbusd.512" => "__builtin_ia32_vpdpbusd_v16si",
|
||||
"llvm.x86.avx512.vpdpbusd.256" => "__builtin_ia32_vpdpbusd_v8si",
|
||||
"llvm.x86.avx512.vpdpbusd.128" => "__builtin_ia32_vpdpbusd_v4si",
|
||||
"llvm.x86.avx512.vpdpbusds.512" => "__builtin_ia32_vpdpbusds_v16si",
|
||||
"llvm.x86.avx512.vpdpbusds.256" => "__builtin_ia32_vpdpbusds_v8si",
|
||||
"llvm.x86.avx512.vpdpbusds.128" => "__builtin_ia32_vpdpbusds_v4si",
|
||||
|
||||
// NOTE: this file is generated by https://github.com/GuillaumeGomez/llvmint/blob/master/generate_list.py
|
||||
_ => include!("archs.rs"),
|
||||
};
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
pub mod llvm;
|
||||
mod simd;
|
||||
|
||||
#[cfg(feature="master")]
|
||||
use std::iter;
|
||||
|
||||
use gccjit::{ComparisonOp, Function, RValue, ToRValue, Type, UnaryOp, FunctionType};
|
||||
use rustc_codegen_ssa::MemFlags;
|
||||
use rustc_codegen_ssa::base::wants_msvc_seh;
|
||||
|
@ -8,15 +11,23 @@ use rustc_codegen_ssa::common::IntPredicate;
|
|||
use rustc_codegen_ssa::mir::operand::{OperandRef, OperandValue};
|
||||
use rustc_codegen_ssa::mir::place::PlaceRef;
|
||||
use rustc_codegen_ssa::traits::{ArgAbiMethods, BaseTypeMethods, BuilderMethods, ConstMethods, IntrinsicCallMethods};
|
||||
#[cfg(feature="master")]
|
||||
use rustc_codegen_ssa::traits::{DerivedTypeMethods, MiscMethods};
|
||||
use rustc_middle::bug;
|
||||
use rustc_middle::ty::{self, Instance, Ty};
|
||||
use rustc_middle::ty::layout::LayoutOf;
|
||||
#[cfg(feature="master")]
|
||||
use rustc_middle::ty::layout::{FnAbiOf, HasTyCtxt};
|
||||
use rustc_span::{Span, Symbol, symbol::kw, sym};
|
||||
use rustc_target::abi::HasDataLayout;
|
||||
use rustc_target::abi::call::{ArgAbi, FnAbi, PassMode};
|
||||
use rustc_target::spec::PanicStrategy;
|
||||
#[cfg(feature="master")]
|
||||
use rustc_target::spec::abi::Abi;
|
||||
|
||||
use crate::abi::GccType;
|
||||
#[cfg(feature="master")]
|
||||
use crate::abi::FnAbiGccExt;
|
||||
use crate::builder::Builder;
|
||||
use crate::common::{SignType, TypeReflection};
|
||||
use crate::context::CodegenCx;
|
||||
|
@ -93,7 +104,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
|
|||
let name = tcx.item_name(def_id);
|
||||
let name_str = name.as_str();
|
||||
|
||||
let llret_ty = self.layout_of(ret_ty).gcc_type(self, true);
|
||||
let llret_ty = self.layout_of(ret_ty).gcc_type(self);
|
||||
let result = PlaceRef::new_sized(llresult, fn_abi.ret.layout);
|
||||
|
||||
let simple = get_simple_intrinsic(self, name);
|
||||
|
@ -406,7 +417,7 @@ impl<'gcc, 'tcx> ArgAbiExt<'gcc, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
|
|||
/// Gets the LLVM type for a place of the original Rust type of
|
||||
/// this argument/return, i.e., the result of `type_of::type_of`.
|
||||
fn memory_ty(&self, cx: &CodegenCx<'gcc, 'tcx>) -> Type<'gcc> {
|
||||
self.layout.gcc_type(cx, true)
|
||||
self.layout.gcc_type(cx)
|
||||
}
|
||||
|
||||
/// Stores a direct/indirect value described by this ArgAbi into a
|
||||
|
@ -1122,10 +1133,8 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
|
|||
}
|
||||
}
|
||||
|
||||
fn try_intrinsic<'gcc, 'tcx>(bx: &mut Builder<'_, 'gcc, 'tcx>, try_func: RValue<'gcc>, data: RValue<'gcc>, _catch_func: RValue<'gcc>, dest: RValue<'gcc>) {
|
||||
// NOTE: the `|| true` here is to use the panic=abort strategy with panic=unwind too
|
||||
if bx.sess().panic_strategy() == PanicStrategy::Abort || true {
|
||||
// TODO(bjorn3): Properly implement unwinding and remove the `|| true` once this is done.
|
||||
fn try_intrinsic<'a, 'b, 'gcc, 'tcx>(bx: &'b mut Builder<'a, 'gcc, 'tcx>, try_func: RValue<'gcc>, data: RValue<'gcc>, _catch_func: RValue<'gcc>, dest: RValue<'gcc>) {
|
||||
if bx.sess().panic_strategy() == PanicStrategy::Abort {
|
||||
bx.call(bx.type_void(), None, try_func, &[data], None);
|
||||
// Return 0 unconditionally from the intrinsic call;
|
||||
// we can never unwind.
|
||||
|
@ -1136,6 +1145,141 @@ fn try_intrinsic<'gcc, 'tcx>(bx: &mut Builder<'_, 'gcc, 'tcx>, try_func: RValue<
|
|||
unimplemented!();
|
||||
}
|
||||
else {
|
||||
#[cfg(feature="master")]
|
||||
codegen_gnu_try(bx, try_func, data, _catch_func, dest);
|
||||
#[cfg(not(feature="master"))]
|
||||
unimplemented!();
|
||||
}
|
||||
}
|
||||
|
||||
// Definition of the standard `try` function for Rust using the GNU-like model
|
||||
// of exceptions (e.g., the normal semantics of LLVM's `landingpad` and `invoke`
|
||||
// instructions).
|
||||
//
|
||||
// This codegen is a little surprising because we always call a shim
|
||||
// function instead of inlining the call to `invoke` manually here. This is done
|
||||
// because in LLVM we're only allowed to have one personality per function
|
||||
// definition. The call to the `try` intrinsic is being inlined into the
|
||||
// function calling it, and that function may already have other personality
|
||||
// functions in play. By calling a shim we're guaranteed that our shim will have
|
||||
// the right personality function.
|
||||
#[cfg(feature="master")]
|
||||
fn codegen_gnu_try<'gcc>(bx: &mut Builder<'_, 'gcc, '_>, try_func: RValue<'gcc>, data: RValue<'gcc>, catch_func: RValue<'gcc>, dest: RValue<'gcc>) {
|
||||
let cx: &CodegenCx<'gcc, '_> = bx.cx;
|
||||
let (llty, func) = get_rust_try_fn(cx, &mut |mut bx| {
|
||||
// Codegens the shims described above:
|
||||
//
|
||||
// bx:
|
||||
// invoke %try_func(%data) normal %normal unwind %catch
|
||||
//
|
||||
// normal:
|
||||
// ret 0
|
||||
//
|
||||
// catch:
|
||||
// (%ptr, _) = landingpad
|
||||
// call %catch_func(%data, %ptr)
|
||||
// ret 1
|
||||
let then = bx.append_sibling_block("then");
|
||||
let catch = bx.append_sibling_block("catch");
|
||||
|
||||
let func = bx.current_func();
|
||||
let try_func = func.get_param(0).to_rvalue();
|
||||
let data = func.get_param(1).to_rvalue();
|
||||
let catch_func = func.get_param(2).to_rvalue();
|
||||
let try_func_ty = bx.type_func(&[bx.type_i8p()], bx.type_void());
|
||||
|
||||
let current_block = bx.block.clone();
|
||||
|
||||
bx.switch_to_block(then);
|
||||
bx.ret(bx.const_i32(0));
|
||||
|
||||
// Type indicator for the exception being thrown.
|
||||
//
|
||||
// The value is a pointer to the exception object
|
||||
// being thrown.
|
||||
bx.switch_to_block(catch);
|
||||
bx.set_personality_fn(bx.eh_personality());
|
||||
|
||||
let eh_pointer_builtin = bx.cx.context.get_target_builtin_function("__builtin_eh_pointer");
|
||||
let zero = bx.cx.context.new_rvalue_zero(bx.int_type);
|
||||
let ptr = bx.cx.context.new_call(None, eh_pointer_builtin, &[zero]);
|
||||
let catch_ty = bx.type_func(&[bx.type_i8p(), bx.type_i8p()], bx.type_void());
|
||||
bx.call(catch_ty, None, catch_func, &[data, ptr], None);
|
||||
bx.ret(bx.const_i32(1));
|
||||
|
||||
// NOTE: the blocks must be filled before adding the try/catch, otherwise gcc will not
|
||||
// generate a try/catch.
|
||||
// FIXME(antoyo): add a check in the libgccjit API to prevent this.
|
||||
bx.switch_to_block(current_block);
|
||||
bx.invoke(try_func_ty, None, try_func, &[data], then, catch, None);
|
||||
});
|
||||
|
||||
let func = unsafe { std::mem::transmute(func) };
|
||||
|
||||
// Note that no invoke is used here because by definition this function
|
||||
// can't panic (that's what it's catching).
|
||||
let ret = bx.call(llty, None, func, &[try_func, data, catch_func], None);
|
||||
let i32_align = bx.tcx().data_layout.i32_align.abi;
|
||||
bx.store(ret, dest, i32_align);
|
||||
}
|
||||
|
||||
|
||||
// Helper function used to get a handle to the `__rust_try` function used to
|
||||
// catch exceptions.
|
||||
//
|
||||
// This function is only generated once and is then cached.
|
||||
#[cfg(feature="master")]
|
||||
fn get_rust_try_fn<'a, 'gcc, 'tcx>(cx: &'a CodegenCx<'gcc, 'tcx>, codegen: &mut dyn FnMut(Builder<'a, 'gcc, 'tcx>)) -> (Type<'gcc>, Function<'gcc>) {
|
||||
if let Some(llfn) = cx.rust_try_fn.get() {
|
||||
return llfn;
|
||||
}
|
||||
|
||||
// Define the type up front for the signature of the rust_try function.
|
||||
let tcx = cx.tcx;
|
||||
let i8p = tcx.mk_mut_ptr(tcx.types.i8);
|
||||
// `unsafe fn(*mut i8) -> ()`
|
||||
let try_fn_ty = tcx.mk_fn_ptr(ty::Binder::dummy(tcx.mk_fn_sig(
|
||||
iter::once(i8p),
|
||||
tcx.mk_unit(),
|
||||
false,
|
||||
rustc_hir::Unsafety::Unsafe,
|
||||
Abi::Rust,
|
||||
)));
|
||||
// `unsafe fn(*mut i8, *mut i8) -> ()`
|
||||
let catch_fn_ty = tcx.mk_fn_ptr(ty::Binder::dummy(tcx.mk_fn_sig(
|
||||
[i8p, i8p].iter().cloned(),
|
||||
tcx.mk_unit(),
|
||||
false,
|
||||
rustc_hir::Unsafety::Unsafe,
|
||||
Abi::Rust,
|
||||
)));
|
||||
// `unsafe fn(unsafe fn(*mut i8) -> (), *mut i8, unsafe fn(*mut i8, *mut i8) -> ()) -> i32`
|
||||
let rust_fn_sig = ty::Binder::dummy(cx.tcx.mk_fn_sig(
|
||||
[try_fn_ty, i8p, catch_fn_ty],
|
||||
tcx.types.i32,
|
||||
false,
|
||||
rustc_hir::Unsafety::Unsafe,
|
||||
Abi::Rust,
|
||||
));
|
||||
let rust_try = gen_fn(cx, "__rust_try", rust_fn_sig, codegen);
|
||||
cx.rust_try_fn.set(Some(rust_try));
|
||||
rust_try
|
||||
}
|
||||
|
||||
// Helper function to give a Block to a closure to codegen a shim function.
|
||||
// This is currently primarily used for the `try` intrinsic functions above.
|
||||
#[cfg(feature="master")]
|
||||
fn gen_fn<'a, 'gcc, 'tcx>(cx: &'a CodegenCx<'gcc, 'tcx>, name: &str, rust_fn_sig: ty::PolyFnSig<'tcx>, codegen: &mut dyn FnMut(Builder<'a, 'gcc, 'tcx>)) -> (Type<'gcc>, Function<'gcc>) {
|
||||
let fn_abi = cx.fn_abi_of_fn_ptr(rust_fn_sig, ty::List::empty());
|
||||
let (typ, _, _, _) = fn_abi.gcc_type(cx);
|
||||
// FIXME(eddyb) find a nicer way to do this.
|
||||
cx.linkage.set(FunctionType::Internal);
|
||||
let func = cx.declare_fn(name, fn_abi);
|
||||
let func_val = unsafe { std::mem::transmute(func) };
|
||||
cx.set_frame_pointer_type(func_val);
|
||||
cx.apply_target_cpu_attr(func_val);
|
||||
let block = Builder::append_block(cx, func_val, "entry-block");
|
||||
let bx = Builder::build(cx, block);
|
||||
codegen(bx);
|
||||
(typ, func)
|
||||
}
|
||||
|
|
|
@ -1,8 +1,13 @@
|
|||
use std::cmp::Ordering;
|
||||
#[cfg(feature="master")]
|
||||
use gccjit::{ComparisonOp, UnaryOp};
|
||||
use gccjit::ToRValue;
|
||||
use gccjit::{BinaryOp, RValue, Type};
|
||||
|
||||
use gccjit::{BinaryOp, RValue, ToRValue, Type};
|
||||
use rustc_codegen_ssa::base::compare_simd_types;
|
||||
use rustc_codegen_ssa::common::TypeKind;
|
||||
use rustc_codegen_ssa::common::{IntPredicate, TypeKind};
|
||||
#[cfg(feature="master")]
|
||||
use rustc_codegen_ssa::errors::ExpectedPointerMutability;
|
||||
use rustc_codegen_ssa::errors::InvalidMonomorphization;
|
||||
use rustc_codegen_ssa::mir::operand::OperandRef;
|
||||
use rustc_codegen_ssa::mir::place::PlaceRef;
|
||||
use rustc_codegen_ssa::traits::{BaseTypeMethods, BuilderMethods};
|
||||
|
@ -14,18 +19,21 @@ use rustc_span::{sym, Span, Symbol};
|
|||
use rustc_target::abi::Align;
|
||||
|
||||
use crate::builder::Builder;
|
||||
#[cfg(feature="master")]
|
||||
use crate::context::CodegenCx;
|
||||
#[cfg(feature="master")]
|
||||
use crate::errors::{InvalidMonomorphizationExpectedSignedUnsigned, InvalidMonomorphizationInsertedType};
|
||||
use crate::errors::{
|
||||
InvalidMonomorphizationExpectedSignedUnsigned, InvalidMonomorphizationExpectedSimd,
|
||||
InvalidMonomorphizationInsertedType, InvalidMonomorphizationInvalidBitmask,
|
||||
InvalidMonomorphizationExpectedSimd,
|
||||
InvalidMonomorphizationInvalidBitmask,
|
||||
InvalidMonomorphizationInvalidFloatVector, InvalidMonomorphizationMaskType,
|
||||
InvalidMonomorphizationMismatchedLengths, InvalidMonomorphizationNotFloat,
|
||||
InvalidMonomorphizationReturnElement, InvalidMonomorphizationReturnIntegerType,
|
||||
InvalidMonomorphizationReturnLength, InvalidMonomorphizationReturnLengthInputType,
|
||||
InvalidMonomorphizationReturnType, InvalidMonomorphizationSimdShuffle,
|
||||
InvalidMonomorphizationUnrecognized, InvalidMonomorphizationUnsupportedCast,
|
||||
InvalidMonomorphizationUnsupportedElement, InvalidMonomorphizationUnsupportedOperation,
|
||||
InvalidMonomorphizationUnrecognized, InvalidMonomorphizationUnsupportedElement,
|
||||
InvalidMonomorphizationUnsupportedOperation,
|
||||
};
|
||||
use crate::intrinsic;
|
||||
|
||||
pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
|
||||
bx: &mut Builder<'a, 'gcc, 'tcx>,
|
||||
|
@ -105,14 +113,19 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
|
|||
let arg1_vector_type = arg1_type.unqualified().dyncast_vector().expect("vector type");
|
||||
let arg1_element_type = arg1_vector_type.get_element_type();
|
||||
|
||||
// NOTE: since the arguments can be vectors of floats, make sure the mask is a vector of
|
||||
// integer.
|
||||
let mask_element_type = bx.type_ix(arg1_element_type.get_size() as u64 * 8);
|
||||
let vector_mask_type = bx.context.new_vector_type(mask_element_type, arg1_vector_type.get_num_units() as u64);
|
||||
|
||||
let mut elements = vec![];
|
||||
let one = bx.context.new_rvalue_one(mask.get_type());
|
||||
for _ in 0..len {
|
||||
let element = bx.context.new_cast(None, mask & one, arg1_element_type);
|
||||
let element = bx.context.new_cast(None, mask & one, mask_element_type);
|
||||
elements.push(element);
|
||||
mask = mask >> one;
|
||||
}
|
||||
let vector_mask = bx.context.new_rvalue_from_vector(None, arg1_type, &elements);
|
||||
let vector_mask = bx.context.new_rvalue_from_vector(None, vector_mask_type, &elements);
|
||||
|
||||
return Ok(bx.vector_select(vector_mask, arg1, args[2].immediate()));
|
||||
}
|
||||
|
@ -210,48 +223,12 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
|
|||
let vector = args[0].immediate();
|
||||
let index = args[1].immediate();
|
||||
let value = args[2].immediate();
|
||||
// TODO(antoyo): use a recursive unqualified() here.
|
||||
let vector_type = vector.get_type().unqualified().dyncast_vector().expect("vector type");
|
||||
let element_type = vector_type.get_element_type();
|
||||
// NOTE: we cannot cast to an array and assign to its element here because the value might
|
||||
// not be an l-value. So, call a builtin to set the element.
|
||||
// TODO(antoyo): perhaps we could create a new vector or maybe there's a GIMPLE instruction for that?
|
||||
// TODO(antoyo): don't use target specific builtins here.
|
||||
let func_name = match in_len {
|
||||
2 => {
|
||||
if element_type == bx.i64_type {
|
||||
"__builtin_ia32_vec_set_v2di"
|
||||
} else {
|
||||
unimplemented!();
|
||||
}
|
||||
}
|
||||
4 => {
|
||||
if element_type == bx.i32_type {
|
||||
"__builtin_ia32_vec_set_v4si"
|
||||
} else {
|
||||
unimplemented!();
|
||||
}
|
||||
}
|
||||
8 => {
|
||||
if element_type == bx.i16_type {
|
||||
"__builtin_ia32_vec_set_v8hi"
|
||||
} else {
|
||||
unimplemented!();
|
||||
}
|
||||
}
|
||||
_ => unimplemented!("Len: {}", in_len),
|
||||
};
|
||||
let builtin = bx.context.get_target_builtin_function(func_name);
|
||||
let param1_type = builtin.get_param(0).to_rvalue().get_type();
|
||||
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
|
||||
let vector = bx.cx.bitcast_if_needed(vector, param1_type);
|
||||
let result = bx.context.new_call(
|
||||
None,
|
||||
builtin,
|
||||
&[vector, value, bx.context.new_cast(None, index, bx.int_type)],
|
||||
);
|
||||
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
|
||||
return Ok(bx.context.new_bitcast(None, result, vector.get_type()));
|
||||
let variable = bx.current_func().new_local(None, vector.get_type(), "new_vector");
|
||||
bx.llbb().add_assignment(None, variable, vector);
|
||||
let lvalue = bx.context.new_vector_access(None, variable.to_rvalue(), index);
|
||||
// TODO(antoyo): if simd_insert is constant, use BIT_REF.
|
||||
bx.llbb().add_assignment(None, lvalue, value);
|
||||
return Ok(variable.to_rvalue());
|
||||
}
|
||||
|
||||
#[cfg(feature = "master")]
|
||||
|
@ -280,7 +257,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
|
|||
return Ok(bx.vector_select(args[0].immediate(), args[1].immediate(), args[2].immediate()));
|
||||
}
|
||||
|
||||
if name == sym::simd_cast {
|
||||
#[cfg(feature="master")]
|
||||
if name == sym::simd_cast || name == sym::simd_as {
|
||||
require_simd!(ret_ty, "return");
|
||||
let (out_len, out_elem) = ret_ty.simd_size_and_type(bx.tcx());
|
||||
require!(
|
||||
|
@ -301,125 +279,40 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
|
|||
|
||||
enum Style {
|
||||
Float,
|
||||
Int(/* is signed? */ bool),
|
||||
Int,
|
||||
Unsupported,
|
||||
}
|
||||
|
||||
let (in_style, in_width) = match in_elem.kind() {
|
||||
// vectors of pointer-sized integers should've been
|
||||
// disallowed before here, so this unwrap is safe.
|
||||
ty::Int(i) => (
|
||||
Style::Int(true),
|
||||
i.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
|
||||
),
|
||||
ty::Uint(u) => (
|
||||
Style::Int(false),
|
||||
u.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
|
||||
),
|
||||
ty::Float(f) => (Style::Float, f.bit_width()),
|
||||
_ => (Style::Unsupported, 0),
|
||||
};
|
||||
let (out_style, out_width) = match out_elem.kind() {
|
||||
ty::Int(i) => (
|
||||
Style::Int(true),
|
||||
i.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
|
||||
),
|
||||
ty::Uint(u) => (
|
||||
Style::Int(false),
|
||||
u.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
|
||||
),
|
||||
ty::Float(f) => (Style::Float, f.bit_width()),
|
||||
_ => (Style::Unsupported, 0),
|
||||
};
|
||||
|
||||
let extend = |in_type, out_type| {
|
||||
let vector_type = bx.context.new_vector_type(out_type, 8);
|
||||
let vector = args[0].immediate();
|
||||
let array_type = bx.context.new_array_type(None, in_type, 8);
|
||||
// TODO(antoyo): switch to using new_vector_access or __builtin_convertvector for vector casting.
|
||||
let array = bx.context.new_bitcast(None, vector, array_type);
|
||||
|
||||
let cast_vec_element = |index| {
|
||||
let index = bx.context.new_rvalue_from_int(bx.int_type, index);
|
||||
bx.context.new_cast(
|
||||
None,
|
||||
bx.context.new_array_access(None, array, index).to_rvalue(),
|
||||
out_type,
|
||||
)
|
||||
let in_style =
|
||||
match in_elem.kind() {
|
||||
ty::Int(_) | ty::Uint(_) => Style::Int,
|
||||
ty::Float(_) => Style::Float,
|
||||
_ => Style::Unsupported,
|
||||
};
|
||||
|
||||
bx.context.new_rvalue_from_vector(
|
||||
None,
|
||||
vector_type,
|
||||
&[
|
||||
cast_vec_element(0),
|
||||
cast_vec_element(1),
|
||||
cast_vec_element(2),
|
||||
cast_vec_element(3),
|
||||
cast_vec_element(4),
|
||||
cast_vec_element(5),
|
||||
cast_vec_element(6),
|
||||
cast_vec_element(7),
|
||||
],
|
||||
)
|
||||
};
|
||||
let out_style =
|
||||
match out_elem.kind() {
|
||||
ty::Int(_) | ty::Uint(_) => Style::Int,
|
||||
ty::Float(_) => Style::Float,
|
||||
_ => Style::Unsupported,
|
||||
};
|
||||
|
||||
match (in_style, out_style) {
|
||||
(Style::Int(in_is_signed), Style::Int(_)) => {
|
||||
return Ok(match in_width.cmp(&out_width) {
|
||||
Ordering::Greater => bx.trunc(args[0].immediate(), llret_ty),
|
||||
Ordering::Equal => args[0].immediate(),
|
||||
Ordering::Less => {
|
||||
if in_is_signed {
|
||||
match (in_width, out_width) {
|
||||
// FIXME(antoyo): the function _mm_cvtepi8_epi16 should directly
|
||||
// call an intrinsic equivalent to __builtin_ia32_pmovsxbw128 so that
|
||||
// we can generate a call to it.
|
||||
(8, 16) => extend(bx.i8_type, bx.i16_type),
|
||||
(8, 32) => extend(bx.i8_type, bx.i32_type),
|
||||
(8, 64) => extend(bx.i8_type, bx.i64_type),
|
||||
(16, 32) => extend(bx.i16_type, bx.i32_type),
|
||||
(32, 64) => extend(bx.i32_type, bx.i64_type),
|
||||
(16, 64) => extend(bx.i16_type, bx.i64_type),
|
||||
_ => unimplemented!("in: {}, out: {}", in_width, out_width),
|
||||
}
|
||||
} else {
|
||||
match (in_width, out_width) {
|
||||
(8, 16) => extend(bx.u8_type, bx.u16_type),
|
||||
(8, 32) => extend(bx.u8_type, bx.u32_type),
|
||||
(8, 64) => extend(bx.u8_type, bx.u64_type),
|
||||
(16, 32) => extend(bx.u16_type, bx.u32_type),
|
||||
(16, 64) => extend(bx.u16_type, bx.u64_type),
|
||||
(32, 64) => extend(bx.u32_type, bx.u64_type),
|
||||
_ => unimplemented!("in: {}, out: {}", in_width, out_width),
|
||||
}
|
||||
}
|
||||
(Style::Unsupported, Style::Unsupported) => {
|
||||
require!(
|
||||
false,
|
||||
InvalidMonomorphization::UnsupportedCast {
|
||||
span,
|
||||
name,
|
||||
in_ty,
|
||||
in_elem,
|
||||
ret_ty,
|
||||
out_elem
|
||||
}
|
||||
});
|
||||
}
|
||||
(Style::Int(_), Style::Float) => {
|
||||
// TODO: add support for internal functions in libgccjit to get access to IFN_VEC_CONVERT which is
|
||||
// doing like __builtin_convertvector?
|
||||
// Or maybe provide convert_vector as an API since it might not easy to get the
|
||||
// types of internal functions.
|
||||
unimplemented!();
|
||||
}
|
||||
(Style::Float, Style::Int(_)) => {
|
||||
unimplemented!();
|
||||
}
|
||||
(Style::Float, Style::Float) => {
|
||||
unimplemented!();
|
||||
}
|
||||
_ => { /* Unsupported. Fallthrough. */ }
|
||||
);
|
||||
},
|
||||
_ => return Ok(bx.context.convert_vector(None, args[0].immediate(), llret_ty)),
|
||||
}
|
||||
return_error!(InvalidMonomorphizationUnsupportedCast {
|
||||
span,
|
||||
name,
|
||||
in_ty,
|
||||
in_elem,
|
||||
ret_ty,
|
||||
out_elem
|
||||
});
|
||||
}
|
||||
|
||||
macro_rules! arith_binary {
|
||||
|
@ -436,6 +329,71 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
|
|||
}
|
||||
}
|
||||
|
||||
if name == sym::simd_bitmask {
|
||||
// The `fn simd_bitmask(vector) -> unsigned integer` intrinsic takes a
|
||||
// vector mask and returns the most significant bit (MSB) of each lane in the form
|
||||
// of either:
|
||||
// * an unsigned integer
|
||||
// * an array of `u8`
|
||||
// If the vector has less than 8 lanes, a u8 is returned with zeroed trailing bits.
|
||||
//
|
||||
// The bit order of the result depends on the byte endianness, LSB-first for little
|
||||
// endian and MSB-first for big endian.
|
||||
|
||||
let vector = args[0].immediate();
|
||||
let vector_type = vector.get_type().dyncast_vector().expect("vector type");
|
||||
let elem_type = vector_type.get_element_type();
|
||||
|
||||
let expected_int_bits = in_len.max(8);
|
||||
let expected_bytes = expected_int_bits / 8 + ((expected_int_bits % 8 > 0) as u64);
|
||||
|
||||
// FIXME(antoyo): that's not going to work for masks bigger than 128 bits.
|
||||
let result_type = bx.type_ix(expected_int_bits);
|
||||
let mut result = bx.context.new_rvalue_zero(result_type);
|
||||
|
||||
let elem_size = elem_type.get_size() * 8;
|
||||
let sign_shift = bx.context.new_rvalue_from_int(elem_type, elem_size as i32 - 1);
|
||||
let one = bx.context.new_rvalue_one(elem_type);
|
||||
|
||||
let mut shift = 0;
|
||||
for i in 0..in_len {
|
||||
let elem = bx.extract_element(vector, bx.context.new_rvalue_from_int(bx.int_type, i as i32));
|
||||
let shifted = elem >> sign_shift;
|
||||
let masked = shifted & one;
|
||||
result = result | (bx.context.new_cast(None, masked, result_type) << bx.context.new_rvalue_from_int(result_type, shift));
|
||||
shift += 1;
|
||||
}
|
||||
|
||||
match ret_ty.kind() {
|
||||
ty::Uint(i) if i.bit_width() == Some(expected_int_bits) => {
|
||||
// Zero-extend iN to the bitmask type:
|
||||
return Ok(result);
|
||||
}
|
||||
ty::Array(elem, len)
|
||||
if matches!(elem.kind(), ty::Uint(ty::UintTy::U8))
|
||||
&& len.try_eval_target_usize(bx.tcx, ty::ParamEnv::reveal_all())
|
||||
== Some(expected_bytes) =>
|
||||
{
|
||||
// Zero-extend iN to the array length:
|
||||
let ze = bx.zext(result, bx.type_ix(expected_bytes * 8));
|
||||
|
||||
// Convert the integer to a byte array
|
||||
let ptr = bx.alloca(bx.type_ix(expected_bytes * 8), Align::ONE);
|
||||
bx.store(ze, ptr, Align::ONE);
|
||||
let array_ty = bx.type_array(bx.type_i8(), expected_bytes);
|
||||
let ptr = bx.pointercast(ptr, bx.cx.type_ptr_to(array_ty));
|
||||
return Ok(bx.load(array_ty, ptr, Align::ONE));
|
||||
}
|
||||
_ => return_error!(InvalidMonomorphization::CannotReturn {
|
||||
span,
|
||||
name,
|
||||
ret_ty,
|
||||
expected_int_bits,
|
||||
expected_bytes
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn simd_simple_float_intrinsic<'gcc, 'tcx>(
|
||||
name: Symbol,
|
||||
in_elem: Ty<'_>,
|
||||
|
@ -451,55 +409,66 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
|
|||
return Err(());
|
||||
}};
|
||||
}
|
||||
let (elem_ty_str, elem_ty) = if let ty::Float(f) = in_elem.kind() {
|
||||
let elem_ty = bx.cx.type_float_from_ty(*f);
|
||||
match f.bit_width() {
|
||||
32 => ("f32", elem_ty),
|
||||
64 => ("f64", elem_ty),
|
||||
_ => {
|
||||
return_error!(InvalidMonomorphizationInvalidFloatVector {
|
||||
span,
|
||||
name,
|
||||
elem_ty: f.name_str(),
|
||||
vec_ty: in_ty
|
||||
});
|
||||
let (elem_ty_str, elem_ty) =
|
||||
if let ty::Float(f) = in_elem.kind() {
|
||||
let elem_ty = bx.cx.type_float_from_ty(*f);
|
||||
match f.bit_width() {
|
||||
32 => ("f", elem_ty),
|
||||
64 => ("", elem_ty),
|
||||
_ => {
|
||||
return_error!(InvalidMonomorphizationInvalidFloatVector { span, name, elem_ty: f.name_str(), vec_ty: in_ty });
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return_error!(InvalidMonomorphizationNotFloat { span, name, ty: in_ty });
|
||||
};
|
||||
else {
|
||||
return_error!(InvalidMonomorphizationNotFloat { span, name, ty: in_ty });
|
||||
};
|
||||
|
||||
let vec_ty = bx.cx.type_vector(elem_ty, in_len);
|
||||
|
||||
let (intr_name, fn_ty) = match name {
|
||||
sym::simd_ceil => ("ceil", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_fabs => ("fabs", bx.type_func(&[vec_ty], vec_ty)), // TODO(antoyo): pand with 170141183420855150465331762880109871103
|
||||
sym::simd_fcos => ("cos", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_fexp2 => ("exp2", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_fexp => ("exp", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_flog10 => ("log10", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_flog2 => ("log2", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_flog => ("log", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_floor => ("floor", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_fma => ("fma", bx.type_func(&[vec_ty, vec_ty, vec_ty], vec_ty)),
|
||||
sym::simd_fpowi => ("powi", bx.type_func(&[vec_ty, bx.type_i32()], vec_ty)),
|
||||
sym::simd_fpow => ("pow", bx.type_func(&[vec_ty, vec_ty], vec_ty)),
|
||||
sym::simd_fsin => ("sin", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_fsqrt => ("sqrt", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_round => ("round", bx.type_func(&[vec_ty], vec_ty)),
|
||||
sym::simd_trunc => ("trunc", bx.type_func(&[vec_ty], vec_ty)),
|
||||
_ => return_error!(InvalidMonomorphizationUnrecognized { span, name }),
|
||||
};
|
||||
let llvm_name = &format!("llvm.{0}.v{1}{2}", intr_name, in_len, elem_ty_str);
|
||||
let function = intrinsic::llvm::intrinsic(llvm_name, &bx.cx);
|
||||
let function: RValue<'gcc> = unsafe { std::mem::transmute(function) };
|
||||
let c = bx.call(
|
||||
fn_ty,
|
||||
None,
|
||||
function,
|
||||
&args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(),
|
||||
None,
|
||||
);
|
||||
let intr_name =
|
||||
match name {
|
||||
sym::simd_ceil => "ceil",
|
||||
sym::simd_fabs => "fabs", // TODO(antoyo): pand with 170141183420855150465331762880109871103
|
||||
sym::simd_fcos => "cos",
|
||||
sym::simd_fexp2 => "exp2",
|
||||
sym::simd_fexp => "exp",
|
||||
sym::simd_flog10 => "log10",
|
||||
sym::simd_flog2 => "log2",
|
||||
sym::simd_flog => "log",
|
||||
sym::simd_floor => "floor",
|
||||
sym::simd_fma => "fma",
|
||||
sym::simd_fpowi => "__builtin_powi",
|
||||
sym::simd_fpow => "pow",
|
||||
sym::simd_fsin => "sin",
|
||||
sym::simd_fsqrt => "sqrt",
|
||||
sym::simd_round => "round",
|
||||
sym::simd_trunc => "trunc",
|
||||
_ => return_error!(InvalidMonomorphizationUnrecognized { span, name })
|
||||
};
|
||||
let builtin_name = format!("{}{}", intr_name, elem_ty_str);
|
||||
let funcs = bx.cx.functions.borrow();
|
||||
let function = funcs.get(&builtin_name).unwrap_or_else(|| panic!("unable to find builtin function {}", builtin_name));
|
||||
|
||||
// TODO(antoyo): add platform-specific behavior here for architectures that have these
|
||||
// intrinsics as instructions (for instance, gpus)
|
||||
let mut vector_elements = vec![];
|
||||
for i in 0..in_len {
|
||||
let index = bx.context.new_rvalue_from_long(bx.ulong_type, i as i64);
|
||||
// we have to treat fpowi specially, since fpowi's second argument is always an i32
|
||||
let arguments = if name == sym::simd_fpowi {
|
||||
vec![
|
||||
bx.extract_element(args[0].immediate(), index).to_rvalue(),
|
||||
args[1].immediate(),
|
||||
]
|
||||
} else {
|
||||
args.iter()
|
||||
.map(|arg| bx.extract_element(arg.immediate(), index).to_rvalue())
|
||||
.collect()
|
||||
};
|
||||
vector_elements.push(bx.context.new_call(None, *function, &arguments));
|
||||
}
|
||||
let c = bx.context.new_rvalue_from_vector(None, vec_ty, &vector_elements);
|
||||
Ok(c)
|
||||
}
|
||||
|
||||
|
@ -525,6 +494,297 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
|
|||
return simd_simple_float_intrinsic(name, in_elem, in_ty, in_len, bx, span, args);
|
||||
}
|
||||
|
||||
#[cfg(feature="master")]
|
||||
fn vector_ty<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, elem_ty: Ty<'tcx>, vec_len: u64) -> Type<'gcc> {
|
||||
// FIXME: use cx.layout_of(ty).llvm_type() ?
|
||||
let elem_ty = match *elem_ty.kind() {
|
||||
ty::Int(v) => cx.type_int_from_ty(v),
|
||||
ty::Uint(v) => cx.type_uint_from_ty(v),
|
||||
ty::Float(v) => cx.type_float_from_ty(v),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
cx.type_vector(elem_ty, vec_len)
|
||||
}
|
||||
|
||||
#[cfg(feature="master")]
|
||||
fn gather<'a, 'gcc, 'tcx>(default: RValue<'gcc>, pointers: RValue<'gcc>, mask: RValue<'gcc>, pointer_count: usize, bx: &mut Builder<'a, 'gcc, 'tcx>, in_len: u64, underlying_ty: Ty<'tcx>, invert: bool) -> RValue<'gcc> {
|
||||
let vector_type =
|
||||
if pointer_count > 1 {
|
||||
bx.context.new_vector_type(bx.usize_type, in_len)
|
||||
}
|
||||
else {
|
||||
vector_ty(bx, underlying_ty, in_len)
|
||||
};
|
||||
let elem_type = vector_type.dyncast_vector().expect("vector type").get_element_type();
|
||||
|
||||
let mut values = vec![];
|
||||
for i in 0..in_len {
|
||||
let index = bx.context.new_rvalue_from_long(bx.i32_type, i as i64);
|
||||
let int = bx.context.new_vector_access(None, pointers, index).to_rvalue();
|
||||
|
||||
let ptr_type = elem_type.make_pointer();
|
||||
let ptr = bx.context.new_bitcast(None, int, ptr_type);
|
||||
let value = ptr.dereference(None).to_rvalue();
|
||||
values.push(value);
|
||||
}
|
||||
|
||||
let vector = bx.context.new_rvalue_from_vector(None, vector_type, &values);
|
||||
|
||||
let mut mask_types = vec![];
|
||||
let mut mask_values = vec![];
|
||||
for i in 0..in_len {
|
||||
let index = bx.context.new_rvalue_from_long(bx.i32_type, i as i64);
|
||||
mask_types.push(bx.context.new_field(None, bx.i32_type, "m"));
|
||||
let mask_value = bx.context.new_vector_access(None, mask, index).to_rvalue();
|
||||
let masked = bx.context.new_rvalue_from_int(bx.i32_type, in_len as i32) & mask_value;
|
||||
let value = index + masked;
|
||||
mask_values.push(value);
|
||||
}
|
||||
let mask_type = bx.context.new_struct_type(None, "mask_type", &mask_types);
|
||||
let mask = bx.context.new_struct_constructor(None, mask_type.as_type(), None, &mask_values);
|
||||
|
||||
if invert {
|
||||
bx.shuffle_vector(vector, default, mask)
|
||||
}
|
||||
else {
|
||||
bx.shuffle_vector(default, vector, mask)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature="master")]
|
||||
if name == sym::simd_gather {
|
||||
// simd_gather(values: <N x T>, pointers: <N x *_ T>,
|
||||
// mask: <N x i{M}>) -> <N x T>
|
||||
// * N: number of elements in the input vectors
|
||||
// * T: type of the element to load
|
||||
// * M: any integer width is supported, will be truncated to i1
|
||||
|
||||
// All types must be simd vector types
|
||||
require_simd!(in_ty, "first");
|
||||
require_simd!(arg_tys[1], "second");
|
||||
require_simd!(arg_tys[2], "third");
|
||||
require_simd!(ret_ty, "return");
|
||||
|
||||
// Of the same length:
|
||||
let (out_len, _) = arg_tys[1].simd_size_and_type(bx.tcx());
|
||||
let (out_len2, _) = arg_tys[2].simd_size_and_type(bx.tcx());
|
||||
require!(
|
||||
in_len == out_len,
|
||||
InvalidMonomorphization::SecondArgumentLength {
|
||||
span,
|
||||
name,
|
||||
in_len,
|
||||
in_ty,
|
||||
arg_ty: arg_tys[1],
|
||||
out_len
|
||||
}
|
||||
);
|
||||
require!(
|
||||
in_len == out_len2,
|
||||
InvalidMonomorphization::ThirdArgumentLength {
|
||||
span,
|
||||
name,
|
||||
in_len,
|
||||
in_ty,
|
||||
arg_ty: arg_tys[2],
|
||||
out_len: out_len2
|
||||
}
|
||||
);
|
||||
|
||||
// The return type must match the first argument type
|
||||
require!(
|
||||
ret_ty == in_ty,
|
||||
InvalidMonomorphization::ExpectedReturnType { span, name, in_ty, ret_ty }
|
||||
);
|
||||
|
||||
// This counts how many pointers
|
||||
fn ptr_count(t: Ty<'_>) -> usize {
|
||||
match t.kind() {
|
||||
ty::RawPtr(p) => 1 + ptr_count(p.ty),
|
||||
_ => 0,
|
||||
}
|
||||
}
|
||||
|
||||
// Non-ptr type
|
||||
fn non_ptr(t: Ty<'_>) -> Ty<'_> {
|
||||
match t.kind() {
|
||||
ty::RawPtr(p) => non_ptr(p.ty),
|
||||
_ => t,
|
||||
}
|
||||
}
|
||||
|
||||
// The second argument must be a simd vector with an element type that's a pointer
|
||||
// to the element type of the first argument
|
||||
let (_, element_ty0) = arg_tys[0].simd_size_and_type(bx.tcx());
|
||||
let (_, element_ty1) = arg_tys[1].simd_size_and_type(bx.tcx());
|
||||
let (pointer_count, underlying_ty) = match element_ty1.kind() {
|
||||
ty::RawPtr(p) if p.ty == in_elem => (ptr_count(element_ty1), non_ptr(element_ty1)),
|
||||
_ => {
|
||||
require!(
|
||||
false,
|
||||
InvalidMonomorphization::ExpectedElementType {
|
||||
span,
|
||||
name,
|
||||
expected_element: element_ty1,
|
||||
second_arg: arg_tys[1],
|
||||
in_elem,
|
||||
in_ty,
|
||||
mutability: ExpectedPointerMutability::Not,
|
||||
}
|
||||
);
|
||||
unreachable!();
|
||||
}
|
||||
};
|
||||
assert!(pointer_count > 0);
|
||||
assert_eq!(pointer_count - 1, ptr_count(element_ty0));
|
||||
assert_eq!(underlying_ty, non_ptr(element_ty0));
|
||||
|
||||
// The element type of the third argument must be a signed integer type of any width:
|
||||
let (_, element_ty2) = arg_tys[2].simd_size_and_type(bx.tcx());
|
||||
match element_ty2.kind() {
|
||||
ty::Int(_) => (),
|
||||
_ => {
|
||||
require!(
|
||||
false,
|
||||
InvalidMonomorphization::ThirdArgElementType {
|
||||
span,
|
||||
name,
|
||||
expected_element: element_ty2,
|
||||
third_arg: arg_tys[2]
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return Ok(gather(args[0].immediate(), args[1].immediate(), args[2].immediate(), pointer_count, bx, in_len, underlying_ty, false));
|
||||
}
|
||||
|
||||
#[cfg(feature="master")]
|
||||
if name == sym::simd_scatter {
|
||||
// simd_scatter(values: <N x T>, pointers: <N x *mut T>,
|
||||
// mask: <N x i{M}>) -> ()
|
||||
// * N: number of elements in the input vectors
|
||||
// * T: type of the element to load
|
||||
// * M: any integer width is supported, will be truncated to i1
|
||||
|
||||
// All types must be simd vector types
|
||||
require_simd!(in_ty, "first");
|
||||
require_simd!(arg_tys[1], "second");
|
||||
require_simd!(arg_tys[2], "third");
|
||||
|
||||
// Of the same length:
|
||||
let (element_len1, _) = arg_tys[1].simd_size_and_type(bx.tcx());
|
||||
let (element_len2, _) = arg_tys[2].simd_size_and_type(bx.tcx());
|
||||
require!(
|
||||
in_len == element_len1,
|
||||
InvalidMonomorphization::SecondArgumentLength {
|
||||
span,
|
||||
name,
|
||||
in_len,
|
||||
in_ty,
|
||||
arg_ty: arg_tys[1],
|
||||
out_len: element_len1
|
||||
}
|
||||
);
|
||||
require!(
|
||||
in_len == element_len2,
|
||||
InvalidMonomorphization::ThirdArgumentLength {
|
||||
span,
|
||||
name,
|
||||
in_len,
|
||||
in_ty,
|
||||
arg_ty: arg_tys[2],
|
||||
out_len: element_len2
|
||||
}
|
||||
);
|
||||
|
||||
// This counts how many pointers
|
||||
fn ptr_count(t: Ty<'_>) -> usize {
|
||||
match t.kind() {
|
||||
ty::RawPtr(p) => 1 + ptr_count(p.ty),
|
||||
_ => 0,
|
||||
}
|
||||
}
|
||||
|
||||
// Non-ptr type
|
||||
fn non_ptr(t: Ty<'_>) -> Ty<'_> {
|
||||
match t.kind() {
|
||||
ty::RawPtr(p) => non_ptr(p.ty),
|
||||
_ => t,
|
||||
}
|
||||
}
|
||||
|
||||
// The second argument must be a simd vector with an element type that's a pointer
|
||||
// to the element type of the first argument
|
||||
let (_, element_ty0) = arg_tys[0].simd_size_and_type(bx.tcx());
|
||||
let (_, element_ty1) = arg_tys[1].simd_size_and_type(bx.tcx());
|
||||
let (_, element_ty2) = arg_tys[2].simd_size_and_type(bx.tcx());
|
||||
let (pointer_count, underlying_ty) = match element_ty1.kind() {
|
||||
ty::RawPtr(p) if p.ty == in_elem && p.mutbl == hir::Mutability::Mut => {
|
||||
(ptr_count(element_ty1), non_ptr(element_ty1))
|
||||
}
|
||||
_ => {
|
||||
require!(
|
||||
false,
|
||||
InvalidMonomorphization::ExpectedElementType {
|
||||
span,
|
||||
name,
|
||||
expected_element: element_ty1,
|
||||
second_arg: arg_tys[1],
|
||||
in_elem,
|
||||
in_ty,
|
||||
mutability: ExpectedPointerMutability::Mut,
|
||||
}
|
||||
);
|
||||
unreachable!();
|
||||
}
|
||||
};
|
||||
assert!(pointer_count > 0);
|
||||
assert_eq!(pointer_count - 1, ptr_count(element_ty0));
|
||||
assert_eq!(underlying_ty, non_ptr(element_ty0));
|
||||
|
||||
// The element type of the third argument must be a signed integer type of any width:
|
||||
match element_ty2.kind() {
|
||||
ty::Int(_) => (),
|
||||
_ => {
|
||||
require!(
|
||||
false,
|
||||
InvalidMonomorphization::ThirdArgElementType {
|
||||
span,
|
||||
name,
|
||||
expected_element: element_ty2,
|
||||
third_arg: arg_tys[2]
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let result = gather(args[0].immediate(), args[1].immediate(), args[2].immediate(), pointer_count, bx, in_len, underlying_ty, true);
|
||||
|
||||
let pointers = args[1].immediate();
|
||||
|
||||
let vector_type =
|
||||
if pointer_count > 1 {
|
||||
bx.context.new_vector_type(bx.usize_type, in_len)
|
||||
}
|
||||
else {
|
||||
vector_ty(bx, underlying_ty, in_len)
|
||||
};
|
||||
let elem_type = vector_type.dyncast_vector().expect("vector type").get_element_type();
|
||||
|
||||
for i in 0..in_len {
|
||||
let index = bx.context.new_rvalue_from_int(bx.int_type, i as i32);
|
||||
let value = bx.context.new_vector_access(None, result, index);
|
||||
|
||||
let int = bx.context.new_vector_access(None, pointers, index).to_rvalue();
|
||||
let ptr_type = elem_type.make_pointer();
|
||||
let ptr = bx.context.new_bitcast(None, int, ptr_type);
|
||||
bx.llbb().add_assignment(None, ptr.dereference(None), value);
|
||||
}
|
||||
|
||||
return Ok(bx.context.new_rvalue_zero(bx.i32_type));
|
||||
}
|
||||
|
||||
arith_binary! {
|
||||
simd_add: Uint, Int => add, Float => fadd;
|
||||
simd_sub: Uint, Int => sub, Float => fsub;
|
||||
|
@ -536,6 +796,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
|
|||
simd_and: Uint, Int => and;
|
||||
simd_or: Uint, Int => or; // FIXME(antoyo): calling `or` might not work on vectors.
|
||||
simd_xor: Uint, Int => xor;
|
||||
simd_fmin: Float => vector_fmin;
|
||||
simd_fmax: Float => vector_fmax;
|
||||
}
|
||||
|
||||
macro_rules! arith_unary {
|
||||
|
@ -562,10 +824,11 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
|
|||
let rhs = args[1].immediate();
|
||||
let is_add = name == sym::simd_saturating_add;
|
||||
let ptr_bits = bx.tcx().data_layout.pointer_size.bits() as _;
|
||||
let (signed, elem_width, elem_ty) = match *in_elem.kind() {
|
||||
ty::Int(i) => (true, i.bit_width().unwrap_or(ptr_bits), bx.cx.type_int_from_ty(i)),
|
||||
ty::Uint(i) => (false, i.bit_width().unwrap_or(ptr_bits), bx.cx.type_uint_from_ty(i)),
|
||||
_ => {
|
||||
let (signed, elem_width, elem_ty) =
|
||||
match *in_elem.kind() {
|
||||
ty::Int(i) => (true, i.bit_width().unwrap_or(ptr_bits) / 8, bx.cx.type_int_from_ty(i)),
|
||||
ty::Uint(i) => (false, i.bit_width().unwrap_or(ptr_bits) / 8, bx.cx.type_uint_from_ty(i)),
|
||||
_ => {
|
||||
return_error!(InvalidMonomorphizationExpectedSignedUnsigned {
|
||||
span,
|
||||
name,
|
||||
|
@ -574,33 +837,78 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
|
|||
});
|
||||
}
|
||||
};
|
||||
let builtin_name = match (signed, is_add, in_len, elem_width) {
|
||||
(true, true, 32, 8) => "__builtin_ia32_paddsb256", // TODO(antoyo): cast arguments to unsigned.
|
||||
(false, true, 32, 8) => "__builtin_ia32_paddusb256",
|
||||
(true, true, 16, 16) => "__builtin_ia32_paddsw256",
|
||||
(false, true, 16, 16) => "__builtin_ia32_paddusw256",
|
||||
(true, false, 16, 16) => "__builtin_ia32_psubsw256",
|
||||
(false, false, 16, 16) => "__builtin_ia32_psubusw256",
|
||||
(true, false, 32, 8) => "__builtin_ia32_psubsb256",
|
||||
(false, false, 32, 8) => "__builtin_ia32_psubusb256",
|
||||
_ => unimplemented!(
|
||||
"signed: {}, is_add: {}, in_len: {}, elem_width: {}",
|
||||
signed,
|
||||
is_add,
|
||||
in_len,
|
||||
elem_width
|
||||
),
|
||||
};
|
||||
let vec_ty = bx.cx.type_vector(elem_ty, in_len as u64);
|
||||
|
||||
let func = bx.context.get_target_builtin_function(builtin_name);
|
||||
let param1_type = func.get_param(0).to_rvalue().get_type();
|
||||
let param2_type = func.get_param(1).to_rvalue().get_type();
|
||||
let lhs = bx.cx.bitcast_if_needed(lhs, param1_type);
|
||||
let rhs = bx.cx.bitcast_if_needed(rhs, param2_type);
|
||||
let result = bx.context.new_call(None, func, &[lhs, rhs]);
|
||||
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
|
||||
return Ok(bx.context.new_bitcast(None, result, vec_ty));
|
||||
let result =
|
||||
match (signed, is_add) {
|
||||
(false, true) => {
|
||||
let res = lhs + rhs;
|
||||
let cmp = bx.context.new_comparison(None, ComparisonOp::LessThan, res, lhs);
|
||||
res | cmp
|
||||
},
|
||||
(true, true) => {
|
||||
// Algorithm from: https://codereview.stackexchange.com/questions/115869/saturated-signed-addition
|
||||
// TODO(antoyo): improve using conditional operators if possible.
|
||||
let arg_type = lhs.get_type();
|
||||
// TODO(antoyo): convert lhs and rhs to unsigned.
|
||||
let sum = lhs + rhs;
|
||||
let vector_type = arg_type.dyncast_vector().expect("vector type");
|
||||
let unit = vector_type.get_num_units();
|
||||
let a = bx.context.new_rvalue_from_int(elem_ty, ((elem_width as i32) << 3) - 1);
|
||||
let width = bx.context.new_rvalue_from_vector(None, lhs.get_type(), &vec![a; unit]);
|
||||
|
||||
let xor1 = lhs ^ rhs;
|
||||
let xor2 = lhs ^ sum;
|
||||
let and = bx.context.new_unary_op(None, UnaryOp::BitwiseNegate, arg_type, xor1) & xor2;
|
||||
let mask = and >> width;
|
||||
|
||||
let one = bx.context.new_rvalue_one(elem_ty);
|
||||
let ones = bx.context.new_rvalue_from_vector(None, lhs.get_type(), &vec![one; unit]);
|
||||
let shift1 = ones << width;
|
||||
let shift2 = sum >> width;
|
||||
let mask_min = shift1 ^ shift2;
|
||||
|
||||
let and1 = bx.context.new_unary_op(None, UnaryOp::BitwiseNegate, arg_type, mask) & sum;
|
||||
let and2 = mask & mask_min;
|
||||
|
||||
and1 + and2
|
||||
},
|
||||
(false, false) => {
|
||||
let res = lhs - rhs;
|
||||
let cmp = bx.context.new_comparison(None, ComparisonOp::LessThanEquals, res, lhs);
|
||||
res & cmp
|
||||
},
|
||||
(true, false) => {
|
||||
let arg_type = lhs.get_type();
|
||||
// TODO(antoyo): this uses the same algorithm from saturating add, but add the
|
||||
// negative of the right operand. Find a proper subtraction algorithm.
|
||||
let rhs = bx.context.new_unary_op(None, UnaryOp::Minus, arg_type, rhs);
|
||||
|
||||
// TODO(antoyo): convert lhs and rhs to unsigned.
|
||||
let sum = lhs + rhs;
|
||||
let vector_type = arg_type.dyncast_vector().expect("vector type");
|
||||
let unit = vector_type.get_num_units();
|
||||
let a = bx.context.new_rvalue_from_int(elem_ty, ((elem_width as i32) << 3) - 1);
|
||||
let width = bx.context.new_rvalue_from_vector(None, lhs.get_type(), &vec![a; unit]);
|
||||
|
||||
let xor1 = lhs ^ rhs;
|
||||
let xor2 = lhs ^ sum;
|
||||
let and = bx.context.new_unary_op(None, UnaryOp::BitwiseNegate, arg_type, xor1) & xor2;
|
||||
let mask = and >> width;
|
||||
|
||||
let one = bx.context.new_rvalue_one(elem_ty);
|
||||
let ones = bx.context.new_rvalue_from_vector(None, lhs.get_type(), &vec![one; unit]);
|
||||
let shift1 = ones << width;
|
||||
let shift2 = sum >> width;
|
||||
let mask_min = shift1 ^ shift2;
|
||||
|
||||
let and1 = bx.context.new_unary_op(None, UnaryOp::BitwiseNegate, arg_type, mask) & sum;
|
||||
let and2 = mask & mask_min;
|
||||
|
||||
and1 + and2
|
||||
}
|
||||
};
|
||||
|
||||
return Ok(result);
|
||||
}
|
||||
|
||||
macro_rules! arith_red {
|
||||
|
@ -650,33 +958,50 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
|
|||
add,
|
||||
0.0 // TODO: Use this argument.
|
||||
);
|
||||
arith_red!(simd_reduce_mul_unordered: BinaryOp::Mult, vector_reduce_fmul_fast, false, mul, 1.0);
|
||||
arith_red!(
|
||||
simd_reduce_mul_unordered: BinaryOp::Mult,
|
||||
vector_reduce_fmul_fast,
|
||||
false,
|
||||
mul,
|
||||
1.0
|
||||
);
|
||||
arith_red!(
|
||||
simd_reduce_add_ordered: BinaryOp::Plus,
|
||||
vector_reduce_fadd,
|
||||
true,
|
||||
add,
|
||||
0.0
|
||||
);
|
||||
arith_red!(
|
||||
simd_reduce_mul_ordered: BinaryOp::Mult,
|
||||
vector_reduce_fmul,
|
||||
true,
|
||||
mul,
|
||||
1.0
|
||||
);
|
||||
|
||||
|
||||
macro_rules! minmax_red {
|
||||
($name:ident: $reduction:ident) => {
|
||||
($name:ident: $int_red:ident, $float_red:ident) => {
|
||||
if name == sym::$name {
|
||||
require!(
|
||||
ret_ty == in_elem,
|
||||
InvalidMonomorphizationReturnType { span, name, in_elem, in_ty, ret_ty }
|
||||
);
|
||||
return match in_elem.kind() {
|
||||
ty::Int(_) | ty::Uint(_) | ty::Float(_) => {
|
||||
Ok(bx.$reduction(args[0].immediate()))
|
||||
}
|
||||
_ => return_error!(InvalidMonomorphizationUnsupportedElement {
|
||||
span,
|
||||
name,
|
||||
in_ty,
|
||||
elem_ty: in_elem,
|
||||
ret_ty
|
||||
}),
|
||||
ty::Int(_) | ty::Uint(_) => Ok(bx.$int_red(args[0].immediate())),
|
||||
ty::Float(_) => Ok(bx.$float_red(args[0].immediate())),
|
||||
_ => return_error!(InvalidMonomorphizationUnsupportedElement { span, name, in_ty, elem_ty: in_elem, ret_ty }),
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
minmax_red!(simd_reduce_min: vector_reduce_min);
|
||||
minmax_red!(simd_reduce_max: vector_reduce_max);
|
||||
minmax_red!(simd_reduce_min: vector_reduce_min, vector_reduce_fmin);
|
||||
minmax_red!(simd_reduce_max: vector_reduce_max, vector_reduce_fmax);
|
||||
// TODO(sadlerap): revisit these intrinsics to generate more optimal reductions
|
||||
minmax_red!(simd_reduce_min_nanless: vector_reduce_min, vector_reduce_fmin);
|
||||
minmax_red!(simd_reduce_max_nanless: vector_reduce_max, vector_reduce_fmax);
|
||||
|
||||
macro_rules! bitwise_red {
|
||||
($name:ident : $op:expr, $boolean:expr) => {
|
||||
|
@ -699,15 +1024,12 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
|
|||
}),
|
||||
}
|
||||
|
||||
// boolean reductions operate on vectors of i1s:
|
||||
let i1 = bx.type_i1();
|
||||
let i1xn = bx.type_vector(i1, in_len as u64);
|
||||
bx.trunc(args[0].immediate(), i1xn)
|
||||
args[0].immediate()
|
||||
};
|
||||
return match in_elem.kind() {
|
||||
ty::Int(_) | ty::Uint(_) => {
|
||||
let r = bx.vector_reduce_op(input, $op);
|
||||
Ok(if !$boolean { r } else { bx.zext(r, bx.type_bool()) })
|
||||
Ok(if !$boolean { r } else { bx.icmp(IntPredicate::IntNE, r, bx.context.new_rvalue_zero(r.get_type())) })
|
||||
}
|
||||
_ => return_error!(InvalidMonomorphizationUnsupportedElement {
|
||||
span,
|
||||
|
@ -723,6 +1045,9 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
|
|||
|
||||
bitwise_red!(simd_reduce_and: BinaryOp::BitwiseAnd, false);
|
||||
bitwise_red!(simd_reduce_or: BinaryOp::BitwiseOr, false);
|
||||
bitwise_red!(simd_reduce_xor: BinaryOp::BitwiseXor, false);
|
||||
bitwise_red!(simd_reduce_all: BinaryOp::BitwiseAnd, true);
|
||||
bitwise_red!(simd_reduce_any: BinaryOp::BitwiseOr, true);
|
||||
|
||||
unimplemented!("simd {}", name);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue