Merge commit 'e8dca3e87d' into sync_from_cg_gcc

This commit is contained in:
Antoni Boucher 2022-06-06 22:04:37 -04:00
commit 3fac982e07
41 changed files with 7933 additions and 592 deletions

View file

@ -13,6 +13,7 @@ use std::borrow::Cow;
use crate::builder::Builder;
use crate::context::CodegenCx;
use crate::type_of::LayoutGccExt;
use crate::callee::get_fn;
// Rust asm! and GCC Extended Asm semantics differ substantially.
@ -116,7 +117,6 @@ impl<'a, 'gcc, 'tcx> AsmBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
let asm_arch = self.tcx.sess.asm_arch.unwrap();
let is_x86 = matches!(asm_arch, InlineAsmArch::X86 | InlineAsmArch::X86_64);
let att_dialect = is_x86 && options.contains(InlineAsmOptions::ATT_SYNTAX);
let intel_dialect = is_x86 && !options.contains(InlineAsmOptions::ATT_SYNTAX);
// GCC index of an output operand equals its position in the array
let mut outputs = vec![];
@ -348,9 +348,24 @@ impl<'a, 'gcc, 'tcx> AsmBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
// processed in the previous pass
}
InlineAsmOperandRef::Const { .. }
| InlineAsmOperandRef::SymFn { .. }
| InlineAsmOperandRef::SymStatic { .. } => {
InlineAsmOperandRef::SymFn { instance } => {
inputs.push(AsmInOperand {
constraint: "X".into(),
rust_idx,
val: self.cx.rvalue_as_function(get_fn(self.cx, instance))
.get_address(None),
});
}
InlineAsmOperandRef::SymStatic { def_id } => {
inputs.push(AsmInOperand {
constraint: "X".into(),
rust_idx,
val: self.cx.get_static(def_id).get_address(None),
});
}
InlineAsmOperandRef::Const { .. } => {
// processed in the previous pass
}
}
@ -359,7 +374,7 @@ impl<'a, 'gcc, 'tcx> AsmBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
// 3. Build the template string
let mut template_str = String::with_capacity(estimate_template_length(template, constants_len, att_dialect));
if !intel_dialect {
if att_dialect {
template_str.push_str(ATT_SYNTAX_INS);
}
@ -444,7 +459,7 @@ impl<'a, 'gcc, 'tcx> AsmBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
}
}
if !intel_dialect {
if att_dialect {
template_str.push_str(INTEL_SYNTAX_INS);
}
@ -588,7 +603,7 @@ fn reg_to_gcc(reg: InlineAsmRegOrRegClass) -> ConstraintOrRegister {
InlineAsmRegClass::X86(X86InlineAsmRegClass::xmm_reg)
| InlineAsmRegClass::X86(X86InlineAsmRegClass::ymm_reg) => "x",
InlineAsmRegClass::X86(X86InlineAsmRegClass::zmm_reg) => "v",
InlineAsmRegClass::X86(X86InlineAsmRegClass::kreg) => unimplemented!(),
InlineAsmRegClass::X86(X86InlineAsmRegClass::kreg) => "Yk",
InlineAsmRegClass::X86(X86InlineAsmRegClass::kreg0) => unimplemented!(),
InlineAsmRegClass::Wasm(WasmInlineAsmRegClass::local) => unimplemented!(),
InlineAsmRegClass::X86(
@ -672,8 +687,8 @@ impl<'gcc, 'tcx> AsmMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
let asm_arch = self.tcx.sess.asm_arch.unwrap();
// Default to Intel syntax on x86
let intel_syntax = matches!(asm_arch, InlineAsmArch::X86 | InlineAsmArch::X86_64)
&& !options.contains(InlineAsmOptions::ATT_SYNTAX);
let att_dialect = matches!(asm_arch, InlineAsmArch::X86 | InlineAsmArch::X86_64)
&& options.contains(InlineAsmOptions::ATT_SYNTAX);
// Build the template string
let mut template_str = String::new();
@ -723,11 +738,11 @@ impl<'gcc, 'tcx> AsmMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
}
let template_str =
if intel_syntax {
format!("{}\n\t.intel_syntax noprefix", template_str)
if att_dialect {
format!(".att_syntax\n\t{}\n\t.intel_syntax noprefix", template_str)
}
else {
format!(".att_syntax\n\t{}\n\t.intel_syntax noprefix", template_str)
template_str
};
// NOTE: seems like gcc will put the asm in the wrong section, so set it to .text manually.
let template_str = format!(".pushsection .text\n{}\n.popsection", template_str);

View file

@ -78,9 +78,19 @@ pub fn compile_codegen_unit<'tcx>(tcx: TyCtxt<'tcx>, cgu_name: Symbol, supports_
let context = Context::default();
// TODO(antoyo): only set on x86 platforms.
context.add_command_line_option("-masm=intel");
// TODO(antoyo): only add the following cli argument if the feature is supported.
context.add_command_line_option("-msse2");
context.add_command_line_option("-mavx2");
context.add_command_line_option("-msha");
context.add_command_line_option("-mpclmul");
// FIXME(antoyo): the following causes an illegal instruction on vmovdqu64 in std_example on my CPU.
// Only add if the CPU supports it.
//context.add_command_line_option("-mavx512f");
for arg in &tcx.sess.opts.cg.llvm_args {
context.add_command_line_option(arg);
}
// NOTE: This is needed to compile the file src/intrinsic/archs.rs during a bootstrap of rustc.
context.add_command_line_option("-fno-var-tracking-assignments");
// NOTE: an optimization (https://github.com/rust-lang/rustc_codegen_gcc/issues/53).
context.add_command_line_option("-fno-semantic-interposition");
// NOTE: Rust relies on LLVM not doing TBAA (https://github.com/rust-lang/unsafe-code-guidelines/issues/292).

View file

@ -3,11 +3,11 @@ use std::cell::Cell;
use std::convert::TryFrom;
use std::ops::Deref;
use gccjit::FunctionType;
use gccjit::{
BinaryOp,
Block,
ComparisonOp,
Context,
Function,
LValue,
RValue,
@ -48,6 +48,7 @@ use rustc_target::spec::{HasTargetSpec, Target};
use crate::common::{SignType, TypeReflection, type_is_pointer};
use crate::context::CodegenCx;
use crate::intrinsic::llvm;
use crate::type_of::LayoutGccExt;
// TODO(antoyo)
@ -199,17 +200,28 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
return Cow::Borrowed(args);
}
let func_name = format!("{:?}", func_ptr);
let casted_args: Vec<_> = param_types
.into_iter()
.zip(args.iter())
.enumerate()
.map(|(index, (expected_ty, &actual_val))| {
if llvm::ignore_arg_cast(&func_name, index, args.len()) {
return actual_val;
}
let actual_ty = actual_val.get_type();
if expected_ty != actual_ty {
if on_stack_param_indices.contains(&index) {
if !actual_ty.is_vector() && !expected_ty.is_vector() && actual_ty.is_integral() && expected_ty.is_integral() && actual_ty.get_size() != expected_ty.get_size() {
self.context.new_cast(None, actual_val, expected_ty)
}
else if on_stack_param_indices.contains(&index) {
actual_val.dereference(None).to_rvalue()
}
else {
assert!(!((actual_ty.is_vector() && !expected_ty.is_vector()) || (!actual_ty.is_vector() && expected_ty.is_vector())), "{:?} ({}) -> {:?} ({}), index: {:?}[{}]", actual_ty, actual_ty.is_vector(), expected_ty, expected_ty.is_vector(), func_ptr, index);
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
self.bitcast(actual_val, expected_ty)
}
}
@ -268,22 +280,20 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
// gccjit requires to use the result of functions, even when it's not used.
// That's why we assign the result to a local or call add_eval().
let gcc_func = func_ptr.get_type().dyncast_function_ptr_type().expect("function ptr");
let mut return_type = gcc_func.get_return_type();
let return_type = gcc_func.get_return_type();
let void_type = self.context.new_type::<()>();
let current_func = self.block.get_function();
// FIXME(antoyo): As a temporary workaround for unsupported LLVM intrinsics.
if gcc_func.get_param_count() == 0 && format!("{:?}", func_ptr) == "__builtin_ia32_pmovmskb128" {
return_type = self.int_type;
}
if return_type != void_type {
unsafe { RETURN_VALUE_COUNT += 1 };
let result = current_func.new_local(None, return_type, &format!("ptrReturnValue{}", unsafe { RETURN_VALUE_COUNT }));
let func_name = format!("{:?}", func_ptr);
let args = llvm::adjust_intrinsic_arguments(&self, gcc_func, args, &func_name);
self.block.add_assignment(None, result, self.cx.context.new_call_through_ptr(None, func_ptr, &args));
result.to_rvalue()
}
else {
#[cfg(not(feature="master"))]
if gcc_func.get_param_count() == 0 {
// FIXME(antoyo): As a temporary workaround for unsupported LLVM intrinsics.
self.block.add_eval(None, self.cx.context.new_call_through_ptr(None, func_ptr, &[]));
@ -291,6 +301,8 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
else {
self.block.add_eval(None, self.cx.context.new_call_through_ptr(None, func_ptr, &args));
}
#[cfg(feature="master")]
self.block.add_eval(None, self.cx.context.new_call_through_ptr(None, func_ptr, &args));
// Return dummy value when not having return value.
let result = current_func.new_local(None, self.isize_type, "dummyValueThatShouldNeverBeUsed");
self.block.add_assignment(None, result, self.context.new_rvalue_from_long(self.isize_type, 0));
@ -480,8 +492,11 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
}
fn exactudiv(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> {
// TODO(antoyo): convert the arguments to unsigned?
// TODO(antoyo): poison if not exact.
let a_type = a.get_type().to_unsigned(self);
let a = self.gcc_int_cast(a, a_type);
let b_type = b.get_type().to_unsigned(self);
let b = self.gcc_int_cast(b, b_type);
a / b
}
@ -511,12 +526,12 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
}
fn frem(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> {
if a.get_type() == self.cx.float_type {
if a.get_type().is_compatible_with(self.cx.float_type) {
let fmodf = self.context.get_builtin_function("fmodf");
// FIXME(antoyo): this seems to produce the wrong result.
return self.context.new_call(None, fmodf, &[a, b]);
}
assert_eq!(a.get_type(), self.cx.double_type);
assert_eq!(a.get_type().unqualified(), self.cx.double_type);
let fmod = self.context.get_builtin_function("fmod");
return self.context.new_call(None, fmod, &[a, b]);
@ -632,18 +647,17 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
unimplemented!();
}
fn load(&mut self, _ty: Type<'gcc>, ptr: RValue<'gcc>, _align: Align) -> RValue<'gcc> {
// TODO(antoyo): use ty.
fn load(&mut self, pointee_ty: Type<'gcc>, ptr: RValue<'gcc>, _align: Align) -> RValue<'gcc> {
let block = self.llbb();
let function = block.get_function();
// NOTE: instead of returning the dereference here, we have to assign it to a variable in
// the current basic block. Otherwise, it could be used in another basic block, causing a
// dereference after a drop, for instance.
// TODO(antoyo): handle align.
// TODO(antoyo): handle align of the load instruction.
let ptr = self.context.new_cast(None, ptr, pointee_ty.make_pointer());
let deref = ptr.dereference(None).to_rvalue();
let value_type = deref.get_type();
unsafe { RETURN_VALUE_COUNT += 1 };
let loaded_value = function.new_local(None, value_type, &format!("loadedValue{}", unsafe { RETURN_VALUE_COUNT }));
let loaded_value = function.new_local(None, pointee_ty, &format!("loadedValue{}", unsafe { RETURN_VALUE_COUNT }));
block.add_assignment(None, loaded_value, deref);
loaded_value.to_rvalue()
}
@ -695,7 +709,11 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
OperandValue::Ref(place.llval, Some(llextra), place.align)
}
else if place.layout.is_gcc_immediate() {
let load = self.load(place.llval.get_type(), place.llval, place.align);
let load = self.load(
place.layout.gcc_type(self, false),
place.llval,
place.align,
);
if let abi::Abi::Scalar(ref scalar) = place.layout.abi {
scalar_load_metadata(self, load, scalar);
}
@ -707,7 +725,8 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
let mut load = |i, scalar: &abi::Scalar, align| {
let llptr = self.struct_gep(pair_type, place.llval, i as u64);
let load = self.load(llptr.get_type(), llptr, align);
let llty = place.layout.scalar_pair_element_gcc_type(self, i, false);
let load = self.load(llty, llptr, align);
scalar_load_metadata(self, load, scalar);
if scalar.is_bool() { self.trunc(load, self.type_i1()) } else { load }
};
@ -779,9 +798,16 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
self.store_with_flags(val, ptr, align, MemFlags::empty())
}
fn store_with_flags(&mut self, val: RValue<'gcc>, ptr: RValue<'gcc>, _align: Align, _flags: MemFlags) -> RValue<'gcc> {
fn store_with_flags(&mut self, val: RValue<'gcc>, ptr: RValue<'gcc>, align: Align, _flags: MemFlags) -> RValue<'gcc> {
let ptr = self.check_store(val, ptr);
self.llbb().add_assignment(None, ptr.dereference(None), val);
let destination = ptr.dereference(None);
// NOTE: libgccjit does not support specifying the alignment on the assignment, so we cast
// to type so it gets the proper alignment.
let destination_type = destination.to_rvalue().get_type().unqualified();
let aligned_type = destination_type.get_aligned(align.bytes()).make_pointer();
let aligned_destination = self.cx.context.new_bitcast(None, ptr, aligned_type);
let aligned_destination = aligned_destination.dereference(None);
self.llbb().add_assignment(None, aligned_destination, val);
// TODO(antoyo): handle align and flags.
// NOTE: dummy value here since it's never used. FIXME(antoyo): API should not return a value here?
self.cx.context.new_rvalue_zero(self.type_i32())
@ -953,7 +979,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
fn memmove(&mut self, dst: RValue<'gcc>, dst_align: Align, src: RValue<'gcc>, src_align: Align, size: RValue<'gcc>, flags: MemFlags) {
if flags.contains(MemFlags::NONTEMPORAL) {
// HACK(nox): This is inefficient but there is no nontemporal memmove.
let val = self.load(src.get_type(), src, src_align);
let val = self.load(src.get_type().get_pointee().expect("get_pointee"), src, src_align);
let ptr = self.pointercast(dst, self.type_ptr_to(self.val_ty(val)));
self.store_with_flags(val, ptr, dst_align, flags);
return;
@ -1269,16 +1295,183 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
}
impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
#[cfg(feature="master")]
pub fn shuffle_vector(&mut self, v1: RValue<'gcc>, v2: RValue<'gcc>, mask: RValue<'gcc>) -> RValue<'gcc> {
let return_type = v1.get_type();
let params = [
self.context.new_parameter(None, return_type, "v1"),
self.context.new_parameter(None, return_type, "v2"),
self.context.new_parameter(None, mask.get_type(), "mask"),
];
let shuffle = self.context.new_function(None, FunctionType::Extern, return_type, &params, "_mm_shuffle_epi8", false);
self.context.new_call(None, shuffle, &[v1, v2, mask])
let struct_type = mask.get_type().is_struct().expect("mask of struct type");
// TODO(antoyo): use a recursive unqualified() here.
let vector_type = v1.get_type().unqualified().dyncast_vector().expect("vector type");
let element_type = vector_type.get_element_type();
let vec_num_units = vector_type.get_num_units();
let mask_num_units = struct_type.get_field_count();
let mut vector_elements = vec![];
let mask_element_type =
if element_type.is_integral() {
element_type
}
else {
#[cfg(feature="master")]
{
self.cx.type_ix(element_type.get_size() as u64 * 8)
}
#[cfg(not(feature="master"))]
self.int_type
};
for i in 0..mask_num_units {
let field = struct_type.get_field(i as i32);
vector_elements.push(self.context.new_cast(None, mask.access_field(None, field).to_rvalue(), mask_element_type));
}
// NOTE: the mask needs to be the same length as the input vectors, so add the missing
// elements in the mask if needed.
for _ in mask_num_units..vec_num_units {
vector_elements.push(self.context.new_rvalue_zero(mask_element_type));
}
let array_type = self.context.new_array_type(None, element_type, vec_num_units as i32);
let result_type = self.context.new_vector_type(element_type, mask_num_units as u64);
let (v1, v2) =
if vec_num_units < mask_num_units {
// NOTE: the mask needs to be the same length as the input vectors, so join the 2
// vectors and create a dummy second vector.
// TODO(antoyo): switch to using new_vector_access.
let array = self.context.new_bitcast(None, v1, array_type);
let mut elements = vec![];
for i in 0..vec_num_units {
elements.push(self.context.new_array_access(None, array, self.context.new_rvalue_from_int(self.int_type, i as i32)).to_rvalue());
}
// TODO(antoyo): switch to using new_vector_access.
let array = self.context.new_bitcast(None, v2, array_type);
for i in 0..(mask_num_units - vec_num_units) {
elements.push(self.context.new_array_access(None, array, self.context.new_rvalue_from_int(self.int_type, i as i32)).to_rvalue());
}
let v1 = self.context.new_rvalue_from_vector(None, result_type, &elements);
let zero = self.context.new_rvalue_zero(element_type);
let v2 = self.context.new_rvalue_from_vector(None, result_type, &vec![zero; mask_num_units]);
(v1, v2)
}
else {
(v1, v2)
};
let new_mask_num_units = std::cmp::max(mask_num_units, vec_num_units);
let mask_type = self.context.new_vector_type(mask_element_type, new_mask_num_units as u64);
let mask = self.context.new_rvalue_from_vector(None, mask_type, &vector_elements);
let result = self.context.new_rvalue_vector_perm(None, v1, v2, mask);
if vec_num_units != mask_num_units {
// NOTE: if padding was added, only select the number of elements of the masks to
// remove that padding in the result.
let mut elements = vec![];
// TODO(antoyo): switch to using new_vector_access.
let array = self.context.new_bitcast(None, result, array_type);
for i in 0..mask_num_units {
elements.push(self.context.new_array_access(None, array, self.context.new_rvalue_from_int(self.int_type, i as i32)).to_rvalue());
}
self.context.new_rvalue_from_vector(None, result_type, &elements)
}
else {
result
}
}
#[cfg(not(feature="master"))]
pub fn shuffle_vector(&mut self, _v1: RValue<'gcc>, _v2: RValue<'gcc>, _mask: RValue<'gcc>) -> RValue<'gcc> {
unimplemented!();
}
#[cfg(feature="master")]
pub fn vector_reduce<F>(&mut self, src: RValue<'gcc>, op: F) -> RValue<'gcc>
where F: Fn(RValue<'gcc>, RValue<'gcc>, &'gcc Context<'gcc>) -> RValue<'gcc>
{
let vector_type = src.get_type().unqualified().dyncast_vector().expect("vector type");
let element_count = vector_type.get_num_units();
let mut vector_elements = vec![];
for i in 0..element_count {
vector_elements.push(i);
}
let mask_type = self.context.new_vector_type(self.int_type, element_count as u64);
let mut shift = 1;
let mut res = src;
while shift < element_count {
let vector_elements: Vec<_> =
vector_elements.iter()
.map(|i| self.context.new_rvalue_from_int(self.int_type, ((i + shift) % element_count) as i32))
.collect();
let mask = self.context.new_rvalue_from_vector(None, mask_type, &vector_elements);
let shifted = self.context.new_rvalue_vector_perm(None, res, res, mask);
shift *= 2;
res = op(res, shifted, &self.context);
}
self.context.new_vector_access(None, res, self.context.new_rvalue_zero(self.int_type))
.to_rvalue()
}
#[cfg(not(feature="master"))]
pub fn vector_reduce<F>(&mut self, src: RValue<'gcc>, op: F) -> RValue<'gcc>
where F: Fn(RValue<'gcc>, RValue<'gcc>, &'gcc Context<'gcc>) -> RValue<'gcc>
{
unimplemented!();
}
pub fn vector_reduce_op(&mut self, src: RValue<'gcc>, op: BinaryOp) -> RValue<'gcc> {
self.vector_reduce(src, |a, b, context| context.new_binary_op(None, op, a.get_type(), a, b))
}
pub fn vector_reduce_fadd_fast(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
unimplemented!();
}
pub fn vector_reduce_fmul_fast(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
unimplemented!();
}
// Inspired by Hacker's Delight min implementation.
pub fn vector_reduce_min(&mut self, src: RValue<'gcc>) -> RValue<'gcc> {
self.vector_reduce(src, |a, b, context| {
let differences_or_zeros = difference_or_zero(a, b, context);
context.new_binary_op(None, BinaryOp::Minus, a.get_type(), a, differences_or_zeros)
})
}
// Inspired by Hacker's Delight max implementation.
pub fn vector_reduce_max(&mut self, src: RValue<'gcc>) -> RValue<'gcc> {
self.vector_reduce(src, |a, b, context| {
let differences_or_zeros = difference_or_zero(a, b, context);
context.new_binary_op(None, BinaryOp::Plus, b.get_type(), b, differences_or_zeros)
})
}
pub fn vector_select(&mut self, cond: RValue<'gcc>, then_val: RValue<'gcc>, else_val: RValue<'gcc>) -> RValue<'gcc> {
// cond is a vector of integers, not of bools.
let cond_type = cond.get_type();
let vector_type = cond_type.unqualified().dyncast_vector().expect("vector type");
let num_units = vector_type.get_num_units();
let element_type = vector_type.get_element_type();
let zeros = vec![self.context.new_rvalue_zero(element_type); num_units];
let zeros = self.context.new_rvalue_from_vector(None, cond_type, &zeros);
let masks = self.context.new_comparison(None, ComparisonOp::NotEquals, cond, zeros);
let then_vals = masks & then_val;
let ones = vec![self.context.new_rvalue_one(element_type); num_units];
let ones = self.context.new_rvalue_from_vector(None, cond_type, &ones);
let inverted_masks = masks + ones;
// NOTE: sometimes, the type of else_val can be different than the type of then_val in
// libgccjit (vector of int vs vector of int32_t), but they should be the same for the AND
// operation to work.
let else_val = self.context.new_bitcast(None, else_val, then_val.get_type());
let else_vals = inverted_masks & else_val;
then_vals | else_vals
}
}
fn difference_or_zero<'gcc>(a: RValue<'gcc>, b: RValue<'gcc>, context: &'gcc Context<'gcc>) -> RValue<'gcc> {
let difference = a - b;
let masks = context.new_comparison(None, ComparisonOp::GreaterThanEquals, b, a);
difference & masks
}
impl<'a, 'gcc, 'tcx> StaticBuilderMethods for Builder<'a, 'gcc, 'tcx> {

View file

@ -121,8 +121,8 @@ impl<'gcc, 'tcx> ConstMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
unimplemented!();
}
fn const_real(&self, _t: Type<'gcc>, _val: f64) -> RValue<'gcc> {
unimplemented!();
fn const_real(&self, typ: Type<'gcc>, val: f64) -> RValue<'gcc> {
self.context.new_rvalue_from_double(typ, val)
}
fn const_str(&self, s: Symbol) -> (RValue<'gcc>, RValue<'gcc>) {
@ -279,6 +279,21 @@ impl<'gcc, 'tcx> SignType<'gcc, 'tcx> for Type<'gcc> {
else if self.is_u128(cx) {
cx.i128_type
}
else if self.is_uchar(cx) {
cx.char_type
}
else if self.is_ushort(cx) {
cx.short_type
}
else if self.is_uint(cx) {
cx.int_type
}
else if self.is_ulong(cx) {
cx.long_type
}
else if self.is_ulonglong(cx) {
cx.longlong_type
}
else {
self.clone()
}
@ -300,6 +315,21 @@ impl<'gcc, 'tcx> SignType<'gcc, 'tcx> for Type<'gcc> {
else if self.is_i128(cx) {
cx.u128_type
}
else if self.is_char(cx) {
cx.uchar_type
}
else if self.is_short(cx) {
cx.ushort_type
}
else if self.is_int(cx) {
cx.uint_type
}
else if self.is_long(cx) {
cx.ulong_type
}
else if self.is_longlong(cx) {
cx.ulonglong_type
}
else {
self.clone()
}
@ -312,6 +342,11 @@ pub trait TypeReflection<'gcc, 'tcx> {
fn is_uint(&self, cx: &CodegenCx<'gcc, 'tcx>) -> bool;
fn is_ulong(&self, cx: &CodegenCx<'gcc, 'tcx>) -> bool;
fn is_ulonglong(&self, cx: &CodegenCx<'gcc, 'tcx>) -> bool;
fn is_char(&self, cx: &CodegenCx<'gcc, 'tcx>) -> bool;
fn is_short(&self, cx: &CodegenCx<'gcc, 'tcx>) -> bool;
fn is_int(&self, cx: &CodegenCx<'gcc, 'tcx>) -> bool;
fn is_long(&self, cx: &CodegenCx<'gcc, 'tcx>) -> bool;
fn is_longlong(&self, cx: &CodegenCx<'gcc, 'tcx>) -> bool;
fn is_i8(&self, cx: &CodegenCx<'gcc, 'tcx>) -> bool;
fn is_u8(&self, cx: &CodegenCx<'gcc, 'tcx>) -> bool;
@ -326,15 +361,17 @@ pub trait TypeReflection<'gcc, 'tcx> {
fn is_f32(&self, cx: &CodegenCx<'gcc, 'tcx>) -> bool;
fn is_f64(&self, cx: &CodegenCx<'gcc, 'tcx>) -> bool;
fn is_vector(&self) -> bool;
}
impl<'gcc, 'tcx> TypeReflection<'gcc, 'tcx> for Type<'gcc> {
fn is_uchar(&self, cx: &CodegenCx<'gcc, 'tcx>) -> bool {
self.unqualified() == cx.u8_type
self.unqualified() == cx.uchar_type
}
fn is_ushort(&self, cx: &CodegenCx<'gcc, 'tcx>) -> bool {
self.unqualified() == cx.u16_type
self.unqualified() == cx.ushort_type
}
fn is_uint(&self, cx: &CodegenCx<'gcc, 'tcx>) -> bool {
@ -349,6 +386,26 @@ impl<'gcc, 'tcx> TypeReflection<'gcc, 'tcx> for Type<'gcc> {
self.unqualified() == cx.ulonglong_type
}
fn is_char(&self, cx: &CodegenCx<'gcc, 'tcx>) -> bool {
self.unqualified() == cx.char_type
}
fn is_short(&self, cx: &CodegenCx<'gcc, 'tcx>) -> bool {
self.unqualified() == cx.short_type
}
fn is_int(&self, cx: &CodegenCx<'gcc, 'tcx>) -> bool {
self.unqualified() == cx.int_type
}
fn is_long(&self, cx: &CodegenCx<'gcc, 'tcx>) -> bool {
self.unqualified() == cx.long_type
}
fn is_longlong(&self, cx: &CodegenCx<'gcc, 'tcx>) -> bool {
self.unqualified() == cx.longlong_type
}
fn is_i8(&self, cx: &CodegenCx<'gcc, 'tcx>) -> bool {
self.unqualified() == cx.i8_type
}
@ -396,4 +453,21 @@ impl<'gcc, 'tcx> TypeReflection<'gcc, 'tcx> for Type<'gcc> {
fn is_f64(&self, cx: &CodegenCx<'gcc, 'tcx>) -> bool {
self.unqualified() == cx.context.new_type::<f64>()
}
fn is_vector(&self) -> bool {
let mut typ = self.clone();
loop {
if typ.dyncast_vector().is_some() {
return true;
}
let old_type = typ;
typ = typ.unqualified();
if old_type == typ {
break;
}
}
false
}
}

View file

@ -25,7 +25,9 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
}
}
}
self.context.new_bitcast(None, value, typ)
// NOTE: since bitcast makes a value non-constant, don't bitcast if not necessary as some
// SIMD builtins require a constant value.
self.bitcast_if_needed(value, typ)
}
}
@ -45,7 +47,10 @@ impl<'gcc, 'tcx> StaticMethods for CodegenCx<'gcc, 'tcx> {
}
}
let global_value = self.static_addr_of_mut(cv, align, kind);
// TODO(antoyo): set global constant.
#[cfg(feature = "master")]
self.global_lvalues.borrow().get(&global_value)
.expect("`static_addr_of_mut` did not add the global to `self.global_lvalues`")
.global_set_readonly();
self.const_globals.borrow_mut().insert(cv, global_value);
global_value
}
@ -79,20 +84,15 @@ impl<'gcc, 'tcx> StaticMethods for CodegenCx<'gcc, 'tcx> {
// TODO(antoyo): set alignment.
let value =
if value.get_type() != gcc_type {
self.context.new_bitcast(None, value, gcc_type)
}
else {
value
};
let value = self.bitcast_if_needed(value, gcc_type);
global.global_set_initializer_rvalue(value);
// As an optimization, all shared statics which do not have interior
// mutability are placed into read-only memory.
if !is_mutable {
if self.type_is_freeze(ty) {
// TODO(antoyo): set global constant.
#[cfg(feature = "master")]
global.global_set_readonly();
}
}
@ -171,8 +171,9 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
Some(kind) if !self.tcx.sess.fewer_names() => {
let name = self.generate_local_symbol_name(kind);
// TODO(antoyo): check if it's okay that no link_section is set.
// TODO(antoyo): set alignment here as well.
let global = self.declare_private_global(&name[..], self.val_ty(cv));
let typ = self.val_ty(cv).get_aligned(align.bytes());
let global = self.declare_private_global(&name[..], typ);
global
}
_ => {

View file

@ -35,6 +35,7 @@ pub struct CodegenCx<'gcc, 'tcx> {
pub normal_function_addresses: RefCell<FxHashSet<RValue<'gcc>>>,
pub functions: RefCell<FxHashMap<String, Function<'gcc>>>,
pub intrinsics: RefCell<FxHashMap<String, Function<'gcc>>>,
pub tls_model: gccjit::TlsModel,
@ -53,10 +54,15 @@ pub struct CodegenCx<'gcc, 'tcx> {
pub u128_type: Type<'gcc>,
pub usize_type: Type<'gcc>,
pub char_type: Type<'gcc>,
pub uchar_type: Type<'gcc>,
pub short_type: Type<'gcc>,
pub ushort_type: Type<'gcc>,
pub int_type: Type<'gcc>,
pub uint_type: Type<'gcc>,
pub long_type: Type<'gcc>,
pub ulong_type: Type<'gcc>,
pub longlong_type: Type<'gcc>,
pub ulonglong_type: Type<'gcc>,
pub sizet_type: Type<'gcc>,
@ -145,10 +151,15 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
let float_type = context.new_type::<f32>();
let double_type = context.new_type::<f64>();
let char_type = context.new_c_type(CType::Char);
let uchar_type = context.new_c_type(CType::UChar);
let short_type = context.new_c_type(CType::Short);
let ushort_type = context.new_c_type(CType::UShort);
let int_type = context.new_c_type(CType::Int);
let uint_type = context.new_c_type(CType::UInt);
let long_type = context.new_c_type(CType::Long);
let ulong_type = context.new_c_type(CType::ULong);
let longlong_type = context.new_c_type(CType::LongLong);
let ulonglong_type = context.new_c_type(CType::ULongLong);
let sizet_type = context.new_c_type(CType::SizeT);
@ -184,6 +195,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
current_func: RefCell::new(None),
normal_function_addresses: Default::default(),
functions: RefCell::new(functions),
intrinsics: RefCell::new(FxHashMap::default()),
tls_model,
@ -200,10 +212,15 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
u32_type,
u64_type,
u128_type,
char_type,
uchar_type,
short_type,
ushort_type,
int_type,
uint_type,
long_type,
ulong_type,
longlong_type,
ulonglong_type,
sizet_type,
@ -269,16 +286,25 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
}
pub fn is_native_int_type_or_bool(&self, typ: Type<'gcc>) -> bool {
self.is_native_int_type(typ) || typ == self.bool_type
self.is_native_int_type(typ) || typ.is_compatible_with(self.bool_type)
}
pub fn is_int_type_or_bool(&self, typ: Type<'gcc>) -> bool {
self.is_native_int_type(typ) || self.is_non_native_int_type(typ) || typ == self.bool_type
self.is_native_int_type(typ) || self.is_non_native_int_type(typ) || typ.is_compatible_with(self.bool_type)
}
pub fn sess(&self) -> &Session {
&self.tcx.sess
}
pub fn bitcast_if_needed(&self, value: RValue<'gcc>, expected_type: Type<'gcc>) -> RValue<'gcc> {
if value.get_type() != expected_type {
self.context.new_bitcast(None, value, expected_type)
}
else {
value
}
}
}
impl<'gcc, 'tcx> BackendTypes for CodegenCx<'gcc, 'tcx> {
@ -306,8 +332,16 @@ impl<'gcc, 'tcx> MiscMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
}
fn get_fn_addr(&self, instance: Instance<'tcx>) -> RValue<'gcc> {
let func = get_fn(self, instance);
let func = self.rvalue_as_function(func);
let func_name = self.tcx.symbol_name(instance).name;
let func =
if self.intrinsics.borrow().contains_key(func_name) {
self.intrinsics.borrow()[func_name].clone()
}
else {
let func = get_fn(self, instance);
self.rvalue_as_function(func)
};
let ptr = func.get_address(None);
// TODO(antoyo): don't do this twice: i.e. in declare_fn and here.

View file

@ -11,7 +11,7 @@ use crate::intrinsic::llvm;
impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
pub fn get_or_insert_global(&self, name: &str, ty: Type<'gcc>, is_tls: bool, link_section: Option<Symbol>) -> LValue<'gcc> {
if self.globals.borrow().contains_key(name) {
let typ = self.globals.borrow().get(name).expect("global").get_type();
let typ = self.globals.borrow()[name].get_type();
let global = self.context.new_global(None, GlobalKind::Imported, typ, name);
if is_tls {
global.set_tls_model(self.tls_model);
@ -103,11 +103,13 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
/// update the declaration and return existing Value instead.
fn declare_raw_fn<'gcc>(cx: &CodegenCx<'gcc, '_>, name: &str, _callconv: () /*llvm::CallConv*/, return_type: Type<'gcc>, param_types: &[Type<'gcc>], variadic: bool) -> Function<'gcc> {
if name.starts_with("llvm.") {
return llvm::intrinsic(name, cx);
let intrinsic = llvm::intrinsic(name, cx);
cx.intrinsics.borrow_mut().insert(name.to_string(), intrinsic);
return intrinsic;
}
let func =
if cx.functions.borrow().contains_key(name) {
*cx.functions.borrow().get(name).expect("function")
cx.functions.borrow()[name]
}
else {
let params: Vec<_> = param_types.into_iter().enumerate()

View file

@ -153,8 +153,15 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
let a_type = a.get_type();
let b_type = b.get_type();
if self.is_native_int_type_or_bool(a_type) && self.is_native_int_type_or_bool(b_type) {
if a.get_type() != b.get_type() {
b = self.context.new_cast(None, b, a.get_type());
if a_type != b_type {
if a_type.is_vector() {
// Vector types need to be bitcast.
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
b = self.context.new_bitcast(None, b, a.get_type());
}
else {
b = self.context.new_cast(None, b, a.get_type());
}
}
self.context.new_binary_op(None, operation, a_type, a, b)
}
@ -593,7 +600,10 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
let b_type = b.get_type();
let a_native = self.is_native_int_type_or_bool(a_type);
let b_native = self.is_native_int_type_or_bool(b_type);
if a_native && b_native {
if a_type.is_vector() && b_type.is_vector() {
self.context.new_binary_op(None, operation, a_type, a, b)
}
else if a_native && b_native {
if a_type != b_type {
b = self.context.new_cast(None, b, a_type);
}
@ -639,6 +649,8 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
else {
// Since u128 and i128 are the only types that can be unsupported, we know the type of
// value and the destination type have the same size, so a bitcast is fine.
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
self.context.new_bitcast(None, value, dest_typ)
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,22 +1,250 @@
use gccjit::Function;
use std::borrow::Cow;
use crate::context::CodegenCx;
use gccjit::{Function, FunctionPtrType, RValue, ToRValue};
pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function<'gcc> {
let _gcc_name =
match name {
"llvm.x86.xgetbv" => {
let gcc_name = "__builtin_trap";
let func = cx.context.get_builtin_function(gcc_name);
cx.functions.borrow_mut().insert(gcc_name.to_string(), func);
return func;
},
// NOTE: this doc specifies the equivalent GCC builtins: http://huonw.github.io/llvmint/llvmint/x86/index.html
"llvm.x86.sse2.cmp.pd" => "__builtin_ia32_cmppd",
"llvm.x86.sse2.movmsk.pd" => "__builtin_ia32_movmskpd",
"llvm.x86.sse2.pmovmskb.128" => "__builtin_ia32_pmovmskb128",
_ => unimplemented!("unsupported LLVM intrinsic {}", name)
};
use crate::{context::CodegenCx, builder::Builder};
unimplemented!();
pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc, 'tcx>, gcc_func: FunctionPtrType<'gcc>, mut args: Cow<'b, [RValue<'gcc>]>, func_name: &str) -> Cow<'b, [RValue<'gcc>]> {
// Some LLVM intrinsics do not map 1-to-1 to GCC intrinsics, so we add the missing
// arguments here.
if gcc_func.get_param_count() != args.len() {
match &*func_name {
"__builtin_ia32_pmuldq512_mask" | "__builtin_ia32_pmuludq512_mask"
// FIXME(antoyo): the following intrinsics has 4 (or 5) arguments according to the doc, but is defined with 2 (or 3) arguments in library/stdarch/crates/core_arch/src/x86/avx512f.rs.
| "__builtin_ia32_pmaxsd512_mask" | "__builtin_ia32_pmaxsq512_mask" | "__builtin_ia32_pmaxsq256_mask"
| "__builtin_ia32_pmaxsq128_mask" | "__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
| "__builtin_ia32_pmaxud512_mask" | "__builtin_ia32_pmaxuq512_mask" | "__builtin_ia32_pmaxuq256_mask"
| "__builtin_ia32_pmaxuq128_mask"
| "__builtin_ia32_pminsd512_mask" | "__builtin_ia32_pminsq512_mask" | "__builtin_ia32_pminsq256_mask"
| "__builtin_ia32_pminsq128_mask" | "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask"
| "__builtin_ia32_pminud512_mask" | "__builtin_ia32_pminuq512_mask" | "__builtin_ia32_pminuq256_mask"
| "__builtin_ia32_pminuq128_mask" | "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask"
=> {
// TODO: refactor by separating those intrinsics outside of this branch.
let add_before_last_arg =
match &*func_name {
"__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
| "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask"
| "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => true,
_ => false,
};
let new_first_arg_is_zero =
match &*func_name {
"__builtin_ia32_pmaxuq256_mask" | "__builtin_ia32_pmaxuq128_mask"
| "__builtin_ia32_pminuq256_mask" | "__builtin_ia32_pminuq128_mask" => true,
_ => false
};
let arg3_index =
match &*func_name {
"__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => 1,
_ => 2,
};
let mut new_args = args.to_vec();
let arg3_type = gcc_func.get_param_type(arg3_index);
let first_arg =
if new_first_arg_is_zero {
let vector_type = arg3_type.dyncast_vector().expect("vector type");
let zero = builder.context.new_rvalue_zero(vector_type.get_element_type());
let num_units = vector_type.get_num_units();
builder.context.new_rvalue_from_vector(None, arg3_type, &vec![zero; num_units])
}
else {
builder.current_func().new_local(None, arg3_type, "undefined_for_intrinsic").to_rvalue()
};
if add_before_last_arg {
new_args.insert(new_args.len() - 1, first_arg);
}
else {
new_args.push(first_arg);
}
let arg4_index =
match &*func_name {
"__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => 2,
_ => 3,
};
let arg4_type = gcc_func.get_param_type(arg4_index);
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
if add_before_last_arg {
new_args.insert(new_args.len() - 1, minus_one);
}
else {
new_args.push(minus_one);
}
args = new_args.into();
},
"__builtin_ia32_pternlogd512_mask" | "__builtin_ia32_pternlogd256_mask"
| "__builtin_ia32_pternlogd128_mask" | "__builtin_ia32_pternlogq512_mask"
| "__builtin_ia32_pternlogq256_mask" | "__builtin_ia32_pternlogq128_mask" => {
let mut new_args = args.to_vec();
let arg5_type = gcc_func.get_param_type(4);
let minus_one = builder.context.new_rvalue_from_int(arg5_type, -1);
new_args.push(minus_one);
args = new_args.into();
},
"__builtin_ia32_vfmaddps512_mask" | "__builtin_ia32_vfmaddpd512_mask" => {
let mut new_args = args.to_vec();
let mut last_arg = None;
if args.len() == 4 {
last_arg = new_args.pop();
}
let arg4_type = gcc_func.get_param_type(3);
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
new_args.push(minus_one);
if args.len() == 3 {
// Both llvm.fma.v16f32 and llvm.x86.avx512.vfmadd.ps.512 maps to
// the same GCC intrinsic, but the former has 3 parameters and the
// latter has 4 so it doesn't require this additional argument.
let arg5_type = gcc_func.get_param_type(4);
new_args.push(builder.context.new_rvalue_from_int(arg5_type, 4));
}
if let Some(last_arg) = last_arg {
new_args.push(last_arg);
}
args = new_args.into();
},
"__builtin_ia32_addps512_mask" | "__builtin_ia32_addpd512_mask"
| "__builtin_ia32_subps512_mask" | "__builtin_ia32_subpd512_mask"
| "__builtin_ia32_mulps512_mask" | "__builtin_ia32_mulpd512_mask"
| "__builtin_ia32_divps512_mask" | "__builtin_ia32_divpd512_mask" => {
let mut new_args = args.to_vec();
let last_arg = new_args.pop().expect("last arg");
let arg3_type = gcc_func.get_param_type(2);
let undefined = builder.current_func().new_local(None, arg3_type, "undefined_for_intrinsic").to_rvalue();
new_args.push(undefined);
let arg4_type = gcc_func.get_param_type(3);
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
new_args.push(minus_one);
new_args.push(last_arg);
args = new_args.into();
},
"__builtin_ia32_vfmaddsubps512_mask" | "__builtin_ia32_vfmaddsubpd512_mask" => {
let mut new_args = args.to_vec();
let last_arg = new_args.pop().expect("last arg");
let arg4_type = gcc_func.get_param_type(3);
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
new_args.push(minus_one);
new_args.push(last_arg);
args = new_args.into();
},
_ => (),
}
}
args
}
pub fn ignore_arg_cast(func_name: &str, index: usize, args_len: usize) -> bool {
// NOTE: these intrinsics have missing parameters before the last one, so ignore the
// last argument type check.
// FIXME(antoyo): find a way to refactor in order to avoid this hack.
match func_name {
"__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
| "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask" | "__builtin_ia32_sqrtps512_mask"
| "__builtin_ia32_sqrtpd512_mask" | "__builtin_ia32_addps512_mask" | "__builtin_ia32_addpd512_mask"
| "__builtin_ia32_subps512_mask" | "__builtin_ia32_subpd512_mask"
| "__builtin_ia32_mulps512_mask" | "__builtin_ia32_mulpd512_mask"
| "__builtin_ia32_divps512_mask" | "__builtin_ia32_divpd512_mask"
| "__builtin_ia32_vfmaddsubps512_mask" | "__builtin_ia32_vfmaddsubpd512_mask" => {
if index == args_len - 1 {
return true;
}
},
"__builtin_ia32_vfmaddps512_mask" | "__builtin_ia32_vfmaddpd512_mask" => {
// Since there are two LLVM intrinsics that map to each of these GCC builtins and only
// one of them has a missing parameter before the last one, we check the number of
// arguments to distinguish those cases.
if args_len == 4 && index == args_len - 1 {
return true;
}
},
_ => (),
}
false
}
#[cfg(not(feature="master"))]
pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function<'gcc> {
match name {
"llvm.x86.xgetbv" => {
let gcc_name = "__builtin_trap";
let func = cx.context.get_builtin_function(gcc_name);
cx.functions.borrow_mut().insert(gcc_name.to_string(), func);
return func;
},
_ => unimplemented!("unsupported LLVM intrinsic {}", name),
}
}
#[cfg(feature="master")]
pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function<'gcc> {
let gcc_name = match name {
"llvm.x86.xgetbv" => "__builtin_ia32_xgetbv",
// NOTE: this doc specifies the equivalent GCC builtins: http://huonw.github.io/llvmint/llvmint/x86/index.html
"llvm.sqrt.v2f64" => "__builtin_ia32_sqrtpd",
"llvm.x86.avx512.pmul.dq.512" => "__builtin_ia32_pmuldq512_mask",
"llvm.x86.avx512.pmulu.dq.512" => "__builtin_ia32_pmuludq512_mask",
"llvm.x86.avx512.mask.pmaxs.q.256" => "__builtin_ia32_pmaxsq256_mask",
"llvm.x86.avx512.mask.pmaxs.q.128" => "__builtin_ia32_pmaxsq128_mask",
"llvm.x86.avx512.max.ps.512" => "__builtin_ia32_maxps512_mask",
"llvm.x86.avx512.max.pd.512" => "__builtin_ia32_maxpd512_mask",
"llvm.x86.avx512.mask.pmaxu.q.256" => "__builtin_ia32_pmaxuq256_mask",
"llvm.x86.avx512.mask.pmaxu.q.128" => "__builtin_ia32_pmaxuq128_mask",
"llvm.x86.avx512.mask.pmins.q.256" => "__builtin_ia32_pminsq256_mask",
"llvm.x86.avx512.mask.pmins.q.128" => "__builtin_ia32_pminsq128_mask",
"llvm.x86.avx512.min.ps.512" => "__builtin_ia32_minps512_mask",
"llvm.x86.avx512.min.pd.512" => "__builtin_ia32_minpd512_mask",
"llvm.x86.avx512.mask.pminu.q.256" => "__builtin_ia32_pminuq256_mask",
"llvm.x86.avx512.mask.pminu.q.128" => "__builtin_ia32_pminuq128_mask",
"llvm.fma.v16f32" => "__builtin_ia32_vfmaddps512_mask",
"llvm.fma.v8f64" => "__builtin_ia32_vfmaddpd512_mask",
"llvm.x86.avx512.vfmaddsub.ps.512" => "__builtin_ia32_vfmaddsubps512_mask",
"llvm.x86.avx512.vfmaddsub.pd.512" => "__builtin_ia32_vfmaddsubpd512_mask",
"llvm.x86.avx512.pternlog.d.512" => "__builtin_ia32_pternlogd512_mask",
"llvm.x86.avx512.pternlog.d.256" => "__builtin_ia32_pternlogd256_mask",
"llvm.x86.avx512.pternlog.d.128" => "__builtin_ia32_pternlogd128_mask",
"llvm.x86.avx512.pternlog.q.512" => "__builtin_ia32_pternlogq512_mask",
"llvm.x86.avx512.pternlog.q.256" => "__builtin_ia32_pternlogq256_mask",
"llvm.x86.avx512.pternlog.q.128" => "__builtin_ia32_pternlogq128_mask",
"llvm.x86.avx512.add.ps.512" => "__builtin_ia32_addps512_mask",
"llvm.x86.avx512.add.pd.512" => "__builtin_ia32_addpd512_mask",
"llvm.x86.avx512.sub.ps.512" => "__builtin_ia32_subps512_mask",
"llvm.x86.avx512.sub.pd.512" => "__builtin_ia32_subpd512_mask",
"llvm.x86.avx512.mul.ps.512" => "__builtin_ia32_mulps512_mask",
"llvm.x86.avx512.mul.pd.512" => "__builtin_ia32_mulpd512_mask",
"llvm.x86.avx512.div.ps.512" => "__builtin_ia32_divps512_mask",
"llvm.x86.avx512.div.pd.512" => "__builtin_ia32_divpd512_mask",
"llvm.x86.avx512.vfmadd.ps.512" => "__builtin_ia32_vfmaddps512_mask",
"llvm.x86.avx512.vfmadd.pd.512" => "__builtin_ia32_vfmaddpd512_mask",
// The above doc points to unknown builtins for the following, so override them:
"llvm.x86.avx2.gather.d.d" => "__builtin_ia32_gathersiv4si",
"llvm.x86.avx2.gather.d.d.256" => "__builtin_ia32_gathersiv8si",
"llvm.x86.avx2.gather.d.ps" => "__builtin_ia32_gathersiv4sf",
"llvm.x86.avx2.gather.d.ps.256" => "__builtin_ia32_gathersiv8sf",
"llvm.x86.avx2.gather.d.q" => "__builtin_ia32_gathersiv2di",
"llvm.x86.avx2.gather.d.q.256" => "__builtin_ia32_gathersiv4di",
"llvm.x86.avx2.gather.d.pd" => "__builtin_ia32_gathersiv2df",
"llvm.x86.avx2.gather.d.pd.256" => "__builtin_ia32_gathersiv4df",
"llvm.x86.avx2.gather.q.d" => "__builtin_ia32_gatherdiv4si",
"llvm.x86.avx2.gather.q.d.256" => "__builtin_ia32_gatherdiv4si256",
"llvm.x86.avx2.gather.q.ps" => "__builtin_ia32_gatherdiv4sf",
"llvm.x86.avx2.gather.q.ps.256" => "__builtin_ia32_gatherdiv4sf256",
"llvm.x86.avx2.gather.q.q" => "__builtin_ia32_gatherdiv2di",
"llvm.x86.avx2.gather.q.q.256" => "__builtin_ia32_gatherdiv4di",
"llvm.x86.avx2.gather.q.pd" => "__builtin_ia32_gatherdiv2df",
"llvm.x86.avx2.gather.q.pd.256" => "__builtin_ia32_gatherdiv4df",
"" => "",
// NOTE: this file is generated by https://github.com/GuillaumeGomez/llvmint/blob/master/generate_list.py
_ => include!("archs.rs"),
};
let func = cx.context.get_target_builtin_function(gcc_name);
cx.functions.borrow_mut().insert(gcc_name.to_string(), func);
func
}

View file

@ -967,34 +967,55 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
}
fn saturating_add(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>, signed: bool, width: u64) -> RValue<'gcc> {
let func = self.current_func.borrow().expect("func");
let result_type = lhs.get_type();
if signed {
// Algorithm from: https://stackoverflow.com/a/56531252/389119
let after_block = func.new_block("after");
let func_name =
match width {
8 => "__builtin_add_overflow",
16 => "__builtin_add_overflow",
32 => "__builtin_sadd_overflow",
64 => "__builtin_saddll_overflow",
128 => "__builtin_add_overflow",
_ => unreachable!(),
};
let overflow_func = self.context.get_builtin_function(func_name);
let result_type = lhs.get_type();
// Based on algorithm from: https://stackoverflow.com/a/56531252/389119
let func = self.current_func.borrow().expect("func");
let res = func.new_local(None, result_type, "saturating_sum");
let overflow = self.overflow_call(overflow_func, &[lhs, rhs, res.get_address(None)], None);
let supports_native_type = self.is_native_int_type(result_type);
let overflow =
if supports_native_type {
let func_name =
match width {
8 => "__builtin_add_overflow",
16 => "__builtin_add_overflow",
32 => "__builtin_sadd_overflow",
64 => "__builtin_saddll_overflow",
128 => "__builtin_add_overflow",
_ => unreachable!(),
};
let overflow_func = self.context.get_builtin_function(func_name);
self.overflow_call(overflow_func, &[lhs, rhs, res.get_address(None)], None)
}
else {
let func_name =
match width {
128 => "__rust_i128_addo",
_ => unreachable!(),
};
let param_a = self.context.new_parameter(None, result_type, "a");
let param_b = self.context.new_parameter(None, result_type, "b");
let result_field = self.context.new_field(None, result_type, "result");
let overflow_field = self.context.new_field(None, self.bool_type, "overflow");
let return_type = self.context.new_struct_type(None, "result_overflow", &[result_field, overflow_field]);
let func = self.context.new_function(None, FunctionType::Extern, return_type.as_type(), &[param_a, param_b], func_name, false);
let result = self.context.new_call(None, func, &[lhs, rhs]);
let overflow = result.access_field(None, overflow_field);
let int_result = result.access_field(None, result_field);
self.llbb().add_assignment(None, res, int_result);
overflow
};
let then_block = func.new_block("then");
let after_block = func.new_block("after");
let unsigned_type = self.context.new_int_type(width as i32 / 8, false);
let shifted = self.context.new_cast(None, lhs, unsigned_type) >> self.context.new_rvalue_from_int(unsigned_type, width as i32 - 1);
let uint_max = self.context.new_unary_op(None, UnaryOp::BitwiseNegate, unsigned_type,
self.context.new_rvalue_from_int(unsigned_type, 0)
);
let int_max = uint_max >> self.context.new_rvalue_one(unsigned_type);
then_block.add_assignment(None, res, self.context.new_cast(None, shifted + int_max, result_type));
// Return `result_type`'s maximum or minimum value on overflow
// NOTE: convert the type to unsigned to have an unsigned shift.
let unsigned_type = result_type.to_unsigned(&self.cx);
let shifted = self.gcc_lshr(self.gcc_int_cast(lhs, unsigned_type), self.gcc_int(unsigned_type, width as i64 - 1));
let uint_max = self.gcc_not(self.gcc_int(unsigned_type, 0));
let int_max = self.gcc_lshr(uint_max, self.gcc_int(unsigned_type, 1));
then_block.add_assignment(None, res, self.gcc_int_cast(self.gcc_add(shifted, int_max), result_type));
then_block.end_with_jump(None, after_block);
self.llbb().end_with_conditional(None, overflow, then_block, after_block);
@ -1007,19 +1028,18 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
}
else {
// Algorithm from: http://locklessinc.com/articles/sat_arithmetic/
let res = lhs + rhs;
let res_type = res.get_type();
let cond = self.context.new_comparison(None, ComparisonOp::LessThan, res, lhs);
let value = self.context.new_unary_op(None, UnaryOp::Minus, res_type, self.context.new_cast(None, cond, res_type));
res | value
let res = self.gcc_add(lhs, rhs);
let cond = self.gcc_icmp(IntPredicate::IntULT, res, lhs);
let value = self.gcc_neg(self.gcc_int_cast(cond, result_type));
self.gcc_or(res, value)
}
}
// Algorithm from: https://locklessinc.com/articles/sat_arithmetic/
fn saturating_sub(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>, signed: bool, width: u64) -> RValue<'gcc> {
let result_type = lhs.get_type();
if signed {
// Also based on algorithm from: https://stackoverflow.com/a/56531252/389119
let result_type = lhs.get_type();
// Based on algorithm from: https://stackoverflow.com/a/56531252/389119
let func = self.current_func.borrow().expect("func");
let res = func.new_local(None, result_type, "saturating_diff");
let supports_native_type = self.is_native_int_type(result_type);
@ -1059,6 +1079,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
let then_block = func.new_block("then");
let after_block = func.new_block("after");
// Return `result_type`'s maximum or minimum value on overflow
// NOTE: convert the type to unsigned to have an unsigned shift.
let unsigned_type = result_type.to_unsigned(&self.cx);
let shifted = self.gcc_lshr(self.gcc_int_cast(lhs, unsigned_type), self.gcc_int(unsigned_type, width as i64 - 1));
@ -1076,11 +1097,10 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
res.to_rvalue()
}
else {
let res = lhs - rhs;
let comparison = self.context.new_comparison(None, ComparisonOp::LessThanEquals, res, lhs);
let comparison = self.context.new_cast(None, comparison, lhs.get_type());
let unary_op = self.context.new_unary_op(None, UnaryOp::Minus, comparison.get_type(), comparison);
self.and(res, unary_op)
let res = self.gcc_sub(lhs, rhs);
let comparison = self.gcc_icmp(IntPredicate::IntULE, res, lhs);
let value = self.gcc_neg(self.gcc_int_cast(comparison, result_type));
self.gcc_and(res, value)
}
}
}

View file

@ -1,15 +1,20 @@
use gccjit::{RValue, Type};
use std::cmp::Ordering;
use gccjit::{BinaryOp, RValue, Type, ToRValue};
use rustc_codegen_ssa::base::compare_simd_types;
use rustc_codegen_ssa::common::{TypeKind, span_invalid_monomorphization_error};
use rustc_codegen_ssa::mir::operand::OperandRef;
use rustc_codegen_ssa::mir::place::PlaceRef;
use rustc_codegen_ssa::traits::{BaseTypeMethods, BuilderMethods};
use rustc_hir as hir;
use rustc_middle::span_bug;
use rustc_middle::ty::layout::HasTyCtxt;
use rustc_middle::ty::{self, Ty};
use rustc_span::{Span, Symbol, sym};
use rustc_target::abi::Align;
use crate::builder::Builder;
use crate::intrinsic;
pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>, name: Symbol, callee_ty: Ty<'tcx>, args: &[OperandRef<'tcx, RValue<'gcc>>], ret_ty: Ty<'tcx>, llret_ty: Type<'gcc>, span: Span) -> Result<RValue<'gcc>, ()> {
// macros for error handling:
@ -53,7 +58,53 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
let sig =
tcx.normalize_erasing_late_bound_regions(ty::ParamEnv::reveal_all(), callee_ty.fn_sig(tcx));
let arg_tys = sig.inputs();
let name_str = name.as_str();
if name == sym::simd_select_bitmask {
require_simd!(arg_tys[1], "argument");
let (len, _) = arg_tys[1].simd_size_and_type(bx.tcx());
let expected_int_bits = (len.max(8) - 1).next_power_of_two();
let expected_bytes = len / 8 + ((len % 8 > 0) as u64);
let mask_ty = arg_tys[0];
let mut mask = match mask_ty.kind() {
ty::Int(i) if i.bit_width() == Some(expected_int_bits) => args[0].immediate(),
ty::Uint(i) if i.bit_width() == Some(expected_int_bits) => args[0].immediate(),
ty::Array(elem, len)
if matches!(elem.kind(), ty::Uint(ty::UintTy::U8))
&& len.try_eval_usize(bx.tcx, ty::ParamEnv::reveal_all())
== Some(expected_bytes) =>
{
let place = PlaceRef::alloca(bx, args[0].layout);
args[0].val.store(bx, place);
let int_ty = bx.type_ix(expected_bytes * 8);
let ptr = bx.pointercast(place.llval, bx.cx.type_ptr_to(int_ty));
bx.load(int_ty, ptr, Align::ONE)
}
_ => return_error!(
"invalid bitmask `{}`, expected `u{}` or `[u8; {}]`",
mask_ty,
expected_int_bits,
expected_bytes
),
};
let arg1 = args[1].immediate();
let arg1_type = arg1.get_type();
let arg1_vector_type = arg1_type.unqualified().dyncast_vector().expect("vector type");
let arg1_element_type = arg1_vector_type.get_element_type();
let mut elements = vec![];
let one = bx.context.new_rvalue_one(mask.get_type());
for _ in 0..len {
let element = bx.context.new_cast(None, mask & one, arg1_element_type);
elements.push(element);
mask = mask >> one;
}
let vector_mask = bx.context.new_rvalue_from_vector(None, arg1_type, &elements);
return Ok(bx.vector_select(vector_mask, arg1, args[2].immediate()));
}
// every intrinsic below takes a SIMD vector as its first argument
require_simd!(arg_tys[0], "input");
@ -100,10 +151,28 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
));
}
if let Some(stripped) = name_str.strip_prefix("simd_shuffle") {
let n: u64 = stripped.parse().unwrap_or_else(|_| {
span_bug!(span, "bad `simd_shuffle` instruction only caught in codegen?")
});
if let Some(stripped) = name.as_str().strip_prefix("simd_shuffle") {
let n: u64 =
if stripped.is_empty() {
// Make sure this is actually an array, since typeck only checks the length-suffixed
// version of this intrinsic.
match args[2].layout.ty.kind() {
ty::Array(ty, len) if matches!(ty.kind(), ty::Uint(ty::UintTy::U32)) => {
len.try_eval_usize(bx.cx.tcx, ty::ParamEnv::reveal_all()).unwrap_or_else(|| {
span_bug!(span, "could not evaluate shuffle index array length")
})
}
_ => return_error!(
"simd_shuffle index must be an array of `u32`, got `{}`",
args[2].layout.ty
),
}
}
else {
stripped.parse().unwrap_or_else(|_| {
span_bug!(span, "bad `simd_shuffle` instruction only caught in codegen?")
})
};
require_simd!(ret_ty, "return");
@ -134,6 +203,225 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
));
}
#[cfg(feature="master")]
if name == sym::simd_insert {
require!(
in_elem == arg_tys[2],
"expected inserted type `{}` (element of input `{}`), found `{}`",
in_elem,
in_ty,
arg_tys[2]
);
let vector = args[0].immediate();
let index = args[1].immediate();
let value = args[2].immediate();
// TODO(antoyo): use a recursive unqualified() here.
let vector_type = vector.get_type().unqualified().dyncast_vector().expect("vector type");
let element_type = vector_type.get_element_type();
// NOTE: we cannot cast to an array and assign to its element here because the value might
// not be an l-value. So, call a builtin to set the element.
// TODO(antoyo): perhaps we could create a new vector or maybe there's a GIMPLE instruction for that?
// TODO(antoyo): don't use target specific builtins here.
let func_name =
match in_len {
2 => {
if element_type == bx.i64_type {
"__builtin_ia32_vec_set_v2di"
}
else {
unimplemented!();
}
},
4 => {
if element_type == bx.i32_type {
"__builtin_ia32_vec_set_v4si"
}
else {
unimplemented!();
}
},
8 => {
if element_type == bx.i16_type {
"__builtin_ia32_vec_set_v8hi"
}
else {
unimplemented!();
}
},
_ => unimplemented!("Len: {}", in_len),
};
let builtin = bx.context.get_target_builtin_function(func_name);
let param1_type = builtin.get_param(0).to_rvalue().get_type();
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
let vector = bx.cx.bitcast_if_needed(vector, param1_type);
let result = bx.context.new_call(None, builtin, &[vector, value, bx.context.new_cast(None, index, bx.int_type)]);
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
return Ok(bx.context.new_bitcast(None, result, vector.get_type()));
}
#[cfg(feature="master")]
if name == sym::simd_extract {
require!(
ret_ty == in_elem,
"expected return type `{}` (element of input `{}`), found `{}`",
in_elem,
in_ty,
ret_ty
);
let vector = args[0].immediate();
return Ok(bx.context.new_vector_access(None, vector, args[1].immediate()).to_rvalue());
}
if name == sym::simd_select {
let m_elem_ty = in_elem;
let m_len = in_len;
require_simd!(arg_tys[1], "argument");
let (v_len, _) = arg_tys[1].simd_size_and_type(bx.tcx());
require!(
m_len == v_len,
"mismatched lengths: mask length `{}` != other vector length `{}`",
m_len,
v_len
);
match m_elem_ty.kind() {
ty::Int(_) => {}
_ => return_error!("mask element type is `{}`, expected `i_`", m_elem_ty),
}
return Ok(bx.vector_select(args[0].immediate(), args[1].immediate(), args[2].immediate()));
}
if name == sym::simd_cast {
require_simd!(ret_ty, "return");
let (out_len, out_elem) = ret_ty.simd_size_and_type(bx.tcx());
require!(
in_len == out_len,
"expected return type with length {} (same as input type `{}`), \
found `{}` with length {}",
in_len,
in_ty,
ret_ty,
out_len
);
// casting cares about nominal type, not just structural type
if in_elem == out_elem {
return Ok(args[0].immediate());
}
enum Style {
Float,
Int(/* is signed? */ bool),
Unsupported,
}
let (in_style, in_width) = match in_elem.kind() {
// vectors of pointer-sized integers should've been
// disallowed before here, so this unwrap is safe.
ty::Int(i) => (
Style::Int(true),
i.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
),
ty::Uint(u) => (
Style::Int(false),
u.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
),
ty::Float(f) => (Style::Float, f.bit_width()),
_ => (Style::Unsupported, 0),
};
let (out_style, out_width) = match out_elem.kind() {
ty::Int(i) => (
Style::Int(true),
i.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
),
ty::Uint(u) => (
Style::Int(false),
u.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
),
ty::Float(f) => (Style::Float, f.bit_width()),
_ => (Style::Unsupported, 0),
};
let extend = |in_type, out_type| {
let vector_type = bx.context.new_vector_type(out_type, 8);
let vector = args[0].immediate();
let array_type = bx.context.new_array_type(None, in_type, 8);
// TODO(antoyo): switch to using new_vector_access or __builtin_convertvector for vector casting.
let array = bx.context.new_bitcast(None, vector, array_type);
let cast_vec_element = |index| {
let index = bx.context.new_rvalue_from_int(bx.int_type, index);
bx.context.new_cast(None, bx.context.new_array_access(None, array, index).to_rvalue(), out_type)
};
bx.context.new_rvalue_from_vector(None, vector_type, &[
cast_vec_element(0),
cast_vec_element(1),
cast_vec_element(2),
cast_vec_element(3),
cast_vec_element(4),
cast_vec_element(5),
cast_vec_element(6),
cast_vec_element(7),
])
};
match (in_style, out_style) {
(Style::Int(in_is_signed), Style::Int(_)) => {
return Ok(match in_width.cmp(&out_width) {
Ordering::Greater => bx.trunc(args[0].immediate(), llret_ty),
Ordering::Equal => args[0].immediate(),
Ordering::Less => {
if in_is_signed {
match (in_width, out_width) {
// FIXME(antoyo): the function _mm_cvtepi8_epi16 should directly
// call an intrinsic equivalent to __builtin_ia32_pmovsxbw128 so that
// we can generate a call to it.
(8, 16) => extend(bx.i8_type, bx.i16_type),
(8, 32) => extend(bx.i8_type, bx.i32_type),
(8, 64) => extend(bx.i8_type, bx.i64_type),
(16, 32) => extend(bx.i16_type, bx.i32_type),
(32, 64) => extend(bx.i32_type, bx.i64_type),
(16, 64) => extend(bx.i16_type, bx.i64_type),
_ => unimplemented!("in: {}, out: {}", in_width, out_width),
}
} else {
match (in_width, out_width) {
(8, 16) => extend(bx.u8_type, bx.u16_type),
(8, 32) => extend(bx.u8_type, bx.u32_type),
(8, 64) => extend(bx.u8_type, bx.u64_type),
(16, 32) => extend(bx.u16_type, bx.u32_type),
(16, 64) => extend(bx.u16_type, bx.u64_type),
(32, 64) => extend(bx.u32_type, bx.u64_type),
_ => unimplemented!("in: {}, out: {}", in_width, out_width),
}
}
}
});
}
(Style::Int(_), Style::Float) => {
// TODO: add support for internal functions in libgccjit to get access to IFN_VEC_CONVERT which is
// doing like __builtin_convertvector?
// Or maybe provide convert_vector as an API since it might not easy to get the
// types of internal functions.
unimplemented!();
}
(Style::Float, Style::Int(_)) => {
unimplemented!();
}
(Style::Float, Style::Float) => {
unimplemented!();
}
_ => { /* Unsupported. Fallthrough. */ }
}
require!(
false,
"unsupported cast from `{}` with element `{}` to `{}` with element `{}`",
in_ty,
in_elem,
ret_ty,
out_elem
);
}
macro_rules! arith_binary {
($($name: ident: $($($p: ident),* => $call: ident),*;)*) => {
$(if name == sym::$name {
@ -151,6 +439,105 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
}
}
fn simd_simple_float_intrinsic<'gcc, 'tcx>(
name: Symbol,
in_elem: Ty<'_>,
in_ty: Ty<'_>,
in_len: u64,
bx: &mut Builder<'_, 'gcc, 'tcx>,
span: Span,
args: &[OperandRef<'tcx, RValue<'gcc>>],
) -> Result<RValue<'gcc>, ()> {
macro_rules! emit_error {
($msg: tt) => {
emit_error!($msg, )
};
($msg: tt, $($fmt: tt)*) => {
span_invalid_monomorphization_error(
bx.sess(), span,
&format!(concat!("invalid monomorphization of `{}` intrinsic: ", $msg),
name, $($fmt)*));
}
}
macro_rules! return_error {
($($fmt: tt)*) => {
{
emit_error!($($fmt)*);
return Err(());
}
}
}
let (elem_ty_str, elem_ty) =
if let ty::Float(f) = in_elem.kind() {
let elem_ty = bx.cx.type_float_from_ty(*f);
match f.bit_width() {
32 => ("f32", elem_ty),
64 => ("f64", elem_ty),
_ => {
return_error!(
"unsupported element type `{}` of floating-point vector `{}`",
f.name_str(),
in_ty
);
}
}
}
else {
return_error!("`{}` is not a floating-point type", in_ty);
};
let vec_ty = bx.cx.type_vector(elem_ty, in_len);
let (intr_name, fn_ty) =
match name {
sym::simd_ceil => ("ceil", bx.type_func(&[vec_ty], vec_ty)),
sym::simd_fabs => ("fabs", bx.type_func(&[vec_ty], vec_ty)), // TODO(antoyo): pand with 170141183420855150465331762880109871103
sym::simd_fcos => ("cos", bx.type_func(&[vec_ty], vec_ty)),
sym::simd_fexp2 => ("exp2", bx.type_func(&[vec_ty], vec_ty)),
sym::simd_fexp => ("exp", bx.type_func(&[vec_ty], vec_ty)),
sym::simd_flog10 => ("log10", bx.type_func(&[vec_ty], vec_ty)),
sym::simd_flog2 => ("log2", bx.type_func(&[vec_ty], vec_ty)),
sym::simd_flog => ("log", bx.type_func(&[vec_ty], vec_ty)),
sym::simd_floor => ("floor", bx.type_func(&[vec_ty], vec_ty)),
sym::simd_fma => ("fma", bx.type_func(&[vec_ty, vec_ty, vec_ty], vec_ty)),
sym::simd_fpowi => ("powi", bx.type_func(&[vec_ty, bx.type_i32()], vec_ty)),
sym::simd_fpow => ("pow", bx.type_func(&[vec_ty, vec_ty], vec_ty)),
sym::simd_fsin => ("sin", bx.type_func(&[vec_ty], vec_ty)),
sym::simd_fsqrt => ("sqrt", bx.type_func(&[vec_ty], vec_ty)),
sym::simd_round => ("round", bx.type_func(&[vec_ty], vec_ty)),
sym::simd_trunc => ("trunc", bx.type_func(&[vec_ty], vec_ty)),
_ => return_error!("unrecognized intrinsic `{}`", name),
};
let llvm_name = &format!("llvm.{0}.v{1}{2}", intr_name, in_len, elem_ty_str);
let function = intrinsic::llvm::intrinsic(llvm_name, &bx.cx);
let function: RValue<'gcc> = unsafe { std::mem::transmute(function) };
let c = bx.call(fn_ty, function, &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(), None);
Ok(c)
}
if std::matches!(
name,
sym::simd_ceil
| sym::simd_fabs
| sym::simd_fcos
| sym::simd_fexp2
| sym::simd_fexp
| sym::simd_flog10
| sym::simd_flog2
| sym::simd_flog
| sym::simd_floor
| sym::simd_fma
| sym::simd_fpow
| sym::simd_fpowi
| sym::simd_fsin
| sym::simd_fsqrt
| sym::simd_round
| sym::simd_trunc
) {
return simd_simple_float_intrinsic(name, in_elem, in_ty, in_len, bx, span, args);
}
arith_binary! {
simd_add: Uint, Int => add, Float => fadd;
simd_sub: Uint, Int => sub, Float => fsub;
@ -185,5 +572,183 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
simd_neg: Int => neg, Float => fneg;
}
#[cfg(feature="master")]
if name == sym::simd_saturating_add || name == sym::simd_saturating_sub {
let lhs = args[0].immediate();
let rhs = args[1].immediate();
let is_add = name == sym::simd_saturating_add;
let ptr_bits = bx.tcx().data_layout.pointer_size.bits() as _;
let (signed, elem_width, elem_ty) = match *in_elem.kind() {
ty::Int(i) => (true, i.bit_width().unwrap_or(ptr_bits), bx.cx.type_int_from_ty(i)),
ty::Uint(i) => (false, i.bit_width().unwrap_or(ptr_bits), bx.cx.type_uint_from_ty(i)),
_ => {
return_error!(
"expected element type `{}` of vector type `{}` \
to be a signed or unsigned integer type",
arg_tys[0].simd_size_and_type(bx.tcx()).1,
arg_tys[0]
);
}
};
let builtin_name =
match (signed, is_add, in_len, elem_width) {
(true, true, 32, 8) => "__builtin_ia32_paddsb256", // TODO(antoyo): cast arguments to unsigned.
(false, true, 32, 8) => "__builtin_ia32_paddusb256",
(true, true, 16, 16) => "__builtin_ia32_paddsw256",
(false, true, 16, 16) => "__builtin_ia32_paddusw256",
(true, false, 16, 16) => "__builtin_ia32_psubsw256",
(false, false, 16, 16) => "__builtin_ia32_psubusw256",
(true, false, 32, 8) => "__builtin_ia32_psubsb256",
(false, false, 32, 8) => "__builtin_ia32_psubusb256",
_ => unimplemented!("signed: {}, is_add: {}, in_len: {}, elem_width: {}", signed, is_add, in_len, elem_width),
};
let vec_ty = bx.cx.type_vector(elem_ty, in_len as u64);
let func = bx.context.get_target_builtin_function(builtin_name);
let param1_type = func.get_param(0).to_rvalue().get_type();
let param2_type = func.get_param(1).to_rvalue().get_type();
let lhs = bx.cx.bitcast_if_needed(lhs, param1_type);
let rhs = bx.cx.bitcast_if_needed(rhs, param2_type);
let result = bx.context.new_call(None, func, &[lhs, rhs]);
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
return Ok(bx.context.new_bitcast(None, result, vec_ty));
}
macro_rules! arith_red {
($name:ident : $vec_op:expr, $float_reduce:ident, $ordered:expr, $op:ident,
$identity:expr) => {
if name == sym::$name {
require!(
ret_ty == in_elem,
"expected return type `{}` (element of input `{}`), found `{}`",
in_elem,
in_ty,
ret_ty
);
return match in_elem.kind() {
ty::Int(_) | ty::Uint(_) => {
let r = bx.vector_reduce_op(args[0].immediate(), $vec_op);
if $ordered {
// if overflow occurs, the result is the
// mathematical result modulo 2^n:
Ok(bx.$op(args[1].immediate(), r))
}
else {
Ok(bx.vector_reduce_op(args[0].immediate(), $vec_op))
}
}
ty::Float(_) => {
if $ordered {
// ordered arithmetic reductions take an accumulator
let acc = args[1].immediate();
Ok(bx.$float_reduce(acc, args[0].immediate()))
}
else {
Ok(bx.vector_reduce_op(args[0].immediate(), $vec_op))
}
}
_ => return_error!(
"unsupported {} from `{}` with element `{}` to `{}`",
sym::$name,
in_ty,
in_elem,
ret_ty
),
};
}
};
}
arith_red!(
simd_reduce_add_unordered: BinaryOp::Plus,
vector_reduce_fadd_fast,
false,
add,
0.0 // TODO: Use this argument.
);
arith_red!(
simd_reduce_mul_unordered: BinaryOp::Mult,
vector_reduce_fmul_fast,
false,
mul,
1.0
);
macro_rules! minmax_red {
($name:ident: $reduction:ident) => {
if name == sym::$name {
require!(
ret_ty == in_elem,
"expected return type `{}` (element of input `{}`), found `{}`",
in_elem,
in_ty,
ret_ty
);
return match in_elem.kind() {
ty::Int(_) | ty::Uint(_) | ty::Float(_) => Ok(bx.$reduction(args[0].immediate())),
_ => return_error!(
"unsupported {} from `{}` with element `{}` to `{}`",
sym::$name,
in_ty,
in_elem,
ret_ty
),
};
}
};
}
minmax_red!(simd_reduce_min: vector_reduce_min);
minmax_red!(simd_reduce_max: vector_reduce_max);
macro_rules! bitwise_red {
($name:ident : $op:expr, $boolean:expr) => {
if name == sym::$name {
let input = if !$boolean {
require!(
ret_ty == in_elem,
"expected return type `{}` (element of input `{}`), found `{}`",
in_elem,
in_ty,
ret_ty
);
args[0].immediate()
} else {
match in_elem.kind() {
ty::Int(_) | ty::Uint(_) => {}
_ => return_error!(
"unsupported {} from `{}` with element `{}` to `{}`",
sym::$name,
in_ty,
in_elem,
ret_ty
),
}
// boolean reductions operate on vectors of i1s:
let i1 = bx.type_i1();
let i1xn = bx.type_vector(i1, in_len as u64);
bx.trunc(args[0].immediate(), i1xn)
};
return match in_elem.kind() {
ty::Int(_) | ty::Uint(_) => {
let r = bx.vector_reduce_op(input, $op);
Ok(if !$boolean { r } else { bx.zext(r, bx.type_bool()) })
}
_ => return_error!(
"unsupported {} from `{}` with element `{}` to `{}`",
sym::$name,
in_ty,
in_elem,
ret_ty
),
};
}
};
}
bitwise_red!(simd_reduce_and: BinaryOp::BitwiseAnd, false);
bitwise_red!(simd_reduce_or: BinaryOp::BitwiseOr, false);
unimplemented!("simd {}", name);
}

View file

@ -203,7 +203,7 @@ impl WriteBackendMethods for GccCodegenBackend {
fn run_fat_lto(_cgcx: &CodegenContext<Self>, mut modules: Vec<FatLTOInput<Self>>, _cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>) -> Result<LtoModuleCodegen<Self>, FatalError> {
// TODO(antoyo): implement LTO by sending -flto to libgccjit and adding the appropriate gcc linker plugins.
// NOTE: implemented elsewhere.
// TODO: what is implemented elsewhere ^ ?
// TODO(antoyo): what is implemented elsewhere ^ ?
let module =
match modules.remove(0) {
FatLTOInput::InMemory(module) => module,
@ -301,7 +301,22 @@ pub fn target_features(sess: &Session) -> Vec<Symbol> {
)
.filter(|_feature| {
// TODO(antoyo): implement a way to get enabled feature in libgccjit.
false
// Probably using the equivalent of __builtin_cpu_supports.
#[cfg(feature="master")]
{
_feature.contains("sse") || _feature.contains("avx")
}
#[cfg(not(feature="master"))]
{
false
}
/*
adx, aes, avx, avx2, avx512bf16, avx512bitalg, avx512bw, avx512cd, avx512dq, avx512er, avx512f, avx512gfni,
avx512ifma, avx512pf, avx512vaes, avx512vbmi, avx512vbmi2, avx512vl, avx512vnni, avx512vp2intersect, avx512vpclmulqdq,
avx512vpopcntdq, bmi1, bmi2, cmpxchg16b, ermsb, f16c, fma, fxsr, lzcnt, movbe, pclmulqdq, popcnt, rdrand, rdseed, rtm,
sha, sse, sse2, sse3, sse4.1, sse4.2, sse4a, ssse3, tbm, xsave, xsavec, xsaveopt, xsaves
*/
//false
})
.map(|feature| Symbol::intern(feature))
.collect()

View file

@ -3,10 +3,11 @@ use std::convert::TryInto;
use gccjit::{RValue, Struct, Type};
use rustc_codegen_ssa::traits::{BaseTypeMethods, DerivedTypeMethods};
use rustc_codegen_ssa::common::TypeKind;
use rustc_middle::bug;
use rustc_middle::{bug, ty};
use rustc_middle::ty::layout::TyAndLayout;
use rustc_target::abi::{AddressSpace, Align, Integer, Size};
use crate::common::TypeReflection;
use crate::context::CodegenCx;
use crate::type_of::LayoutGccExt;
@ -60,6 +61,17 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
let ity = Integer::approximate_align(self, align);
self.type_from_integer(ity)
}
pub fn type_vector(&self, ty: Type<'gcc>, len: u64) -> Type<'gcc> {
self.context.new_vector_type(ty, len)
}
pub fn type_float_from_ty(&self, t: ty::FloatTy) -> Type<'gcc> {
match t {
ty::FloatTy::F32 => self.type_f32(),
ty::FloatTy::F64 => self.type_f64(),
}
}
}
impl<'gcc, 'tcx> BaseTypeMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
@ -103,7 +115,7 @@ impl<'gcc, 'tcx> BaseTypeMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
self.context.new_function_pointer_type(None, return_type, params, false)
}
fn type_struct(&self, fields: &[Type<'gcc>], _packed: bool) -> Type<'gcc> {
fn type_struct(&self, fields: &[Type<'gcc>], packed: bool) -> Type<'gcc> {
let types = fields.to_vec();
if let Some(typ) = self.struct_types.borrow().get(fields) {
return typ.clone();
@ -111,8 +123,11 @@ impl<'gcc, 'tcx> BaseTypeMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
let fields: Vec<_> = fields.iter().enumerate()
.map(|(index, field)| self.context.new_field(None, *field, &format!("field{}_TODO", index)))
.collect();
// TODO(antoyo): use packed.
let typ = self.context.new_struct_type(None, "struct", &fields).as_type();
if packed {
#[cfg(feature="master")]
typ.set_packed();
}
self.struct_types.borrow_mut().insert(types, typ);
typ
}
@ -127,7 +142,7 @@ impl<'gcc, 'tcx> BaseTypeMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
else if typ.is_compatible_with(self.double_type) {
TypeKind::Double
}
else if typ.dyncast_vector().is_some() {
else if typ.is_vector() {
TypeKind::Vector
}
else {
@ -141,7 +156,7 @@ impl<'gcc, 'tcx> BaseTypeMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
}
fn type_ptr_to_ext(&self, ty: Type<'gcc>, _address_space: AddressSpace) -> Type<'gcc> {
// TODO(antoyo): use address_space
// TODO(antoyo): use address_space, perhaps with TYPE_ADDR_SPACE?
ty.make_pointer()
}
@ -167,10 +182,10 @@ impl<'gcc, 'tcx> BaseTypeMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
fn float_width(&self, typ: Type<'gcc>) -> usize {
let f32 = self.context.new_type::<f32>();
let f64 = self.context.new_type::<f64>();
if typ == f32 {
if typ.is_compatible_with(f32) {
32
}
else if typ == f64 {
else if typ.is_compatible_with(f64) {
64
}
else {
@ -197,12 +212,15 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
self.type_array(self.type_from_integer(unit), size / unit_size)
}
pub fn set_struct_body(&self, typ: Struct<'gcc>, fields: &[Type<'gcc>], _packed: bool) {
// TODO(antoyo): use packed.
pub fn set_struct_body(&self, typ: Struct<'gcc>, fields: &[Type<'gcc>], packed: bool) {
let fields: Vec<_> = fields.iter().enumerate()
.map(|(index, field)| self.context.new_field(None, *field, &format!("field_{}", index)))
.collect();
typ.set_fields(None, &fields);
if packed {
#[cfg(feature="master")]
typ.as_type().set_packed();
}
}
pub fn type_named_struct(&self, name: &str) -> Struct<'gcc> {
@ -229,6 +247,10 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
self.context.new_array_type(None, ty, len)
}
pub fn type_bool(&self) -> Type<'gcc> {
self.context.new_type::<bool>()
}
}
pub fn struct_fields<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, layout: TyAndLayout<'tcx>) -> (Vec<Type<'gcc>>, bool) {

View file

@ -24,6 +24,30 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
I128 => self.type_u128(),
}
}
#[cfg(feature="master")]
pub fn type_int_from_ty(&self, t: ty::IntTy) -> Type<'gcc> {
match t {
ty::IntTy::Isize => self.type_isize(),
ty::IntTy::I8 => self.type_i8(),
ty::IntTy::I16 => self.type_i16(),
ty::IntTy::I32 => self.type_i32(),
ty::IntTy::I64 => self.type_i64(),
ty::IntTy::I128 => self.type_i128(),
}
}
#[cfg(feature="master")]
pub fn type_uint_from_ty(&self, t: ty::UintTy) -> Type<'gcc> {
match t {
ty::UintTy::Usize => self.type_isize(),
ty::UintTy::U8 => self.type_i8(),
ty::UintTy::U16 => self.type_i16(),
ty::UintTy::U32 => self.type_i32(),
ty::UintTy::U64 => self.type_i64(),
ty::UintTy::U128 => self.type_i128(),
}
}
}
pub fn uncached_gcc_type<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, layout: TyAndLayout<'tcx>, defer: &mut Option<(Struct<'gcc>, TyAndLayout<'tcx>)>) -> Type<'gcc> {