Merge commit '3e50cf6502' into sync_cg_clif-2024-01-26

This commit is contained in:
bjorn3 2024-01-26 18:33:45 +00:00
commit 37018026f0
24 changed files with 434 additions and 323 deletions

View file

@ -13,17 +13,14 @@ use gimli::write::{
};
use gimli::{Encoding, Format, LineEncoding, RunTimeEndian};
use indexmap::IndexSet;
use rustc_session::Session;
pub(crate) use self::emit::{DebugReloc, DebugRelocName};
pub(crate) use self::unwind::UnwindContext;
use crate::prelude::*;
pub(crate) fn producer() -> String {
format!(
"rustc version {} with cranelift {}",
rustc_interface::util::rustc_version_str().unwrap_or("unknown version"),
cranelift_codegen::VERSION,
)
pub(crate) fn producer(sess: &Session) -> String {
format!("rustc version {} with cranelift {}", sess.cfg_version, cranelift_codegen::VERSION)
}
pub(crate) struct DebugContext {
@ -67,7 +64,7 @@ impl DebugContext {
let should_remap_filepaths = tcx.sess.should_prefer_remapped_for_codegen();
let producer = producer();
let producer = producer(tcx.sess);
let comp_dir = tcx
.sess
.opts

View file

@ -143,6 +143,7 @@ fn emit_cgu(
debug: Option<DebugContext>,
unwind_context: UnwindContext,
global_asm_object_file: Option<PathBuf>,
producer: &str,
) -> Result<ModuleCodegenResult, String> {
let mut product = module.finish();
@ -152,8 +153,14 @@ fn emit_cgu(
unwind_context.emit(&mut product);
let module_regular =
emit_module(output_filenames, prof, product.object, ModuleKind::Regular, name.clone())?;
let module_regular = emit_module(
output_filenames,
prof,
product.object,
ModuleKind::Regular,
name.clone(),
producer,
)?;
Ok(ModuleCodegenResult {
module_regular,
@ -174,6 +181,7 @@ fn emit_module(
mut object: cranelift_object::object::write::Object<'_>,
kind: ModuleKind,
name: String,
producer_str: &str,
) -> Result<CompiledModule, String> {
if object.format() == cranelift_object::object::BinaryFormat::Elf {
let comment_section = object.add_section(
@ -182,7 +190,7 @@ fn emit_module(
cranelift_object::object::SectionKind::OtherString,
);
let mut producer = vec![0];
producer.extend(crate::debuginfo::producer().as_bytes());
producer.extend(producer_str.as_bytes());
producer.push(0);
object.set_section_data(comment_section, producer, 1);
}
@ -321,6 +329,8 @@ fn module_codegen(
(cgu_name, cx, module, codegened_functions)
});
let producer = crate::debuginfo::producer(tcx.sess);
OngoingModuleCodegen::Async(std::thread::spawn(move || {
cx.profiler.clone().generic_activity_with_arg("compile functions", &*cgu_name).run(|| {
cranelift_codegen::timing::set_thread_profiler(Box::new(super::MeasuremeProfiler(
@ -348,6 +358,7 @@ fn module_codegen(
cx.debug_context,
cx.unwind_context,
global_asm_object_file,
&producer,
)
});
std::mem::drop(token);
@ -453,6 +464,7 @@ pub(crate) fn run_aot(
product.object,
ModuleKind::Allocator,
"allocator_shim".to_owned(),
&crate::debuginfo::producer(tcx.sess),
) {
Ok(allocator_module) => Some(allocator_module),
Err(err) => tcx.dcx().fatal(err),
@ -467,7 +479,7 @@ pub(crate) fn run_aot(
let cgu_name_builder = &mut CodegenUnitNameBuilder::new(tcx);
let metadata_cgu_name = cgu_name_builder
.build_cgu_name(LOCAL_CRATE, &["crate"], Some("metadata"))
.build_cgu_name(LOCAL_CRATE, ["crate"], Some("metadata"))
.as_str()
.to_string();

View file

@ -321,10 +321,9 @@ fn dep_symbol_lookup_fn(
Linkage::NotLinked | Linkage::IncludedFromDylib => {}
Linkage::Static => {
let name = crate_info.crate_name[&cnum];
sess.dcx()
.struct_err(format!("Can't load static lib {}", name))
.note("rustc_codegen_cranelift can only load dylibs in JIT mode.")
.emit();
let mut diag = sess.dcx().struct_err(format!("Can't load static lib {}", name));
diag.note("rustc_codegen_cranelift can only load dylibs in JIT mode.");
diag.emit();
}
Linkage::Dynamic => {
dylib_paths.push(src.dylib.as_ref().unwrap().0.clone());

View file

@ -52,7 +52,7 @@ pub(crate) fn codegen_inline_asm_terminator<'tcx>(
}
let operands = operands
.into_iter()
.iter()
.map(|operand| match *operand {
InlineAsmOperand::In { reg, ref value } => CInlineAsmOperand::In {
reg,
@ -506,10 +506,34 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
if self.options.contains(InlineAsmOptions::ATT_SYNTAX) {
generated_asm.push('%');
}
self.registers[*operand_idx]
.unwrap()
.emit(&mut generated_asm, self.arch, *modifier)
.unwrap();
let reg = self.registers[*operand_idx].unwrap();
match self.arch {
InlineAsmArch::X86_64 => match reg {
InlineAsmReg::X86(reg)
if reg as u32 >= X86InlineAsmReg::xmm0 as u32
&& reg as u32 <= X86InlineAsmReg::xmm15 as u32 =>
{
// rustc emits x0 rather than xmm0
let class = match *modifier {
None | Some('x') => "xmm",
Some('y') => "ymm",
Some('z') => "zmm",
_ => unreachable!(),
};
write!(
generated_asm,
"{class}{}",
reg as u32 - X86InlineAsmReg::xmm0 as u32
)
.unwrap();
}
_ => reg
.emit(&mut generated_asm, InlineAsmArch::X86_64, *modifier)
.unwrap(),
},
_ => reg.emit(&mut generated_asm, self.arch, *modifier).unwrap(),
}
}
CInlineAsmOperand::Const { ref value } => {
generated_asm.push_str(value);
@ -739,7 +763,7 @@ fn call_inline_asm<'tcx>(
},
)
.unwrap();
let inline_asm_func = fx.module.declare_func_in_func(inline_asm_func, &mut fx.bcx.func);
let inline_asm_func = fx.module.declare_func_in_func(inline_asm_func, fx.bcx.func);
if fx.clif_comments.enabled() {
fx.add_comment(inline_asm_func, asm_name);
}

View file

@ -35,6 +35,10 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
}
match intrinsic {
"llvm.prefetch" => {
// Nothing to do. This is merely a perf hint.
}
_ if intrinsic.starts_with("llvm.ctlz.v") => {
intrinsic_args!(fx, args => (a); intrinsic);

View file

@ -243,6 +243,20 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
}
// FIXME generalize vector types
"llvm.aarch64.neon.tbl1.v8i8" => {
intrinsic_args!(fx, args => (t, idx); intrinsic);
let zero = fx.bcx.ins().iconst(types::I8, 0);
for i in 0..8 {
let idx_lane = idx.value_lane(fx, i).load_scalar(fx);
let is_zero =
fx.bcx.ins().icmp_imm(IntCC::UnsignedGreaterThanOrEqual, idx_lane, 16);
let t_idx = fx.bcx.ins().uextend(fx.pointer_type, idx_lane);
let t_lane = t.value_lane_dyn(fx, t_idx).load_scalar(fx);
let res = fx.bcx.ins().select(is_zero, zero, t_lane);
ret.place_lane(fx, i).to_ptr().store(fx, res, MemFlags::trusted());
}
}
"llvm.aarch64.neon.tbl1.v16i8" => {
intrinsic_args!(fx, args => (t, idx); intrinsic);

View file

@ -610,230 +610,56 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi16&ig_expand=4903
intrinsic_args!(fx, args => (a, b); intrinsic);
assert_eq!(a.layout(), b.layout());
let layout = a.layout();
pack_instruction(fx, a, b, ret, PackSize::U8, PackWidth::Sse);
}
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
assert_eq!(lane_ty, fx.tcx.types.i16);
assert_eq!(ret_lane_ty, fx.tcx.types.u8);
assert_eq!(lane_count * 2, ret_lane_count);
"llvm.x86.sse2.packsswb.128" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi16&ig_expand=4848
intrinsic_args!(fx, args => (a, b); intrinsic);
let zero = fx.bcx.ins().iconst(types::I16, 0);
let max_u8 = fx.bcx.ins().iconst(types::I16, 255);
let ret_lane_layout = fx.layout_of(fx.tcx.types.u8);
for idx in 0..lane_count {
let lane = a.value_lane(fx, idx).load_scalar(fx);
let sat = fx.bcx.ins().smax(lane, zero);
let sat = fx.bcx.ins().umin(sat, max_u8);
let res = fx.bcx.ins().ireduce(types::I8, sat);
let res_lane = CValue::by_val(res, ret_lane_layout);
ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
}
for idx in 0..lane_count {
let lane = b.value_lane(fx, idx).load_scalar(fx);
let sat = fx.bcx.ins().smax(lane, zero);
let sat = fx.bcx.ins().umin(sat, max_u8);
let res = fx.bcx.ins().ireduce(types::I8, sat);
let res_lane = CValue::by_val(res, ret_lane_layout);
ret.place_lane(fx, lane_count + idx).write_cvalue(fx, res_lane);
}
pack_instruction(fx, a, b, ret, PackSize::S8, PackWidth::Sse);
}
"llvm.x86.avx2.packuswb" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi16&ig_expand=4906
intrinsic_args!(fx, args => (a, b); intrinsic);
assert_eq!(a.layout(), b.layout());
let layout = a.layout();
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
assert_eq!(lane_ty, fx.tcx.types.i16);
assert_eq!(ret_lane_ty, fx.tcx.types.u8);
assert_eq!(lane_count * 2, ret_lane_count);
let zero = fx.bcx.ins().iconst(types::I16, 0);
let max_u8 = fx.bcx.ins().iconst(types::I16, 255);
let ret_lane_layout = fx.layout_of(fx.tcx.types.u8);
for idx in 0..lane_count / 2 {
let lane = a.value_lane(fx, idx).load_scalar(fx);
let sat = fx.bcx.ins().smax(lane, zero);
let sat = fx.bcx.ins().umin(sat, max_u8);
let res = fx.bcx.ins().ireduce(types::I8, sat);
let res_lane = CValue::by_val(res, ret_lane_layout);
ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
}
for idx in 0..lane_count / 2 {
let lane = b.value_lane(fx, idx).load_scalar(fx);
let sat = fx.bcx.ins().smax(lane, zero);
let sat = fx.bcx.ins().umin(sat, max_u8);
let res = fx.bcx.ins().ireduce(types::I8, sat);
let res_lane = CValue::by_val(res, ret_lane_layout);
ret.place_lane(fx, lane_count / 2 + idx).write_cvalue(fx, res_lane);
}
for idx in 0..lane_count / 2 {
let lane = a.value_lane(fx, idx).load_scalar(fx);
let sat = fx.bcx.ins().smax(lane, zero);
let sat = fx.bcx.ins().umin(sat, max_u8);
let res = fx.bcx.ins().ireduce(types::I8, sat);
let res_lane = CValue::by_val(res, ret_lane_layout);
ret.place_lane(fx, lane_count / 2 * 2 + idx).write_cvalue(fx, res_lane);
}
for idx in 0..lane_count / 2 {
let lane = b.value_lane(fx, idx).load_scalar(fx);
let sat = fx.bcx.ins().smax(lane, zero);
let sat = fx.bcx.ins().umin(sat, max_u8);
let res = fx.bcx.ins().ireduce(types::I8, sat);
let res_lane = CValue::by_val(res, ret_lane_layout);
ret.place_lane(fx, lane_count / 2 * 3 + idx).write_cvalue(fx, res_lane);
}
pack_instruction(fx, a, b, ret, PackSize::U8, PackWidth::Avx);
}
"llvm.x86.sse2.packssdw.128" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi32&ig_expand=4889
"llvm.x86.avx2.packsswb" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi16&ig_expand=4851
intrinsic_args!(fx, args => (a, b); intrinsic);
assert_eq!(a.layout(), b.layout());
let layout = a.layout();
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
assert_eq!(lane_ty, fx.tcx.types.i32);
assert_eq!(ret_lane_ty, fx.tcx.types.i16);
assert_eq!(lane_count * 2, ret_lane_count);
let min_i16 = fx.bcx.ins().iconst(types::I32, i32::from(i16::MIN) as u32 as i64);
let max_i16 = fx.bcx.ins().iconst(types::I32, i32::from(i16::MAX) as u32 as i64);
let ret_lane_layout = fx.layout_of(fx.tcx.types.i16);
for idx in 0..lane_count {
let lane = a.value_lane(fx, idx).load_scalar(fx);
let sat = fx.bcx.ins().smax(lane, min_i16);
let sat = fx.bcx.ins().smin(sat, max_i16);
let res = fx.bcx.ins().ireduce(types::I16, sat);
let res_lane = CValue::by_val(res, ret_lane_layout);
ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
}
for idx in 0..lane_count {
let lane = b.value_lane(fx, idx).load_scalar(fx);
let sat = fx.bcx.ins().smax(lane, min_i16);
let sat = fx.bcx.ins().smin(sat, max_i16);
let res = fx.bcx.ins().ireduce(types::I16, sat);
let res_lane = CValue::by_val(res, ret_lane_layout);
ret.place_lane(fx, lane_count + idx).write_cvalue(fx, res_lane);
}
pack_instruction(fx, a, b, ret, PackSize::S8, PackWidth::Avx);
}
"llvm.x86.sse41.packusdw" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi32&ig_expand=4912
intrinsic_args!(fx, args => (a, b); intrinsic);
assert_eq!(a.layout(), b.layout());
let layout = a.layout();
pack_instruction(fx, a, b, ret, PackSize::U16, PackWidth::Sse);
}
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
assert_eq!(lane_ty, fx.tcx.types.i32);
assert_eq!(ret_lane_ty, fx.tcx.types.u16);
assert_eq!(lane_count * 2, ret_lane_count);
"llvm.x86.sse2.packssdw.128" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi32&ig_expand=4889
intrinsic_args!(fx, args => (a, b); intrinsic);
let min_u16 = fx.bcx.ins().iconst(types::I32, i64::from(u16::MIN));
let max_u16 = fx.bcx.ins().iconst(types::I32, i64::from(u16::MAX));
let ret_lane_layout = fx.layout_of(fx.tcx.types.u16);
pack_instruction(fx, a, b, ret, PackSize::S16, PackWidth::Sse);
}
for idx in 0..lane_count {
let lane = a.value_lane(fx, idx).load_scalar(fx);
let sat = fx.bcx.ins().smax(lane, min_u16);
let sat = fx.bcx.ins().smin(sat, max_u16);
let res = fx.bcx.ins().ireduce(types::I16, sat);
"llvm.x86.avx2.packusdw" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi32&ig_expand=4883
intrinsic_args!(fx, args => (a, b); intrinsic);
let res_lane = CValue::by_val(res, ret_lane_layout);
ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
}
for idx in 0..lane_count {
let lane = b.value_lane(fx, idx).load_scalar(fx);
let sat = fx.bcx.ins().smax(lane, min_u16);
let sat = fx.bcx.ins().smin(sat, max_u16);
let res = fx.bcx.ins().ireduce(types::I16, sat);
let res_lane = CValue::by_val(res, ret_lane_layout);
ret.place_lane(fx, lane_count + idx).write_cvalue(fx, res_lane);
}
pack_instruction(fx, a, b, ret, PackSize::U16, PackWidth::Avx);
}
"llvm.x86.avx2.packssdw" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi32&ig_expand=4892
intrinsic_args!(fx, args => (a, b); intrinsic);
assert_eq!(a.layout(), b.layout());
let layout = a.layout();
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
assert_eq!(lane_ty, fx.tcx.types.i32);
assert_eq!(ret_lane_ty, fx.tcx.types.i16);
assert_eq!(lane_count * 2, ret_lane_count);
let min_i16 = fx.bcx.ins().iconst(types::I32, i32::from(i16::MIN) as u32 as i64);
let max_i16 = fx.bcx.ins().iconst(types::I32, i32::from(i16::MAX) as u32 as i64);
let ret_lane_layout = fx.layout_of(fx.tcx.types.i16);
for idx in 0..lane_count / 2 {
let lane = a.value_lane(fx, idx).load_scalar(fx);
let sat = fx.bcx.ins().smax(lane, min_i16);
let sat = fx.bcx.ins().smin(sat, max_i16);
let res = fx.bcx.ins().ireduce(types::I16, sat);
let res_lane = CValue::by_val(res, ret_lane_layout);
ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
}
for idx in 0..lane_count / 2 {
let lane = b.value_lane(fx, idx).load_scalar(fx);
let sat = fx.bcx.ins().smax(lane, min_i16);
let sat = fx.bcx.ins().smin(sat, max_i16);
let res = fx.bcx.ins().ireduce(types::I16, sat);
let res_lane = CValue::by_val(res, ret_lane_layout);
ret.place_lane(fx, lane_count / 2 + idx).write_cvalue(fx, res_lane);
}
for idx in 0..lane_count / 2 {
let lane = a.value_lane(fx, idx).load_scalar(fx);
let sat = fx.bcx.ins().smax(lane, min_i16);
let sat = fx.bcx.ins().smin(sat, max_i16);
let res = fx.bcx.ins().ireduce(types::I16, sat);
let res_lane = CValue::by_val(res, ret_lane_layout);
ret.place_lane(fx, lane_count / 2 * 2 + idx).write_cvalue(fx, res_lane);
}
for idx in 0..lane_count / 2 {
let lane = b.value_lane(fx, idx).load_scalar(fx);
let sat = fx.bcx.ins().smax(lane, min_i16);
let sat = fx.bcx.ins().smin(sat, max_i16);
let res = fx.bcx.ins().ireduce(types::I16, sat);
let res_lane = CValue::by_val(res, ret_lane_layout);
ret.place_lane(fx, lane_count / 2 * 3 + idx).write_cvalue(fx, res_lane);
}
pack_instruction(fx, a, b, ret, PackSize::S16, PackWidth::Avx);
}
"llvm.x86.fma.vfmaddsub.ps"
@ -1407,3 +1233,115 @@ fn llvm_add_sub<'tcx>(
(cb_out, c)
}
enum PackSize {
U8,
U16,
S8,
S16,
}
impl PackSize {
fn ret_clif_type(&self) -> Type {
match self {
Self::U8 | Self::S8 => types::I8,
Self::U16 | Self::S16 => types::I16,
}
}
fn src_clif_type(&self) -> Type {
match self {
Self::U8 | Self::S8 => types::I16,
Self::U16 | Self::S16 => types::I32,
}
}
fn src_ty<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Ty<'tcx> {
match self {
Self::U8 | Self::S8 => tcx.types.i16,
Self::U16 | Self::S16 => tcx.types.i32,
}
}
fn ret_ty<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Ty<'tcx> {
match self {
Self::U8 => tcx.types.u8,
Self::S8 => tcx.types.i8,
Self::U16 => tcx.types.u16,
Self::S16 => tcx.types.i16,
}
}
fn max(&self) -> i64 {
match self {
Self::U8 => u8::MAX as u64 as i64,
Self::S8 => i8::MAX as u8 as u64 as i64,
Self::U16 => u16::MAX as u64 as i64,
Self::S16 => i16::MAX as u64 as u64 as i64,
}
}
fn min(&self) -> i64 {
match self {
Self::U8 | Self::U16 => 0,
Self::S8 => i16::from(i8::MIN) as u16 as i64,
Self::S16 => i32::from(i16::MIN) as u32 as i64,
}
}
}
enum PackWidth {
Sse = 1,
Avx = 2,
}
impl PackWidth {
fn divisor(&self) -> u64 {
match self {
Self::Sse => 1,
Self::Avx => 2,
}
}
}
/// Implement an x86 pack instruction with the intrinsic `_mm{,256}pack{us,s}_epi{16,32}`.
/// Validated for correctness against LLVM, see commit `c8f5d35508e062bd2d95e6c03429bfec831db6d3`.
fn pack_instruction<'tcx>(
fx: &mut FunctionCx<'_, '_, 'tcx>,
a: CValue<'tcx>,
b: CValue<'tcx>,
ret: CPlace<'tcx>,
ret_size: PackSize,
width: PackWidth,
) {
assert_eq!(a.layout(), b.layout());
let layout = a.layout();
let (src_lane_count, src_lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
assert_eq!(src_lane_ty, ret_size.src_ty(fx.tcx));
assert_eq!(ret_lane_ty, ret_size.ret_ty(fx.tcx));
assert_eq!(src_lane_count * 2, ret_lane_count);
let min = fx.bcx.ins().iconst(ret_size.src_clif_type(), ret_size.min());
let max = fx.bcx.ins().iconst(ret_size.src_clif_type(), ret_size.max());
let ret_lane_layout = fx.layout_of(ret_size.ret_ty(fx.tcx));
let mut round = |source: CValue<'tcx>, source_offset: u64, dest_offset: u64| {
let step_amount = src_lane_count / width.divisor();
let dest_offset = step_amount * dest_offset;
for idx in 0..step_amount {
let lane = source.value_lane(fx, step_amount * source_offset + idx).load_scalar(fx);
let sat = fx.bcx.ins().smax(lane, min);
let sat = match ret_size {
PackSize::U8 | PackSize::U16 => fx.bcx.ins().umin(sat, max),
PackSize::S8 | PackSize::S16 => fx.bcx.ins().smin(sat, max),
};
let res = fx.bcx.ins().ireduce(ret_size.ret_clif_type(), sat);
let res_lane = CValue::by_val(res, ret_lane_layout);
ret.place_lane(fx, dest_offset + idx).write_cvalue(fx, res_lane);
}
};
round(a, 0, 0);
round(b, 0, 1);
if let PackWidth::Avx = width {
round(a, 1, 2);
round(b, 1, 3);
}
}

View file

@ -293,7 +293,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
}
ret.write_cvalue(fx, base);
let ret_lane = ret.place_lane(fx, idx.try_into().unwrap());
let ret_lane = ret.place_lane(fx, idx.into());
ret_lane.write_cvalue(fx, val);
}
@ -340,7 +340,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
);
}
let ret_lane = v.value_lane(fx, idx.try_into().unwrap());
let ret_lane = v.value_lane(fx, idx.into());
ret.write_cvalue(fx, ret_lane);
}
@ -822,7 +822,35 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
let lane_layout = fx.layout_of(lane_ty);
let m = m.load_scalar(fx);
let expected_int_bits = lane_count.max(8);
let expected_bytes = expected_int_bits / 8 + ((expected_int_bits % 8 > 0) as u64);
let m = match m.layout().ty.kind() {
ty::Uint(i) if i.bit_width() == Some(expected_int_bits) => m.load_scalar(fx),
ty::Array(elem, len)
if matches!(elem.kind(), ty::Uint(ty::UintTy::U8))
&& len.try_eval_target_usize(fx.tcx, ty::ParamEnv::reveal_all())
== Some(expected_bytes) =>
{
m.force_stack(fx).0.load(
fx,
Type::int(expected_int_bits as u16).unwrap(),
MemFlags::trusted(),
)
}
_ => {
fx.tcx.dcx().span_fatal(
span,
format!(
"invalid monomorphization of `simd_select_bitmask` intrinsic: \
cannot accept `{}` as mask, expected `u{}` or `[u8; {}]`",
ret.layout().ty,
expected_int_bits,
expected_bytes
),
);
}
};
for lane in 0..lane_count {
let m_lane = fx.bcx.ins().ushr_imm(m, u64::from(lane) as i64);

View file

@ -18,7 +18,6 @@ extern crate rustc_fs_util;
extern crate rustc_hir;
extern crate rustc_incremental;
extern crate rustc_index;
extern crate rustc_interface;
extern crate rustc_metadata;
extern crate rustc_session;
extern crate rustc_span;
@ -42,7 +41,7 @@ use rustc_metadata::EncodedMetadata;
use rustc_middle::dep_graph::{WorkProduct, WorkProductId};
use rustc_session::config::OutputFilenames;
use rustc_session::Session;
use rustc_span::Symbol;
use rustc_span::{sym, Symbol};
pub use crate::config::*;
use crate::prelude::*;
@ -190,8 +189,17 @@ impl CodegenBackend for CraneliftCodegenBackend {
}
}
fn target_features(&self, _sess: &Session, _allow_unstable: bool) -> Vec<rustc_span::Symbol> {
vec![] // FIXME necessary for #[cfg(target_feature]
fn target_features(&self, sess: &Session, _allow_unstable: bool) -> Vec<rustc_span::Symbol> {
// FIXME return the actually used target features. this is necessary for #[cfg(target_feature)]
if sess.target.arch == "x86_64" && sess.target.os != "none" {
// x86_64 mandates SSE2 support
vec![Symbol::intern("fxsr"), sym::sse, Symbol::intern("sse2")]
} else if sess.target.arch == "aarch64" && sess.target.os != "none" {
// AArch64 mandates Neon support
vec![sym::neon]
} else {
vec![]
}
}
fn print_version(&self) {
@ -305,16 +313,13 @@ fn build_isa(sess: &Session, backend_config: &BackendConfig) -> Arc<dyn isa::Tar
let flags = settings::Flags::new(flags_builder);
let isa_builder = match sess.opts.cg.target_cpu.as_deref() {
Some("native") => {
let builder = cranelift_native::builder_with_options(true).unwrap();
builder
}
Some("native") => cranelift_native::builder_with_options(true).unwrap(),
Some(value) => {
let mut builder =
cranelift_codegen::isa::lookup(target_triple.clone()).unwrap_or_else(|err| {
sess.dcx().fatal(format!("can't compile for {}: {}", target_triple, err));
});
if let Err(_) = builder.enable(value) {
if builder.enable(value).is_err() {
sess.dcx()
.fatal("the specified target cpu isn't currently supported by Cranelift.");
}

View file

@ -28,10 +28,9 @@ pub(crate) fn unsized_info<'tcx>(
.bcx
.ins()
.iconst(fx.pointer_type, len.eval_target_usize(fx.tcx, ParamEnv::reveal_all()) as i64),
(
&ty::Dynamic(ref data_a, _, src_dyn_kind),
&ty::Dynamic(ref data_b, _, target_dyn_kind),
) if src_dyn_kind == target_dyn_kind => {
(&ty::Dynamic(data_a, _, src_dyn_kind), &ty::Dynamic(data_b, _, target_dyn_kind))
if src_dyn_kind == target_dyn_kind =>
{
let old_info =
old_info.expect("unsized_info: missing old info for trait upcasting coercion");
if data_a.principal_def_id() == data_b.principal_def_id() {

View file

@ -95,7 +95,7 @@ pub(crate) fn get_vtable<'tcx>(
let alloc_id = fx.tcx.vtable_allocation((ty, trait_ref));
let data_id =
data_id_for_alloc_id(&mut fx.constants_cx, &mut *fx.module, alloc_id, Mutability::Not);
let local_data_id = fx.module.declare_data_in_func(data_id, &mut fx.bcx.func);
let local_data_id = fx.module.declare_data_in_func(data_id, fx.bcx.func);
if fx.clif_comments.enabled() {
fx.add_comment(local_data_id, format!("vtable: {:?}", alloc_id));
}