Merge commit '3e50cf6502
' into sync_cg_clif-2024-01-26
This commit is contained in:
commit
37018026f0
24 changed files with 434 additions and 323 deletions
|
@ -13,17 +13,14 @@ use gimli::write::{
|
|||
};
|
||||
use gimli::{Encoding, Format, LineEncoding, RunTimeEndian};
|
||||
use indexmap::IndexSet;
|
||||
use rustc_session::Session;
|
||||
|
||||
pub(crate) use self::emit::{DebugReloc, DebugRelocName};
|
||||
pub(crate) use self::unwind::UnwindContext;
|
||||
use crate::prelude::*;
|
||||
|
||||
pub(crate) fn producer() -> String {
|
||||
format!(
|
||||
"rustc version {} with cranelift {}",
|
||||
rustc_interface::util::rustc_version_str().unwrap_or("unknown version"),
|
||||
cranelift_codegen::VERSION,
|
||||
)
|
||||
pub(crate) fn producer(sess: &Session) -> String {
|
||||
format!("rustc version {} with cranelift {}", sess.cfg_version, cranelift_codegen::VERSION)
|
||||
}
|
||||
|
||||
pub(crate) struct DebugContext {
|
||||
|
@ -67,7 +64,7 @@ impl DebugContext {
|
|||
|
||||
let should_remap_filepaths = tcx.sess.should_prefer_remapped_for_codegen();
|
||||
|
||||
let producer = producer();
|
||||
let producer = producer(tcx.sess);
|
||||
let comp_dir = tcx
|
||||
.sess
|
||||
.opts
|
||||
|
|
|
@ -143,6 +143,7 @@ fn emit_cgu(
|
|||
debug: Option<DebugContext>,
|
||||
unwind_context: UnwindContext,
|
||||
global_asm_object_file: Option<PathBuf>,
|
||||
producer: &str,
|
||||
) -> Result<ModuleCodegenResult, String> {
|
||||
let mut product = module.finish();
|
||||
|
||||
|
@ -152,8 +153,14 @@ fn emit_cgu(
|
|||
|
||||
unwind_context.emit(&mut product);
|
||||
|
||||
let module_regular =
|
||||
emit_module(output_filenames, prof, product.object, ModuleKind::Regular, name.clone())?;
|
||||
let module_regular = emit_module(
|
||||
output_filenames,
|
||||
prof,
|
||||
product.object,
|
||||
ModuleKind::Regular,
|
||||
name.clone(),
|
||||
producer,
|
||||
)?;
|
||||
|
||||
Ok(ModuleCodegenResult {
|
||||
module_regular,
|
||||
|
@ -174,6 +181,7 @@ fn emit_module(
|
|||
mut object: cranelift_object::object::write::Object<'_>,
|
||||
kind: ModuleKind,
|
||||
name: String,
|
||||
producer_str: &str,
|
||||
) -> Result<CompiledModule, String> {
|
||||
if object.format() == cranelift_object::object::BinaryFormat::Elf {
|
||||
let comment_section = object.add_section(
|
||||
|
@ -182,7 +190,7 @@ fn emit_module(
|
|||
cranelift_object::object::SectionKind::OtherString,
|
||||
);
|
||||
let mut producer = vec![0];
|
||||
producer.extend(crate::debuginfo::producer().as_bytes());
|
||||
producer.extend(producer_str.as_bytes());
|
||||
producer.push(0);
|
||||
object.set_section_data(comment_section, producer, 1);
|
||||
}
|
||||
|
@ -321,6 +329,8 @@ fn module_codegen(
|
|||
(cgu_name, cx, module, codegened_functions)
|
||||
});
|
||||
|
||||
let producer = crate::debuginfo::producer(tcx.sess);
|
||||
|
||||
OngoingModuleCodegen::Async(std::thread::spawn(move || {
|
||||
cx.profiler.clone().generic_activity_with_arg("compile functions", &*cgu_name).run(|| {
|
||||
cranelift_codegen::timing::set_thread_profiler(Box::new(super::MeasuremeProfiler(
|
||||
|
@ -348,6 +358,7 @@ fn module_codegen(
|
|||
cx.debug_context,
|
||||
cx.unwind_context,
|
||||
global_asm_object_file,
|
||||
&producer,
|
||||
)
|
||||
});
|
||||
std::mem::drop(token);
|
||||
|
@ -453,6 +464,7 @@ pub(crate) fn run_aot(
|
|||
product.object,
|
||||
ModuleKind::Allocator,
|
||||
"allocator_shim".to_owned(),
|
||||
&crate::debuginfo::producer(tcx.sess),
|
||||
) {
|
||||
Ok(allocator_module) => Some(allocator_module),
|
||||
Err(err) => tcx.dcx().fatal(err),
|
||||
|
@ -467,7 +479,7 @@ pub(crate) fn run_aot(
|
|||
|
||||
let cgu_name_builder = &mut CodegenUnitNameBuilder::new(tcx);
|
||||
let metadata_cgu_name = cgu_name_builder
|
||||
.build_cgu_name(LOCAL_CRATE, &["crate"], Some("metadata"))
|
||||
.build_cgu_name(LOCAL_CRATE, ["crate"], Some("metadata"))
|
||||
.as_str()
|
||||
.to_string();
|
||||
|
||||
|
|
|
@ -321,10 +321,9 @@ fn dep_symbol_lookup_fn(
|
|||
Linkage::NotLinked | Linkage::IncludedFromDylib => {}
|
||||
Linkage::Static => {
|
||||
let name = crate_info.crate_name[&cnum];
|
||||
sess.dcx()
|
||||
.struct_err(format!("Can't load static lib {}", name))
|
||||
.note("rustc_codegen_cranelift can only load dylibs in JIT mode.")
|
||||
.emit();
|
||||
let mut diag = sess.dcx().struct_err(format!("Can't load static lib {}", name));
|
||||
diag.note("rustc_codegen_cranelift can only load dylibs in JIT mode.");
|
||||
diag.emit();
|
||||
}
|
||||
Linkage::Dynamic => {
|
||||
dylib_paths.push(src.dylib.as_ref().unwrap().0.clone());
|
||||
|
|
|
@ -52,7 +52,7 @@ pub(crate) fn codegen_inline_asm_terminator<'tcx>(
|
|||
}
|
||||
|
||||
let operands = operands
|
||||
.into_iter()
|
||||
.iter()
|
||||
.map(|operand| match *operand {
|
||||
InlineAsmOperand::In { reg, ref value } => CInlineAsmOperand::In {
|
||||
reg,
|
||||
|
@ -506,10 +506,34 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
|
|||
if self.options.contains(InlineAsmOptions::ATT_SYNTAX) {
|
||||
generated_asm.push('%');
|
||||
}
|
||||
self.registers[*operand_idx]
|
||||
.unwrap()
|
||||
.emit(&mut generated_asm, self.arch, *modifier)
|
||||
.unwrap();
|
||||
|
||||
let reg = self.registers[*operand_idx].unwrap();
|
||||
match self.arch {
|
||||
InlineAsmArch::X86_64 => match reg {
|
||||
InlineAsmReg::X86(reg)
|
||||
if reg as u32 >= X86InlineAsmReg::xmm0 as u32
|
||||
&& reg as u32 <= X86InlineAsmReg::xmm15 as u32 =>
|
||||
{
|
||||
// rustc emits x0 rather than xmm0
|
||||
let class = match *modifier {
|
||||
None | Some('x') => "xmm",
|
||||
Some('y') => "ymm",
|
||||
Some('z') => "zmm",
|
||||
_ => unreachable!(),
|
||||
};
|
||||
write!(
|
||||
generated_asm,
|
||||
"{class}{}",
|
||||
reg as u32 - X86InlineAsmReg::xmm0 as u32
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
_ => reg
|
||||
.emit(&mut generated_asm, InlineAsmArch::X86_64, *modifier)
|
||||
.unwrap(),
|
||||
},
|
||||
_ => reg.emit(&mut generated_asm, self.arch, *modifier).unwrap(),
|
||||
}
|
||||
}
|
||||
CInlineAsmOperand::Const { ref value } => {
|
||||
generated_asm.push_str(value);
|
||||
|
@ -739,7 +763,7 @@ fn call_inline_asm<'tcx>(
|
|||
},
|
||||
)
|
||||
.unwrap();
|
||||
let inline_asm_func = fx.module.declare_func_in_func(inline_asm_func, &mut fx.bcx.func);
|
||||
let inline_asm_func = fx.module.declare_func_in_func(inline_asm_func, fx.bcx.func);
|
||||
if fx.clif_comments.enabled() {
|
||||
fx.add_comment(inline_asm_func, asm_name);
|
||||
}
|
||||
|
|
|
@ -35,6 +35,10 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
|
|||
}
|
||||
|
||||
match intrinsic {
|
||||
"llvm.prefetch" => {
|
||||
// Nothing to do. This is merely a perf hint.
|
||||
}
|
||||
|
||||
_ if intrinsic.starts_with("llvm.ctlz.v") => {
|
||||
intrinsic_args!(fx, args => (a); intrinsic);
|
||||
|
||||
|
|
|
@ -243,6 +243,20 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
|
|||
}
|
||||
|
||||
// FIXME generalize vector types
|
||||
"llvm.aarch64.neon.tbl1.v8i8" => {
|
||||
intrinsic_args!(fx, args => (t, idx); intrinsic);
|
||||
|
||||
let zero = fx.bcx.ins().iconst(types::I8, 0);
|
||||
for i in 0..8 {
|
||||
let idx_lane = idx.value_lane(fx, i).load_scalar(fx);
|
||||
let is_zero =
|
||||
fx.bcx.ins().icmp_imm(IntCC::UnsignedGreaterThanOrEqual, idx_lane, 16);
|
||||
let t_idx = fx.bcx.ins().uextend(fx.pointer_type, idx_lane);
|
||||
let t_lane = t.value_lane_dyn(fx, t_idx).load_scalar(fx);
|
||||
let res = fx.bcx.ins().select(is_zero, zero, t_lane);
|
||||
ret.place_lane(fx, i).to_ptr().store(fx, res, MemFlags::trusted());
|
||||
}
|
||||
}
|
||||
"llvm.aarch64.neon.tbl1.v16i8" => {
|
||||
intrinsic_args!(fx, args => (t, idx); intrinsic);
|
||||
|
||||
|
|
|
@ -610,230 +610,56 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
|
|||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi16&ig_expand=4903
|
||||
intrinsic_args!(fx, args => (a, b); intrinsic);
|
||||
|
||||
assert_eq!(a.layout(), b.layout());
|
||||
let layout = a.layout();
|
||||
pack_instruction(fx, a, b, ret, PackSize::U8, PackWidth::Sse);
|
||||
}
|
||||
|
||||
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
|
||||
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
|
||||
assert_eq!(lane_ty, fx.tcx.types.i16);
|
||||
assert_eq!(ret_lane_ty, fx.tcx.types.u8);
|
||||
assert_eq!(lane_count * 2, ret_lane_count);
|
||||
"llvm.x86.sse2.packsswb.128" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi16&ig_expand=4848
|
||||
intrinsic_args!(fx, args => (a, b); intrinsic);
|
||||
|
||||
let zero = fx.bcx.ins().iconst(types::I16, 0);
|
||||
let max_u8 = fx.bcx.ins().iconst(types::I16, 255);
|
||||
let ret_lane_layout = fx.layout_of(fx.tcx.types.u8);
|
||||
|
||||
for idx in 0..lane_count {
|
||||
let lane = a.value_lane(fx, idx).load_scalar(fx);
|
||||
let sat = fx.bcx.ins().smax(lane, zero);
|
||||
let sat = fx.bcx.ins().umin(sat, max_u8);
|
||||
let res = fx.bcx.ins().ireduce(types::I8, sat);
|
||||
|
||||
let res_lane = CValue::by_val(res, ret_lane_layout);
|
||||
ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
|
||||
}
|
||||
|
||||
for idx in 0..lane_count {
|
||||
let lane = b.value_lane(fx, idx).load_scalar(fx);
|
||||
let sat = fx.bcx.ins().smax(lane, zero);
|
||||
let sat = fx.bcx.ins().umin(sat, max_u8);
|
||||
let res = fx.bcx.ins().ireduce(types::I8, sat);
|
||||
|
||||
let res_lane = CValue::by_val(res, ret_lane_layout);
|
||||
ret.place_lane(fx, lane_count + idx).write_cvalue(fx, res_lane);
|
||||
}
|
||||
pack_instruction(fx, a, b, ret, PackSize::S8, PackWidth::Sse);
|
||||
}
|
||||
|
||||
"llvm.x86.avx2.packuswb" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi16&ig_expand=4906
|
||||
intrinsic_args!(fx, args => (a, b); intrinsic);
|
||||
|
||||
assert_eq!(a.layout(), b.layout());
|
||||
let layout = a.layout();
|
||||
|
||||
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
|
||||
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
|
||||
assert_eq!(lane_ty, fx.tcx.types.i16);
|
||||
assert_eq!(ret_lane_ty, fx.tcx.types.u8);
|
||||
assert_eq!(lane_count * 2, ret_lane_count);
|
||||
|
||||
let zero = fx.bcx.ins().iconst(types::I16, 0);
|
||||
let max_u8 = fx.bcx.ins().iconst(types::I16, 255);
|
||||
let ret_lane_layout = fx.layout_of(fx.tcx.types.u8);
|
||||
|
||||
for idx in 0..lane_count / 2 {
|
||||
let lane = a.value_lane(fx, idx).load_scalar(fx);
|
||||
let sat = fx.bcx.ins().smax(lane, zero);
|
||||
let sat = fx.bcx.ins().umin(sat, max_u8);
|
||||
let res = fx.bcx.ins().ireduce(types::I8, sat);
|
||||
|
||||
let res_lane = CValue::by_val(res, ret_lane_layout);
|
||||
ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
|
||||
}
|
||||
|
||||
for idx in 0..lane_count / 2 {
|
||||
let lane = b.value_lane(fx, idx).load_scalar(fx);
|
||||
let sat = fx.bcx.ins().smax(lane, zero);
|
||||
let sat = fx.bcx.ins().umin(sat, max_u8);
|
||||
let res = fx.bcx.ins().ireduce(types::I8, sat);
|
||||
|
||||
let res_lane = CValue::by_val(res, ret_lane_layout);
|
||||
ret.place_lane(fx, lane_count / 2 + idx).write_cvalue(fx, res_lane);
|
||||
}
|
||||
|
||||
for idx in 0..lane_count / 2 {
|
||||
let lane = a.value_lane(fx, idx).load_scalar(fx);
|
||||
let sat = fx.bcx.ins().smax(lane, zero);
|
||||
let sat = fx.bcx.ins().umin(sat, max_u8);
|
||||
let res = fx.bcx.ins().ireduce(types::I8, sat);
|
||||
|
||||
let res_lane = CValue::by_val(res, ret_lane_layout);
|
||||
ret.place_lane(fx, lane_count / 2 * 2 + idx).write_cvalue(fx, res_lane);
|
||||
}
|
||||
|
||||
for idx in 0..lane_count / 2 {
|
||||
let lane = b.value_lane(fx, idx).load_scalar(fx);
|
||||
let sat = fx.bcx.ins().smax(lane, zero);
|
||||
let sat = fx.bcx.ins().umin(sat, max_u8);
|
||||
let res = fx.bcx.ins().ireduce(types::I8, sat);
|
||||
|
||||
let res_lane = CValue::by_val(res, ret_lane_layout);
|
||||
ret.place_lane(fx, lane_count / 2 * 3 + idx).write_cvalue(fx, res_lane);
|
||||
}
|
||||
pack_instruction(fx, a, b, ret, PackSize::U8, PackWidth::Avx);
|
||||
}
|
||||
|
||||
"llvm.x86.sse2.packssdw.128" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi32&ig_expand=4889
|
||||
"llvm.x86.avx2.packsswb" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi16&ig_expand=4851
|
||||
intrinsic_args!(fx, args => (a, b); intrinsic);
|
||||
|
||||
assert_eq!(a.layout(), b.layout());
|
||||
let layout = a.layout();
|
||||
|
||||
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
|
||||
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
|
||||
assert_eq!(lane_ty, fx.tcx.types.i32);
|
||||
assert_eq!(ret_lane_ty, fx.tcx.types.i16);
|
||||
assert_eq!(lane_count * 2, ret_lane_count);
|
||||
|
||||
let min_i16 = fx.bcx.ins().iconst(types::I32, i32::from(i16::MIN) as u32 as i64);
|
||||
let max_i16 = fx.bcx.ins().iconst(types::I32, i32::from(i16::MAX) as u32 as i64);
|
||||
let ret_lane_layout = fx.layout_of(fx.tcx.types.i16);
|
||||
|
||||
for idx in 0..lane_count {
|
||||
let lane = a.value_lane(fx, idx).load_scalar(fx);
|
||||
let sat = fx.bcx.ins().smax(lane, min_i16);
|
||||
let sat = fx.bcx.ins().smin(sat, max_i16);
|
||||
let res = fx.bcx.ins().ireduce(types::I16, sat);
|
||||
|
||||
let res_lane = CValue::by_val(res, ret_lane_layout);
|
||||
ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
|
||||
}
|
||||
|
||||
for idx in 0..lane_count {
|
||||
let lane = b.value_lane(fx, idx).load_scalar(fx);
|
||||
let sat = fx.bcx.ins().smax(lane, min_i16);
|
||||
let sat = fx.bcx.ins().smin(sat, max_i16);
|
||||
let res = fx.bcx.ins().ireduce(types::I16, sat);
|
||||
|
||||
let res_lane = CValue::by_val(res, ret_lane_layout);
|
||||
ret.place_lane(fx, lane_count + idx).write_cvalue(fx, res_lane);
|
||||
}
|
||||
pack_instruction(fx, a, b, ret, PackSize::S8, PackWidth::Avx);
|
||||
}
|
||||
|
||||
"llvm.x86.sse41.packusdw" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi32&ig_expand=4912
|
||||
intrinsic_args!(fx, args => (a, b); intrinsic);
|
||||
|
||||
assert_eq!(a.layout(), b.layout());
|
||||
let layout = a.layout();
|
||||
pack_instruction(fx, a, b, ret, PackSize::U16, PackWidth::Sse);
|
||||
}
|
||||
|
||||
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
|
||||
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
|
||||
assert_eq!(lane_ty, fx.tcx.types.i32);
|
||||
assert_eq!(ret_lane_ty, fx.tcx.types.u16);
|
||||
assert_eq!(lane_count * 2, ret_lane_count);
|
||||
"llvm.x86.sse2.packssdw.128" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi32&ig_expand=4889
|
||||
intrinsic_args!(fx, args => (a, b); intrinsic);
|
||||
|
||||
let min_u16 = fx.bcx.ins().iconst(types::I32, i64::from(u16::MIN));
|
||||
let max_u16 = fx.bcx.ins().iconst(types::I32, i64::from(u16::MAX));
|
||||
let ret_lane_layout = fx.layout_of(fx.tcx.types.u16);
|
||||
pack_instruction(fx, a, b, ret, PackSize::S16, PackWidth::Sse);
|
||||
}
|
||||
|
||||
for idx in 0..lane_count {
|
||||
let lane = a.value_lane(fx, idx).load_scalar(fx);
|
||||
let sat = fx.bcx.ins().smax(lane, min_u16);
|
||||
let sat = fx.bcx.ins().smin(sat, max_u16);
|
||||
let res = fx.bcx.ins().ireduce(types::I16, sat);
|
||||
"llvm.x86.avx2.packusdw" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi32&ig_expand=4883
|
||||
intrinsic_args!(fx, args => (a, b); intrinsic);
|
||||
|
||||
let res_lane = CValue::by_val(res, ret_lane_layout);
|
||||
ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
|
||||
}
|
||||
|
||||
for idx in 0..lane_count {
|
||||
let lane = b.value_lane(fx, idx).load_scalar(fx);
|
||||
let sat = fx.bcx.ins().smax(lane, min_u16);
|
||||
let sat = fx.bcx.ins().smin(sat, max_u16);
|
||||
let res = fx.bcx.ins().ireduce(types::I16, sat);
|
||||
|
||||
let res_lane = CValue::by_val(res, ret_lane_layout);
|
||||
ret.place_lane(fx, lane_count + idx).write_cvalue(fx, res_lane);
|
||||
}
|
||||
pack_instruction(fx, a, b, ret, PackSize::U16, PackWidth::Avx);
|
||||
}
|
||||
|
||||
"llvm.x86.avx2.packssdw" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi32&ig_expand=4892
|
||||
intrinsic_args!(fx, args => (a, b); intrinsic);
|
||||
|
||||
assert_eq!(a.layout(), b.layout());
|
||||
let layout = a.layout();
|
||||
|
||||
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
|
||||
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
|
||||
assert_eq!(lane_ty, fx.tcx.types.i32);
|
||||
assert_eq!(ret_lane_ty, fx.tcx.types.i16);
|
||||
assert_eq!(lane_count * 2, ret_lane_count);
|
||||
|
||||
let min_i16 = fx.bcx.ins().iconst(types::I32, i32::from(i16::MIN) as u32 as i64);
|
||||
let max_i16 = fx.bcx.ins().iconst(types::I32, i32::from(i16::MAX) as u32 as i64);
|
||||
let ret_lane_layout = fx.layout_of(fx.tcx.types.i16);
|
||||
|
||||
for idx in 0..lane_count / 2 {
|
||||
let lane = a.value_lane(fx, idx).load_scalar(fx);
|
||||
let sat = fx.bcx.ins().smax(lane, min_i16);
|
||||
let sat = fx.bcx.ins().smin(sat, max_i16);
|
||||
let res = fx.bcx.ins().ireduce(types::I16, sat);
|
||||
|
||||
let res_lane = CValue::by_val(res, ret_lane_layout);
|
||||
ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
|
||||
}
|
||||
|
||||
for idx in 0..lane_count / 2 {
|
||||
let lane = b.value_lane(fx, idx).load_scalar(fx);
|
||||
let sat = fx.bcx.ins().smax(lane, min_i16);
|
||||
let sat = fx.bcx.ins().smin(sat, max_i16);
|
||||
let res = fx.bcx.ins().ireduce(types::I16, sat);
|
||||
|
||||
let res_lane = CValue::by_val(res, ret_lane_layout);
|
||||
ret.place_lane(fx, lane_count / 2 + idx).write_cvalue(fx, res_lane);
|
||||
}
|
||||
|
||||
for idx in 0..lane_count / 2 {
|
||||
let lane = a.value_lane(fx, idx).load_scalar(fx);
|
||||
let sat = fx.bcx.ins().smax(lane, min_i16);
|
||||
let sat = fx.bcx.ins().smin(sat, max_i16);
|
||||
let res = fx.bcx.ins().ireduce(types::I16, sat);
|
||||
|
||||
let res_lane = CValue::by_val(res, ret_lane_layout);
|
||||
ret.place_lane(fx, lane_count / 2 * 2 + idx).write_cvalue(fx, res_lane);
|
||||
}
|
||||
|
||||
for idx in 0..lane_count / 2 {
|
||||
let lane = b.value_lane(fx, idx).load_scalar(fx);
|
||||
let sat = fx.bcx.ins().smax(lane, min_i16);
|
||||
let sat = fx.bcx.ins().smin(sat, max_i16);
|
||||
let res = fx.bcx.ins().ireduce(types::I16, sat);
|
||||
|
||||
let res_lane = CValue::by_val(res, ret_lane_layout);
|
||||
ret.place_lane(fx, lane_count / 2 * 3 + idx).write_cvalue(fx, res_lane);
|
||||
}
|
||||
pack_instruction(fx, a, b, ret, PackSize::S16, PackWidth::Avx);
|
||||
}
|
||||
|
||||
"llvm.x86.fma.vfmaddsub.ps"
|
||||
|
@ -1407,3 +1233,115 @@ fn llvm_add_sub<'tcx>(
|
|||
|
||||
(cb_out, c)
|
||||
}
|
||||
|
||||
enum PackSize {
|
||||
U8,
|
||||
U16,
|
||||
S8,
|
||||
S16,
|
||||
}
|
||||
|
||||
impl PackSize {
|
||||
fn ret_clif_type(&self) -> Type {
|
||||
match self {
|
||||
Self::U8 | Self::S8 => types::I8,
|
||||
Self::U16 | Self::S16 => types::I16,
|
||||
}
|
||||
}
|
||||
fn src_clif_type(&self) -> Type {
|
||||
match self {
|
||||
Self::U8 | Self::S8 => types::I16,
|
||||
Self::U16 | Self::S16 => types::I32,
|
||||
}
|
||||
}
|
||||
fn src_ty<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Ty<'tcx> {
|
||||
match self {
|
||||
Self::U8 | Self::S8 => tcx.types.i16,
|
||||
Self::U16 | Self::S16 => tcx.types.i32,
|
||||
}
|
||||
}
|
||||
fn ret_ty<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Ty<'tcx> {
|
||||
match self {
|
||||
Self::U8 => tcx.types.u8,
|
||||
Self::S8 => tcx.types.i8,
|
||||
Self::U16 => tcx.types.u16,
|
||||
Self::S16 => tcx.types.i16,
|
||||
}
|
||||
}
|
||||
fn max(&self) -> i64 {
|
||||
match self {
|
||||
Self::U8 => u8::MAX as u64 as i64,
|
||||
Self::S8 => i8::MAX as u8 as u64 as i64,
|
||||
Self::U16 => u16::MAX as u64 as i64,
|
||||
Self::S16 => i16::MAX as u64 as u64 as i64,
|
||||
}
|
||||
}
|
||||
fn min(&self) -> i64 {
|
||||
match self {
|
||||
Self::U8 | Self::U16 => 0,
|
||||
Self::S8 => i16::from(i8::MIN) as u16 as i64,
|
||||
Self::S16 => i32::from(i16::MIN) as u32 as i64,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum PackWidth {
|
||||
Sse = 1,
|
||||
Avx = 2,
|
||||
}
|
||||
impl PackWidth {
|
||||
fn divisor(&self) -> u64 {
|
||||
match self {
|
||||
Self::Sse => 1,
|
||||
Self::Avx => 2,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Implement an x86 pack instruction with the intrinsic `_mm{,256}pack{us,s}_epi{16,32}`.
|
||||
/// Validated for correctness against LLVM, see commit `c8f5d35508e062bd2d95e6c03429bfec831db6d3`.
|
||||
fn pack_instruction<'tcx>(
|
||||
fx: &mut FunctionCx<'_, '_, 'tcx>,
|
||||
a: CValue<'tcx>,
|
||||
b: CValue<'tcx>,
|
||||
ret: CPlace<'tcx>,
|
||||
ret_size: PackSize,
|
||||
width: PackWidth,
|
||||
) {
|
||||
assert_eq!(a.layout(), b.layout());
|
||||
let layout = a.layout();
|
||||
|
||||
let (src_lane_count, src_lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
|
||||
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
|
||||
assert_eq!(src_lane_ty, ret_size.src_ty(fx.tcx));
|
||||
assert_eq!(ret_lane_ty, ret_size.ret_ty(fx.tcx));
|
||||
assert_eq!(src_lane_count * 2, ret_lane_count);
|
||||
|
||||
let min = fx.bcx.ins().iconst(ret_size.src_clif_type(), ret_size.min());
|
||||
let max = fx.bcx.ins().iconst(ret_size.src_clif_type(), ret_size.max());
|
||||
let ret_lane_layout = fx.layout_of(ret_size.ret_ty(fx.tcx));
|
||||
|
||||
let mut round = |source: CValue<'tcx>, source_offset: u64, dest_offset: u64| {
|
||||
let step_amount = src_lane_count / width.divisor();
|
||||
let dest_offset = step_amount * dest_offset;
|
||||
for idx in 0..step_amount {
|
||||
let lane = source.value_lane(fx, step_amount * source_offset + idx).load_scalar(fx);
|
||||
let sat = fx.bcx.ins().smax(lane, min);
|
||||
let sat = match ret_size {
|
||||
PackSize::U8 | PackSize::U16 => fx.bcx.ins().umin(sat, max),
|
||||
PackSize::S8 | PackSize::S16 => fx.bcx.ins().smin(sat, max),
|
||||
};
|
||||
let res = fx.bcx.ins().ireduce(ret_size.ret_clif_type(), sat);
|
||||
let res_lane = CValue::by_val(res, ret_lane_layout);
|
||||
ret.place_lane(fx, dest_offset + idx).write_cvalue(fx, res_lane);
|
||||
}
|
||||
};
|
||||
|
||||
round(a, 0, 0);
|
||||
round(b, 0, 1);
|
||||
|
||||
if let PackWidth::Avx = width {
|
||||
round(a, 1, 2);
|
||||
round(b, 1, 3);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -293,7 +293,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
|
|||
}
|
||||
|
||||
ret.write_cvalue(fx, base);
|
||||
let ret_lane = ret.place_lane(fx, idx.try_into().unwrap());
|
||||
let ret_lane = ret.place_lane(fx, idx.into());
|
||||
ret_lane.write_cvalue(fx, val);
|
||||
}
|
||||
|
||||
|
@ -340,7 +340,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
|
|||
);
|
||||
}
|
||||
|
||||
let ret_lane = v.value_lane(fx, idx.try_into().unwrap());
|
||||
let ret_lane = v.value_lane(fx, idx.into());
|
||||
ret.write_cvalue(fx, ret_lane);
|
||||
}
|
||||
|
||||
|
@ -822,7 +822,35 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
|
|||
let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
|
||||
let lane_layout = fx.layout_of(lane_ty);
|
||||
|
||||
let m = m.load_scalar(fx);
|
||||
let expected_int_bits = lane_count.max(8);
|
||||
let expected_bytes = expected_int_bits / 8 + ((expected_int_bits % 8 > 0) as u64);
|
||||
|
||||
let m = match m.layout().ty.kind() {
|
||||
ty::Uint(i) if i.bit_width() == Some(expected_int_bits) => m.load_scalar(fx),
|
||||
ty::Array(elem, len)
|
||||
if matches!(elem.kind(), ty::Uint(ty::UintTy::U8))
|
||||
&& len.try_eval_target_usize(fx.tcx, ty::ParamEnv::reveal_all())
|
||||
== Some(expected_bytes) =>
|
||||
{
|
||||
m.force_stack(fx).0.load(
|
||||
fx,
|
||||
Type::int(expected_int_bits as u16).unwrap(),
|
||||
MemFlags::trusted(),
|
||||
)
|
||||
}
|
||||
_ => {
|
||||
fx.tcx.dcx().span_fatal(
|
||||
span,
|
||||
format!(
|
||||
"invalid monomorphization of `simd_select_bitmask` intrinsic: \
|
||||
cannot accept `{}` as mask, expected `u{}` or `[u8; {}]`",
|
||||
ret.layout().ty,
|
||||
expected_int_bits,
|
||||
expected_bytes
|
||||
),
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
for lane in 0..lane_count {
|
||||
let m_lane = fx.bcx.ins().ushr_imm(m, u64::from(lane) as i64);
|
||||
|
|
|
@ -18,7 +18,6 @@ extern crate rustc_fs_util;
|
|||
extern crate rustc_hir;
|
||||
extern crate rustc_incremental;
|
||||
extern crate rustc_index;
|
||||
extern crate rustc_interface;
|
||||
extern crate rustc_metadata;
|
||||
extern crate rustc_session;
|
||||
extern crate rustc_span;
|
||||
|
@ -42,7 +41,7 @@ use rustc_metadata::EncodedMetadata;
|
|||
use rustc_middle::dep_graph::{WorkProduct, WorkProductId};
|
||||
use rustc_session::config::OutputFilenames;
|
||||
use rustc_session::Session;
|
||||
use rustc_span::Symbol;
|
||||
use rustc_span::{sym, Symbol};
|
||||
|
||||
pub use crate::config::*;
|
||||
use crate::prelude::*;
|
||||
|
@ -190,8 +189,17 @@ impl CodegenBackend for CraneliftCodegenBackend {
|
|||
}
|
||||
}
|
||||
|
||||
fn target_features(&self, _sess: &Session, _allow_unstable: bool) -> Vec<rustc_span::Symbol> {
|
||||
vec![] // FIXME necessary for #[cfg(target_feature]
|
||||
fn target_features(&self, sess: &Session, _allow_unstable: bool) -> Vec<rustc_span::Symbol> {
|
||||
// FIXME return the actually used target features. this is necessary for #[cfg(target_feature)]
|
||||
if sess.target.arch == "x86_64" && sess.target.os != "none" {
|
||||
// x86_64 mandates SSE2 support
|
||||
vec![Symbol::intern("fxsr"), sym::sse, Symbol::intern("sse2")]
|
||||
} else if sess.target.arch == "aarch64" && sess.target.os != "none" {
|
||||
// AArch64 mandates Neon support
|
||||
vec![sym::neon]
|
||||
} else {
|
||||
vec![]
|
||||
}
|
||||
}
|
||||
|
||||
fn print_version(&self) {
|
||||
|
@ -305,16 +313,13 @@ fn build_isa(sess: &Session, backend_config: &BackendConfig) -> Arc<dyn isa::Tar
|
|||
let flags = settings::Flags::new(flags_builder);
|
||||
|
||||
let isa_builder = match sess.opts.cg.target_cpu.as_deref() {
|
||||
Some("native") => {
|
||||
let builder = cranelift_native::builder_with_options(true).unwrap();
|
||||
builder
|
||||
}
|
||||
Some("native") => cranelift_native::builder_with_options(true).unwrap(),
|
||||
Some(value) => {
|
||||
let mut builder =
|
||||
cranelift_codegen::isa::lookup(target_triple.clone()).unwrap_or_else(|err| {
|
||||
sess.dcx().fatal(format!("can't compile for {}: {}", target_triple, err));
|
||||
});
|
||||
if let Err(_) = builder.enable(value) {
|
||||
if builder.enable(value).is_err() {
|
||||
sess.dcx()
|
||||
.fatal("the specified target cpu isn't currently supported by Cranelift.");
|
||||
}
|
||||
|
|
|
@ -28,10 +28,9 @@ pub(crate) fn unsized_info<'tcx>(
|
|||
.bcx
|
||||
.ins()
|
||||
.iconst(fx.pointer_type, len.eval_target_usize(fx.tcx, ParamEnv::reveal_all()) as i64),
|
||||
(
|
||||
&ty::Dynamic(ref data_a, _, src_dyn_kind),
|
||||
&ty::Dynamic(ref data_b, _, target_dyn_kind),
|
||||
) if src_dyn_kind == target_dyn_kind => {
|
||||
(&ty::Dynamic(data_a, _, src_dyn_kind), &ty::Dynamic(data_b, _, target_dyn_kind))
|
||||
if src_dyn_kind == target_dyn_kind =>
|
||||
{
|
||||
let old_info =
|
||||
old_info.expect("unsized_info: missing old info for trait upcasting coercion");
|
||||
if data_a.principal_def_id() == data_b.principal_def_id() {
|
||||
|
|
|
@ -95,7 +95,7 @@ pub(crate) fn get_vtable<'tcx>(
|
|||
let alloc_id = fx.tcx.vtable_allocation((ty, trait_ref));
|
||||
let data_id =
|
||||
data_id_for_alloc_id(&mut fx.constants_cx, &mut *fx.module, alloc_id, Mutability::Not);
|
||||
let local_data_id = fx.module.declare_data_in_func(data_id, &mut fx.bcx.func);
|
||||
let local_data_id = fx.module.declare_data_in_func(data_id, fx.bcx.func);
|
||||
if fx.clif_comments.enabled() {
|
||||
fx.add_comment(local_data_id, format!("vtable: {:?}", alloc_id));
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue