Merge commit '69b3f5a426
' into sync_cg_clif-2024-08-09
This commit is contained in:
commit
af7ea3135d
29 changed files with 399 additions and 851 deletions
|
@ -1,8 +1,13 @@
|
|||
use std::borrow::Borrow;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
use ar_archive_writer::{COFFShortExport, MachineTypes};
|
||||
use rustc_codegen_ssa::back::archive::{
|
||||
ArArchiveBuilder, ArchiveBuilder, ArchiveBuilderBuilder, DEFAULT_OBJECT_READER,
|
||||
create_mingw_dll_import_lib, ArArchiveBuilder, ArchiveBuilder, ArchiveBuilderBuilder,
|
||||
DEFAULT_OBJECT_READER,
|
||||
};
|
||||
use rustc_codegen_ssa::common::is_mingw_gnu_toolchain;
|
||||
use rustc_session::Session;
|
||||
|
||||
pub(crate) struct ArArchiveBuilderBuilder;
|
||||
|
@ -15,10 +20,74 @@ impl ArchiveBuilderBuilder for ArArchiveBuilderBuilder {
|
|||
fn create_dll_import_lib(
|
||||
&self,
|
||||
sess: &Session,
|
||||
_lib_name: &str,
|
||||
_import_name_and_ordinal_vector: Vec<(String, Option<u16>)>,
|
||||
_output_path: &Path,
|
||||
lib_name: &str,
|
||||
import_name_and_ordinal_vector: Vec<(String, Option<u16>)>,
|
||||
output_path: &Path,
|
||||
) {
|
||||
sess.dcx().fatal("raw-dylib is not yet supported by rustc_codegen_cranelift");
|
||||
if is_mingw_gnu_toolchain(&sess.target) {
|
||||
// The binutils linker used on -windows-gnu targets cannot read the import
|
||||
// libraries generated by LLVM: in our attempts, the linker produced an .EXE
|
||||
// that loaded but crashed with an AV upon calling one of the imported
|
||||
// functions. Therefore, use binutils to create the import library instead,
|
||||
// by writing a .DEF file to the temp dir and calling binutils's dlltool.
|
||||
create_mingw_dll_import_lib(
|
||||
sess,
|
||||
lib_name,
|
||||
import_name_and_ordinal_vector,
|
||||
output_path,
|
||||
);
|
||||
} else {
|
||||
let mut file =
|
||||
match fs::OpenOptions::new().write(true).create_new(true).open(&output_path) {
|
||||
Ok(file) => file,
|
||||
Err(error) => {
|
||||
sess.dcx().fatal(format!(
|
||||
"failed to create import library file `{path}`: {error}",
|
||||
path = output_path.display(),
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
let machine = match sess.target.arch.borrow() {
|
||||
"x86" => MachineTypes::I386,
|
||||
"x86_64" => MachineTypes::AMD64,
|
||||
"arm" => MachineTypes::ARMNT,
|
||||
"aarch64" => MachineTypes::ARM64,
|
||||
_ => {
|
||||
sess.dcx().fatal(format!(
|
||||
"unsupported target architecture `{arch}`",
|
||||
arch = sess.target.arch,
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
let exports = import_name_and_ordinal_vector
|
||||
.iter()
|
||||
.map(|(name, ordinal)| COFFShortExport {
|
||||
name: name.to_string(),
|
||||
ext_name: None,
|
||||
symbol_name: None,
|
||||
alias_target: None,
|
||||
ordinal: ordinal.unwrap_or(0),
|
||||
noname: ordinal.is_some(),
|
||||
data: false,
|
||||
private: false,
|
||||
constant: false,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if let Err(error) = ar_archive_writer::write_import_library(
|
||||
&mut file,
|
||||
lib_name,
|
||||
&exports,
|
||||
machine,
|
||||
!sess.target.is_like_msvc,
|
||||
) {
|
||||
sess.dcx().fatal(format!(
|
||||
"failed to create import library `{path}`: `{error}`",
|
||||
path = output_path.display(),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,19 +23,7 @@ pub(crate) fn maybe_codegen<'tcx>(
|
|||
match bin_op {
|
||||
BinOp::BitAnd | BinOp::BitOr | BinOp::BitXor => None,
|
||||
BinOp::Add | BinOp::AddUnchecked | BinOp::Sub | BinOp::SubUnchecked => None,
|
||||
BinOp::Mul | BinOp::MulUnchecked => {
|
||||
let args = [lhs.load_scalar(fx), rhs.load_scalar(fx)];
|
||||
let ret_val = fx.lib_call(
|
||||
"__multi3",
|
||||
vec![AbiParam::new(types::I128), AbiParam::new(types::I128)],
|
||||
vec![AbiParam::new(types::I128)],
|
||||
&args,
|
||||
)[0];
|
||||
Some(CValue::by_val(
|
||||
ret_val,
|
||||
fx.layout_of(if is_signed { fx.tcx.types.i128 } else { fx.tcx.types.u128 }),
|
||||
))
|
||||
}
|
||||
BinOp::Mul | BinOp::MulUnchecked => None,
|
||||
BinOp::Offset => unreachable!("offset should only be used on pointers, not 128bit ints"),
|
||||
BinOp::Div | BinOp::Rem => {
|
||||
let name = match (bin_op, is_signed) {
|
||||
|
@ -92,6 +80,7 @@ pub(crate) fn maybe_codegen_checked<'tcx>(
|
|||
|
||||
match bin_op {
|
||||
BinOp::BitAnd | BinOp::BitOr | BinOp::BitXor => unreachable!(),
|
||||
BinOp::Add | BinOp::Sub => None,
|
||||
BinOp::Mul if is_signed => {
|
||||
let out_ty = Ty::new_tup(fx.tcx, &[lhs.layout().ty, fx.tcx.types.bool]);
|
||||
let oflow = CPlace::new_stack_slot(fx, fx.layout_of(fx.tcx.types.i32));
|
||||
|
@ -112,7 +101,7 @@ pub(crate) fn maybe_codegen_checked<'tcx>(
|
|||
let oflow = fx.bcx.ins().ireduce(types::I8, oflow);
|
||||
Some(CValue::by_val_pair(res, oflow, fx.layout_of(out_ty)))
|
||||
}
|
||||
BinOp::Add | BinOp::Sub | BinOp::Mul => {
|
||||
BinOp::Mul => {
|
||||
let out_ty = Ty::new_tup(fx.tcx, &[lhs.layout().ty, fx.tcx.types.bool]);
|
||||
let out_place = CPlace::new_stack_slot(fx, fx.layout_of(out_ty));
|
||||
let param_types = vec![
|
||||
|
@ -121,15 +110,7 @@ pub(crate) fn maybe_codegen_checked<'tcx>(
|
|||
AbiParam::new(types::I128),
|
||||
];
|
||||
let args = [out_place.to_ptr().get_addr(fx), lhs.load_scalar(fx), rhs.load_scalar(fx)];
|
||||
let name = match (bin_op, is_signed) {
|
||||
(BinOp::Add, false) => "__rust_u128_addo",
|
||||
(BinOp::Add, true) => "__rust_i128_addo",
|
||||
(BinOp::Sub, false) => "__rust_u128_subo",
|
||||
(BinOp::Sub, true) => "__rust_i128_subo",
|
||||
(BinOp::Mul, false) => "__rust_u128_mulo",
|
||||
_ => unreachable!(),
|
||||
};
|
||||
fx.lib_call(name, param_types, vec![], &args);
|
||||
fx.lib_call("__rust_u128_mulo", param_types, vec![], &args);
|
||||
Some(out_place.to_cvalue(fx))
|
||||
}
|
||||
BinOp::AddUnchecked | BinOp::SubUnchecked | BinOp::MulUnchecked => unreachable!(),
|
||||
|
|
|
@ -38,18 +38,12 @@ builtin_functions! {
|
|||
register_functions_for_jit;
|
||||
|
||||
// integers
|
||||
fn __multi3(a: i128, b: i128) -> i128;
|
||||
fn __muloti4(n: i128, d: i128, oflow: &mut i32) -> i128;
|
||||
fn __udivti3(n: u128, d: u128) -> u128;
|
||||
fn __divti3(n: i128, d: i128) -> i128;
|
||||
fn __umodti3(n: u128, d: u128) -> u128;
|
||||
fn __modti3(n: i128, d: i128) -> i128;
|
||||
fn __rust_u128_addo(a: u128, b: u128) -> (u128, bool);
|
||||
fn __rust_i128_addo(a: i128, b: i128) -> (i128, bool);
|
||||
fn __rust_u128_subo(a: u128, b: u128) -> (u128, bool);
|
||||
fn __rust_i128_subo(a: i128, b: i128) -> (i128, bool);
|
||||
fn __rust_u128_mulo(a: u128, b: u128) -> (u128, bool);
|
||||
fn __rust_i128_mulo(a: i128, b: i128) -> (i128, bool);
|
||||
|
||||
// floats
|
||||
fn __floattisf(i: i128) -> f32;
|
||||
|
|
|
@ -169,39 +169,6 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
|
|||
}
|
||||
}
|
||||
|
||||
"llvm.x86.sse.add.ss" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_ss&ig_expand=171
|
||||
intrinsic_args!(fx, args => (a, b); intrinsic);
|
||||
|
||||
assert_eq!(a.layout(), b.layout());
|
||||
assert_eq!(a.layout(), ret.layout());
|
||||
let layout = a.layout();
|
||||
|
||||
let (_, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
|
||||
assert!(lane_ty.is_floating_point());
|
||||
let ret_lane_layout = fx.layout_of(lane_ty);
|
||||
|
||||
ret.write_cvalue(fx, a);
|
||||
|
||||
let a_lane = a.value_lane(fx, 0).load_scalar(fx);
|
||||
let b_lane = b.value_lane(fx, 0).load_scalar(fx);
|
||||
|
||||
let res = fx.bcx.ins().fadd(a_lane, b_lane);
|
||||
|
||||
let res_lane = CValue::by_val(res, ret_lane_layout);
|
||||
ret.place_lane(fx, 0).write_cvalue(fx, res_lane);
|
||||
}
|
||||
|
||||
"llvm.x86.sse.sqrt.ps" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_ps&ig_expand=6245
|
||||
intrinsic_args!(fx, args => (a); intrinsic);
|
||||
|
||||
// FIXME use vector instructions when possible
|
||||
simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
|
||||
fx.bcx.ins().sqrt(lane)
|
||||
});
|
||||
}
|
||||
|
||||
"llvm.x86.sse.max.ps" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_ps&ig_expand=4357
|
||||
intrinsic_args!(fx, args => (a, b); intrinsic);
|
||||
|
@ -744,117 +711,6 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
|
|||
pack_instruction(fx, a, b, ret, PackSize::S16, PackWidth::Avx);
|
||||
}
|
||||
|
||||
"llvm.x86.fma.vfmaddsub.ps"
|
||||
| "llvm.x86.fma.vfmaddsub.pd"
|
||||
| "llvm.x86.fma.vfmaddsub.ps.256"
|
||||
| "llvm.x86.fma.vfmaddsub.pd.256" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_ps&ig_expand=3205
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_pd&ig_expand=3181
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_ps&ig_expand=3209
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_pd&ig_expand=3185
|
||||
intrinsic_args!(fx, args => (a, b, c); intrinsic);
|
||||
|
||||
assert_eq!(a.layout(), b.layout());
|
||||
assert_eq!(a.layout(), c.layout());
|
||||
let layout = a.layout();
|
||||
|
||||
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
|
||||
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
|
||||
assert!(lane_ty.is_floating_point());
|
||||
assert!(ret_lane_ty.is_floating_point());
|
||||
assert_eq!(lane_count, ret_lane_count);
|
||||
let ret_lane_layout = fx.layout_of(ret_lane_ty);
|
||||
|
||||
for idx in 0..lane_count {
|
||||
let a_lane = a.value_lane(fx, idx).load_scalar(fx);
|
||||
let b_lane = b.value_lane(fx, idx).load_scalar(fx);
|
||||
let c_lane = c.value_lane(fx, idx).load_scalar(fx);
|
||||
|
||||
let mul = fx.bcx.ins().fmul(a_lane, b_lane);
|
||||
let res = if idx & 1 == 0 {
|
||||
fx.bcx.ins().fsub(mul, c_lane)
|
||||
} else {
|
||||
fx.bcx.ins().fadd(mul, c_lane)
|
||||
};
|
||||
|
||||
let res_lane = CValue::by_val(res, ret_lane_layout);
|
||||
ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
|
||||
}
|
||||
}
|
||||
|
||||
"llvm.x86.fma.vfmsubadd.ps"
|
||||
| "llvm.x86.fma.vfmsubadd.pd"
|
||||
| "llvm.x86.fma.vfmsubadd.ps.256"
|
||||
| "llvm.x86.fma.vfmsubadd.pd.256" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_ps&ig_expand=3325
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_pd&ig_expand=3301
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_ps&ig_expand=3329
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_pd&ig_expand=3305
|
||||
intrinsic_args!(fx, args => (a, b, c); intrinsic);
|
||||
|
||||
assert_eq!(a.layout(), b.layout());
|
||||
assert_eq!(a.layout(), c.layout());
|
||||
let layout = a.layout();
|
||||
|
||||
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
|
||||
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
|
||||
assert!(lane_ty.is_floating_point());
|
||||
assert!(ret_lane_ty.is_floating_point());
|
||||
assert_eq!(lane_count, ret_lane_count);
|
||||
let ret_lane_layout = fx.layout_of(ret_lane_ty);
|
||||
|
||||
for idx in 0..lane_count {
|
||||
let a_lane = a.value_lane(fx, idx).load_scalar(fx);
|
||||
let b_lane = b.value_lane(fx, idx).load_scalar(fx);
|
||||
let c_lane = c.value_lane(fx, idx).load_scalar(fx);
|
||||
|
||||
let mul = fx.bcx.ins().fmul(a_lane, b_lane);
|
||||
let res = if idx & 1 == 0 {
|
||||
fx.bcx.ins().fadd(mul, c_lane)
|
||||
} else {
|
||||
fx.bcx.ins().fsub(mul, c_lane)
|
||||
};
|
||||
|
||||
let res_lane = CValue::by_val(res, ret_lane_layout);
|
||||
ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
|
||||
}
|
||||
}
|
||||
|
||||
"llvm.x86.fma.vfnmadd.ps"
|
||||
| "llvm.x86.fma.vfnmadd.pd"
|
||||
| "llvm.x86.fma.vfnmadd.ps.256"
|
||||
| "llvm.x86.fma.vfnmadd.pd.256" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_ps&ig_expand=3391
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_pd&ig_expand=3367
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_ps&ig_expand=3395
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_pd&ig_expand=3371
|
||||
intrinsic_args!(fx, args => (a, b, c); intrinsic);
|
||||
|
||||
assert_eq!(a.layout(), b.layout());
|
||||
assert_eq!(a.layout(), c.layout());
|
||||
let layout = a.layout();
|
||||
|
||||
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
|
||||
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
|
||||
assert!(lane_ty.is_floating_point());
|
||||
assert!(ret_lane_ty.is_floating_point());
|
||||
assert_eq!(lane_count, ret_lane_count);
|
||||
let ret_lane_layout = fx.layout_of(ret_lane_ty);
|
||||
|
||||
for idx in 0..lane_count {
|
||||
let a_lane = a.value_lane(fx, idx).load_scalar(fx);
|
||||
let b_lane = b.value_lane(fx, idx).load_scalar(fx);
|
||||
let c_lane = c.value_lane(fx, idx).load_scalar(fx);
|
||||
|
||||
let mul = fx.bcx.ins().fmul(a_lane, b_lane);
|
||||
let neg_mul = fx.bcx.ins().fneg(mul);
|
||||
let res = fx.bcx.ins().fadd(neg_mul, c_lane);
|
||||
|
||||
let res_lane = CValue::by_val(res, ret_lane_layout);
|
||||
ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
|
||||
}
|
||||
}
|
||||
|
||||
"llvm.x86.sse42.crc32.32.8"
|
||||
| "llvm.x86.sse42.crc32.32.16"
|
||||
| "llvm.x86.sse42.crc32.32.32"
|
||||
|
|
|
@ -190,9 +190,20 @@ impl CodegenBackend for CraneliftCodegenBackend {
|
|||
if sess.target.arch == "x86_64" && sess.target.os != "none" {
|
||||
// x86_64 mandates SSE2 support
|
||||
vec![Symbol::intern("fxsr"), sym::sse, Symbol::intern("sse2")]
|
||||
} else if sess.target.arch == "aarch64" && sess.target.os != "none" {
|
||||
// AArch64 mandates Neon support
|
||||
vec![sym::neon]
|
||||
} else if sess.target.arch == "aarch64" {
|
||||
match &*sess.target.os {
|
||||
"none" => vec![],
|
||||
// On macOS the aes, sha2 and sha3 features are enabled by default and ring
|
||||
// fails to compile on macOS when they are not present.
|
||||
"macos" => vec![
|
||||
sym::neon,
|
||||
Symbol::intern("aes"),
|
||||
Symbol::intern("sha2"),
|
||||
Symbol::intern("sha3"),
|
||||
],
|
||||
// AArch64 mandates Neon support
|
||||
_ => vec![sym::neon],
|
||||
}
|
||||
} else {
|
||||
vec![]
|
||||
}
|
||||
|
|
|
@ -677,8 +677,10 @@ impl<'tcx> CPlace<'tcx> {
|
|||
let to_addr = to_ptr.get_addr(fx);
|
||||
let src_layout = from.1;
|
||||
let size = dst_layout.size.bytes();
|
||||
let src_align = src_layout.align.abi.bytes() as u8;
|
||||
let dst_align = dst_layout.align.abi.bytes() as u8;
|
||||
// `emit_small_memory_copy` uses `u8` for alignments, just use the maximum
|
||||
// alignment that fits in a `u8` if the actual alignment is larger.
|
||||
let src_align = src_layout.align.abi.bytes().try_into().unwrap_or(128);
|
||||
let dst_align = dst_layout.align.abi.bytes().try_into().unwrap_or(128);
|
||||
fx.bcx.emit_small_memory_copy(
|
||||
fx.target_config,
|
||||
to_addr,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue