Merge commit '69b3f5a426' into sync_cg_clif-2024-08-09

This commit is contained in:
bjorn3 2024-08-09 17:18:46 +00:00
commit af7ea3135d
29 changed files with 399 additions and 851 deletions

View file

@ -1,8 +1,13 @@
use std::borrow::Borrow;
use std::fs;
use std::path::Path;
use ar_archive_writer::{COFFShortExport, MachineTypes};
use rustc_codegen_ssa::back::archive::{
ArArchiveBuilder, ArchiveBuilder, ArchiveBuilderBuilder, DEFAULT_OBJECT_READER,
create_mingw_dll_import_lib, ArArchiveBuilder, ArchiveBuilder, ArchiveBuilderBuilder,
DEFAULT_OBJECT_READER,
};
use rustc_codegen_ssa::common::is_mingw_gnu_toolchain;
use rustc_session::Session;
pub(crate) struct ArArchiveBuilderBuilder;
@ -15,10 +20,74 @@ impl ArchiveBuilderBuilder for ArArchiveBuilderBuilder {
fn create_dll_import_lib(
&self,
sess: &Session,
_lib_name: &str,
_import_name_and_ordinal_vector: Vec<(String, Option<u16>)>,
_output_path: &Path,
lib_name: &str,
import_name_and_ordinal_vector: Vec<(String, Option<u16>)>,
output_path: &Path,
) {
sess.dcx().fatal("raw-dylib is not yet supported by rustc_codegen_cranelift");
if is_mingw_gnu_toolchain(&sess.target) {
// The binutils linker used on -windows-gnu targets cannot read the import
// libraries generated by LLVM: in our attempts, the linker produced an .EXE
// that loaded but crashed with an AV upon calling one of the imported
// functions. Therefore, use binutils to create the import library instead,
// by writing a .DEF file to the temp dir and calling binutils's dlltool.
create_mingw_dll_import_lib(
sess,
lib_name,
import_name_and_ordinal_vector,
output_path,
);
} else {
let mut file =
match fs::OpenOptions::new().write(true).create_new(true).open(&output_path) {
Ok(file) => file,
Err(error) => {
sess.dcx().fatal(format!(
"failed to create import library file `{path}`: {error}",
path = output_path.display(),
));
}
};
let machine = match sess.target.arch.borrow() {
"x86" => MachineTypes::I386,
"x86_64" => MachineTypes::AMD64,
"arm" => MachineTypes::ARMNT,
"aarch64" => MachineTypes::ARM64,
_ => {
sess.dcx().fatal(format!(
"unsupported target architecture `{arch}`",
arch = sess.target.arch,
));
}
};
let exports = import_name_and_ordinal_vector
.iter()
.map(|(name, ordinal)| COFFShortExport {
name: name.to_string(),
ext_name: None,
symbol_name: None,
alias_target: None,
ordinal: ordinal.unwrap_or(0),
noname: ordinal.is_some(),
data: false,
private: false,
constant: false,
})
.collect::<Vec<_>>();
if let Err(error) = ar_archive_writer::write_import_library(
&mut file,
lib_name,
&exports,
machine,
!sess.target.is_like_msvc,
) {
sess.dcx().fatal(format!(
"failed to create import library `{path}`: `{error}`",
path = output_path.display(),
));
}
}
}
}

View file

@ -23,19 +23,7 @@ pub(crate) fn maybe_codegen<'tcx>(
match bin_op {
BinOp::BitAnd | BinOp::BitOr | BinOp::BitXor => None,
BinOp::Add | BinOp::AddUnchecked | BinOp::Sub | BinOp::SubUnchecked => None,
BinOp::Mul | BinOp::MulUnchecked => {
let args = [lhs.load_scalar(fx), rhs.load_scalar(fx)];
let ret_val = fx.lib_call(
"__multi3",
vec![AbiParam::new(types::I128), AbiParam::new(types::I128)],
vec![AbiParam::new(types::I128)],
&args,
)[0];
Some(CValue::by_val(
ret_val,
fx.layout_of(if is_signed { fx.tcx.types.i128 } else { fx.tcx.types.u128 }),
))
}
BinOp::Mul | BinOp::MulUnchecked => None,
BinOp::Offset => unreachable!("offset should only be used on pointers, not 128bit ints"),
BinOp::Div | BinOp::Rem => {
let name = match (bin_op, is_signed) {
@ -92,6 +80,7 @@ pub(crate) fn maybe_codegen_checked<'tcx>(
match bin_op {
BinOp::BitAnd | BinOp::BitOr | BinOp::BitXor => unreachable!(),
BinOp::Add | BinOp::Sub => None,
BinOp::Mul if is_signed => {
let out_ty = Ty::new_tup(fx.tcx, &[lhs.layout().ty, fx.tcx.types.bool]);
let oflow = CPlace::new_stack_slot(fx, fx.layout_of(fx.tcx.types.i32));
@ -112,7 +101,7 @@ pub(crate) fn maybe_codegen_checked<'tcx>(
let oflow = fx.bcx.ins().ireduce(types::I8, oflow);
Some(CValue::by_val_pair(res, oflow, fx.layout_of(out_ty)))
}
BinOp::Add | BinOp::Sub | BinOp::Mul => {
BinOp::Mul => {
let out_ty = Ty::new_tup(fx.tcx, &[lhs.layout().ty, fx.tcx.types.bool]);
let out_place = CPlace::new_stack_slot(fx, fx.layout_of(out_ty));
let param_types = vec![
@ -121,15 +110,7 @@ pub(crate) fn maybe_codegen_checked<'tcx>(
AbiParam::new(types::I128),
];
let args = [out_place.to_ptr().get_addr(fx), lhs.load_scalar(fx), rhs.load_scalar(fx)];
let name = match (bin_op, is_signed) {
(BinOp::Add, false) => "__rust_u128_addo",
(BinOp::Add, true) => "__rust_i128_addo",
(BinOp::Sub, false) => "__rust_u128_subo",
(BinOp::Sub, true) => "__rust_i128_subo",
(BinOp::Mul, false) => "__rust_u128_mulo",
_ => unreachable!(),
};
fx.lib_call(name, param_types, vec![], &args);
fx.lib_call("__rust_u128_mulo", param_types, vec![], &args);
Some(out_place.to_cvalue(fx))
}
BinOp::AddUnchecked | BinOp::SubUnchecked | BinOp::MulUnchecked => unreachable!(),

View file

@ -38,18 +38,12 @@ builtin_functions! {
register_functions_for_jit;
// integers
fn __multi3(a: i128, b: i128) -> i128;
fn __muloti4(n: i128, d: i128, oflow: &mut i32) -> i128;
fn __udivti3(n: u128, d: u128) -> u128;
fn __divti3(n: i128, d: i128) -> i128;
fn __umodti3(n: u128, d: u128) -> u128;
fn __modti3(n: i128, d: i128) -> i128;
fn __rust_u128_addo(a: u128, b: u128) -> (u128, bool);
fn __rust_i128_addo(a: i128, b: i128) -> (i128, bool);
fn __rust_u128_subo(a: u128, b: u128) -> (u128, bool);
fn __rust_i128_subo(a: i128, b: i128) -> (i128, bool);
fn __rust_u128_mulo(a: u128, b: u128) -> (u128, bool);
fn __rust_i128_mulo(a: i128, b: i128) -> (i128, bool);
// floats
fn __floattisf(i: i128) -> f32;

View file

@ -169,39 +169,6 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
}
}
"llvm.x86.sse.add.ss" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_ss&ig_expand=171
intrinsic_args!(fx, args => (a, b); intrinsic);
assert_eq!(a.layout(), b.layout());
assert_eq!(a.layout(), ret.layout());
let layout = a.layout();
let (_, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
assert!(lane_ty.is_floating_point());
let ret_lane_layout = fx.layout_of(lane_ty);
ret.write_cvalue(fx, a);
let a_lane = a.value_lane(fx, 0).load_scalar(fx);
let b_lane = b.value_lane(fx, 0).load_scalar(fx);
let res = fx.bcx.ins().fadd(a_lane, b_lane);
let res_lane = CValue::by_val(res, ret_lane_layout);
ret.place_lane(fx, 0).write_cvalue(fx, res_lane);
}
"llvm.x86.sse.sqrt.ps" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_ps&ig_expand=6245
intrinsic_args!(fx, args => (a); intrinsic);
// FIXME use vector instructions when possible
simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
fx.bcx.ins().sqrt(lane)
});
}
"llvm.x86.sse.max.ps" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_ps&ig_expand=4357
intrinsic_args!(fx, args => (a, b); intrinsic);
@ -744,117 +711,6 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
pack_instruction(fx, a, b, ret, PackSize::S16, PackWidth::Avx);
}
"llvm.x86.fma.vfmaddsub.ps"
| "llvm.x86.fma.vfmaddsub.pd"
| "llvm.x86.fma.vfmaddsub.ps.256"
| "llvm.x86.fma.vfmaddsub.pd.256" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_ps&ig_expand=3205
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_pd&ig_expand=3181
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_ps&ig_expand=3209
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_pd&ig_expand=3185
intrinsic_args!(fx, args => (a, b, c); intrinsic);
assert_eq!(a.layout(), b.layout());
assert_eq!(a.layout(), c.layout());
let layout = a.layout();
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
assert!(lane_ty.is_floating_point());
assert!(ret_lane_ty.is_floating_point());
assert_eq!(lane_count, ret_lane_count);
let ret_lane_layout = fx.layout_of(ret_lane_ty);
for idx in 0..lane_count {
let a_lane = a.value_lane(fx, idx).load_scalar(fx);
let b_lane = b.value_lane(fx, idx).load_scalar(fx);
let c_lane = c.value_lane(fx, idx).load_scalar(fx);
let mul = fx.bcx.ins().fmul(a_lane, b_lane);
let res = if idx & 1 == 0 {
fx.bcx.ins().fsub(mul, c_lane)
} else {
fx.bcx.ins().fadd(mul, c_lane)
};
let res_lane = CValue::by_val(res, ret_lane_layout);
ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
}
}
"llvm.x86.fma.vfmsubadd.ps"
| "llvm.x86.fma.vfmsubadd.pd"
| "llvm.x86.fma.vfmsubadd.ps.256"
| "llvm.x86.fma.vfmsubadd.pd.256" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_ps&ig_expand=3325
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_pd&ig_expand=3301
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_ps&ig_expand=3329
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_pd&ig_expand=3305
intrinsic_args!(fx, args => (a, b, c); intrinsic);
assert_eq!(a.layout(), b.layout());
assert_eq!(a.layout(), c.layout());
let layout = a.layout();
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
assert!(lane_ty.is_floating_point());
assert!(ret_lane_ty.is_floating_point());
assert_eq!(lane_count, ret_lane_count);
let ret_lane_layout = fx.layout_of(ret_lane_ty);
for idx in 0..lane_count {
let a_lane = a.value_lane(fx, idx).load_scalar(fx);
let b_lane = b.value_lane(fx, idx).load_scalar(fx);
let c_lane = c.value_lane(fx, idx).load_scalar(fx);
let mul = fx.bcx.ins().fmul(a_lane, b_lane);
let res = if idx & 1 == 0 {
fx.bcx.ins().fadd(mul, c_lane)
} else {
fx.bcx.ins().fsub(mul, c_lane)
};
let res_lane = CValue::by_val(res, ret_lane_layout);
ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
}
}
"llvm.x86.fma.vfnmadd.ps"
| "llvm.x86.fma.vfnmadd.pd"
| "llvm.x86.fma.vfnmadd.ps.256"
| "llvm.x86.fma.vfnmadd.pd.256" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_ps&ig_expand=3391
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_pd&ig_expand=3367
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_ps&ig_expand=3395
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_pd&ig_expand=3371
intrinsic_args!(fx, args => (a, b, c); intrinsic);
assert_eq!(a.layout(), b.layout());
assert_eq!(a.layout(), c.layout());
let layout = a.layout();
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
assert!(lane_ty.is_floating_point());
assert!(ret_lane_ty.is_floating_point());
assert_eq!(lane_count, ret_lane_count);
let ret_lane_layout = fx.layout_of(ret_lane_ty);
for idx in 0..lane_count {
let a_lane = a.value_lane(fx, idx).load_scalar(fx);
let b_lane = b.value_lane(fx, idx).load_scalar(fx);
let c_lane = c.value_lane(fx, idx).load_scalar(fx);
let mul = fx.bcx.ins().fmul(a_lane, b_lane);
let neg_mul = fx.bcx.ins().fneg(mul);
let res = fx.bcx.ins().fadd(neg_mul, c_lane);
let res_lane = CValue::by_val(res, ret_lane_layout);
ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
}
}
"llvm.x86.sse42.crc32.32.8"
| "llvm.x86.sse42.crc32.32.16"
| "llvm.x86.sse42.crc32.32.32"

View file

@ -190,9 +190,20 @@ impl CodegenBackend for CraneliftCodegenBackend {
if sess.target.arch == "x86_64" && sess.target.os != "none" {
// x86_64 mandates SSE2 support
vec![Symbol::intern("fxsr"), sym::sse, Symbol::intern("sse2")]
} else if sess.target.arch == "aarch64" && sess.target.os != "none" {
// AArch64 mandates Neon support
vec![sym::neon]
} else if sess.target.arch == "aarch64" {
match &*sess.target.os {
"none" => vec![],
// On macOS the aes, sha2 and sha3 features are enabled by default and ring
// fails to compile on macOS when they are not present.
"macos" => vec![
sym::neon,
Symbol::intern("aes"),
Symbol::intern("sha2"),
Symbol::intern("sha3"),
],
// AArch64 mandates Neon support
_ => vec![sym::neon],
}
} else {
vec![]
}

View file

@ -677,8 +677,10 @@ impl<'tcx> CPlace<'tcx> {
let to_addr = to_ptr.get_addr(fx);
let src_layout = from.1;
let size = dst_layout.size.bytes();
let src_align = src_layout.align.abi.bytes() as u8;
let dst_align = dst_layout.align.abi.bytes() as u8;
// `emit_small_memory_copy` uses `u8` for alignments, just use the maximum
// alignment that fits in a `u8` if the actual alignment is larger.
let src_align = src_layout.align.abi.bytes().try_into().unwrap_or(128);
let dst_align = dst_layout.align.abi.bytes().try_into().unwrap_or(128);
fx.bcx.emit_small_memory_copy(
fx.target_config,
to_addr,