Merge commit '69b3f5a426' into sync_cg_clif-2024-08-09

2024-08-09 17:18:46 +00:00 · 2024-08-09 17:18:46 +00:00 · af7ea3135d
commit af7ea3135d
parent 899eb03926 69b3f5a426
29 changed files with 399 additions and 851 deletions
--- a/compiler/rustc_codegen_cranelift/src/archive.rs
+++ b/compiler/rustc_codegen_cranelift/src/archive.rs
@ -1,8 +1,13 @@
+use std::borrow::Borrow;
+use std::fs;
 use std::path::Path;

+use ar_archive_writer::{COFFShortExport, MachineTypes};
 use rustc_codegen_ssa::back::archive::{
-    ArArchiveBuilder, ArchiveBuilder, ArchiveBuilderBuilder, DEFAULT_OBJECT_READER,
+    create_mingw_dll_import_lib, ArArchiveBuilder, ArchiveBuilder, ArchiveBuilderBuilder,
+    DEFAULT_OBJECT_READER,
 };
+use rustc_codegen_ssa::common::is_mingw_gnu_toolchain;
 use rustc_session::Session;

 pub(crate) struct ArArchiveBuilderBuilder;
@ -15,10 +20,74 @@ impl ArchiveBuilderBuilder for ArArchiveBuilderBuilder {
    fn create_dll_import_lib(
        &self,
        sess: &Session,
-        _lib_name: &str,
-        _import_name_and_ordinal_vector: Vec<(String, Option<u16>)>,
-        _output_path: &Path,
+        lib_name: &str,
+        import_name_and_ordinal_vector: Vec<(String, Option<u16>)>,
+        output_path: &Path,
    ) {
-        sess.dcx().fatal("raw-dylib is not yet supported by rustc_codegen_cranelift");
+        if is_mingw_gnu_toolchain(&sess.target) {
+            // The binutils linker used on -windows-gnu targets cannot read the import
+            // libraries generated by LLVM: in our attempts, the linker produced an .EXE
+            // that loaded but crashed with an AV upon calling one of the imported
+            // functions. Therefore, use binutils to create the import library instead,
+            // by writing a .DEF file to the temp dir and calling binutils's dlltool.
+            create_mingw_dll_import_lib(
+                sess,
+                lib_name,
+                import_name_and_ordinal_vector,
+                output_path,
+            );
+        } else {
+            let mut file =
+                match fs::OpenOptions::new().write(true).create_new(true).open(&output_path) {
+                    Ok(file) => file,
+                    Err(error) => {
+                        sess.dcx().fatal(format!(
+                            "failed to create import library file `{path}`: {error}",
+                            path = output_path.display(),
+                        ));
+                    }
+                };
+
+            let machine = match sess.target.arch.borrow() {
+                "x86" => MachineTypes::I386,
+                "x86_64" => MachineTypes::AMD64,
+                "arm" => MachineTypes::ARMNT,
+                "aarch64" => MachineTypes::ARM64,
+                _ => {
+                    sess.dcx().fatal(format!(
+                        "unsupported target architecture `{arch}`",
+                        arch = sess.target.arch,
+                    ));
+                }
+            };
+
+            let exports = import_name_and_ordinal_vector
+                .iter()
+                .map(|(name, ordinal)| COFFShortExport {
+                    name: name.to_string(),
+                    ext_name: None,
+                    symbol_name: None,
+                    alias_target: None,
+                    ordinal: ordinal.unwrap_or(0),
+                    noname: ordinal.is_some(),
+                    data: false,
+                    private: false,
+                    constant: false,
+                })
+                .collect::<Vec<_>>();
+
+            if let Err(error) = ar_archive_writer::write_import_library(
+                &mut file,
+                lib_name,
+                &exports,
+                machine,
+                !sess.target.is_like_msvc,
+            ) {
+                sess.dcx().fatal(format!(
+                    "failed to create import library `{path}`: `{error}`",
+                    path = output_path.display(),
+                ));
+            }
+        }
    }
 }
--- a/compiler/rustc_codegen_cranelift/src/codegen_i128.rs
+++ b/compiler/rustc_codegen_cranelift/src/codegen_i128.rs
@ -23,19 +23,7 @@ pub(crate) fn maybe_codegen<'tcx>(
    match bin_op {
        BinOp::BitAnd | BinOp::BitOr | BinOp::BitXor => None,
        BinOp::Add | BinOp::AddUnchecked | BinOp::Sub | BinOp::SubUnchecked => None,
-        BinOp::Mul | BinOp::MulUnchecked => {
-            let args = [lhs.load_scalar(fx), rhs.load_scalar(fx)];
-            let ret_val = fx.lib_call(
-                "__multi3",
-                vec![AbiParam::new(types::I128), AbiParam::new(types::I128)],
-                vec![AbiParam::new(types::I128)],
-                &args,
-            )[0];
-            Some(CValue::by_val(
-                ret_val,
-                fx.layout_of(if is_signed { fx.tcx.types.i128 } else { fx.tcx.types.u128 }),
-            ))
-        }
+        BinOp::Mul | BinOp::MulUnchecked => None,
        BinOp::Offset => unreachable!("offset should only be used on pointers, not 128bit ints"),
        BinOp::Div | BinOp::Rem => {
            let name = match (bin_op, is_signed) {
@ -92,6 +80,7 @@ pub(crate) fn maybe_codegen_checked<'tcx>(

    match bin_op {
        BinOp::BitAnd | BinOp::BitOr | BinOp::BitXor => unreachable!(),
+        BinOp::Add | BinOp::Sub => None,
        BinOp::Mul if is_signed => {
            let out_ty = Ty::new_tup(fx.tcx, &[lhs.layout().ty, fx.tcx.types.bool]);
            let oflow = CPlace::new_stack_slot(fx, fx.layout_of(fx.tcx.types.i32));
@ -112,7 +101,7 @@ pub(crate) fn maybe_codegen_checked<'tcx>(
            let oflow = fx.bcx.ins().ireduce(types::I8, oflow);
            Some(CValue::by_val_pair(res, oflow, fx.layout_of(out_ty)))
        }
-        BinOp::Add | BinOp::Sub | BinOp::Mul => {
+        BinOp::Mul => {
            let out_ty = Ty::new_tup(fx.tcx, &[lhs.layout().ty, fx.tcx.types.bool]);
            let out_place = CPlace::new_stack_slot(fx, fx.layout_of(out_ty));
            let param_types = vec![
@ -121,15 +110,7 @@ pub(crate) fn maybe_codegen_checked<'tcx>(
                AbiParam::new(types::I128),
            ];
            let args = [out_place.to_ptr().get_addr(fx), lhs.load_scalar(fx), rhs.load_scalar(fx)];
-            let name = match (bin_op, is_signed) {
-                (BinOp::Add, false) => "__rust_u128_addo",
-                (BinOp::Add, true) => "__rust_i128_addo",
-                (BinOp::Sub, false) => "__rust_u128_subo",
-                (BinOp::Sub, true) => "__rust_i128_subo",
-                (BinOp::Mul, false) => "__rust_u128_mulo",
-                _ => unreachable!(),
-            };
-            fx.lib_call(name, param_types, vec![], &args);
+            fx.lib_call("__rust_u128_mulo", param_types, vec![], &args);
            Some(out_place.to_cvalue(fx))
        }
        BinOp::AddUnchecked | BinOp::SubUnchecked | BinOp::MulUnchecked => unreachable!(),
--- a/compiler/rustc_codegen_cranelift/src/compiler_builtins.rs
+++ b/compiler/rustc_codegen_cranelift/src/compiler_builtins.rs
@ -38,18 +38,12 @@ builtin_functions! {
    register_functions_for_jit;

    // integers
-    fn __multi3(a: i128, b: i128) -> i128;
    fn __muloti4(n: i128, d: i128, oflow: &mut i32) -> i128;
    fn __udivti3(n: u128, d: u128) -> u128;
    fn __divti3(n: i128, d: i128) -> i128;
    fn __umodti3(n: u128, d: u128) -> u128;
    fn __modti3(n: i128, d: i128) -> i128;
-    fn __rust_u128_addo(a: u128, b: u128) -> (u128, bool);
-    fn __rust_i128_addo(a: i128, b: i128) -> (i128, bool);
-    fn __rust_u128_subo(a: u128, b: u128) -> (u128, bool);
-    fn __rust_i128_subo(a: i128, b: i128) -> (i128, bool);
    fn __rust_u128_mulo(a: u128, b: u128) -> (u128, bool);
-    fn __rust_i128_mulo(a: i128, b: i128) -> (i128, bool);

    // floats
    fn __floattisf(i: i128) -> f32;
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs
@ -169,39 +169,6 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
            }
        }

-        "llvm.x86.sse.add.ss" => {
-            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_ss&ig_expand=171
-            intrinsic_args!(fx, args => (a, b); intrinsic);
-
-            assert_eq!(a.layout(), b.layout());
-            assert_eq!(a.layout(), ret.layout());
-            let layout = a.layout();
-
-            let (_, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
-            assert!(lane_ty.is_floating_point());
-            let ret_lane_layout = fx.layout_of(lane_ty);
-
-            ret.write_cvalue(fx, a);
-
-            let a_lane = a.value_lane(fx, 0).load_scalar(fx);
-            let b_lane = b.value_lane(fx, 0).load_scalar(fx);
-
-            let res = fx.bcx.ins().fadd(a_lane, b_lane);
-
-            let res_lane = CValue::by_val(res, ret_lane_layout);
-            ret.place_lane(fx, 0).write_cvalue(fx, res_lane);
-        }
-
-        "llvm.x86.sse.sqrt.ps" => {
-            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_ps&ig_expand=6245
-            intrinsic_args!(fx, args => (a); intrinsic);
-
-            // FIXME use vector instructions when possible
-            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
-                fx.bcx.ins().sqrt(lane)
-            });
-        }
-
        "llvm.x86.sse.max.ps" => {
            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_ps&ig_expand=4357
            intrinsic_args!(fx, args => (a, b); intrinsic);
@ -744,117 +711,6 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
            pack_instruction(fx, a, b, ret, PackSize::S16, PackWidth::Avx);
        }

-        "llvm.x86.fma.vfmaddsub.ps"
-        | "llvm.x86.fma.vfmaddsub.pd"
-        | "llvm.x86.fma.vfmaddsub.ps.256"
-        | "llvm.x86.fma.vfmaddsub.pd.256" => {
-            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_ps&ig_expand=3205
-            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_pd&ig_expand=3181
-            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_ps&ig_expand=3209
-            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_pd&ig_expand=3185
-            intrinsic_args!(fx, args => (a, b, c); intrinsic);
-
-            assert_eq!(a.layout(), b.layout());
-            assert_eq!(a.layout(), c.layout());
-            let layout = a.layout();
-
-            let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
-            let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
-            assert!(lane_ty.is_floating_point());
-            assert!(ret_lane_ty.is_floating_point());
-            assert_eq!(lane_count, ret_lane_count);
-            let ret_lane_layout = fx.layout_of(ret_lane_ty);
-
-            for idx in 0..lane_count {
-                let a_lane = a.value_lane(fx, idx).load_scalar(fx);
-                let b_lane = b.value_lane(fx, idx).load_scalar(fx);
-                let c_lane = c.value_lane(fx, idx).load_scalar(fx);
-
-                let mul = fx.bcx.ins().fmul(a_lane, b_lane);
-                let res = if idx & 1 == 0 {
-                    fx.bcx.ins().fsub(mul, c_lane)
-                } else {
-                    fx.bcx.ins().fadd(mul, c_lane)
-                };
-
-                let res_lane = CValue::by_val(res, ret_lane_layout);
-                ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
-            }
-        }
-
-        "llvm.x86.fma.vfmsubadd.ps"
-        | "llvm.x86.fma.vfmsubadd.pd"
-        | "llvm.x86.fma.vfmsubadd.ps.256"
-        | "llvm.x86.fma.vfmsubadd.pd.256" => {
-            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_ps&ig_expand=3325
-            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_pd&ig_expand=3301
-            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_ps&ig_expand=3329
-            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_pd&ig_expand=3305
-            intrinsic_args!(fx, args => (a, b, c); intrinsic);
-
-            assert_eq!(a.layout(), b.layout());
-            assert_eq!(a.layout(), c.layout());
-            let layout = a.layout();
-
-            let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
-            let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
-            assert!(lane_ty.is_floating_point());
-            assert!(ret_lane_ty.is_floating_point());
-            assert_eq!(lane_count, ret_lane_count);
-            let ret_lane_layout = fx.layout_of(ret_lane_ty);
-
-            for idx in 0..lane_count {
-                let a_lane = a.value_lane(fx, idx).load_scalar(fx);
-                let b_lane = b.value_lane(fx, idx).load_scalar(fx);
-                let c_lane = c.value_lane(fx, idx).load_scalar(fx);
-
-                let mul = fx.bcx.ins().fmul(a_lane, b_lane);
-                let res = if idx & 1 == 0 {
-                    fx.bcx.ins().fadd(mul, c_lane)
-                } else {
-                    fx.bcx.ins().fsub(mul, c_lane)
-                };
-
-                let res_lane = CValue::by_val(res, ret_lane_layout);
-                ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
-            }
-        }
-
-        "llvm.x86.fma.vfnmadd.ps"
-        | "llvm.x86.fma.vfnmadd.pd"
-        | "llvm.x86.fma.vfnmadd.ps.256"
-        | "llvm.x86.fma.vfnmadd.pd.256" => {
-            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_ps&ig_expand=3391
-            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_pd&ig_expand=3367
-            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_ps&ig_expand=3395
-            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_pd&ig_expand=3371
-            intrinsic_args!(fx, args => (a, b, c); intrinsic);
-
-            assert_eq!(a.layout(), b.layout());
-            assert_eq!(a.layout(), c.layout());
-            let layout = a.layout();
-
-            let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
-            let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
-            assert!(lane_ty.is_floating_point());
-            assert!(ret_lane_ty.is_floating_point());
-            assert_eq!(lane_count, ret_lane_count);
-            let ret_lane_layout = fx.layout_of(ret_lane_ty);
-
-            for idx in 0..lane_count {
-                let a_lane = a.value_lane(fx, idx).load_scalar(fx);
-                let b_lane = b.value_lane(fx, idx).load_scalar(fx);
-                let c_lane = c.value_lane(fx, idx).load_scalar(fx);
-
-                let mul = fx.bcx.ins().fmul(a_lane, b_lane);
-                let neg_mul = fx.bcx.ins().fneg(mul);
-                let res = fx.bcx.ins().fadd(neg_mul, c_lane);
-
-                let res_lane = CValue::by_val(res, ret_lane_layout);
-                ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
-            }
-        }
-
        "llvm.x86.sse42.crc32.32.8"
        | "llvm.x86.sse42.crc32.32.16"
        | "llvm.x86.sse42.crc32.32.32"
--- a/compiler/rustc_codegen_cranelift/src/lib.rs
+++ b/compiler/rustc_codegen_cranelift/src/lib.rs
@ -190,9 +190,20 @@ impl CodegenBackend for CraneliftCodegenBackend {
        if sess.target.arch == "x86_64" && sess.target.os != "none" {
            // x86_64 mandates SSE2 support
            vec![Symbol::intern("fxsr"), sym::sse, Symbol::intern("sse2")]
-        } else if sess.target.arch == "aarch64" && sess.target.os != "none" {
-            // AArch64 mandates Neon support
-            vec![sym::neon]
+        } else if sess.target.arch == "aarch64" {
+            match &*sess.target.os {
+                "none" => vec![],
+                // On macOS the aes, sha2 and sha3 features are enabled by default and ring
+                // fails to compile on macOS when they are not present.
+                "macos" => vec![
+                    sym::neon,
+                    Symbol::intern("aes"),
+                    Symbol::intern("sha2"),
+                    Symbol::intern("sha3"),
+                ],
+                // AArch64 mandates Neon support
+                _ => vec![sym::neon],
+            }
        } else {
            vec![]
        }
--- a/compiler/rustc_codegen_cranelift/src/value_and_place.rs
+++ b/compiler/rustc_codegen_cranelift/src/value_and_place.rs
@ -677,8 +677,10 @@ impl<'tcx> CPlace<'tcx> {
                        let to_addr = to_ptr.get_addr(fx);
                        let src_layout = from.1;
                        let size = dst_layout.size.bytes();
-                        let src_align = src_layout.align.abi.bytes() as u8;
-                        let dst_align = dst_layout.align.abi.bytes() as u8;
+                        // `emit_small_memory_copy` uses `u8` for alignments, just use the maximum
+                        // alignment that fits in a `u8` if the actual alignment is larger.
+                        let src_align = src_layout.align.abi.bytes().try_into().unwrap_or(128);
+                        let dst_align = dst_layout.align.abi.bytes().try_into().unwrap_or(128);
                        fx.bcx.emit_small_memory_copy(
                            fx.target_config,
                            to_addr,