Merge commit '5988bbd24a' into sync_cg_clif-2020-11-27

2020-11-27 20:48:53 +01:00 · 2020-11-27 20:48:53 +01:00 · 477aa67802
commit 477aa67802
parent 774bce7f5e 5988bbd24a
33 changed files with 400 additions and 222 deletions
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
@ -263,6 +263,48 @@ fn simd_pair_for_each_lane<'tcx, M: Module>(
    }
 }

+fn simd_reduce<'tcx, M: Module>(
+    fx: &mut FunctionCx<'_, 'tcx, M>,
+    val: CValue<'tcx>,
+    ret: CPlace<'tcx>,
+    f: impl Fn(&mut FunctionCx<'_, 'tcx, M>, TyAndLayout<'tcx>, Value, Value) -> Value,
+) {
+    let (lane_layout, lane_count) = lane_type_and_count(fx.tcx, val.layout());
+    assert_eq!(lane_layout, ret.layout());
+
+    let mut res_val = val.value_field(fx, mir::Field::new(0)).load_scalar(fx);
+    for lane_idx in 1..lane_count {
+        let lane = val
+            .value_field(fx, mir::Field::new(lane_idx.into()))
+            .load_scalar(fx);
+        res_val = f(fx, lane_layout, res_val, lane);
+    }
+    let res = CValue::by_val(res_val, lane_layout);
+    ret.write_cvalue(fx, res);
+}
+
+fn simd_reduce_bool<'tcx, M: Module>(
+    fx: &mut FunctionCx<'_, 'tcx, M>,
+    val: CValue<'tcx>,
+    ret: CPlace<'tcx>,
+    f: impl Fn(&mut FunctionCx<'_, 'tcx, M>, Value, Value) -> Value,
+) {
+    let (_lane_layout, lane_count) = lane_type_and_count(fx.tcx, val.layout());
+    assert!(ret.layout().ty.is_bool());
+
+    let res_val = val.value_field(fx, mir::Field::new(0)).load_scalar(fx);
+    let mut res_val = fx.bcx.ins().band_imm(res_val, 1); // mask to boolean
+    for lane_idx in 1..lane_count {
+        let lane = val
+            .value_field(fx, mir::Field::new(lane_idx.into()))
+            .load_scalar(fx);
+        let lane = fx.bcx.ins().band_imm(lane, 1); // mask to boolean
+        res_val = f(fx, res_val, lane);
+    }
+    let res = CValue::by_val(res_val, ret.layout());
+    ret.write_cvalue(fx, res);
+}
+
 fn bool_to_zero_or_max_uint<'tcx>(
    fx: &mut FunctionCx<'_, 'tcx, impl Module>,
    layout: TyAndLayout<'tcx>,
@ -287,7 +329,7 @@ fn bool_to_zero_or_max_uint<'tcx>(
 }

 macro simd_cmp {
-    ($fx:expr, $cc:ident($x:ident, $y:ident) -> $ret:ident) => {
+    ($fx:expr, $cc:ident|$cc_f:ident($x:ident, $y:ident) -> $ret:ident) => {
        let vector_ty = clif_vector_type($fx.tcx, $x.layout());

        if let Some(vector_ty) = vector_ty {
@ -308,6 +350,7 @@ macro simd_cmp {
                |fx, lane_layout, res_lane_layout, x_lane, y_lane| {
                    let res_lane = match lane_layout.ty.kind() {
                        ty::Uint(_) | ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc, x_lane, y_lane),
+                        ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::$cc_f, x_lane, y_lane),
                        _ => unreachable!("{:?}", lane_layout.ty),
                    };
                    bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane)
@ -315,7 +358,7 @@ macro simd_cmp {
            );
        }
    },
-    ($fx:expr, $cc_u:ident|$cc_s:ident($x:ident, $y:ident) -> $ret:ident) => {
+    ($fx:expr, $cc_u:ident|$cc_s:ident|$cc_f:ident($x:ident, $y:ident) -> $ret:ident) => {
        // FIXME use vector icmp when possible
        simd_pair_for_each_lane(
            $fx,
@ -326,6 +369,7 @@ macro simd_cmp {
                let res_lane = match lane_layout.ty.kind() {
                    ty::Uint(_) => fx.bcx.ins().icmp(IntCC::$cc_u, x_lane, y_lane),
                    ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc_s, x_lane, y_lane),
+                    ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::$cc_f, x_lane, y_lane),
                    _ => unreachable!("{:?}", lane_layout.ty),
                };
                bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane)
@ -497,12 +541,12 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
        };
        copy | copy_nonoverlapping, <elem_ty> (v src, v dst, v count) {
            let elem_size: u64 = fx.layout_of(elem_ty).size.bytes();
-            let elem_size = fx
-                .bcx
-                .ins()
-                .iconst(fx.pointer_type, elem_size as i64);
            assert_eq!(args.len(), 3);
-            let byte_amount = fx.bcx.ins().imul(count, elem_size);
+            let byte_amount = if elem_size != 1 {
+                fx.bcx.ins().imul_imm(count, elem_size as i64)
+            } else {
+                count
+            };

            if intrinsic.contains("nonoverlapping") {
                // FIXME emit_small_memcpy
@ -515,12 +559,12 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
        // NOTE: the volatile variants have src and dst swapped
        volatile_copy_memory | volatile_copy_nonoverlapping_memory, <elem_ty> (v dst, v src, v count) {
            let elem_size: u64 = fx.layout_of(elem_ty).size.bytes();
-            let elem_size = fx
-                .bcx
-                .ins()
-                .iconst(fx.pointer_type, elem_size as i64);
            assert_eq!(args.len(), 3);
-            let byte_amount = fx.bcx.ins().imul(count, elem_size);
+            let byte_amount = if elem_size != 1 {
+                fx.bcx.ins().imul_imm(count, elem_size as i64)
+            } else {
+                count
+            };

            // FIXME make the copy actually volatile when using emit_small_mem{cpy,move}
            if intrinsic.contains("nonoverlapping") {
@ -676,7 +720,11 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
        offset | arith_offset, (c base, v offset) {
            let pointee_ty = base.layout().ty.builtin_deref(true).unwrap().ty;
            let pointee_size = fx.layout_of(pointee_ty).size.bytes();
-            let ptr_diff = fx.bcx.ins().imul_imm(offset, pointee_size as i64);
+            let ptr_diff = if pointee_size != 1 {
+                fx.bcx.ins().imul_imm(offset, pointee_size as i64)
+            } else {
+                offset
+            };
            let base_val = base.load_scalar(fx);
            let res = fx.bcx.ins().iadd(base_val, ptr_diff);
            ret.write_cvalue(fx, CValue::by_val(res, base.layout()));
@ -688,7 +736,11 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
        write_bytes | volatile_set_memory, (c dst, v val, v count) {
            let pointee_ty = dst.layout().ty.builtin_deref(true).unwrap().ty;
            let pointee_size = fx.layout_of(pointee_ty).size.bytes();
-            let count = fx.bcx.ins().imul_imm(count, pointee_size as i64);
+            let count = if pointee_size != 1 {
+                fx.bcx.ins().imul_imm(count, pointee_size as i64)
+            } else {
+                count
+            };
            let dst_ptr = dst.load_scalar(fx);
            // FIXME make the memset actually volatile when switching to emit_small_memset
            // FIXME use emit_small_memset
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
@ -35,30 +35,33 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
            });
        };

-        // FIXME support float comparisons
        simd_eq, (c x, c y) {
            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
-            simd_cmp!(fx, Equal(x, y) -> ret);
+            simd_cmp!(fx, Equal|Equal(x, y) -> ret);
        };
        simd_ne, (c x, c y) {
            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
-            simd_cmp!(fx, NotEqual(x, y) -> ret);
+            simd_cmp!(fx, NotEqual|NotEqual(x, y) -> ret);
        };
        simd_lt, (c x, c y) {
            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
-            simd_cmp!(fx, UnsignedLessThan|SignedLessThan(x, y) -> ret);
+            simd_cmp!(fx, UnsignedLessThan|SignedLessThan|LessThan(x, y) -> ret);
        };
        simd_le, (c x, c y) {
            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
-            simd_cmp!(fx, UnsignedLessThanOrEqual|SignedLessThanOrEqual(x, y) -> ret);
+            simd_cmp!(fx, UnsignedLessThanOrEqual|SignedLessThanOrEqual|LessThanOrEqual(x, y) -> ret);
        };
        simd_gt, (c x, c y) {
            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
-            simd_cmp!(fx, UnsignedGreaterThan|SignedGreaterThan(x, y) -> ret);
+            simd_cmp!(fx, UnsignedGreaterThan|SignedGreaterThan|GreaterThan(x, y) -> ret);
        };
        simd_ge, (c x, c y) {
            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
-            simd_cmp!(fx, UnsignedGreaterThanOrEqual|SignedGreaterThanOrEqual(x, y) -> ret);
+            simd_cmp!(
+                fx,
+                UnsignedGreaterThanOrEqual|SignedGreaterThanOrEqual|GreaterThanOrEqual
+                (x, y) -> ret
+            );
        };

        // simd_shuffle32<T, U>(x: T, y: T, idx: [u32; 32]) -> U
@ -107,9 +110,9 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(

            for (out_idx, in_idx) in indexes.into_iter().enumerate() {
                let in_lane = if in_idx < lane_count {
-                    x.value_field(fx, mir::Field::new(in_idx.try_into().unwrap()))
+                    x.value_field(fx, mir::Field::new(in_idx.into()))
                } else {
-                    y.value_field(fx, mir::Field::new((in_idx - lane_count).try_into().unwrap()))
+                    y.value_field(fx, mir::Field::new((in_idx - lane_count).into()))
                };
                let out_lane = ret.place_field(fx, mir::Field::new(out_idx));
                out_lane.write_cvalue(fx, in_lane);
@ -143,10 +146,17 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
            let idx_const = if let Some(idx_const) = crate::constant::mir_operand_get_const_val(fx, idx) {
                idx_const
            } else {
-                fx.tcx.sess.span_fatal(
+                fx.tcx.sess.span_warn(
                    span,
                    "Index argument for `simd_extract` is not a constant",
                );
+                let res = crate::trap::trap_unimplemented_ret_value(
+                    fx,
+                    ret.layout(),
+                    "Index argument for `simd_extract` is not a constant",
+                );
+                ret.write_cvalue(fx, res);
+                return;
            };

            let idx = idx_const.val.try_to_bits(Size::from_bytes(4 /* u32*/)).unwrap_or_else(|| panic!("kind not scalar: {:?}", idx_const));
@ -207,7 +217,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
            assert_eq!(lane_count, ret_lane_count);

            for lane in 0..lane_count {
-                let lane = mir::Field::new(lane.try_into().unwrap());
+                let lane = mir::Field::new(lane.into());
                let a_lane = a.value_field(fx, lane).load_scalar(fx);
                let b_lane = b.value_field(fx, lane).load_scalar(fx);
                let c_lane = c.value_field(fx, lane).load_scalar(fx);
@ -228,11 +238,42 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
            simd_flt_binop!(fx, fmax(x, y) -> ret);
        };

+        simd_reduce_add_ordered | simd_reduce_add_unordered, (c v) {
+            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
+            simd_reduce(fx, v, ret, |fx, lane_layout, a, b| {
+                if lane_layout.ty.is_floating_point() {
+                    fx.bcx.ins().fadd(a, b)
+                } else {
+                    fx.bcx.ins().iadd(a, b)
+                }
+            });
+        };
+
+        simd_reduce_mul_ordered | simd_reduce_mul_unordered, (c v) {
+            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
+            simd_reduce(fx, v, ret, |fx, lane_layout, a, b| {
+                if lane_layout.ty.is_floating_point() {
+                    fx.bcx.ins().fmul(a, b)
+                } else {
+                    fx.bcx.ins().imul(a, b)
+                }
+            });
+        };
+
+        simd_reduce_all, (c v) {
+            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
+            simd_reduce_bool(fx, v, ret, |fx, a, b| fx.bcx.ins().band(a, b));
+        };
+
+        simd_reduce_any, (c v) {
+            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
+            simd_reduce_bool(fx, v, ret, |fx, a, b| fx.bcx.ins().bor(a, b));
+        };
+
        // simd_fabs
        // simd_saturating_add
        // simd_bitmask
        // simd_select
-        // simd_reduce_add_{,un}ordered
        // simd_rem
    }
 }