Merge commit '5988bbd24a
' into sync_cg_clif-2020-11-27
This commit is contained in:
commit
477aa67802
33 changed files with 400 additions and 222 deletions
|
@ -263,6 +263,48 @@ fn simd_pair_for_each_lane<'tcx, M: Module>(
|
|||
}
|
||||
}
|
||||
|
||||
fn simd_reduce<'tcx, M: Module>(
|
||||
fx: &mut FunctionCx<'_, 'tcx, M>,
|
||||
val: CValue<'tcx>,
|
||||
ret: CPlace<'tcx>,
|
||||
f: impl Fn(&mut FunctionCx<'_, 'tcx, M>, TyAndLayout<'tcx>, Value, Value) -> Value,
|
||||
) {
|
||||
let (lane_layout, lane_count) = lane_type_and_count(fx.tcx, val.layout());
|
||||
assert_eq!(lane_layout, ret.layout());
|
||||
|
||||
let mut res_val = val.value_field(fx, mir::Field::new(0)).load_scalar(fx);
|
||||
for lane_idx in 1..lane_count {
|
||||
let lane = val
|
||||
.value_field(fx, mir::Field::new(lane_idx.into()))
|
||||
.load_scalar(fx);
|
||||
res_val = f(fx, lane_layout, res_val, lane);
|
||||
}
|
||||
let res = CValue::by_val(res_val, lane_layout);
|
||||
ret.write_cvalue(fx, res);
|
||||
}
|
||||
|
||||
fn simd_reduce_bool<'tcx, M: Module>(
|
||||
fx: &mut FunctionCx<'_, 'tcx, M>,
|
||||
val: CValue<'tcx>,
|
||||
ret: CPlace<'tcx>,
|
||||
f: impl Fn(&mut FunctionCx<'_, 'tcx, M>, Value, Value) -> Value,
|
||||
) {
|
||||
let (_lane_layout, lane_count) = lane_type_and_count(fx.tcx, val.layout());
|
||||
assert!(ret.layout().ty.is_bool());
|
||||
|
||||
let res_val = val.value_field(fx, mir::Field::new(0)).load_scalar(fx);
|
||||
let mut res_val = fx.bcx.ins().band_imm(res_val, 1); // mask to boolean
|
||||
for lane_idx in 1..lane_count {
|
||||
let lane = val
|
||||
.value_field(fx, mir::Field::new(lane_idx.into()))
|
||||
.load_scalar(fx);
|
||||
let lane = fx.bcx.ins().band_imm(lane, 1); // mask to boolean
|
||||
res_val = f(fx, res_val, lane);
|
||||
}
|
||||
let res = CValue::by_val(res_val, ret.layout());
|
||||
ret.write_cvalue(fx, res);
|
||||
}
|
||||
|
||||
fn bool_to_zero_or_max_uint<'tcx>(
|
||||
fx: &mut FunctionCx<'_, 'tcx, impl Module>,
|
||||
layout: TyAndLayout<'tcx>,
|
||||
|
@ -287,7 +329,7 @@ fn bool_to_zero_or_max_uint<'tcx>(
|
|||
}
|
||||
|
||||
macro simd_cmp {
|
||||
($fx:expr, $cc:ident($x:ident, $y:ident) -> $ret:ident) => {
|
||||
($fx:expr, $cc:ident|$cc_f:ident($x:ident, $y:ident) -> $ret:ident) => {
|
||||
let vector_ty = clif_vector_type($fx.tcx, $x.layout());
|
||||
|
||||
if let Some(vector_ty) = vector_ty {
|
||||
|
@ -308,6 +350,7 @@ macro simd_cmp {
|
|||
|fx, lane_layout, res_lane_layout, x_lane, y_lane| {
|
||||
let res_lane = match lane_layout.ty.kind() {
|
||||
ty::Uint(_) | ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc, x_lane, y_lane),
|
||||
ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::$cc_f, x_lane, y_lane),
|
||||
_ => unreachable!("{:?}", lane_layout.ty),
|
||||
};
|
||||
bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane)
|
||||
|
@ -315,7 +358,7 @@ macro simd_cmp {
|
|||
);
|
||||
}
|
||||
},
|
||||
($fx:expr, $cc_u:ident|$cc_s:ident($x:ident, $y:ident) -> $ret:ident) => {
|
||||
($fx:expr, $cc_u:ident|$cc_s:ident|$cc_f:ident($x:ident, $y:ident) -> $ret:ident) => {
|
||||
// FIXME use vector icmp when possible
|
||||
simd_pair_for_each_lane(
|
||||
$fx,
|
||||
|
@ -326,6 +369,7 @@ macro simd_cmp {
|
|||
let res_lane = match lane_layout.ty.kind() {
|
||||
ty::Uint(_) => fx.bcx.ins().icmp(IntCC::$cc_u, x_lane, y_lane),
|
||||
ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc_s, x_lane, y_lane),
|
||||
ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::$cc_f, x_lane, y_lane),
|
||||
_ => unreachable!("{:?}", lane_layout.ty),
|
||||
};
|
||||
bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane)
|
||||
|
@ -497,12 +541,12 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
|
|||
};
|
||||
copy | copy_nonoverlapping, <elem_ty> (v src, v dst, v count) {
|
||||
let elem_size: u64 = fx.layout_of(elem_ty).size.bytes();
|
||||
let elem_size = fx
|
||||
.bcx
|
||||
.ins()
|
||||
.iconst(fx.pointer_type, elem_size as i64);
|
||||
assert_eq!(args.len(), 3);
|
||||
let byte_amount = fx.bcx.ins().imul(count, elem_size);
|
||||
let byte_amount = if elem_size != 1 {
|
||||
fx.bcx.ins().imul_imm(count, elem_size as i64)
|
||||
} else {
|
||||
count
|
||||
};
|
||||
|
||||
if intrinsic.contains("nonoverlapping") {
|
||||
// FIXME emit_small_memcpy
|
||||
|
@ -515,12 +559,12 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
|
|||
// NOTE: the volatile variants have src and dst swapped
|
||||
volatile_copy_memory | volatile_copy_nonoverlapping_memory, <elem_ty> (v dst, v src, v count) {
|
||||
let elem_size: u64 = fx.layout_of(elem_ty).size.bytes();
|
||||
let elem_size = fx
|
||||
.bcx
|
||||
.ins()
|
||||
.iconst(fx.pointer_type, elem_size as i64);
|
||||
assert_eq!(args.len(), 3);
|
||||
let byte_amount = fx.bcx.ins().imul(count, elem_size);
|
||||
let byte_amount = if elem_size != 1 {
|
||||
fx.bcx.ins().imul_imm(count, elem_size as i64)
|
||||
} else {
|
||||
count
|
||||
};
|
||||
|
||||
// FIXME make the copy actually volatile when using emit_small_mem{cpy,move}
|
||||
if intrinsic.contains("nonoverlapping") {
|
||||
|
@ -676,7 +720,11 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
|
|||
offset | arith_offset, (c base, v offset) {
|
||||
let pointee_ty = base.layout().ty.builtin_deref(true).unwrap().ty;
|
||||
let pointee_size = fx.layout_of(pointee_ty).size.bytes();
|
||||
let ptr_diff = fx.bcx.ins().imul_imm(offset, pointee_size as i64);
|
||||
let ptr_diff = if pointee_size != 1 {
|
||||
fx.bcx.ins().imul_imm(offset, pointee_size as i64)
|
||||
} else {
|
||||
offset
|
||||
};
|
||||
let base_val = base.load_scalar(fx);
|
||||
let res = fx.bcx.ins().iadd(base_val, ptr_diff);
|
||||
ret.write_cvalue(fx, CValue::by_val(res, base.layout()));
|
||||
|
@ -688,7 +736,11 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
|
|||
write_bytes | volatile_set_memory, (c dst, v val, v count) {
|
||||
let pointee_ty = dst.layout().ty.builtin_deref(true).unwrap().ty;
|
||||
let pointee_size = fx.layout_of(pointee_ty).size.bytes();
|
||||
let count = fx.bcx.ins().imul_imm(count, pointee_size as i64);
|
||||
let count = if pointee_size != 1 {
|
||||
fx.bcx.ins().imul_imm(count, pointee_size as i64)
|
||||
} else {
|
||||
count
|
||||
};
|
||||
let dst_ptr = dst.load_scalar(fx);
|
||||
// FIXME make the memset actually volatile when switching to emit_small_memset
|
||||
// FIXME use emit_small_memset
|
||||
|
|
|
@ -35,30 +35,33 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
|
|||
});
|
||||
};
|
||||
|
||||
// FIXME support float comparisons
|
||||
simd_eq, (c x, c y) {
|
||||
validate_simd_type!(fx, intrinsic, span, x.layout().ty);
|
||||
simd_cmp!(fx, Equal(x, y) -> ret);
|
||||
simd_cmp!(fx, Equal|Equal(x, y) -> ret);
|
||||
};
|
||||
simd_ne, (c x, c y) {
|
||||
validate_simd_type!(fx, intrinsic, span, x.layout().ty);
|
||||
simd_cmp!(fx, NotEqual(x, y) -> ret);
|
||||
simd_cmp!(fx, NotEqual|NotEqual(x, y) -> ret);
|
||||
};
|
||||
simd_lt, (c x, c y) {
|
||||
validate_simd_type!(fx, intrinsic, span, x.layout().ty);
|
||||
simd_cmp!(fx, UnsignedLessThan|SignedLessThan(x, y) -> ret);
|
||||
simd_cmp!(fx, UnsignedLessThan|SignedLessThan|LessThan(x, y) -> ret);
|
||||
};
|
||||
simd_le, (c x, c y) {
|
||||
validate_simd_type!(fx, intrinsic, span, x.layout().ty);
|
||||
simd_cmp!(fx, UnsignedLessThanOrEqual|SignedLessThanOrEqual(x, y) -> ret);
|
||||
simd_cmp!(fx, UnsignedLessThanOrEqual|SignedLessThanOrEqual|LessThanOrEqual(x, y) -> ret);
|
||||
};
|
||||
simd_gt, (c x, c y) {
|
||||
validate_simd_type!(fx, intrinsic, span, x.layout().ty);
|
||||
simd_cmp!(fx, UnsignedGreaterThan|SignedGreaterThan(x, y) -> ret);
|
||||
simd_cmp!(fx, UnsignedGreaterThan|SignedGreaterThan|GreaterThan(x, y) -> ret);
|
||||
};
|
||||
simd_ge, (c x, c y) {
|
||||
validate_simd_type!(fx, intrinsic, span, x.layout().ty);
|
||||
simd_cmp!(fx, UnsignedGreaterThanOrEqual|SignedGreaterThanOrEqual(x, y) -> ret);
|
||||
simd_cmp!(
|
||||
fx,
|
||||
UnsignedGreaterThanOrEqual|SignedGreaterThanOrEqual|GreaterThanOrEqual
|
||||
(x, y) -> ret
|
||||
);
|
||||
};
|
||||
|
||||
// simd_shuffle32<T, U>(x: T, y: T, idx: [u32; 32]) -> U
|
||||
|
@ -107,9 +110,9 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
|
|||
|
||||
for (out_idx, in_idx) in indexes.into_iter().enumerate() {
|
||||
let in_lane = if in_idx < lane_count {
|
||||
x.value_field(fx, mir::Field::new(in_idx.try_into().unwrap()))
|
||||
x.value_field(fx, mir::Field::new(in_idx.into()))
|
||||
} else {
|
||||
y.value_field(fx, mir::Field::new((in_idx - lane_count).try_into().unwrap()))
|
||||
y.value_field(fx, mir::Field::new((in_idx - lane_count).into()))
|
||||
};
|
||||
let out_lane = ret.place_field(fx, mir::Field::new(out_idx));
|
||||
out_lane.write_cvalue(fx, in_lane);
|
||||
|
@ -143,10 +146,17 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
|
|||
let idx_const = if let Some(idx_const) = crate::constant::mir_operand_get_const_val(fx, idx) {
|
||||
idx_const
|
||||
} else {
|
||||
fx.tcx.sess.span_fatal(
|
||||
fx.tcx.sess.span_warn(
|
||||
span,
|
||||
"Index argument for `simd_extract` is not a constant",
|
||||
);
|
||||
let res = crate::trap::trap_unimplemented_ret_value(
|
||||
fx,
|
||||
ret.layout(),
|
||||
"Index argument for `simd_extract` is not a constant",
|
||||
);
|
||||
ret.write_cvalue(fx, res);
|
||||
return;
|
||||
};
|
||||
|
||||
let idx = idx_const.val.try_to_bits(Size::from_bytes(4 /* u32*/)).unwrap_or_else(|| panic!("kind not scalar: {:?}", idx_const));
|
||||
|
@ -207,7 +217,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
|
|||
assert_eq!(lane_count, ret_lane_count);
|
||||
|
||||
for lane in 0..lane_count {
|
||||
let lane = mir::Field::new(lane.try_into().unwrap());
|
||||
let lane = mir::Field::new(lane.into());
|
||||
let a_lane = a.value_field(fx, lane).load_scalar(fx);
|
||||
let b_lane = b.value_field(fx, lane).load_scalar(fx);
|
||||
let c_lane = c.value_field(fx, lane).load_scalar(fx);
|
||||
|
@ -228,11 +238,42 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
|
|||
simd_flt_binop!(fx, fmax(x, y) -> ret);
|
||||
};
|
||||
|
||||
simd_reduce_add_ordered | simd_reduce_add_unordered, (c v) {
|
||||
validate_simd_type!(fx, intrinsic, span, v.layout().ty);
|
||||
simd_reduce(fx, v, ret, |fx, lane_layout, a, b| {
|
||||
if lane_layout.ty.is_floating_point() {
|
||||
fx.bcx.ins().fadd(a, b)
|
||||
} else {
|
||||
fx.bcx.ins().iadd(a, b)
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
simd_reduce_mul_ordered | simd_reduce_mul_unordered, (c v) {
|
||||
validate_simd_type!(fx, intrinsic, span, v.layout().ty);
|
||||
simd_reduce(fx, v, ret, |fx, lane_layout, a, b| {
|
||||
if lane_layout.ty.is_floating_point() {
|
||||
fx.bcx.ins().fmul(a, b)
|
||||
} else {
|
||||
fx.bcx.ins().imul(a, b)
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
simd_reduce_all, (c v) {
|
||||
validate_simd_type!(fx, intrinsic, span, v.layout().ty);
|
||||
simd_reduce_bool(fx, v, ret, |fx, a, b| fx.bcx.ins().band(a, b));
|
||||
};
|
||||
|
||||
simd_reduce_any, (c v) {
|
||||
validate_simd_type!(fx, intrinsic, span, v.layout().ty);
|
||||
simd_reduce_bool(fx, v, ret, |fx, a, b| fx.bcx.ins().bor(a, b));
|
||||
};
|
||||
|
||||
// simd_fabs
|
||||
// simd_saturating_add
|
||||
// simd_bitmask
|
||||
// simd_select
|
||||
// simd_reduce_add_{,un}ordered
|
||||
// simd_rem
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue