Auto merge of #3492 - eduardosm:intrinsics-x86-avx2, r=oli-obk
Implement LLVM x86 AVX2 intrinsics
This commit is contained in:
commit
c1073fb36e
8 changed files with 2474 additions and 257 deletions
1
src/tools/miri/clippy.toml
Normal file
1
src/tools/miri/clippy.toml
Normal file
|
@ -0,0 +1 @@
|
||||||
|
arithmetic-side-effects-allowed = ["rustc_target::abi::Size"]
|
|
@ -7,7 +7,8 @@ use rustc_target::spec::abi::Abi;
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
bin_op_simd_float_all, conditional_dot_product, convert_float_to_int, horizontal_bin_op,
|
bin_op_simd_float_all, conditional_dot_product, convert_float_to_int, horizontal_bin_op,
|
||||||
round_all, test_bits_masked, test_high_bits_masked, unary_op_ps, FloatBinOp, FloatUnaryOp,
|
mask_load, mask_store, round_all, test_bits_masked, test_high_bits_masked, unary_op_ps,
|
||||||
|
FloatBinOp, FloatUnaryOp,
|
||||||
};
|
};
|
||||||
use crate::*;
|
use crate::*;
|
||||||
use shims::foreign_items::EmulateForeignItemResult;
|
use shims::foreign_items::EmulateForeignItemResult;
|
||||||
|
@ -347,71 +348,3 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
|
||||||
Ok(EmulateForeignItemResult::NeedsJumping)
|
Ok(EmulateForeignItemResult::NeedsJumping)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Conditionally loads from `ptr` according the high bit of each
|
|
||||||
/// element of `mask`. `ptr` does not need to be aligned.
|
|
||||||
fn mask_load<'tcx>(
|
|
||||||
this: &mut crate::MiriInterpCx<'_, 'tcx>,
|
|
||||||
ptr: &OpTy<'tcx, Provenance>,
|
|
||||||
mask: &OpTy<'tcx, Provenance>,
|
|
||||||
dest: &MPlaceTy<'tcx, Provenance>,
|
|
||||||
) -> InterpResult<'tcx, ()> {
|
|
||||||
let (mask, mask_len) = this.operand_to_simd(mask)?;
|
|
||||||
let (dest, dest_len) = this.mplace_to_simd(dest)?;
|
|
||||||
|
|
||||||
assert_eq!(dest_len, mask_len);
|
|
||||||
|
|
||||||
let mask_item_size = mask.layout.field(this, 0).size;
|
|
||||||
let high_bit_offset = mask_item_size.bits().checked_sub(1).unwrap();
|
|
||||||
|
|
||||||
let ptr = this.read_pointer(ptr)?;
|
|
||||||
for i in 0..dest_len {
|
|
||||||
let mask = this.project_index(&mask, i)?;
|
|
||||||
let dest = this.project_index(&dest, i)?;
|
|
||||||
|
|
||||||
if this.read_scalar(&mask)?.to_uint(mask_item_size)? >> high_bit_offset != 0 {
|
|
||||||
// Size * u64 is implemented as always checked
|
|
||||||
#[allow(clippy::arithmetic_side_effects)]
|
|
||||||
let ptr = ptr.wrapping_offset(dest.layout.size * i, &this.tcx);
|
|
||||||
// Unaligned copy, which is what we want.
|
|
||||||
this.mem_copy(ptr, dest.ptr(), dest.layout.size, /*nonoverlapping*/ true)?;
|
|
||||||
} else {
|
|
||||||
this.write_scalar(Scalar::from_int(0, dest.layout.size), &dest)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Conditionally stores into `ptr` according the high bit of each
|
|
||||||
/// element of `mask`. `ptr` does not need to be aligned.
|
|
||||||
fn mask_store<'tcx>(
|
|
||||||
this: &mut crate::MiriInterpCx<'_, 'tcx>,
|
|
||||||
ptr: &OpTy<'tcx, Provenance>,
|
|
||||||
mask: &OpTy<'tcx, Provenance>,
|
|
||||||
value: &OpTy<'tcx, Provenance>,
|
|
||||||
) -> InterpResult<'tcx, ()> {
|
|
||||||
let (mask, mask_len) = this.operand_to_simd(mask)?;
|
|
||||||
let (value, value_len) = this.operand_to_simd(value)?;
|
|
||||||
|
|
||||||
assert_eq!(value_len, mask_len);
|
|
||||||
|
|
||||||
let mask_item_size = mask.layout.field(this, 0).size;
|
|
||||||
let high_bit_offset = mask_item_size.bits().checked_sub(1).unwrap();
|
|
||||||
|
|
||||||
let ptr = this.read_pointer(ptr)?;
|
|
||||||
for i in 0..value_len {
|
|
||||||
let mask = this.project_index(&mask, i)?;
|
|
||||||
let value = this.project_index(&value, i)?;
|
|
||||||
|
|
||||||
if this.read_scalar(&mask)?.to_uint(mask_item_size)? >> high_bit_offset != 0 {
|
|
||||||
// Size * u64 is implemented as always checked
|
|
||||||
#[allow(clippy::arithmetic_side_effects)]
|
|
||||||
let ptr = ptr.wrapping_offset(value.layout.size * i, &this.tcx);
|
|
||||||
// Unaligned copy, which is what we want.
|
|
||||||
this.mem_copy(value.ptr(), ptr, value.layout.size, /*nonoverlapping*/ true)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
444
src/tools/miri/src/shims/x86/avx2.rs
Normal file
444
src/tools/miri/src/shims/x86/avx2.rs
Normal file
|
@ -0,0 +1,444 @@
|
||||||
|
use crate::rustc_middle::ty::layout::LayoutOf as _;
|
||||||
|
use rustc_middle::mir;
|
||||||
|
use rustc_middle::ty::Ty;
|
||||||
|
use rustc_span::Symbol;
|
||||||
|
use rustc_target::spec::abi::Abi;
|
||||||
|
|
||||||
|
use super::{
|
||||||
|
horizontal_bin_op, int_abs, mask_load, mask_store, mpsadbw, packssdw, packsswb, packusdw,
|
||||||
|
packuswb, pmulhrsw, psign, shift_simd_by_scalar, shift_simd_by_simd, ShiftOp,
|
||||||
|
};
|
||||||
|
use crate::*;
|
||||||
|
use shims::foreign_items::EmulateForeignItemResult;
|
||||||
|
|
||||||
|
impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
|
||||||
|
pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
|
||||||
|
crate::MiriInterpCxExt<'mir, 'tcx>
|
||||||
|
{
|
||||||
|
fn emulate_x86_avx2_intrinsic(
|
||||||
|
&mut self,
|
||||||
|
link_name: Symbol,
|
||||||
|
abi: Abi,
|
||||||
|
args: &[OpTy<'tcx, Provenance>],
|
||||||
|
dest: &MPlaceTy<'tcx, Provenance>,
|
||||||
|
) -> InterpResult<'tcx, EmulateForeignItemResult> {
|
||||||
|
let this = self.eval_context_mut();
|
||||||
|
this.expect_target_feature_for_intrinsic(link_name, "avx2")?;
|
||||||
|
// Prefix should have already been checked.
|
||||||
|
let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.avx2.").unwrap();
|
||||||
|
|
||||||
|
match unprefixed_name {
|
||||||
|
// Used to implement the _mm256_abs_epi{8,16,32} functions.
|
||||||
|
// Calculates the absolute value of packed 8/16/32-bit integers.
|
||||||
|
"pabs.b" | "pabs.w" | "pabs.d" => {
|
||||||
|
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||||
|
|
||||||
|
int_abs(this, op, dest)?;
|
||||||
|
}
|
||||||
|
// Used to implement the _mm256_h{add,adds,sub}_epi{16,32} functions.
|
||||||
|
// Horizontally add / add with saturation / subtract adjacent 16/32-bit
|
||||||
|
// integer values in `left` and `right`.
|
||||||
|
"phadd.w" | "phadd.sw" | "phadd.d" | "phsub.w" | "phsub.sw" | "phsub.d" => {
|
||||||
|
let [left, right] =
|
||||||
|
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||||
|
|
||||||
|
let (which, saturating) = match unprefixed_name {
|
||||||
|
"phadd.w" | "phadd.d" => (mir::BinOp::Add, false),
|
||||||
|
"phadd.sw" => (mir::BinOp::Add, true),
|
||||||
|
"phsub.w" | "phsub.d" => (mir::BinOp::Sub, false),
|
||||||
|
"phsub.sw" => (mir::BinOp::Sub, true),
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
|
||||||
|
horizontal_bin_op(this, which, saturating, left, right, dest)?;
|
||||||
|
}
|
||||||
|
// Used to implement `_mm{,_mask}_{i32,i64}gather_{epi32,epi64,pd,ps}` functions
|
||||||
|
// Gathers elements from `slice` using `offsets * scale` as indices.
|
||||||
|
// When the highest bit of the corresponding element of `mask` is 0,
|
||||||
|
// the value is copied from `src` instead.
|
||||||
|
"gather.d.d" | "gather.d.d.256" | "gather.d.q" | "gather.d.q.256" | "gather.q.d"
|
||||||
|
| "gather.q.d.256" | "gather.q.q" | "gather.q.q.256" | "gather.d.pd"
|
||||||
|
| "gather.d.pd.256" | "gather.q.pd" | "gather.q.pd.256" | "gather.d.ps"
|
||||||
|
| "gather.d.ps.256" | "gather.q.ps" | "gather.q.ps.256" => {
|
||||||
|
let [src, slice, offsets, mask, scale] =
|
||||||
|
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||||
|
|
||||||
|
assert_eq!(dest.layout, src.layout);
|
||||||
|
|
||||||
|
let (src, _) = this.operand_to_simd(src)?;
|
||||||
|
let (offsets, offsets_len) = this.operand_to_simd(offsets)?;
|
||||||
|
let (mask, mask_len) = this.operand_to_simd(mask)?;
|
||||||
|
let (dest, dest_len) = this.mplace_to_simd(dest)?;
|
||||||
|
|
||||||
|
// There are cases like dest: i32x4, offsets: i64x2
|
||||||
|
let actual_len = dest_len.min(offsets_len);
|
||||||
|
|
||||||
|
assert_eq!(dest_len, mask_len);
|
||||||
|
|
||||||
|
let mask_item_size = mask.layout.field(this, 0).size;
|
||||||
|
let high_bit_offset = mask_item_size.bits().checked_sub(1).unwrap();
|
||||||
|
|
||||||
|
let scale = this.read_scalar(scale)?.to_i8()?;
|
||||||
|
if !matches!(scale, 1 | 2 | 4 | 8) {
|
||||||
|
throw_unsup_format!("invalid gather scale {scale}");
|
||||||
|
}
|
||||||
|
let scale = i64::from(scale);
|
||||||
|
|
||||||
|
let slice = this.read_pointer(slice)?;
|
||||||
|
for i in 0..actual_len {
|
||||||
|
let mask = this.project_index(&mask, i)?;
|
||||||
|
let dest = this.project_index(&dest, i)?;
|
||||||
|
|
||||||
|
if this.read_scalar(&mask)?.to_uint(mask_item_size)? >> high_bit_offset != 0 {
|
||||||
|
let offset = this.project_index(&offsets, i)?;
|
||||||
|
let offset =
|
||||||
|
i64::try_from(this.read_scalar(&offset)?.to_int(offset.layout.size)?)
|
||||||
|
.unwrap();
|
||||||
|
let ptr = slice
|
||||||
|
.wrapping_signed_offset(offset.checked_mul(scale).unwrap(), &this.tcx);
|
||||||
|
// Unaligned copy, which is what we want.
|
||||||
|
this.mem_copy(
|
||||||
|
ptr,
|
||||||
|
dest.ptr(),
|
||||||
|
dest.layout.size,
|
||||||
|
/*nonoverlapping*/ true,
|
||||||
|
)?;
|
||||||
|
} else {
|
||||||
|
this.copy_op(&this.project_index(&src, i)?, &dest)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for i in actual_len..dest_len {
|
||||||
|
let dest = this.project_index(&dest, i)?;
|
||||||
|
this.write_scalar(Scalar::from_int(0, dest.layout.size), &dest)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Used to implement the _mm256_madd_epi16 function.
|
||||||
|
// Multiplies packed signed 16-bit integers in `left` and `right`, producing
|
||||||
|
// intermediate signed 32-bit integers. Horizontally add adjacent pairs of
|
||||||
|
// intermediate 32-bit integers, and pack the results in `dest`.
|
||||||
|
"pmadd.wd" => {
|
||||||
|
let [left, right] =
|
||||||
|
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||||
|
|
||||||
|
let (left, left_len) = this.operand_to_simd(left)?;
|
||||||
|
let (right, right_len) = this.operand_to_simd(right)?;
|
||||||
|
let (dest, dest_len) = this.mplace_to_simd(dest)?;
|
||||||
|
|
||||||
|
assert_eq!(left_len, right_len);
|
||||||
|
assert_eq!(dest_len.checked_mul(2).unwrap(), left_len);
|
||||||
|
|
||||||
|
for i in 0..dest_len {
|
||||||
|
let j1 = i.checked_mul(2).unwrap();
|
||||||
|
let left1 = this.read_scalar(&this.project_index(&left, j1)?)?.to_i16()?;
|
||||||
|
let right1 = this.read_scalar(&this.project_index(&right, j1)?)?.to_i16()?;
|
||||||
|
|
||||||
|
let j2 = j1.checked_add(1).unwrap();
|
||||||
|
let left2 = this.read_scalar(&this.project_index(&left, j2)?)?.to_i16()?;
|
||||||
|
let right2 = this.read_scalar(&this.project_index(&right, j2)?)?.to_i16()?;
|
||||||
|
|
||||||
|
let dest = this.project_index(&dest, i)?;
|
||||||
|
|
||||||
|
// Multiplications are i16*i16->i32, which will not overflow.
|
||||||
|
let mul1 = i32::from(left1).checked_mul(right1.into()).unwrap();
|
||||||
|
let mul2 = i32::from(left2).checked_mul(right2.into()).unwrap();
|
||||||
|
// However, this addition can overflow in the most extreme case
|
||||||
|
// (-0x8000)*(-0x8000)+(-0x8000)*(-0x8000) = 0x80000000
|
||||||
|
let res = mul1.wrapping_add(mul2);
|
||||||
|
|
||||||
|
this.write_scalar(Scalar::from_i32(res), &dest)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Used to implement the _mm256_maddubs_epi16 function.
|
||||||
|
// Multiplies packed 8-bit unsigned integers from `left` and packed
|
||||||
|
// signed 8-bit integers from `right` into 16-bit signed integers. Then,
|
||||||
|
// the saturating sum of the products with indices `2*i` and `2*i+1`
|
||||||
|
// produces the output at index `i`.
|
||||||
|
"pmadd.ub.sw" => {
|
||||||
|
let [left, right] =
|
||||||
|
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||||
|
|
||||||
|
let (left, left_len) = this.operand_to_simd(left)?;
|
||||||
|
let (right, right_len) = this.operand_to_simd(right)?;
|
||||||
|
let (dest, dest_len) = this.mplace_to_simd(dest)?;
|
||||||
|
|
||||||
|
assert_eq!(left_len, right_len);
|
||||||
|
assert_eq!(dest_len.checked_mul(2).unwrap(), left_len);
|
||||||
|
|
||||||
|
for i in 0..dest_len {
|
||||||
|
let j1 = i.checked_mul(2).unwrap();
|
||||||
|
let left1 = this.read_scalar(&this.project_index(&left, j1)?)?.to_u8()?;
|
||||||
|
let right1 = this.read_scalar(&this.project_index(&right, j1)?)?.to_i8()?;
|
||||||
|
|
||||||
|
let j2 = j1.checked_add(1).unwrap();
|
||||||
|
let left2 = this.read_scalar(&this.project_index(&left, j2)?)?.to_u8()?;
|
||||||
|
let right2 = this.read_scalar(&this.project_index(&right, j2)?)?.to_i8()?;
|
||||||
|
|
||||||
|
let dest = this.project_index(&dest, i)?;
|
||||||
|
|
||||||
|
// Multiplication of a u8 and an i8 into an i16 cannot overflow.
|
||||||
|
let mul1 = i16::from(left1).checked_mul(right1.into()).unwrap();
|
||||||
|
let mul2 = i16::from(left2).checked_mul(right2.into()).unwrap();
|
||||||
|
let res = mul1.saturating_add(mul2);
|
||||||
|
|
||||||
|
this.write_scalar(Scalar::from_i16(res), &dest)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Used to implement the _mm_maskload_epi32, _mm_maskload_epi64,
|
||||||
|
// _mm256_maskload_epi32 and _mm256_maskload_epi64 functions.
|
||||||
|
// For the element `i`, if the high bit of the `i`-th element of `mask`
|
||||||
|
// is one, it is loaded from `ptr.wrapping_add(i)`, otherwise zero is
|
||||||
|
// loaded.
|
||||||
|
"maskload.d" | "maskload.q" | "maskload.d.256" | "maskload.q.256" => {
|
||||||
|
let [ptr, mask] =
|
||||||
|
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||||
|
|
||||||
|
mask_load(this, ptr, mask, dest)?;
|
||||||
|
}
|
||||||
|
// Used to implement the _mm_maskstore_epi32, _mm_maskstore_epi64,
|
||||||
|
// _mm256_maskstore_epi32 and _mm256_maskstore_epi64 functions.
|
||||||
|
// For the element `i`, if the high bit of the element `i`-th of `mask`
|
||||||
|
// is one, it is stored into `ptr.wapping_add(i)`.
|
||||||
|
// Unlike SSE2's _mm_maskmoveu_si128, these are not non-temporal stores.
|
||||||
|
"maskstore.d" | "maskstore.q" | "maskstore.d.256" | "maskstore.q.256" => {
|
||||||
|
let [ptr, mask, value] =
|
||||||
|
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||||
|
|
||||||
|
mask_store(this, ptr, mask, value)?;
|
||||||
|
}
|
||||||
|
// Used to implement the _mm256_mpsadbw_epu8 function.
|
||||||
|
// Compute the sum of absolute differences of quadruplets of unsigned
|
||||||
|
// 8-bit integers in `left` and `right`, and store the 16-bit results
|
||||||
|
// in `right`. Quadruplets are selected from `left` and `right` with
|
||||||
|
// offsets specified in `imm`.
|
||||||
|
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mpsadbw_epu8
|
||||||
|
"mpsadbw" => {
|
||||||
|
let [left, right, imm] =
|
||||||
|
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||||
|
|
||||||
|
mpsadbw(this, left, right, imm, dest)?;
|
||||||
|
}
|
||||||
|
// Used to implement the _mm256_mulhrs_epi16 function.
|
||||||
|
// Multiplies packed 16-bit signed integer values, truncates the 32-bit
|
||||||
|
// product to the 18 most significant bits by right-shifting, and then
|
||||||
|
// divides the 18-bit value by 2 (rounding to nearest) by first adding
|
||||||
|
// 1 and then taking the bits `1..=16`.
|
||||||
|
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mulhrs_epi16
|
||||||
|
"pmul.hr.sw" => {
|
||||||
|
let [left, right] =
|
||||||
|
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||||
|
|
||||||
|
pmulhrsw(this, left, right, dest)?;
|
||||||
|
}
|
||||||
|
// Used to implement the _mm256_packs_epi16 function.
|
||||||
|
// Converts two 16-bit integer vectors to a single 8-bit integer
|
||||||
|
// vector with signed saturation.
|
||||||
|
"packsswb" => {
|
||||||
|
let [left, right] =
|
||||||
|
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||||
|
|
||||||
|
packsswb(this, left, right, dest)?;
|
||||||
|
}
|
||||||
|
// Used to implement the _mm256_packs_epi32 function.
|
||||||
|
// Converts two 32-bit integer vectors to a single 16-bit integer
|
||||||
|
// vector with signed saturation.
|
||||||
|
"packssdw" => {
|
||||||
|
let [left, right] =
|
||||||
|
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||||
|
|
||||||
|
packssdw(this, left, right, dest)?;
|
||||||
|
}
|
||||||
|
// Used to implement the _mm256_packus_epi16 function.
|
||||||
|
// Converts two 16-bit signed integer vectors to a single 8-bit
|
||||||
|
// unsigned integer vector with saturation.
|
||||||
|
"packuswb" => {
|
||||||
|
let [left, right] =
|
||||||
|
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||||
|
|
||||||
|
packuswb(this, left, right, dest)?;
|
||||||
|
}
|
||||||
|
// Used to implement the _mm256_packus_epi32 function.
|
||||||
|
// Concatenates two 32-bit signed integer vectors and converts
|
||||||
|
// the result to a 16-bit unsigned integer vector with saturation.
|
||||||
|
"packusdw" => {
|
||||||
|
let [left, right] =
|
||||||
|
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||||
|
|
||||||
|
packusdw(this, left, right, dest)?;
|
||||||
|
}
|
||||||
|
// Used to implement the _mm256_permutevar8x32_epi32 and
|
||||||
|
// _mm256_permutevar8x32_ps function.
|
||||||
|
// Shuffles `left` using the three low bits of each element of `right`
|
||||||
|
// as indices.
|
||||||
|
"permd" | "permps" => {
|
||||||
|
let [left, right] =
|
||||||
|
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||||
|
|
||||||
|
let (left, left_len) = this.operand_to_simd(left)?;
|
||||||
|
let (right, right_len) = this.operand_to_simd(right)?;
|
||||||
|
let (dest, dest_len) = this.mplace_to_simd(dest)?;
|
||||||
|
|
||||||
|
assert_eq!(dest_len, left_len);
|
||||||
|
assert_eq!(dest_len, right_len);
|
||||||
|
|
||||||
|
for i in 0..dest_len {
|
||||||
|
let dest = this.project_index(&dest, i)?;
|
||||||
|
let right = this.read_scalar(&this.project_index(&right, i)?)?.to_u32()?;
|
||||||
|
let left = this.project_index(&left, (right & 0b111).into())?;
|
||||||
|
|
||||||
|
this.copy_op(&left, &dest)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Used to implement the _mm256_permute2x128_si256 function.
|
||||||
|
// Shuffles 128-bit blocks of `a` and `b` using `imm` as pattern.
|
||||||
|
"vperm2i128" => {
|
||||||
|
let [left, right, imm] =
|
||||||
|
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||||
|
|
||||||
|
assert_eq!(left.layout.size.bits(), 256);
|
||||||
|
assert_eq!(right.layout.size.bits(), 256);
|
||||||
|
assert_eq!(dest.layout.size.bits(), 256);
|
||||||
|
|
||||||
|
// Transmute to `[i128; 2]`
|
||||||
|
|
||||||
|
let array_layout =
|
||||||
|
this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.i128, 2))?;
|
||||||
|
let left = left.transmute(array_layout, this)?;
|
||||||
|
let right = right.transmute(array_layout, this)?;
|
||||||
|
let dest = dest.transmute(array_layout, this)?;
|
||||||
|
|
||||||
|
let imm = this.read_scalar(imm)?.to_u8()?;
|
||||||
|
|
||||||
|
for i in 0..2 {
|
||||||
|
let dest = this.project_index(&dest, i)?;
|
||||||
|
let src = match (imm >> i.checked_mul(4).unwrap()) & 0b11 {
|
||||||
|
0 => this.project_index(&left, 0)?,
|
||||||
|
1 => this.project_index(&left, 1)?,
|
||||||
|
2 => this.project_index(&right, 0)?,
|
||||||
|
3 => this.project_index(&right, 1)?,
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
|
||||||
|
this.copy_op(&src, &dest)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Used to implement the _mm256_sad_epu8 function.
|
||||||
|
// Compute the absolute differences of packed unsigned 8-bit integers
|
||||||
|
// in `left` and `right`, then horizontally sum each consecutive 8
|
||||||
|
// differences to produce four unsigned 16-bit integers, and pack
|
||||||
|
// these unsigned 16-bit integers in the low 16 bits of 64-bit elements
|
||||||
|
// in `dest`.
|
||||||
|
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sad_epu8
|
||||||
|
"psad.bw" => {
|
||||||
|
let [left, right] =
|
||||||
|
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||||
|
|
||||||
|
let (left, left_len) = this.operand_to_simd(left)?;
|
||||||
|
let (right, right_len) = this.operand_to_simd(right)?;
|
||||||
|
let (dest, dest_len) = this.mplace_to_simd(dest)?;
|
||||||
|
|
||||||
|
assert_eq!(left_len, right_len);
|
||||||
|
assert_eq!(left_len, dest_len.checked_mul(8).unwrap());
|
||||||
|
|
||||||
|
for i in 0..dest_len {
|
||||||
|
let dest = this.project_index(&dest, i)?;
|
||||||
|
|
||||||
|
let mut acc: u16 = 0;
|
||||||
|
for j in 0..8 {
|
||||||
|
let src_index = i.checked_mul(8).unwrap().checked_add(j).unwrap();
|
||||||
|
|
||||||
|
let left = this.project_index(&left, src_index)?;
|
||||||
|
let left = this.read_scalar(&left)?.to_u8()?;
|
||||||
|
|
||||||
|
let right = this.project_index(&right, src_index)?;
|
||||||
|
let right = this.read_scalar(&right)?.to_u8()?;
|
||||||
|
|
||||||
|
acc = acc.checked_add(left.abs_diff(right).into()).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
this.write_scalar(Scalar::from_u64(acc.into()), &dest)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Used to implement the _mm256_shuffle_epi8 intrinsic.
|
||||||
|
// Shuffles bytes from `left` using `right` as pattern.
|
||||||
|
// Each 128-bit block is shuffled independently.
|
||||||
|
"pshuf.b" => {
|
||||||
|
let [left, right] =
|
||||||
|
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||||
|
|
||||||
|
let (left, left_len) = this.operand_to_simd(left)?;
|
||||||
|
let (right, right_len) = this.operand_to_simd(right)?;
|
||||||
|
let (dest, dest_len) = this.mplace_to_simd(dest)?;
|
||||||
|
|
||||||
|
assert_eq!(dest_len, left_len);
|
||||||
|
assert_eq!(dest_len, right_len);
|
||||||
|
|
||||||
|
for i in 0..dest_len {
|
||||||
|
let right = this.read_scalar(&this.project_index(&right, i)?)?.to_u8()?;
|
||||||
|
let dest = this.project_index(&dest, i)?;
|
||||||
|
|
||||||
|
let res = if right & 0x80 == 0 {
|
||||||
|
// Shuffle each 128-bit (16-byte) block independently.
|
||||||
|
let j = u64::from(right % 16).checked_add(i & !15).unwrap();
|
||||||
|
this.read_scalar(&this.project_index(&left, j)?)?
|
||||||
|
} else {
|
||||||
|
// If the highest bit in `right` is 1, write zero.
|
||||||
|
Scalar::from_u8(0)
|
||||||
|
};
|
||||||
|
|
||||||
|
this.write_scalar(res, &dest)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Used to implement the _mm256_sign_epi{8,16,32} functions.
|
||||||
|
// Negates elements from `left` when the corresponding element in
|
||||||
|
// `right` is negative. If an element from `right` is zero, zero
|
||||||
|
// is writen to the corresponding output element.
|
||||||
|
// Basically, we multiply `left` with `right.signum()`.
|
||||||
|
"psign.b" | "psign.w" | "psign.d" => {
|
||||||
|
let [left, right] =
|
||||||
|
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||||
|
|
||||||
|
psign(this, left, right, dest)?;
|
||||||
|
}
|
||||||
|
// Used to implement the _mm256_{sll,srl,sra}_epi{16,32,64} functions
|
||||||
|
// (except _mm256_sra_epi64, which is not available in AVX2).
|
||||||
|
// Shifts N-bit packed integers in left by the amount in right.
|
||||||
|
// `right` is as 128-bit vector. but it is interpreted as a single
|
||||||
|
// 64-bit integer (remaining bits are ignored).
|
||||||
|
// For logic shifts, when right is larger than N - 1, zero is produced.
|
||||||
|
// For arithmetic shifts, when right is larger than N - 1, the sign bit
|
||||||
|
// is copied to remaining bits.
|
||||||
|
"psll.w" | "psrl.w" | "psra.w" | "psll.d" | "psrl.d" | "psra.d" | "psll.q"
|
||||||
|
| "psrl.q" => {
|
||||||
|
let [left, right] =
|
||||||
|
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||||
|
|
||||||
|
let which = match unprefixed_name {
|
||||||
|
"psll.w" | "psll.d" | "psll.q" => ShiftOp::Left,
|
||||||
|
"psrl.w" | "psrl.d" | "psrl.q" => ShiftOp::RightLogic,
|
||||||
|
"psra.w" | "psra.d" => ShiftOp::RightArith,
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
|
||||||
|
shift_simd_by_scalar(this, left, right, which, dest)?;
|
||||||
|
}
|
||||||
|
// Used to implement the _mm{,256}_{sllv,srlv,srav}_epi{32,64} functions
|
||||||
|
// (except _mm{,256}_srav_epi64, which are not available in AVX2).
|
||||||
|
"psllv.d" | "psllv.d.256" | "psllv.q" | "psllv.q.256" | "psrlv.d" | "psrlv.d.256"
|
||||||
|
| "psrlv.q" | "psrlv.q.256" | "psrav.d" | "psrav.d.256" => {
|
||||||
|
let [left, right] =
|
||||||
|
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||||
|
|
||||||
|
let which = match unprefixed_name {
|
||||||
|
"psllv.d" | "psllv.d.256" | "psllv.q" | "psllv.q.256" => ShiftOp::Left,
|
||||||
|
"psrlv.d" | "psrlv.d.256" | "psrlv.q" | "psrlv.q.256" => ShiftOp::RightLogic,
|
||||||
|
"psrav.d" | "psrav.d.256" => ShiftOp::RightArith,
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
|
||||||
|
shift_simd_by_simd(this, left, right, which, dest)?;
|
||||||
|
}
|
||||||
|
_ => return Ok(EmulateForeignItemResult::NotSupported),
|
||||||
|
}
|
||||||
|
Ok(EmulateForeignItemResult::NeedsJumping)
|
||||||
|
}
|
||||||
|
}
|
|
@ -14,6 +14,7 @@ use shims::foreign_items::EmulateForeignItemResult;
|
||||||
|
|
||||||
mod aesni;
|
mod aesni;
|
||||||
mod avx;
|
mod avx;
|
||||||
|
mod avx2;
|
||||||
mod sse;
|
mod sse;
|
||||||
mod sse2;
|
mod sse2;
|
||||||
mod sse3;
|
mod sse3;
|
||||||
|
@ -136,6 +137,11 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
|
||||||
this, link_name, abi, args, dest,
|
this, link_name, abi, args, dest,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
name if name.starts_with("avx2.") => {
|
||||||
|
return avx2::EvalContextExt::emulate_x86_avx2_intrinsic(
|
||||||
|
this, link_name, abi, args, dest,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
_ => return Ok(EmulateForeignItemResult::NotSupported),
|
_ => return Ok(EmulateForeignItemResult::NotSupported),
|
||||||
}
|
}
|
||||||
|
@ -482,7 +488,7 @@ enum ShiftOp {
|
||||||
///
|
///
|
||||||
/// For logic shifts, when right is larger than BITS - 1, zero is produced.
|
/// For logic shifts, when right is larger than BITS - 1, zero is produced.
|
||||||
/// For arithmetic right-shifts, when right is larger than BITS - 1, the sign
|
/// For arithmetic right-shifts, when right is larger than BITS - 1, the sign
|
||||||
/// bit is copied to remaining bits.
|
/// bit is copied to all bits.
|
||||||
fn shift_simd_by_scalar<'tcx>(
|
fn shift_simd_by_scalar<'tcx>(
|
||||||
this: &mut crate::MiriInterpCx<'_, 'tcx>,
|
this: &mut crate::MiriInterpCx<'_, 'tcx>,
|
||||||
left: &OpTy<'tcx, Provenance>,
|
left: &OpTy<'tcx, Provenance>,
|
||||||
|
@ -534,6 +540,61 @@ fn shift_simd_by_scalar<'tcx>(
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Shifts each element of `left` by the corresponding element of `right`.
|
||||||
|
///
|
||||||
|
/// For logic shifts, when right is larger than BITS - 1, zero is produced.
|
||||||
|
/// For arithmetic right-shifts, when right is larger than BITS - 1, the sign
|
||||||
|
/// bit is copied to all bits.
|
||||||
|
fn shift_simd_by_simd<'tcx>(
|
||||||
|
this: &mut crate::MiriInterpCx<'_, 'tcx>,
|
||||||
|
left: &OpTy<'tcx, Provenance>,
|
||||||
|
right: &OpTy<'tcx, Provenance>,
|
||||||
|
which: ShiftOp,
|
||||||
|
dest: &MPlaceTy<'tcx, Provenance>,
|
||||||
|
) -> InterpResult<'tcx, ()> {
|
||||||
|
let (left, left_len) = this.operand_to_simd(left)?;
|
||||||
|
let (right, right_len) = this.operand_to_simd(right)?;
|
||||||
|
let (dest, dest_len) = this.mplace_to_simd(dest)?;
|
||||||
|
|
||||||
|
assert_eq!(dest_len, left_len);
|
||||||
|
assert_eq!(dest_len, right_len);
|
||||||
|
|
||||||
|
for i in 0..dest_len {
|
||||||
|
let left = this.read_scalar(&this.project_index(&left, i)?)?;
|
||||||
|
let right = this.read_scalar(&this.project_index(&right, i)?)?;
|
||||||
|
let dest = this.project_index(&dest, i)?;
|
||||||
|
|
||||||
|
// It is ok to saturate the value to u32::MAX because any value
|
||||||
|
// above BITS - 1 will produce the same result.
|
||||||
|
let shift = u32::try_from(right.to_uint(dest.layout.size)?).unwrap_or(u32::MAX);
|
||||||
|
|
||||||
|
let res = match which {
|
||||||
|
ShiftOp::Left => {
|
||||||
|
let left = left.to_uint(dest.layout.size)?;
|
||||||
|
let res = left.checked_shl(shift).unwrap_or(0);
|
||||||
|
// `truncate` is needed as left-shift can make the absolute value larger.
|
||||||
|
Scalar::from_uint(dest.layout.size.truncate(res), dest.layout.size)
|
||||||
|
}
|
||||||
|
ShiftOp::RightLogic => {
|
||||||
|
let left = left.to_uint(dest.layout.size)?;
|
||||||
|
let res = left.checked_shr(shift).unwrap_or(0);
|
||||||
|
// No `truncate` needed as right-shift can only make the absolute value smaller.
|
||||||
|
Scalar::from_uint(res, dest.layout.size)
|
||||||
|
}
|
||||||
|
ShiftOp::RightArith => {
|
||||||
|
let left = left.to_int(dest.layout.size)?;
|
||||||
|
// On overflow, copy the sign bit to the remaining bits
|
||||||
|
let res = left.checked_shr(shift).unwrap_or(left >> 127);
|
||||||
|
// No `truncate` needed as right-shift can only make the absolute value smaller.
|
||||||
|
Scalar::from_int(res, dest.layout.size)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
this.write_scalar(res, &dest)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// Takes a 128-bit vector, transmutes it to `[u64; 2]` and extracts
|
/// Takes a 128-bit vector, transmutes it to `[u64; 2]` and extracts
|
||||||
/// the first value.
|
/// the first value.
|
||||||
fn extract_first_u64<'tcx>(
|
fn extract_first_u64<'tcx>(
|
||||||
|
@ -664,6 +725,33 @@ fn convert_float_to_int<'tcx>(
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Calculates absolute value of integers in `op` and stores the result in `dest`.
|
||||||
|
///
|
||||||
|
/// In case of overflow (when the operand is the minimum value), the operation
|
||||||
|
/// will wrap around.
|
||||||
|
fn int_abs<'tcx>(
|
||||||
|
this: &mut crate::MiriInterpCx<'_, 'tcx>,
|
||||||
|
op: &OpTy<'tcx, Provenance>,
|
||||||
|
dest: &MPlaceTy<'tcx, Provenance>,
|
||||||
|
) -> InterpResult<'tcx, ()> {
|
||||||
|
let (op, op_len) = this.operand_to_simd(op)?;
|
||||||
|
let (dest, dest_len) = this.mplace_to_simd(dest)?;
|
||||||
|
|
||||||
|
assert_eq!(op_len, dest_len);
|
||||||
|
|
||||||
|
for i in 0..dest_len {
|
||||||
|
let op = this.read_scalar(&this.project_index(&op, i)?)?;
|
||||||
|
let dest = this.project_index(&dest, i)?;
|
||||||
|
|
||||||
|
// Converting to a host "i128" works since the input is always signed.
|
||||||
|
let res = op.to_int(dest.layout.size)?.unsigned_abs();
|
||||||
|
|
||||||
|
this.write_scalar(Scalar::from_uint(res, dest.layout.size), &dest)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// Splits `op` (which must be a SIMD vector) into 128-bit chuncks.
|
/// Splits `op` (which must be a SIMD vector) into 128-bit chuncks.
|
||||||
///
|
///
|
||||||
/// Returns a tuple where:
|
/// Returns a tuple where:
|
||||||
|
@ -874,3 +962,316 @@ fn test_high_bits_masked<'tcx>(
|
||||||
|
|
||||||
Ok((direct, negated))
|
Ok((direct, negated))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Conditionally loads from `ptr` according the high bit of each
|
||||||
|
/// element of `mask`. `ptr` does not need to be aligned.
|
||||||
|
fn mask_load<'tcx>(
|
||||||
|
this: &mut crate::MiriInterpCx<'_, 'tcx>,
|
||||||
|
ptr: &OpTy<'tcx, Provenance>,
|
||||||
|
mask: &OpTy<'tcx, Provenance>,
|
||||||
|
dest: &MPlaceTy<'tcx, Provenance>,
|
||||||
|
) -> InterpResult<'tcx, ()> {
|
||||||
|
let (mask, mask_len) = this.operand_to_simd(mask)?;
|
||||||
|
let (dest, dest_len) = this.mplace_to_simd(dest)?;
|
||||||
|
|
||||||
|
assert_eq!(dest_len, mask_len);
|
||||||
|
|
||||||
|
let mask_item_size = mask.layout.field(this, 0).size;
|
||||||
|
let high_bit_offset = mask_item_size.bits().checked_sub(1).unwrap();
|
||||||
|
|
||||||
|
let ptr = this.read_pointer(ptr)?;
|
||||||
|
for i in 0..dest_len {
|
||||||
|
let mask = this.project_index(&mask, i)?;
|
||||||
|
let dest = this.project_index(&dest, i)?;
|
||||||
|
|
||||||
|
if this.read_scalar(&mask)?.to_uint(mask_item_size)? >> high_bit_offset != 0 {
|
||||||
|
let ptr = ptr.wrapping_offset(dest.layout.size * i, &this.tcx);
|
||||||
|
// Unaligned copy, which is what we want.
|
||||||
|
this.mem_copy(ptr, dest.ptr(), dest.layout.size, /*nonoverlapping*/ true)?;
|
||||||
|
} else {
|
||||||
|
this.write_scalar(Scalar::from_int(0, dest.layout.size), &dest)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Conditionally stores into `ptr` according the high bit of each
|
||||||
|
/// element of `mask`. `ptr` does not need to be aligned.
|
||||||
|
fn mask_store<'tcx>(
|
||||||
|
this: &mut crate::MiriInterpCx<'_, 'tcx>,
|
||||||
|
ptr: &OpTy<'tcx, Provenance>,
|
||||||
|
mask: &OpTy<'tcx, Provenance>,
|
||||||
|
value: &OpTy<'tcx, Provenance>,
|
||||||
|
) -> InterpResult<'tcx, ()> {
|
||||||
|
let (mask, mask_len) = this.operand_to_simd(mask)?;
|
||||||
|
let (value, value_len) = this.operand_to_simd(value)?;
|
||||||
|
|
||||||
|
assert_eq!(value_len, mask_len);
|
||||||
|
|
||||||
|
let mask_item_size = mask.layout.field(this, 0).size;
|
||||||
|
let high_bit_offset = mask_item_size.bits().checked_sub(1).unwrap();
|
||||||
|
|
||||||
|
let ptr = this.read_pointer(ptr)?;
|
||||||
|
for i in 0..value_len {
|
||||||
|
let mask = this.project_index(&mask, i)?;
|
||||||
|
let value = this.project_index(&value, i)?;
|
||||||
|
|
||||||
|
if this.read_scalar(&mask)?.to_uint(mask_item_size)? >> high_bit_offset != 0 {
|
||||||
|
let ptr = ptr.wrapping_offset(value.layout.size * i, &this.tcx);
|
||||||
|
// Unaligned copy, which is what we want.
|
||||||
|
this.mem_copy(value.ptr(), ptr, value.layout.size, /*nonoverlapping*/ true)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compute the sum of absolute differences of quadruplets of unsigned
|
||||||
|
/// 8-bit integers in `left` and `right`, and store the 16-bit results
|
||||||
|
/// in `right`. Quadruplets are selected from `left` and `right` with
|
||||||
|
/// offsets specified in `imm`.
|
||||||
|
///
|
||||||
|
/// <https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maddubs_epi16>
|
||||||
|
/// <https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mpsadbw_epu8>
|
||||||
|
///
|
||||||
|
/// Each 128-bit chunk is treated independently (i.e., the value for
|
||||||
|
/// the is i-th 128-bit chunk of `dest` is calculated with the i-th
|
||||||
|
/// 128-bit chunks of `left` and `right`).
|
||||||
|
fn mpsadbw<'tcx>(
|
||||||
|
this: &mut crate::MiriInterpCx<'_, 'tcx>,
|
||||||
|
left: &OpTy<'tcx, Provenance>,
|
||||||
|
right: &OpTy<'tcx, Provenance>,
|
||||||
|
imm: &OpTy<'tcx, Provenance>,
|
||||||
|
dest: &MPlaceTy<'tcx, Provenance>,
|
||||||
|
) -> InterpResult<'tcx, ()> {
|
||||||
|
assert_eq!(left.layout, right.layout);
|
||||||
|
assert_eq!(left.layout.size, dest.layout.size);
|
||||||
|
|
||||||
|
let (num_chunks, op_items_per_chunk, left) = split_simd_to_128bit_chunks(this, left)?;
|
||||||
|
let (_, _, right) = split_simd_to_128bit_chunks(this, right)?;
|
||||||
|
let (_, dest_items_per_chunk, dest) = split_simd_to_128bit_chunks(this, dest)?;
|
||||||
|
|
||||||
|
assert_eq!(op_items_per_chunk, dest_items_per_chunk.checked_mul(2).unwrap());
|
||||||
|
|
||||||
|
let imm = this.read_scalar(imm)?.to_uint(imm.layout.size)?;
|
||||||
|
// Bit 2 of `imm` specifies the offset for indices of `left`.
|
||||||
|
// The offset is 0 when the bit is 0 or 4 when the bit is 1.
|
||||||
|
let left_offset = u64::try_from((imm >> 2) & 1).unwrap().checked_mul(4).unwrap();
|
||||||
|
// Bits 0..=1 of `imm` specify the offset for indices of
|
||||||
|
// `right` in blocks of 4 elements.
|
||||||
|
let right_offset = u64::try_from(imm & 0b11).unwrap().checked_mul(4).unwrap();
|
||||||
|
|
||||||
|
for i in 0..num_chunks {
|
||||||
|
let left = this.project_index(&left, i)?;
|
||||||
|
let right = this.project_index(&right, i)?;
|
||||||
|
let dest = this.project_index(&dest, i)?;
|
||||||
|
|
||||||
|
for j in 0..dest_items_per_chunk {
|
||||||
|
let left_offset = left_offset.checked_add(j).unwrap();
|
||||||
|
let mut res: u16 = 0;
|
||||||
|
for k in 0..4 {
|
||||||
|
let left = this
|
||||||
|
.read_scalar(&this.project_index(&left, left_offset.checked_add(k).unwrap())?)?
|
||||||
|
.to_u8()?;
|
||||||
|
let right = this
|
||||||
|
.read_scalar(
|
||||||
|
&this.project_index(&right, right_offset.checked_add(k).unwrap())?,
|
||||||
|
)?
|
||||||
|
.to_u8()?;
|
||||||
|
res = res.checked_add(left.abs_diff(right).into()).unwrap();
|
||||||
|
}
|
||||||
|
this.write_scalar(Scalar::from_u16(res), &this.project_index(&dest, j)?)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
|
||||||
|
/// product to the 18 most significant bits by right-shifting, and then
|
||||||
|
/// divides the 18-bit value by 2 (rounding to nearest) by first adding
|
||||||
|
/// 1 and then taking the bits `1..=16`.
|
||||||
|
///
|
||||||
|
/// <https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhrs_epi16>
|
||||||
|
/// <https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mulhrs_epi16>
|
||||||
|
fn pmulhrsw<'tcx>(
|
||||||
|
this: &mut crate::MiriInterpCx<'_, 'tcx>,
|
||||||
|
left: &OpTy<'tcx, Provenance>,
|
||||||
|
right: &OpTy<'tcx, Provenance>,
|
||||||
|
dest: &MPlaceTy<'tcx, Provenance>,
|
||||||
|
) -> InterpResult<'tcx, ()> {
|
||||||
|
let (left, left_len) = this.operand_to_simd(left)?;
|
||||||
|
let (right, right_len) = this.operand_to_simd(right)?;
|
||||||
|
let (dest, dest_len) = this.mplace_to_simd(dest)?;
|
||||||
|
|
||||||
|
assert_eq!(dest_len, left_len);
|
||||||
|
assert_eq!(dest_len, right_len);
|
||||||
|
|
||||||
|
for i in 0..dest_len {
|
||||||
|
let left = this.read_scalar(&this.project_index(&left, i)?)?.to_i16()?;
|
||||||
|
let right = this.read_scalar(&this.project_index(&right, i)?)?.to_i16()?;
|
||||||
|
let dest = this.project_index(&dest, i)?;
|
||||||
|
|
||||||
|
let res =
|
||||||
|
(i32::from(left).checked_mul(right.into()).unwrap() >> 14).checked_add(1).unwrap() >> 1;
|
||||||
|
|
||||||
|
// The result of this operation can overflow a signed 16-bit integer.
|
||||||
|
// When `left` and `right` are -0x8000, the result is 0x8000.
|
||||||
|
#[allow(clippy::cast_possible_truncation)]
|
||||||
|
let res = res as i16;
|
||||||
|
|
||||||
|
this.write_scalar(Scalar::from_i16(res), &dest)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn pack_generic<'tcx>(
|
||||||
|
this: &mut crate::MiriInterpCx<'_, 'tcx>,
|
||||||
|
left: &OpTy<'tcx, Provenance>,
|
||||||
|
right: &OpTy<'tcx, Provenance>,
|
||||||
|
dest: &MPlaceTy<'tcx, Provenance>,
|
||||||
|
f: impl Fn(Scalar<Provenance>) -> InterpResult<'tcx, Scalar<Provenance>>,
|
||||||
|
) -> InterpResult<'tcx, ()> {
|
||||||
|
assert_eq!(left.layout, right.layout);
|
||||||
|
assert_eq!(left.layout.size, dest.layout.size);
|
||||||
|
|
||||||
|
let (num_chunks, op_items_per_chunk, left) = split_simd_to_128bit_chunks(this, left)?;
|
||||||
|
let (_, _, right) = split_simd_to_128bit_chunks(this, right)?;
|
||||||
|
let (_, dest_items_per_chunk, dest) = split_simd_to_128bit_chunks(this, dest)?;
|
||||||
|
|
||||||
|
assert_eq!(dest_items_per_chunk, op_items_per_chunk.checked_mul(2).unwrap());
|
||||||
|
|
||||||
|
for i in 0..num_chunks {
|
||||||
|
let left = this.project_index(&left, i)?;
|
||||||
|
let right = this.project_index(&right, i)?;
|
||||||
|
let dest = this.project_index(&dest, i)?;
|
||||||
|
|
||||||
|
for j in 0..op_items_per_chunk {
|
||||||
|
let left = this.read_scalar(&this.project_index(&left, j)?)?;
|
||||||
|
let right = this.read_scalar(&this.project_index(&right, j)?)?;
|
||||||
|
let left_dest = this.project_index(&dest, j)?;
|
||||||
|
let right_dest =
|
||||||
|
this.project_index(&dest, j.checked_add(op_items_per_chunk).unwrap())?;
|
||||||
|
|
||||||
|
let left_res = f(left)?;
|
||||||
|
let right_res = f(right)?;
|
||||||
|
|
||||||
|
this.write_scalar(left_res, &left_dest)?;
|
||||||
|
this.write_scalar(right_res, &right_dest)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Converts two 16-bit integer vectors to a single 8-bit integer
|
||||||
|
/// vector with signed saturation.
|
||||||
|
///
|
||||||
|
/// Each 128-bit chunk is treated independently (i.e., the value for
|
||||||
|
/// the is i-th 128-bit chunk of `dest` is calculated with the i-th
|
||||||
|
/// 128-bit chunks of `left` and `right`).
|
||||||
|
fn packsswb<'tcx>(
|
||||||
|
this: &mut crate::MiriInterpCx<'_, 'tcx>,
|
||||||
|
left: &OpTy<'tcx, Provenance>,
|
||||||
|
right: &OpTy<'tcx, Provenance>,
|
||||||
|
dest: &MPlaceTy<'tcx, Provenance>,
|
||||||
|
) -> InterpResult<'tcx, ()> {
|
||||||
|
pack_generic(this, left, right, dest, |op| {
|
||||||
|
let op = op.to_i16()?;
|
||||||
|
let res = i8::try_from(op).unwrap_or(if op < 0 { i8::MIN } else { i8::MAX });
|
||||||
|
Ok(Scalar::from_i8(res))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Converts two 16-bit signed integer vectors to a single 8-bit
|
||||||
|
/// unsigned integer vector with saturation.
|
||||||
|
///
|
||||||
|
/// Each 128-bit chunk is treated independently (i.e., the value for
|
||||||
|
/// the is i-th 128-bit chunk of `dest` is calculated with the i-th
|
||||||
|
/// 128-bit chunks of `left` and `right`).
|
||||||
|
fn packuswb<'tcx>(
|
||||||
|
this: &mut crate::MiriInterpCx<'_, 'tcx>,
|
||||||
|
left: &OpTy<'tcx, Provenance>,
|
||||||
|
right: &OpTy<'tcx, Provenance>,
|
||||||
|
dest: &MPlaceTy<'tcx, Provenance>,
|
||||||
|
) -> InterpResult<'tcx, ()> {
|
||||||
|
pack_generic(this, left, right, dest, |op| {
|
||||||
|
let op = op.to_i16()?;
|
||||||
|
let res = u8::try_from(op).unwrap_or(if op < 0 { 0 } else { u8::MAX });
|
||||||
|
Ok(Scalar::from_u8(res))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Converts two 32-bit integer vectors to a single 16-bit integer
|
||||||
|
/// vector with signed saturation.
|
||||||
|
///
|
||||||
|
/// Each 128-bit chunk is treated independently (i.e., the value for
|
||||||
|
/// the is i-th 128-bit chunk of `dest` is calculated with the i-th
|
||||||
|
/// 128-bit chunks of `left` and `right`).
|
||||||
|
fn packssdw<'tcx>(
|
||||||
|
this: &mut crate::MiriInterpCx<'_, 'tcx>,
|
||||||
|
left: &OpTy<'tcx, Provenance>,
|
||||||
|
right: &OpTy<'tcx, Provenance>,
|
||||||
|
dest: &MPlaceTy<'tcx, Provenance>,
|
||||||
|
) -> InterpResult<'tcx, ()> {
|
||||||
|
pack_generic(this, left, right, dest, |op| {
|
||||||
|
let op = op.to_i32()?;
|
||||||
|
let res = i16::try_from(op).unwrap_or(if op < 0 { i16::MIN } else { i16::MAX });
|
||||||
|
Ok(Scalar::from_i16(res))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Converts two 32-bit integer vectors to a single 16-bit integer
|
||||||
|
/// vector with unsigned saturation.
|
||||||
|
///
|
||||||
|
/// Each 128-bit chunk is treated independently (i.e., the value for
|
||||||
|
/// the is i-th 128-bit chunk of `dest` is calculated with the i-th
|
||||||
|
/// 128-bit chunks of `left` and `right`).
|
||||||
|
fn packusdw<'tcx>(
|
||||||
|
this: &mut crate::MiriInterpCx<'_, 'tcx>,
|
||||||
|
left: &OpTy<'tcx, Provenance>,
|
||||||
|
right: &OpTy<'tcx, Provenance>,
|
||||||
|
dest: &MPlaceTy<'tcx, Provenance>,
|
||||||
|
) -> InterpResult<'tcx, ()> {
|
||||||
|
pack_generic(this, left, right, dest, |op| {
|
||||||
|
let op = op.to_i32()?;
|
||||||
|
let res = u16::try_from(op).unwrap_or(if op < 0 { 0 } else { u16::MAX });
|
||||||
|
Ok(Scalar::from_u16(res))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Negates elements from `left` when the corresponding element in
|
||||||
|
/// `right` is negative. If an element from `right` is zero, zero
|
||||||
|
/// is writen to the corresponding output element.
|
||||||
|
/// In other words, multiplies `left` with `right.signum()`.
|
||||||
|
fn psign<'tcx>(
|
||||||
|
this: &mut crate::MiriInterpCx<'_, 'tcx>,
|
||||||
|
left: &OpTy<'tcx, Provenance>,
|
||||||
|
right: &OpTy<'tcx, Provenance>,
|
||||||
|
dest: &MPlaceTy<'tcx, Provenance>,
|
||||||
|
) -> InterpResult<'tcx, ()> {
|
||||||
|
let (left, left_len) = this.operand_to_simd(left)?;
|
||||||
|
let (right, right_len) = this.operand_to_simd(right)?;
|
||||||
|
let (dest, dest_len) = this.mplace_to_simd(dest)?;
|
||||||
|
|
||||||
|
assert_eq!(dest_len, left_len);
|
||||||
|
assert_eq!(dest_len, right_len);
|
||||||
|
|
||||||
|
for i in 0..dest_len {
|
||||||
|
let dest = this.project_index(&dest, i)?;
|
||||||
|
let left = this.read_immediate(&this.project_index(&left, i)?)?;
|
||||||
|
let right = this.read_scalar(&this.project_index(&right, i)?)?.to_int(dest.layout.size)?;
|
||||||
|
|
||||||
|
let res = this.wrapping_binary_op(
|
||||||
|
mir::BinOp::Mul,
|
||||||
|
&left,
|
||||||
|
&ImmTy::from_int(right.signum(), dest.layout),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
this.write_immediate(*res, &dest)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
|
@ -3,8 +3,8 @@ use rustc_span::Symbol;
|
||||||
use rustc_target::spec::abi::Abi;
|
use rustc_target::spec::abi::Abi;
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
bin_op_simd_float_all, bin_op_simd_float_first, convert_float_to_int, shift_simd_by_scalar,
|
bin_op_simd_float_all, bin_op_simd_float_first, convert_float_to_int, packssdw, packsswb,
|
||||||
FloatBinOp, ShiftOp,
|
packuswb, shift_simd_by_scalar, FloatBinOp, ShiftOp,
|
||||||
};
|
};
|
||||||
use crate::*;
|
use crate::*;
|
||||||
use shims::foreign_items::EmulateForeignItemResult;
|
use shims::foreign_items::EmulateForeignItemResult;
|
||||||
|
@ -176,29 +176,7 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
|
||||||
let [left, right] =
|
let [left, right] =
|
||||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||||
|
|
||||||
let (left, left_len) = this.operand_to_simd(left)?;
|
packsswb(this, left, right, dest)?;
|
||||||
let (right, right_len) = this.operand_to_simd(right)?;
|
|
||||||
let (dest, dest_len) = this.mplace_to_simd(dest)?;
|
|
||||||
|
|
||||||
// left and right are i16x8, dest is i8x16
|
|
||||||
assert_eq!(left_len, 8);
|
|
||||||
assert_eq!(right_len, 8);
|
|
||||||
assert_eq!(dest_len, 16);
|
|
||||||
|
|
||||||
for i in 0..left_len {
|
|
||||||
let left = this.read_scalar(&this.project_index(&left, i)?)?.to_i16()?;
|
|
||||||
let right = this.read_scalar(&this.project_index(&right, i)?)?.to_i16()?;
|
|
||||||
let left_dest = this.project_index(&dest, i)?;
|
|
||||||
let right_dest = this.project_index(&dest, i.checked_add(left_len).unwrap())?;
|
|
||||||
|
|
||||||
let left_res =
|
|
||||||
i8::try_from(left).unwrap_or(if left < 0 { i8::MIN } else { i8::MAX });
|
|
||||||
let right_res =
|
|
||||||
i8::try_from(right).unwrap_or(if right < 0 { i8::MIN } else { i8::MAX });
|
|
||||||
|
|
||||||
this.write_scalar(Scalar::from_i8(left_res), &left_dest)?;
|
|
||||||
this.write_scalar(Scalar::from_i8(right_res), &right_dest)?;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// Used to implement the _mm_packus_epi16 function.
|
// Used to implement the _mm_packus_epi16 function.
|
||||||
// Converts two 16-bit signed integer vectors to a single 8-bit
|
// Converts two 16-bit signed integer vectors to a single 8-bit
|
||||||
|
@ -207,28 +185,7 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
|
||||||
let [left, right] =
|
let [left, right] =
|
||||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||||
|
|
||||||
let (left, left_len) = this.operand_to_simd(left)?;
|
packuswb(this, left, right, dest)?;
|
||||||
let (right, right_len) = this.operand_to_simd(right)?;
|
|
||||||
let (dest, dest_len) = this.mplace_to_simd(dest)?;
|
|
||||||
|
|
||||||
// left and right are i16x8, dest is u8x16
|
|
||||||
assert_eq!(left_len, 8);
|
|
||||||
assert_eq!(right_len, 8);
|
|
||||||
assert_eq!(dest_len, 16);
|
|
||||||
|
|
||||||
for i in 0..left_len {
|
|
||||||
let left = this.read_scalar(&this.project_index(&left, i)?)?.to_i16()?;
|
|
||||||
let right = this.read_scalar(&this.project_index(&right, i)?)?.to_i16()?;
|
|
||||||
let left_dest = this.project_index(&dest, i)?;
|
|
||||||
let right_dest = this.project_index(&dest, i.checked_add(left_len).unwrap())?;
|
|
||||||
|
|
||||||
let left_res = u8::try_from(left).unwrap_or(if left < 0 { 0 } else { u8::MAX });
|
|
||||||
let right_res =
|
|
||||||
u8::try_from(right).unwrap_or(if right < 0 { 0 } else { u8::MAX });
|
|
||||||
|
|
||||||
this.write_scalar(Scalar::from_u8(left_res), &left_dest)?;
|
|
||||||
this.write_scalar(Scalar::from_u8(right_res), &right_dest)?;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// Used to implement the _mm_packs_epi32 function.
|
// Used to implement the _mm_packs_epi32 function.
|
||||||
// Converts two 32-bit integer vectors to a single 16-bit integer
|
// Converts two 32-bit integer vectors to a single 16-bit integer
|
||||||
|
@ -237,29 +194,7 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
|
||||||
let [left, right] =
|
let [left, right] =
|
||||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||||
|
|
||||||
let (left, left_len) = this.operand_to_simd(left)?;
|
packssdw(this, left, right, dest)?;
|
||||||
let (right, right_len) = this.operand_to_simd(right)?;
|
|
||||||
let (dest, dest_len) = this.mplace_to_simd(dest)?;
|
|
||||||
|
|
||||||
// left and right are i32x4, dest is i16x8
|
|
||||||
assert_eq!(left_len, 4);
|
|
||||||
assert_eq!(right_len, 4);
|
|
||||||
assert_eq!(dest_len, 8);
|
|
||||||
|
|
||||||
for i in 0..left_len {
|
|
||||||
let left = this.read_scalar(&this.project_index(&left, i)?)?.to_i32()?;
|
|
||||||
let right = this.read_scalar(&this.project_index(&right, i)?)?.to_i32()?;
|
|
||||||
let left_dest = this.project_index(&dest, i)?;
|
|
||||||
let right_dest = this.project_index(&dest, i.checked_add(left_len).unwrap())?;
|
|
||||||
|
|
||||||
let left_res =
|
|
||||||
i16::try_from(left).unwrap_or(if left < 0 { i16::MIN } else { i16::MAX });
|
|
||||||
let right_res =
|
|
||||||
i16::try_from(right).unwrap_or(if right < 0 { i16::MIN } else { i16::MAX });
|
|
||||||
|
|
||||||
this.write_scalar(Scalar::from_i16(left_res), &left_dest)?;
|
|
||||||
this.write_scalar(Scalar::from_i16(right_res), &right_dest)?;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// Used to implement _mm_min_sd and _mm_max_sd functions.
|
// Used to implement _mm_min_sd and _mm_max_sd functions.
|
||||||
// Note that the semantics are a bit different from Rust simd_min
|
// Note that the semantics are a bit different from Rust simd_min
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
use rustc_span::Symbol;
|
use rustc_span::Symbol;
|
||||||
use rustc_target::spec::abi::Abi;
|
use rustc_target::spec::abi::Abi;
|
||||||
|
|
||||||
use super::{conditional_dot_product, round_all, round_first, test_bits_masked};
|
use super::{conditional_dot_product, mpsadbw, packusdw, round_all, round_first, test_bits_masked};
|
||||||
use crate::*;
|
use crate::*;
|
||||||
use shims::foreign_items::EmulateForeignItemResult;
|
use shims::foreign_items::EmulateForeignItemResult;
|
||||||
|
|
||||||
|
@ -68,27 +68,7 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
|
||||||
let [left, right] =
|
let [left, right] =
|
||||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||||
|
|
||||||
let (left, left_len) = this.operand_to_simd(left)?;
|
packusdw(this, left, right, dest)?;
|
||||||
let (right, right_len) = this.operand_to_simd(right)?;
|
|
||||||
let (dest, dest_len) = this.mplace_to_simd(dest)?;
|
|
||||||
|
|
||||||
assert_eq!(left_len, right_len);
|
|
||||||
assert_eq!(dest_len, left_len.checked_mul(2).unwrap());
|
|
||||||
|
|
||||||
for i in 0..left_len {
|
|
||||||
let left = this.read_scalar(&this.project_index(&left, i)?)?.to_i32()?;
|
|
||||||
let right = this.read_scalar(&this.project_index(&right, i)?)?.to_i32()?;
|
|
||||||
let left_dest = this.project_index(&dest, i)?;
|
|
||||||
let right_dest = this.project_index(&dest, i.checked_add(left_len).unwrap())?;
|
|
||||||
|
|
||||||
let left_res =
|
|
||||||
u16::try_from(left).unwrap_or(if left < 0 { 0 } else { u16::MAX });
|
|
||||||
let right_res =
|
|
||||||
u16::try_from(right).unwrap_or(if right < 0 { 0 } else { u16::MAX });
|
|
||||||
|
|
||||||
this.write_scalar(Scalar::from_u16(left_res), &left_dest)?;
|
|
||||||
this.write_scalar(Scalar::from_u16(right_res), &right_dest)?;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// Used to implement the _mm_dp_ps and _mm_dp_pd functions.
|
// Used to implement the _mm_dp_ps and _mm_dp_pd functions.
|
||||||
// Conditionally multiplies the packed floating-point elements in
|
// Conditionally multiplies the packed floating-point elements in
|
||||||
|
@ -176,40 +156,7 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
|
||||||
let [left, right, imm] =
|
let [left, right, imm] =
|
||||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||||
|
|
||||||
let (left, left_len) = this.operand_to_simd(left)?;
|
mpsadbw(this, left, right, imm, dest)?;
|
||||||
let (right, right_len) = this.operand_to_simd(right)?;
|
|
||||||
let (dest, dest_len) = this.mplace_to_simd(dest)?;
|
|
||||||
|
|
||||||
assert_eq!(left_len, right_len);
|
|
||||||
assert_eq!(left_len, dest_len.checked_mul(2).unwrap());
|
|
||||||
|
|
||||||
let imm = this.read_scalar(imm)?.to_u8()?;
|
|
||||||
// Bit 2 of `imm` specifies the offset for indices of `left`.
|
|
||||||
// The offset is 0 when the bit is 0 or 4 when the bit is 1.
|
|
||||||
let left_offset = u64::from((imm >> 2) & 1).checked_mul(4).unwrap();
|
|
||||||
// Bits 0..=1 of `imm` specify the offset for indices of
|
|
||||||
// `right` in blocks of 4 elements.
|
|
||||||
let right_offset = u64::from(imm & 0b11).checked_mul(4).unwrap();
|
|
||||||
|
|
||||||
for i in 0..dest_len {
|
|
||||||
let left_offset = left_offset.checked_add(i).unwrap();
|
|
||||||
let mut res: u16 = 0;
|
|
||||||
for j in 0..4 {
|
|
||||||
let left = this
|
|
||||||
.read_scalar(
|
|
||||||
&this.project_index(&left, left_offset.checked_add(j).unwrap())?,
|
|
||||||
)?
|
|
||||||
.to_u8()?;
|
|
||||||
let right = this
|
|
||||||
.read_scalar(
|
|
||||||
&this
|
|
||||||
.project_index(&right, right_offset.checked_add(j).unwrap())?,
|
|
||||||
)?
|
|
||||||
.to_u8()?;
|
|
||||||
res = res.checked_add(left.abs_diff(right).into()).unwrap();
|
|
||||||
}
|
|
||||||
this.write_scalar(Scalar::from_u16(res), &this.project_index(&dest, i)?)?;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// Used to implement the _mm_testz_si128, _mm_testc_si128
|
// Used to implement the _mm_testz_si128, _mm_testc_si128
|
||||||
// and _mm_testnzc_si128 functions.
|
// and _mm_testnzc_si128 functions.
|
||||||
|
|
|
@ -2,7 +2,7 @@ use rustc_middle::mir;
|
||||||
use rustc_span::Symbol;
|
use rustc_span::Symbol;
|
||||||
use rustc_target::spec::abi::Abi;
|
use rustc_target::spec::abi::Abi;
|
||||||
|
|
||||||
use super::horizontal_bin_op;
|
use super::{horizontal_bin_op, int_abs, pmulhrsw, psign};
|
||||||
use crate::*;
|
use crate::*;
|
||||||
use shims::foreign_items::EmulateForeignItemResult;
|
use shims::foreign_items::EmulateForeignItemResult;
|
||||||
|
|
||||||
|
@ -28,20 +28,7 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
|
||||||
"pabs.b.128" | "pabs.w.128" | "pabs.d.128" => {
|
"pabs.b.128" | "pabs.w.128" | "pabs.d.128" => {
|
||||||
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||||
|
|
||||||
let (op, op_len) = this.operand_to_simd(op)?;
|
int_abs(this, op, dest)?;
|
||||||
let (dest, dest_len) = this.mplace_to_simd(dest)?;
|
|
||||||
|
|
||||||
assert_eq!(op_len, dest_len);
|
|
||||||
|
|
||||||
for i in 0..dest_len {
|
|
||||||
let op = this.read_scalar(&this.project_index(&op, i)?)?;
|
|
||||||
let dest = this.project_index(&dest, i)?;
|
|
||||||
|
|
||||||
// Converting to a host "i128" works since the input is always signed.
|
|
||||||
let res = op.to_int(dest.layout.size)?.unsigned_abs();
|
|
||||||
|
|
||||||
this.write_scalar(Scalar::from_uint(res, dest.layout.size), &dest)?;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// Used to implement the _mm_shuffle_epi8 intrinsic.
|
// Used to implement the _mm_shuffle_epi8 intrinsic.
|
||||||
// Shuffles bytes from `left` using `right` as pattern.
|
// Shuffles bytes from `left` using `right` as pattern.
|
||||||
|
@ -136,30 +123,7 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
|
||||||
let [left, right] =
|
let [left, right] =
|
||||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||||
|
|
||||||
let (left, left_len) = this.operand_to_simd(left)?;
|
pmulhrsw(this, left, right, dest)?;
|
||||||
let (right, right_len) = this.operand_to_simd(right)?;
|
|
||||||
let (dest, dest_len) = this.mplace_to_simd(dest)?;
|
|
||||||
|
|
||||||
assert_eq!(dest_len, left_len);
|
|
||||||
assert_eq!(dest_len, right_len);
|
|
||||||
|
|
||||||
for i in 0..dest_len {
|
|
||||||
let left = this.read_scalar(&this.project_index(&left, i)?)?.to_i16()?;
|
|
||||||
let right = this.read_scalar(&this.project_index(&right, i)?)?.to_i16()?;
|
|
||||||
let dest = this.project_index(&dest, i)?;
|
|
||||||
|
|
||||||
let res = (i32::from(left).checked_mul(right.into()).unwrap() >> 14)
|
|
||||||
.checked_add(1)
|
|
||||||
.unwrap()
|
|
||||||
>> 1;
|
|
||||||
|
|
||||||
// The result of this operation can overflow a signed 16-bit integer.
|
|
||||||
// When `left` and `right` are -0x8000, the result is 0x8000.
|
|
||||||
#[allow(clippy::cast_possible_truncation)]
|
|
||||||
let res = res as i16;
|
|
||||||
|
|
||||||
this.write_scalar(Scalar::from_i16(res), &dest)?;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// Used to implement the _mm_sign_epi{8,16,32} functions.
|
// Used to implement the _mm_sign_epi{8,16,32} functions.
|
||||||
// Negates elements from `left` when the corresponding element in
|
// Negates elements from `left` when the corresponding element in
|
||||||
|
@ -170,28 +134,7 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
|
||||||
let [left, right] =
|
let [left, right] =
|
||||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||||
|
|
||||||
let (left, left_len) = this.operand_to_simd(left)?;
|
psign(this, left, right, dest)?;
|
||||||
let (right, right_len) = this.operand_to_simd(right)?;
|
|
||||||
let (dest, dest_len) = this.mplace_to_simd(dest)?;
|
|
||||||
|
|
||||||
assert_eq!(dest_len, left_len);
|
|
||||||
assert_eq!(dest_len, right_len);
|
|
||||||
|
|
||||||
for i in 0..dest_len {
|
|
||||||
let dest = this.project_index(&dest, i)?;
|
|
||||||
let left = this.read_immediate(&this.project_index(&left, i)?)?;
|
|
||||||
let right = this
|
|
||||||
.read_scalar(&this.project_index(&right, i)?)?
|
|
||||||
.to_int(dest.layout.size)?;
|
|
||||||
|
|
||||||
let res = this.wrapping_binary_op(
|
|
||||||
mir::BinOp::Mul,
|
|
||||||
&left,
|
|
||||||
&ImmTy::from_int(right.signum(), dest.layout),
|
|
||||||
)?;
|
|
||||||
|
|
||||||
this.write_immediate(*res, &dest)?;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
_ => return Ok(EmulateForeignItemResult::NotSupported),
|
_ => return Ok(EmulateForeignItemResult::NotSupported),
|
||||||
}
|
}
|
||||||
|
|
1613
src/tools/miri/tests/pass/intrinsics-x86-avx2.rs
Normal file
1613
src/tools/miri/tests/pass/intrinsics-x86-avx2.rs
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue