Merge commit '3383cfbd35
' into sync-from-portable-simd-2025-01-18
This commit is contained in:
commit
1ff6c555bb
31 changed files with 865 additions and 388 deletions
|
@ -9,10 +9,9 @@ categories = ["hardware-support", "no-std"]
|
|||
license = "MIT OR Apache-2.0"
|
||||
|
||||
[features]
|
||||
default = ["as_crate"]
|
||||
default = ["as_crate", "std"]
|
||||
as_crate = []
|
||||
std = []
|
||||
all_lane_counts = []
|
||||
|
||||
[target.'cfg(target_arch = "wasm32")'.dev-dependencies]
|
||||
wasm-bindgen = "0.2"
|
||||
|
|
|
@ -33,10 +33,8 @@ macro_rules! supported_lane_count {
|
|||
};
|
||||
}
|
||||
|
||||
supported_lane_count!(1, 2, 4, 8, 16, 32, 64);
|
||||
#[cfg(feature = "all_lane_counts")]
|
||||
supported_lane_count!(
|
||||
3, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
|
||||
31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
|
||||
56, 57, 58, 59, 60, 61, 62, 63
|
||||
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
|
||||
27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
|
||||
51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64
|
||||
);
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
#![no_std]
|
||||
#![feature(
|
||||
const_refs_to_cell,
|
||||
const_mut_refs,
|
||||
const_eval_select,
|
||||
convert_float_to_int,
|
||||
core_intrinsics,
|
||||
decl_macro,
|
||||
|
@ -26,6 +25,7 @@
|
|||
all(target_arch = "arm", target_feature = "v7"),
|
||||
feature(stdarch_arm_neon_intrinsics)
|
||||
)]
|
||||
#![cfg_attr(target_arch = "loongarch64", feature(stdarch_loongarch))]
|
||||
#![cfg_attr(
|
||||
any(target_arch = "powerpc", target_arch = "powerpc64"),
|
||||
feature(stdarch_powerpc)
|
||||
|
|
|
@ -308,48 +308,6 @@ where
|
|||
Self(mask_impl::Mask::from_bitmask_integer(bitmask))
|
||||
}
|
||||
|
||||
/// Creates a bitmask vector from a mask.
|
||||
///
|
||||
/// Each bit is set if the corresponding element in the mask is `true`.
|
||||
/// The remaining bits are unset.
|
||||
///
|
||||
/// The bits are packed into the first N bits of the vector:
|
||||
/// ```
|
||||
/// # #![feature(portable_simd)]
|
||||
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
|
||||
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
|
||||
/// # use simd::mask32x8;
|
||||
/// let mask = mask32x8::from_array([true, false, true, false, false, false, true, false]);
|
||||
/// assert_eq!(mask.to_bitmask_vector()[0], 0b01000101);
|
||||
/// ```
|
||||
#[inline]
|
||||
#[must_use = "method returns a new integer and does not mutate the original value"]
|
||||
pub fn to_bitmask_vector(self) -> Simd<u8, N> {
|
||||
self.0.to_bitmask_vector()
|
||||
}
|
||||
|
||||
/// Creates a mask from a bitmask vector.
|
||||
///
|
||||
/// For each bit, if it is set, the corresponding element in the mask is set to `true`.
|
||||
///
|
||||
/// The bits are packed into the first N bits of the vector:
|
||||
/// ```
|
||||
/// # #![feature(portable_simd)]
|
||||
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
|
||||
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
|
||||
/// # use simd::{mask32x8, u8x8};
|
||||
/// let bitmask = u8x8::from_array([0b01000101, 0, 0, 0, 0, 0, 0, 0]);
|
||||
/// assert_eq!(
|
||||
/// mask32x8::from_bitmask_vector(bitmask),
|
||||
/// mask32x8::from_array([true, false, true, false, false, false, true, false]),
|
||||
/// );
|
||||
/// ```
|
||||
#[inline]
|
||||
#[must_use = "method returns a new mask and does not mutate the original value"]
|
||||
pub fn from_bitmask_vector(bitmask: Simd<u8, N>) -> Self {
|
||||
Self(mask_impl::Mask::from_bitmask_vector(bitmask))
|
||||
}
|
||||
|
||||
/// Finds the index of the first set element.
|
||||
///
|
||||
/// ```
|
||||
|
|
|
@ -122,23 +122,6 @@ where
|
|||
unsafe { Self(core::intrinsics::simd::simd_bitmask(value), PhantomData) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
pub fn to_bitmask_vector(self) -> Simd<u8, N> {
|
||||
let mut bitmask = Simd::splat(0);
|
||||
bitmask.as_mut_array()[..self.0.as_ref().len()].copy_from_slice(self.0.as_ref());
|
||||
bitmask
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[must_use = "method returns a new mask and does not mutate the original value"]
|
||||
pub fn from_bitmask_vector(bitmask: Simd<u8, N>) -> Self {
|
||||
let mut bytes = <LaneCount<N> as SupportedLaneCount>::BitMask::default();
|
||||
let len = bytes.as_ref().len();
|
||||
bytes.as_mut().copy_from_slice(&bitmask.as_array()[..len]);
|
||||
Self(bytes, PhantomData)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn to_bitmask_integer(self) -> u64 {
|
||||
let mut bitmask = [0u8; 8];
|
||||
|
|
|
@ -140,62 +140,6 @@ where
|
|||
unsafe { Mask(core::intrinsics::simd::simd_cast(self.0)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
pub fn to_bitmask_vector(self) -> Simd<u8, N> {
|
||||
let mut bitmask = Simd::splat(0);
|
||||
|
||||
// Safety: Bytes is the right size array
|
||||
unsafe {
|
||||
// Compute the bitmask
|
||||
let mut bytes: <LaneCount<N> as SupportedLaneCount>::BitMask =
|
||||
core::intrinsics::simd::simd_bitmask(self.0);
|
||||
|
||||
// LLVM assumes bit order should match endianness
|
||||
if cfg!(target_endian = "big") {
|
||||
for x in bytes.as_mut() {
|
||||
*x = x.reverse_bits()
|
||||
}
|
||||
if N % 8 > 0 {
|
||||
bytes.as_mut()[N / 8] >>= 8 - N % 8;
|
||||
}
|
||||
}
|
||||
|
||||
bitmask.as_mut_array()[..bytes.as_ref().len()].copy_from_slice(bytes.as_ref());
|
||||
}
|
||||
|
||||
bitmask
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[must_use = "method returns a new mask and does not mutate the original value"]
|
||||
pub fn from_bitmask_vector(bitmask: Simd<u8, N>) -> Self {
|
||||
let mut bytes = <LaneCount<N> as SupportedLaneCount>::BitMask::default();
|
||||
|
||||
// Safety: Bytes is the right size array
|
||||
unsafe {
|
||||
let len = bytes.as_ref().len();
|
||||
bytes.as_mut().copy_from_slice(&bitmask.as_array()[..len]);
|
||||
|
||||
// LLVM assumes bit order should match endianness
|
||||
if cfg!(target_endian = "big") {
|
||||
for x in bytes.as_mut() {
|
||||
*x = x.reverse_bits();
|
||||
}
|
||||
if N % 8 > 0 {
|
||||
bytes.as_mut()[N / 8] >>= 8 - N % 8;
|
||||
}
|
||||
}
|
||||
|
||||
// Compute the regular mask
|
||||
Self::from_int_unchecked(core::intrinsics::simd::simd_select_bitmask(
|
||||
bytes,
|
||||
Self::splat(true).to_int(),
|
||||
Self::splat(false).to_int(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
unsafe fn to_bitmask_impl<U: ReverseBits, const M: usize>(self) -> U
|
||||
where
|
||||
|
@ -283,7 +227,7 @@ where
|
|||
}
|
||||
|
||||
#[inline]
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
#[must_use = "method returns a new bool and does not mutate the original value"]
|
||||
pub fn all(self) -> bool {
|
||||
// Safety: use `self` as an integer vector
|
||||
unsafe { core::intrinsics::simd::simd_reduce_all(self.to_int()) }
|
||||
|
|
|
@ -77,7 +77,7 @@ macro_rules! int_divrem_guard {
|
|||
( $lhs:ident,
|
||||
$rhs:ident,
|
||||
{ const PANIC_ZERO: &'static str = $zero:literal;
|
||||
$simd_call:ident
|
||||
$simd_call:ident, $op:tt
|
||||
},
|
||||
$int:ident ) => {
|
||||
if $rhs.simd_eq(Simd::splat(0 as _)).any() {
|
||||
|
@ -96,8 +96,23 @@ macro_rules! int_divrem_guard {
|
|||
// Nice base case to make it easy to const-fold away the other branch.
|
||||
$rhs
|
||||
};
|
||||
// Safety: $lhs and rhs are vectors
|
||||
unsafe { core::intrinsics::simd::$simd_call($lhs, rhs) }
|
||||
|
||||
// aarch64 div fails for arbitrary `v % 0`, mod fails when rhs is MIN, for non-powers-of-two
|
||||
// these operations aren't vectorized on aarch64 anyway
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
{
|
||||
let mut out = Simd::splat(0 as _);
|
||||
for i in 0..Self::LEN {
|
||||
out[i] = $lhs[i] $op rhs[i];
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
{
|
||||
// Safety: $lhs and rhs are vectors
|
||||
unsafe { core::intrinsics::simd::$simd_call($lhs, rhs) }
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -205,14 +220,14 @@ for_base_ops! {
|
|||
impl Div::div {
|
||||
int_divrem_guard {
|
||||
const PANIC_ZERO: &'static str = "attempt to divide by zero";
|
||||
simd_div
|
||||
simd_div, /
|
||||
}
|
||||
}
|
||||
|
||||
impl Rem::rem {
|
||||
int_divrem_guard {
|
||||
const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero";
|
||||
simd_rem
|
||||
simd_rem, %
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@ pub trait SimdPartialEq {
|
|||
#[must_use = "method returns a new mask and does not mutate the original value"]
|
||||
fn simd_eq(self, other: Self) -> Self::Mask;
|
||||
|
||||
/// Test if each element is equal to the corresponding element in `other`.
|
||||
/// Test if each element is not equal to the corresponding element in `other`.
|
||||
#[must_use = "method returns a new mask and does not mutate the original value"]
|
||||
fn simd_ne(self, other: Self) -> Self::Mask;
|
||||
}
|
||||
|
|
|
@ -255,6 +255,7 @@ macro_rules! impl_trait {
|
|||
type Bits = Simd<$bits_ty, N>;
|
||||
type Cast<T: SimdElement> = Simd<T, N>;
|
||||
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
#[inline]
|
||||
fn cast<T: SimdCast>(self) -> Self::Cast<T>
|
||||
{
|
||||
|
@ -262,6 +263,33 @@ macro_rules! impl_trait {
|
|||
unsafe { core::intrinsics::simd::simd_as(self) }
|
||||
}
|
||||
|
||||
// https://github.com/llvm/llvm-project/issues/94694
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[inline]
|
||||
fn cast<T: SimdCast>(self) -> Self::Cast<T>
|
||||
{
|
||||
const { assert!(N <= 64) };
|
||||
if N <= 2 || N == 4 || N == 8 || N == 16 || N == 32 || N == 64 {
|
||||
// Safety: supported types are guaranteed by SimdCast
|
||||
unsafe { core::intrinsics::simd::simd_as(self) }
|
||||
} else if N < 4 {
|
||||
let x = self.resize::<4>(Default::default()).cast();
|
||||
x.resize::<N>(x[0])
|
||||
} else if N < 8 {
|
||||
let x = self.resize::<8>(Default::default()).cast();
|
||||
x.resize::<N>(x[0])
|
||||
} else if N < 16 {
|
||||
let x = self.resize::<16>(Default::default()).cast();
|
||||
x.resize::<N>(x[0])
|
||||
} else if N < 32 {
|
||||
let x = self.resize::<32>(Default::default()).cast();
|
||||
x.resize::<N>(x[0])
|
||||
} else {
|
||||
let x = self.resize::<64>(Default::default()).cast();
|
||||
x.resize::<N>(x[0])
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
|
||||
unsafe fn to_int_unchecked<I: SimdCast>(self) -> Self::Cast<I>
|
||||
|
@ -391,7 +419,7 @@ macro_rules! impl_trait {
|
|||
self.as_array().iter().sum()
|
||||
} else {
|
||||
// Safety: `self` is a float vector
|
||||
unsafe { core::intrinsics::simd::simd_reduce_add_ordered(self, 0.) }
|
||||
unsafe { core::intrinsics::simd::simd_reduce_add_ordered(self, -0.) }
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use super::sealed::Sealed;
|
||||
use crate::simd::{
|
||||
cmp::SimdPartialOrd, num::SimdUint, LaneCount, Mask, Simd, SimdCast, SimdElement,
|
||||
cmp::SimdOrd, cmp::SimdPartialOrd, num::SimdUint, LaneCount, Mask, Simd, SimdCast, SimdElement,
|
||||
SupportedLaneCount,
|
||||
};
|
||||
|
||||
|
@ -70,11 +70,27 @@ pub trait SimdInt: Copy + Sealed {
|
|||
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
|
||||
/// # use simd::prelude::*;
|
||||
/// use core::i32::{MIN, MAX};
|
||||
/// let xs = Simd::from_array([MIN, MIN +1, -5, 0]);
|
||||
/// let xs = Simd::from_array([MIN, MIN + 1, -5, 0]);
|
||||
/// assert_eq!(xs.abs(), Simd::from_array([MIN, MAX, 5, 0]));
|
||||
/// ```
|
||||
fn abs(self) -> Self;
|
||||
|
||||
/// Lanewise absolute difference.
|
||||
/// Every element becomes the absolute difference of `self` and `second`.
|
||||
///
|
||||
/// # Examples
|
||||
/// ```
|
||||
/// # #![feature(portable_simd)]
|
||||
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
|
||||
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
|
||||
/// # use simd::prelude::*;
|
||||
/// use core::i32::{MIN, MAX};
|
||||
/// let a = Simd::from_array([MIN, MAX, 100, -100]);
|
||||
/// let b = Simd::from_array([MAX, MIN, -80, -120]);
|
||||
/// assert_eq!(a.abs_diff(b), Simd::from_array([u32::MAX, u32::MAX, 180, 20]));
|
||||
/// ```
|
||||
fn abs_diff(self, second: Self) -> Self::Unsigned;
|
||||
|
||||
/// Lanewise saturating absolute value, implemented in Rust.
|
||||
/// As abs(), except the MIN value becomes MAX instead of itself.
|
||||
///
|
||||
|
@ -203,6 +219,12 @@ pub trait SimdInt: Copy + Sealed {
|
|||
/// The least significant bit becomes the most significant bit, second least-significant bit becomes second most-significant bit, etc.
|
||||
fn reverse_bits(self) -> Self;
|
||||
|
||||
/// Returns the number of ones in the binary representation of each element.
|
||||
fn count_ones(self) -> Self::Unsigned;
|
||||
|
||||
/// Returns the number of zeros in the binary representation of each element.
|
||||
fn count_zeros(self) -> Self::Unsigned;
|
||||
|
||||
/// Returns the number of leading zeros in the binary representation of each element.
|
||||
fn leading_zeros(self) -> Self::Unsigned;
|
||||
|
||||
|
@ -259,6 +281,13 @@ macro_rules! impl_trait {
|
|||
(self^m) - m
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn abs_diff(self, second: Self) -> Self::Unsigned {
|
||||
let max = self.simd_max(second);
|
||||
let min = self.simd_min(second);
|
||||
(max - min).cast()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn saturating_abs(self) -> Self {
|
||||
// arith shift for -1 or 0 mask based on sign bit, giving 2s complement
|
||||
|
@ -344,6 +373,16 @@ macro_rules! impl_trait {
|
|||
unsafe { core::intrinsics::simd::simd_bitreverse(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn count_ones(self) -> Self::Unsigned {
|
||||
self.cast::<$unsigned>().count_ones()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn count_zeros(self) -> Self::Unsigned {
|
||||
self.cast::<$unsigned>().count_zeros()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn leading_zeros(self) -> Self::Unsigned {
|
||||
self.cast::<$unsigned>().leading_zeros()
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use super::sealed::Sealed;
|
||||
use crate::simd::{LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount};
|
||||
use crate::simd::{cmp::SimdOrd, LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount};
|
||||
|
||||
/// Operations on SIMD vectors of unsigned integers.
|
||||
pub trait SimdUint: Copy + Sealed {
|
||||
|
@ -57,6 +57,22 @@ pub trait SimdUint: Copy + Sealed {
|
|||
/// assert_eq!(sat, Simd::splat(0));
|
||||
fn saturating_sub(self, second: Self) -> Self;
|
||||
|
||||
/// Lanewise absolute difference.
|
||||
/// Every element becomes the absolute difference of `self` and `second`.
|
||||
///
|
||||
/// # Examples
|
||||
/// ```
|
||||
/// # #![feature(portable_simd)]
|
||||
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
|
||||
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
|
||||
/// # use simd::prelude::*;
|
||||
/// use core::u32::MAX;
|
||||
/// let a = Simd::from_array([0, MAX, 100, 20]);
|
||||
/// let b = Simd::from_array([MAX, 0, 80, 200]);
|
||||
/// assert_eq!(a.abs_diff(b), Simd::from_array([MAX, MAX, 20, 180]));
|
||||
/// ```
|
||||
fn abs_diff(self, second: Self) -> Self;
|
||||
|
||||
/// Returns the sum of the elements of the vector, with wrapping addition.
|
||||
fn reduce_sum(self) -> Self::Scalar;
|
||||
|
||||
|
@ -85,6 +101,12 @@ pub trait SimdUint: Copy + Sealed {
|
|||
/// The least significant bit becomes the most significant bit, second least-significant bit becomes second most-significant bit, etc.
|
||||
fn reverse_bits(self) -> Self;
|
||||
|
||||
/// Returns the number of ones in the binary representation of each element.
|
||||
fn count_ones(self) -> Self;
|
||||
|
||||
/// Returns the number of zeros in the binary representation of each element.
|
||||
fn count_zeros(self) -> Self;
|
||||
|
||||
/// Returns the number of leading zeros in the binary representation of each element.
|
||||
fn leading_zeros(self) -> Self;
|
||||
|
||||
|
@ -138,6 +160,13 @@ macro_rules! impl_trait {
|
|||
unsafe { core::intrinsics::simd::simd_saturating_sub(self, second) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn abs_diff(self, second: Self) -> Self {
|
||||
let max = self.simd_max(second);
|
||||
let min = self.simd_min(second);
|
||||
max - min
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn reduce_sum(self) -> Self::Scalar {
|
||||
// Safety: `self` is an integer vector
|
||||
|
@ -192,6 +221,17 @@ macro_rules! impl_trait {
|
|||
unsafe { core::intrinsics::simd::simd_bitreverse(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn count_ones(self) -> Self {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { core::intrinsics::simd::simd_ctpop(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn count_zeros(self) -> Self {
|
||||
(!self).count_ones()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn leading_zeros(self) -> Self {
|
||||
// Safety: `self` is an integer vector
|
||||
|
|
|
@ -42,6 +42,19 @@ pub trait SimdConstPtr: Copy + Sealed {
|
|||
/// Equivalent to calling [`pointer::addr`] on each element.
|
||||
fn addr(self) -> Self::Usize;
|
||||
|
||||
/// Converts an address to a pointer without giving it any provenance.
|
||||
///
|
||||
/// Without provenance, this pointer is not associated with any actual allocation. Such a
|
||||
/// no-provenance pointer may be used for zero-sized memory accesses (if suitably aligned), but
|
||||
/// non-zero-sized memory accesses with a no-provenance pointer are UB. No-provenance pointers
|
||||
/// are little more than a usize address in disguise.
|
||||
///
|
||||
/// This is different from [`Self::with_exposed_provenance`], which creates a pointer that picks up a
|
||||
/// previously exposed provenance.
|
||||
///
|
||||
/// Equivalent to calling [`core::ptr::without_provenance`] on each element.
|
||||
fn without_provenance(addr: Self::Usize) -> Self;
|
||||
|
||||
/// Creates a new pointer with the given address.
|
||||
///
|
||||
/// This performs the same operation as a cast, but copies the *address-space* and
|
||||
|
@ -118,6 +131,14 @@ where
|
|||
unsafe { core::mem::transmute_copy(&self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn without_provenance(addr: Self::Usize) -> Self {
|
||||
// FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
|
||||
// SAFETY: Integer-to-pointer transmutes are valid (if you are okay with not getting any
|
||||
// provenance).
|
||||
unsafe { core::mem::transmute_copy(&addr) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn with_addr(self, addr: Self::Usize) -> Self {
|
||||
// FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
|
||||
|
|
|
@ -39,6 +39,19 @@ pub trait SimdMutPtr: Copy + Sealed {
|
|||
/// Equivalent to calling [`pointer::addr`] on each element.
|
||||
fn addr(self) -> Self::Usize;
|
||||
|
||||
/// Converts an address to a pointer without giving it any provenance.
|
||||
///
|
||||
/// Without provenance, this pointer is not associated with any actual allocation. Such a
|
||||
/// no-provenance pointer may be used for zero-sized memory accesses (if suitably aligned), but
|
||||
/// non-zero-sized memory accesses with a no-provenance pointer are UB. No-provenance pointers
|
||||
/// are little more than a usize address in disguise.
|
||||
///
|
||||
/// This is different from [`Self::with_exposed_provenance`], which creates a pointer that picks up a
|
||||
/// previously exposed provenance.
|
||||
///
|
||||
/// Equivalent to calling [`core::ptr::without_provenance`] on each element.
|
||||
fn without_provenance(addr: Self::Usize) -> Self;
|
||||
|
||||
/// Creates a new pointer with the given address.
|
||||
///
|
||||
/// This performs the same operation as a cast, but copies the *address-space* and
|
||||
|
@ -115,6 +128,14 @@ where
|
|||
unsafe { core::mem::transmute_copy(&self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn without_provenance(addr: Self::Usize) -> Self {
|
||||
// FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
|
||||
// SAFETY: Integer-to-pointer transmutes are valid (if you are okay with not getting any
|
||||
// provenance).
|
||||
unsafe { core::mem::transmute_copy(&addr) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn with_addr(self, addr: Self::Usize) -> Self {
|
||||
// FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
|
||||
|
|
|
@ -155,8 +155,7 @@ pub trait Swizzle<const N: usize> {
|
|||
|
||||
/// Creates a new mask from the elements of `mask`.
|
||||
///
|
||||
/// Element `i` of the output is `concat[Self::INDEX[i]]`, where `concat` is the concatenation of
|
||||
/// `first` and `second`.
|
||||
/// Element `i` of the output is `mask[Self::INDEX[i]]`.
|
||||
#[inline]
|
||||
#[must_use = "method returns a new mask and does not mutate the original inputs"]
|
||||
fn swizzle_mask<T, const M: usize>(mask: Mask<T, M>) -> Mask<T, N>
|
||||
|
@ -260,6 +259,50 @@ where
|
|||
Rotate::<OFFSET>::swizzle(self)
|
||||
}
|
||||
|
||||
/// Shifts the vector elements to the left by `OFFSET`, filling in with
|
||||
/// `padding` from the right.
|
||||
#[inline]
|
||||
#[must_use = "method returns a new vector and does not mutate the original inputs"]
|
||||
pub fn shift_elements_left<const OFFSET: usize>(self, padding: T) -> Self {
|
||||
struct Shift<const OFFSET: usize>;
|
||||
|
||||
impl<const OFFSET: usize, const N: usize> Swizzle<N> for Shift<OFFSET> {
|
||||
const INDEX: [usize; N] = const {
|
||||
let mut index = [N; N];
|
||||
let mut i = 0;
|
||||
while i + OFFSET < N {
|
||||
index[i] = i + OFFSET;
|
||||
i += 1;
|
||||
}
|
||||
index
|
||||
};
|
||||
}
|
||||
|
||||
Shift::<OFFSET>::concat_swizzle(self, Simd::splat(padding))
|
||||
}
|
||||
|
||||
/// Shifts the vector elements to the right by `OFFSET`, filling in with
|
||||
/// `padding` from the left.
|
||||
#[inline]
|
||||
#[must_use = "method returns a new vector and does not mutate the original inputs"]
|
||||
pub fn shift_elements_right<const OFFSET: usize>(self, padding: T) -> Self {
|
||||
struct Shift<const OFFSET: usize>;
|
||||
|
||||
impl<const OFFSET: usize, const N: usize> Swizzle<N> for Shift<OFFSET> {
|
||||
const INDEX: [usize; N] = const {
|
||||
let mut index = [N; N];
|
||||
let mut i = OFFSET;
|
||||
while i < N {
|
||||
index[i] = i - OFFSET;
|
||||
i += 1;
|
||||
}
|
||||
index
|
||||
};
|
||||
}
|
||||
|
||||
Shift::<OFFSET>::concat_swizzle(self, Simd::splat(padding))
|
||||
}
|
||||
|
||||
/// Interleave two vectors.
|
||||
///
|
||||
/// The resulting vectors contain elements taken alternatively from `self` and `other`, first
|
||||
|
@ -320,7 +363,9 @@ where
|
|||
///
|
||||
/// ```
|
||||
/// # #![feature(portable_simd)]
|
||||
/// # use core::simd::Simd;
|
||||
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
|
||||
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
|
||||
/// # use simd::Simd;
|
||||
/// let a = Simd::from_array([0, 4, 1, 5]);
|
||||
/// let b = Simd::from_array([2, 6, 3, 7]);
|
||||
/// let (x, y) = a.deinterleave(b);
|
||||
|
@ -391,4 +436,210 @@ where
|
|||
}
|
||||
Resize::<N>::concat_swizzle(self, Simd::splat(value))
|
||||
}
|
||||
|
||||
/// Extract a vector from another vector.
|
||||
///
|
||||
/// ```
|
||||
/// # #![feature(portable_simd)]
|
||||
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
|
||||
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
|
||||
/// # use simd::u32x4;
|
||||
/// let x = u32x4::from_array([0, 1, 2, 3]);
|
||||
/// assert_eq!(x.extract::<1, 2>().to_array(), [1, 2]);
|
||||
/// ```
|
||||
#[inline]
|
||||
#[must_use = "method returns a new vector and does not mutate the original inputs"]
|
||||
pub fn extract<const START: usize, const LEN: usize>(self) -> Simd<T, LEN>
|
||||
where
|
||||
LaneCount<LEN>: SupportedLaneCount,
|
||||
{
|
||||
struct Extract<const N: usize, const START: usize>;
|
||||
impl<const N: usize, const START: usize, const LEN: usize> Swizzle<LEN> for Extract<N, START> {
|
||||
const INDEX: [usize; LEN] = const {
|
||||
assert!(START + LEN <= N, "index out of bounds");
|
||||
let mut index = [0; LEN];
|
||||
let mut i = 0;
|
||||
while i < LEN {
|
||||
index[i] = START + i;
|
||||
i += 1;
|
||||
}
|
||||
index
|
||||
};
|
||||
}
|
||||
Extract::<N, START>::swizzle(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, const N: usize> Mask<T, N>
|
||||
where
|
||||
T: MaskElement,
|
||||
LaneCount<N>: SupportedLaneCount,
|
||||
{
|
||||
/// Reverse the order of the elements in the mask.
|
||||
#[inline]
|
||||
#[must_use = "method returns a new vector and does not mutate the original inputs"]
|
||||
pub fn reverse(self) -> Self {
|
||||
// Safety: swizzles are safe for masks
|
||||
unsafe { Self::from_int_unchecked(self.to_int().reverse()) }
|
||||
}
|
||||
|
||||
/// Rotates the mask such that the first `OFFSET` elements of the slice move to the end
|
||||
/// while the last `self.len() - OFFSET` elements move to the front. After calling `rotate_elements_left`,
|
||||
/// the element previously at index `OFFSET` will become the first element in the slice.
|
||||
#[inline]
|
||||
#[must_use = "method returns a new vector and does not mutate the original inputs"]
|
||||
pub fn rotate_elements_left<const OFFSET: usize>(self) -> Self {
|
||||
// Safety: swizzles are safe for masks
|
||||
unsafe { Self::from_int_unchecked(self.to_int().rotate_elements_left::<OFFSET>()) }
|
||||
}
|
||||
|
||||
/// Rotates the mask such that the first `self.len() - OFFSET` elements of the mask move to
|
||||
/// the end while the last `OFFSET` elements move to the front. After calling `rotate_elements_right`,
|
||||
/// the element previously at index `self.len() - OFFSET` will become the first element in the slice.
|
||||
#[inline]
|
||||
#[must_use = "method returns a new vector and does not mutate the original inputs"]
|
||||
pub fn rotate_elements_right<const OFFSET: usize>(self) -> Self {
|
||||
// Safety: swizzles are safe for masks
|
||||
unsafe { Self::from_int_unchecked(self.to_int().rotate_elements_right::<OFFSET>()) }
|
||||
}
|
||||
|
||||
/// Shifts the mask elements to the left by `OFFSET`, filling in with
|
||||
/// `padding` from the right.
|
||||
#[inline]
|
||||
#[must_use = "method returns a new mask and does not mutate the original inputs"]
|
||||
pub fn shift_elements_left<const OFFSET: usize>(self, padding: bool) -> Self {
|
||||
// Safety: swizzles are safe for masks
|
||||
unsafe {
|
||||
Self::from_int_unchecked(self.to_int().shift_elements_left::<OFFSET>(if padding {
|
||||
T::TRUE
|
||||
} else {
|
||||
T::FALSE
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
/// Shifts the mask elements to the right by `OFFSET`, filling in with
|
||||
/// `padding` from the left.
|
||||
#[inline]
|
||||
#[must_use = "method returns a new mask and does not mutate the original inputs"]
|
||||
pub fn shift_elements_right<const OFFSET: usize>(self, padding: bool) -> Self {
|
||||
// Safety: swizzles are safe for masks
|
||||
unsafe {
|
||||
Self::from_int_unchecked(self.to_int().shift_elements_right::<OFFSET>(if padding {
|
||||
T::TRUE
|
||||
} else {
|
||||
T::FALSE
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
/// Interleave two masks.
|
||||
///
|
||||
/// The resulting masks contain elements taken alternatively from `self` and `other`, first
|
||||
/// filling the first result, and then the second.
|
||||
///
|
||||
/// The reverse of this operation is [`Mask::deinterleave`].
|
||||
///
|
||||
/// ```
|
||||
/// # #![feature(portable_simd)]
|
||||
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
|
||||
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
|
||||
/// # use simd::mask32x4;
|
||||
/// let a = mask32x4::from_array([false, true, false, true]);
|
||||
/// let b = mask32x4::from_array([false, false, true, true]);
|
||||
/// let (x, y) = a.interleave(b);
|
||||
/// assert_eq!(x.to_array(), [false, false, true, false]);
|
||||
/// assert_eq!(y.to_array(), [false, true, true, true]);
|
||||
/// ```
|
||||
#[inline]
|
||||
#[must_use = "method returns a new vector and does not mutate the original inputs"]
|
||||
pub fn interleave(self, other: Self) -> (Self, Self) {
|
||||
let (lo, hi) = self.to_int().interleave(other.to_int());
|
||||
// Safety: swizzles are safe for masks
|
||||
unsafe { (Self::from_int_unchecked(lo), Self::from_int_unchecked(hi)) }
|
||||
}
|
||||
|
||||
/// Deinterleave two masks.
|
||||
///
|
||||
/// The first result takes every other element of `self` and then `other`, starting with
|
||||
/// the first element.
|
||||
///
|
||||
/// The second result takes every other element of `self` and then `other`, starting with
|
||||
/// the second element.
|
||||
///
|
||||
/// The reverse of this operation is [`Mask::interleave`].
|
||||
///
|
||||
/// ```
|
||||
/// # #![feature(portable_simd)]
|
||||
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
|
||||
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
|
||||
/// # use simd::mask32x4;
|
||||
/// let a = mask32x4::from_array([false, true, false, true]);
|
||||
/// let b = mask32x4::from_array([false, false, true, true]);
|
||||
/// let (x, y) = a.deinterleave(b);
|
||||
/// assert_eq!(x.to_array(), [false, false, false, true]);
|
||||
/// assert_eq!(y.to_array(), [true, true, false, true]);
|
||||
/// ```
|
||||
#[inline]
|
||||
#[must_use = "method returns a new vector and does not mutate the original inputs"]
|
||||
pub fn deinterleave(self, other: Self) -> (Self, Self) {
|
||||
let (even, odd) = self.to_int().deinterleave(other.to_int());
|
||||
// Safety: swizzles are safe for masks
|
||||
unsafe {
|
||||
(
|
||||
Self::from_int_unchecked(even),
|
||||
Self::from_int_unchecked(odd),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Resize a mask.
|
||||
///
|
||||
/// If `M` > `N`, extends the length of a mask, setting the new elements to `value`.
|
||||
/// If `M` < `N`, truncates the mask to the first `M` elements.
|
||||
///
|
||||
/// ```
|
||||
/// # #![feature(portable_simd)]
|
||||
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
|
||||
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
|
||||
/// # use simd::mask32x4;
|
||||
/// let x = mask32x4::from_array([false, true, true, false]);
|
||||
/// assert_eq!(x.resize::<8>(true).to_array(), [false, true, true, false, true, true, true, true]);
|
||||
/// assert_eq!(x.resize::<2>(true).to_array(), [false, true]);
|
||||
/// ```
|
||||
#[inline]
|
||||
#[must_use = "method returns a new vector and does not mutate the original inputs"]
|
||||
pub fn resize<const M: usize>(self, value: bool) -> Mask<T, M>
|
||||
where
|
||||
LaneCount<M>: SupportedLaneCount,
|
||||
{
|
||||
// Safety: swizzles are safe for masks
|
||||
unsafe {
|
||||
Mask::<T, M>::from_int_unchecked(self.to_int().resize::<M>(if value {
|
||||
T::TRUE
|
||||
} else {
|
||||
T::FALSE
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract a vector from another vector.
|
||||
///
|
||||
/// ```
|
||||
/// # #![feature(portable_simd)]
|
||||
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
|
||||
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
|
||||
/// # use simd::mask32x4;
|
||||
/// let x = mask32x4::from_array([false, true, true, false]);
|
||||
/// assert_eq!(x.extract::<1, 2>().to_array(), [true, true]);
|
||||
/// ```
|
||||
#[inline]
|
||||
#[must_use = "method returns a new vector and does not mutate the original inputs"]
|
||||
pub fn extract<const START: usize, const LEN: usize>(self) -> Mask<T, LEN>
|
||||
where
|
||||
LaneCount<LEN>: SupportedLaneCount,
|
||||
{
|
||||
// Safety: swizzles are safe for masks
|
||||
unsafe { Mask::<T, LEN>::from_int_unchecked(self.to_int().extract::<START, LEN>()) }
|
||||
}
|
||||
}
|
||||
|
|
|
@ -59,15 +59,40 @@ where
|
|||
target_endian = "little"
|
||||
))]
|
||||
16 => transize(vqtbl1q_u8, self, idxs),
|
||||
#[cfg(all(
|
||||
target_arch = "arm",
|
||||
target_feature = "v7",
|
||||
target_feature = "neon",
|
||||
target_endian = "little"
|
||||
))]
|
||||
16 => transize(armv7_neon_swizzle_u8x16, self, idxs),
|
||||
#[cfg(all(target_feature = "avx2", not(target_feature = "avx512vbmi")))]
|
||||
32 => transize(avx2_pshufb, self, idxs),
|
||||
#[cfg(all(target_feature = "avx512vl", target_feature = "avx512vbmi"))]
|
||||
32 => transize(x86::_mm256_permutexvar_epi8, zeroing_idxs(idxs), self),
|
||||
// Notable absence: avx512bw shuffle
|
||||
// If avx512bw is available, odds of avx512vbmi are good
|
||||
// FIXME: initial AVX512VBMI variant didn't actually pass muster
|
||||
// #[cfg(target_feature = "avx512vbmi")]
|
||||
// 64 => transize(x86::_mm512_permutexvar_epi8, self, idxs),
|
||||
32 => {
|
||||
// Unlike vpshufb, vpermb doesn't zero out values in the result based on the index high bit
|
||||
let swizzler = |bytes, idxs| {
|
||||
let mask = x86::_mm256_cmp_epu8_mask::<{ x86::_MM_CMPINT_LT }>(
|
||||
idxs,
|
||||
Simd::<u8, 32>::splat(N as u8).into(),
|
||||
);
|
||||
x86::_mm256_maskz_permutexvar_epi8(mask, idxs, bytes)
|
||||
};
|
||||
transize(swizzler, self, idxs)
|
||||
}
|
||||
// Notable absence: avx512bw pshufb shuffle
|
||||
#[cfg(all(target_feature = "avx512vl", target_feature = "avx512vbmi"))]
|
||||
64 => {
|
||||
// Unlike vpshufb, vpermb doesn't zero out values in the result based on the index high bit
|
||||
let swizzler = |bytes, idxs| {
|
||||
let mask = x86::_mm512_cmp_epu8_mask::<{ x86::_MM_CMPINT_LT }>(
|
||||
idxs,
|
||||
Simd::<u8, 64>::splat(N as u8).into(),
|
||||
);
|
||||
x86::_mm512_maskz_permutexvar_epi8(mask, idxs, bytes)
|
||||
};
|
||||
transize(swizzler, self, idxs)
|
||||
}
|
||||
_ => {
|
||||
let mut array = [0; N];
|
||||
for (i, k) in idxs.to_array().into_iter().enumerate() {
|
||||
|
@ -82,6 +107,28 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
/// armv7 neon supports swizzling `u8x16` by swizzling two u8x8 blocks
|
||||
/// with a u8x8x2 lookup table.
|
||||
///
|
||||
/// # Safety
|
||||
/// This requires armv7 neon to work
|
||||
#[cfg(all(
|
||||
target_arch = "arm",
|
||||
target_feature = "v7",
|
||||
target_feature = "neon",
|
||||
target_endian = "little"
|
||||
))]
|
||||
unsafe fn armv7_neon_swizzle_u8x16(bytes: Simd<u8, 16>, idxs: Simd<u8, 16>) -> Simd<u8, 16> {
|
||||
use core::arch::arm::{uint8x8x2_t, vcombine_u8, vget_high_u8, vget_low_u8, vtbl2_u8};
|
||||
// SAFETY: Caller promised arm neon support
|
||||
unsafe {
|
||||
let bytes = uint8x8x2_t(vget_low_u8(bytes.into()), vget_high_u8(bytes.into()));
|
||||
let lo = vtbl2_u8(bytes, vget_low_u8(idxs.into()));
|
||||
let hi = vtbl2_u8(bytes, vget_high_u8(idxs.into()));
|
||||
vcombine_u8(lo, hi).into()
|
||||
}
|
||||
}
|
||||
|
||||
/// "vpshufb like it was meant to be" on AVX2
|
||||
///
|
||||
/// # Safety
|
||||
|
|
|
@ -99,7 +99,7 @@ use crate::simd::{
|
|||
// directly constructing an instance of the type (i.e. `let vector = Simd(array)`) should be
|
||||
// avoided, as it will likely become illegal on `#[repr(simd)]` structs in the future. It also
|
||||
// causes rustc to emit illegal LLVM IR in some cases.
|
||||
#[repr(simd)]
|
||||
#[repr(simd, packed)]
|
||||
pub struct Simd<T, const N: usize>([T; N])
|
||||
where
|
||||
LaneCount<N>: SupportedLaneCount,
|
||||
|
@ -144,14 +144,32 @@ where
|
|||
/// assert_eq!(v.as_array(), &[8, 8, 8, 8]);
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn splat(value: T) -> Self {
|
||||
// This is preferred over `[value; N]`, since it's explicitly a splat:
|
||||
// https://github.com/rust-lang/rust/issues/97804
|
||||
struct Splat;
|
||||
impl<const N: usize> Swizzle<N> for Splat {
|
||||
const INDEX: [usize; N] = [0; N];
|
||||
#[rustc_const_unstable(feature = "portable_simd", issue = "86656")]
|
||||
pub const fn splat(value: T) -> Self {
|
||||
const fn splat_const<T, const N: usize>(value: T) -> Simd<T, N>
|
||||
where
|
||||
T: SimdElement,
|
||||
LaneCount<N>: SupportedLaneCount,
|
||||
{
|
||||
Simd::from_array([value; N])
|
||||
}
|
||||
Splat::swizzle::<T, 1>(Simd::<T, 1>::from([value]))
|
||||
|
||||
fn splat_rt<T, const N: usize>(value: T) -> Simd<T, N>
|
||||
where
|
||||
T: SimdElement,
|
||||
LaneCount<N>: SupportedLaneCount,
|
||||
{
|
||||
// This is preferred over `[value; N]`, since it's explicitly a splat:
|
||||
// https://github.com/rust-lang/rust/issues/97804
|
||||
struct Splat;
|
||||
impl<const N: usize> Swizzle<N> for Splat {
|
||||
const INDEX: [usize; N] = [0; N];
|
||||
}
|
||||
|
||||
Splat::swizzle::<T, 1>(Simd::<T, 1>::from([value]))
|
||||
}
|
||||
|
||||
core::intrinsics::const_eval_select((value,), splat_const, splat_rt)
|
||||
}
|
||||
|
||||
/// Returns an array reference containing the entire SIMD vector.
|
||||
|
@ -425,6 +443,9 @@ where
|
|||
///
|
||||
/// When the element is disabled, that memory location is not accessed and the corresponding
|
||||
/// value from `or` is passed through.
|
||||
///
|
||||
/// # Safety
|
||||
/// Enabled loads must not exceed the length of `slice`.
|
||||
#[must_use]
|
||||
#[inline]
|
||||
pub unsafe fn load_select_unchecked(
|
||||
|
@ -442,6 +463,9 @@ where
|
|||
///
|
||||
/// When the element is disabled, that memory location is not accessed and the corresponding
|
||||
/// value from `or` is passed through.
|
||||
///
|
||||
/// # Safety
|
||||
/// Enabled `ptr` elements must be safe to read as if by `std::ptr::read`.
|
||||
#[must_use]
|
||||
#[inline]
|
||||
pub unsafe fn load_select_ptr(
|
||||
|
@ -924,6 +948,7 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
/// Lexicographic order. For the SIMD elementwise minimum and maximum, use simd_min and simd_max instead.
|
||||
impl<T, const N: usize> PartialOrd for Simd<T, N>
|
||||
where
|
||||
LaneCount<N>: SupportedLaneCount,
|
||||
|
@ -943,6 +968,7 @@ where
|
|||
{
|
||||
}
|
||||
|
||||
/// Lexicographic order. For the SIMD elementwise minimum and maximum, use simd_min and simd_max instead.
|
||||
impl<T, const N: usize> Ord for Simd<T, N>
|
||||
where
|
||||
LaneCount<N>: SupportedLaneCount,
|
||||
|
@ -1195,6 +1221,7 @@ fn lane_indices<const N: usize>() -> Simd<usize, N>
|
|||
where
|
||||
LaneCount<N>: SupportedLaneCount,
|
||||
{
|
||||
#![allow(clippy::needless_range_loop)]
|
||||
let mut index = [0; N];
|
||||
for i in 0..N {
|
||||
index[i] = i;
|
||||
|
|
|
@ -29,3 +29,6 @@ mod arm;
|
|||
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
||||
mod powerpc;
|
||||
|
||||
#[cfg(target_arch = "loongarch64")]
|
||||
mod loongarch64;
|
||||
|
|
31
library/portable-simd/crates/core_simd/src/vendor/loongarch64.rs
vendored
Normal file
31
library/portable-simd/crates/core_simd/src/vendor/loongarch64.rs
vendored
Normal file
|
@ -0,0 +1,31 @@
|
|||
use crate::simd::*;
|
||||
use core::arch::loongarch64::*;
|
||||
|
||||
from_transmute! { unsafe u8x16 => v16u8 }
|
||||
from_transmute! { unsafe u8x32 => v32u8 }
|
||||
from_transmute! { unsafe i8x16 => v16i8 }
|
||||
from_transmute! { unsafe i8x32 => v32i8 }
|
||||
|
||||
from_transmute! { unsafe u16x8 => v8u16 }
|
||||
from_transmute! { unsafe u16x16 => v16u16 }
|
||||
from_transmute! { unsafe i16x8 => v8i16 }
|
||||
from_transmute! { unsafe i16x16 => v16i16 }
|
||||
|
||||
from_transmute! { unsafe u32x4 => v4u32 }
|
||||
from_transmute! { unsafe u32x8 => v8u32 }
|
||||
from_transmute! { unsafe i32x4 => v4i32 }
|
||||
from_transmute! { unsafe i32x8 => v8i32 }
|
||||
from_transmute! { unsafe f32x4 => v4f32 }
|
||||
from_transmute! { unsafe f32x8 => v8f32 }
|
||||
|
||||
from_transmute! { unsafe u64x2 => v2u64 }
|
||||
from_transmute! { unsafe u64x4 => v4u64 }
|
||||
from_transmute! { unsafe i64x2 => v2i64 }
|
||||
from_transmute! { unsafe i64x4 => v4i64 }
|
||||
from_transmute! { unsafe f64x2 => v2f64 }
|
||||
from_transmute! { unsafe f64x4 => v4f64 }
|
||||
|
||||
from_transmute! { unsafe usizex2 => v2u64 }
|
||||
from_transmute! { unsafe usizex4 => v4u64 }
|
||||
from_transmute! { unsafe isizex2 => v2i64 }
|
||||
from_transmute! { unsafe isizex4 => v4i64 }
|
35
library/portable-simd/crates/core_simd/tests/layout.rs
Normal file
35
library/portable-simd/crates/core_simd/tests/layout.rs
Normal file
|
@ -0,0 +1,35 @@
|
|||
#![feature(portable_simd)]
|
||||
|
||||
macro_rules! layout_tests {
|
||||
{ $($mod:ident, $ty:ty,)* } => {
|
||||
$(
|
||||
mod $mod {
|
||||
test_helpers::test_lanes! {
|
||||
fn no_padding<const LANES: usize>() {
|
||||
assert_eq!(
|
||||
core::mem::size_of::<core_simd::simd::Simd::<$ty, LANES>>(),
|
||||
core::mem::size_of::<[$ty; LANES]>(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
layout_tests! {
|
||||
i8, i8,
|
||||
i16, i16,
|
||||
i32, i32,
|
||||
i64, i64,
|
||||
isize, isize,
|
||||
u8, u8,
|
||||
u16, u16,
|
||||
u32, u32,
|
||||
u64, u64,
|
||||
usize, usize,
|
||||
f32, f32,
|
||||
f64, f64,
|
||||
mut_ptr, *mut (),
|
||||
const_ptr, *const (),
|
||||
}
|
|
@ -99,7 +99,6 @@ macro_rules! test_mask_api {
|
|||
assert_eq!(Mask::<$type, 2>::from_bitmask(bitmask), mask);
|
||||
}
|
||||
|
||||
#[cfg(feature = "all_lane_counts")]
|
||||
#[test]
|
||||
fn roundtrip_bitmask_conversion_odd() {
|
||||
let values = [
|
||||
|
@ -134,48 +133,6 @@ macro_rules! test_mask_api {
|
|||
cast_impl::<i64>();
|
||||
cast_impl::<isize>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn roundtrip_bitmask_vector_conversion() {
|
||||
use core_simd::simd::ToBytes;
|
||||
let values = [
|
||||
true, false, false, true, false, false, true, false,
|
||||
true, true, false, false, false, false, false, true,
|
||||
];
|
||||
let mask = Mask::<$type, 16>::from_array(values);
|
||||
let bitmask = mask.to_bitmask_vector();
|
||||
assert_eq!(bitmask.resize::<2>(0).to_ne_bytes()[..2], [0b01001001, 0b10000011]);
|
||||
assert_eq!(Mask::<$type, 16>::from_bitmask_vector(bitmask), mask);
|
||||
}
|
||||
|
||||
// rust-lang/portable-simd#379
|
||||
#[test]
|
||||
fn roundtrip_bitmask_vector_conversion_small() {
|
||||
use core_simd::simd::ToBytes;
|
||||
let values = [
|
||||
true, false, true, true
|
||||
];
|
||||
let mask = Mask::<$type, 4>::from_array(values);
|
||||
let bitmask = mask.to_bitmask_vector();
|
||||
assert_eq!(bitmask.resize::<1>(0).to_ne_bytes()[0], 0b00001101);
|
||||
assert_eq!(Mask::<$type, 4>::from_bitmask_vector(bitmask), mask);
|
||||
}
|
||||
|
||||
/* FIXME doesn't work with non-powers-of-two, yet
|
||||
// rust-lang/portable-simd#379
|
||||
#[cfg(feature = "all_lane_counts")]
|
||||
#[test]
|
||||
fn roundtrip_bitmask_vector_conversion_odd() {
|
||||
use core_simd::simd::ToBytes;
|
||||
let values = [
|
||||
true, false, true, false, true, true, false, false, false, true, true,
|
||||
];
|
||||
let mask = Mask::<$type, 11>::from_array(values);
|
||||
let bitmask = mask.to_bitmask_vector();
|
||||
assert_eq!(bitmask.resize::<2>(0).to_ne_bytes()[..2], [0b00110101, 0b00000110]);
|
||||
assert_eq!(Mask::<$type, 11>::from_bitmask_vector(bitmask), mask);
|
||||
}
|
||||
*/
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -216,6 +216,22 @@ macro_rules! impl_common_integer_tests {
|
|||
)
|
||||
}
|
||||
|
||||
fn count_ones<const LANES: usize>() {
|
||||
test_helpers::test_unary_elementwise(
|
||||
&$vector::<LANES>::count_ones,
|
||||
&|x| x.count_ones() as _,
|
||||
&|_| true,
|
||||
)
|
||||
}
|
||||
|
||||
fn count_zeros<const LANES: usize>() {
|
||||
test_helpers::test_unary_elementwise(
|
||||
&$vector::<LANES>::count_zeros,
|
||||
&|x| x.count_zeros() as _,
|
||||
&|_| true,
|
||||
)
|
||||
}
|
||||
|
||||
fn leading_zeros<const LANES: usize>() {
|
||||
test_helpers::test_unary_elementwise(
|
||||
&$vector::<LANES>::leading_zeros,
|
||||
|
@ -307,6 +323,14 @@ macro_rules! impl_signed_tests {
|
|||
assert_eq!(a % b, Vector::<LANES>::splat(0));
|
||||
}
|
||||
|
||||
fn abs_diff<const LANES: usize>() {
|
||||
test_helpers::test_binary_elementwise(
|
||||
&Vector::<LANES>::abs_diff,
|
||||
&Scalar::abs_diff,
|
||||
&|_, _| true,
|
||||
)
|
||||
}
|
||||
|
||||
fn simd_min<const LANES: usize>() {
|
||||
use core_simd::simd::cmp::SimdOrd;
|
||||
let a = Vector::<LANES>::splat(Scalar::MIN);
|
||||
|
@ -419,6 +443,14 @@ macro_rules! impl_unsigned_tests {
|
|||
&|_| true,
|
||||
);
|
||||
}
|
||||
|
||||
fn abs_diff<const LANES: usize>() {
|
||||
test_helpers::test_binary_elementwise(
|
||||
&Vector::<LANES>::abs_diff,
|
||||
&Scalar::abs_diff,
|
||||
&|_, _| true,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl_binary_op_test!(Scalar, Add::add, AddAssign::add_assign, Scalar::wrapping_add);
|
||||
|
@ -495,6 +527,9 @@ macro_rules! impl_float_tests {
|
|||
}
|
||||
|
||||
fn is_normal<const LANES: usize>() {
|
||||
// Arm v7 Neon violates float opsem re: subnormals, see
|
||||
// https://github.com/rust-lang/portable-simd/issues/439
|
||||
#[cfg(not(target_arch = "arm"))]
|
||||
test_helpers::test_unary_mask_elementwise(
|
||||
&Vector::<LANES>::is_normal,
|
||||
&Scalar::is_normal,
|
||||
|
@ -503,6 +538,9 @@ macro_rules! impl_float_tests {
|
|||
}
|
||||
|
||||
fn is_subnormal<const LANES: usize>() {
|
||||
// Arm v7 Neon violates float opsem re: subnormals, see
|
||||
// https://github.com/rust-lang/portable-simd/issues/439
|
||||
#[cfg(not(target_arch = "arm"))]
|
||||
test_helpers::test_unary_mask_elementwise(
|
||||
&Vector::<LANES>::is_subnormal,
|
||||
&Scalar::is_subnormal,
|
||||
|
|
|
@ -48,6 +48,24 @@ fn rotate() {
|
|||
assert_eq!(a.rotate_elements_right::<5>().to_array(), [4, 1, 2, 3]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
||||
fn shift() {
|
||||
let a = Simd::from_array([1, 2, 3, 4]);
|
||||
assert_eq!(a.shift_elements_left::<0>(0).to_array(), [1, 2, 3, 4]);
|
||||
assert_eq!(a.shift_elements_left::<1>(0).to_array(), [2, 3, 4, 0]);
|
||||
assert_eq!(a.shift_elements_left::<2>(9).to_array(), [3, 4, 9, 9]);
|
||||
assert_eq!(a.shift_elements_left::<3>(8).to_array(), [4, 8, 8, 8]);
|
||||
assert_eq!(a.shift_elements_left::<4>(7).to_array(), [7, 7, 7, 7]);
|
||||
assert_eq!(a.shift_elements_left::<5>(6).to_array(), [6, 6, 6, 6]);
|
||||
assert_eq!(a.shift_elements_right::<0>(0).to_array(), [1, 2, 3, 4]);
|
||||
assert_eq!(a.shift_elements_right::<1>(0).to_array(), [0, 1, 2, 3]);
|
||||
assert_eq!(a.shift_elements_right::<2>(-1).to_array(), [-1, -1, 1, 2]);
|
||||
assert_eq!(a.shift_elements_right::<3>(-2).to_array(), [-2, -2, -2, 1]);
|
||||
assert_eq!(a.shift_elements_right::<4>(-3).to_array(), [-3, -3, -3, -3]);
|
||||
assert_eq!(a.shift_elements_right::<5>(-4).to_array(), [-4, -4, -4, -4]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
|
||||
fn interleave() {
|
||||
|
|
|
@ -6,6 +6,3 @@ publish = false
|
|||
|
||||
[dependencies]
|
||||
proptest = { version = "0.10", default-features = false, features = ["alloc"] }
|
||||
|
||||
[features]
|
||||
all_lane_counts = []
|
||||
|
|
|
@ -539,32 +539,22 @@ macro_rules! test_lanes {
|
|||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)];
|
||||
lanes_1 1;
|
||||
lanes_2 2;
|
||||
lanes_4 4;
|
||||
);
|
||||
|
||||
#[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow
|
||||
$crate::test_lanes_helper!(
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)];
|
||||
lanes_8 8;
|
||||
lanes_16 16;
|
||||
lanes_32 32;
|
||||
lanes_64 64;
|
||||
);
|
||||
|
||||
#[cfg(feature = "all_lane_counts")]
|
||||
$crate::test_lanes_helper!(
|
||||
// test some odd and even non-power-of-2 lengths on miri
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)];
|
||||
// Cover an odd and an even non-power-of-2 length in Miri.
|
||||
// (Even non-power-of-2 vectors have alignment between element
|
||||
// and vector size, so we want to cover that case as well.)
|
||||
lanes_3 3;
|
||||
lanes_5 5;
|
||||
|
||||
lanes_6 6;
|
||||
);
|
||||
|
||||
#[cfg(feature = "all_lane_counts")]
|
||||
#[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow
|
||||
$crate::test_lanes_helper!(
|
||||
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)];
|
||||
lanes_4 4;
|
||||
lanes_5 5;
|
||||
|
||||
lanes_7 7;
|
||||
lanes_8 8;
|
||||
lanes_9 9;
|
||||
lanes_10 10;
|
||||
lanes_11 11;
|
||||
|
@ -572,52 +562,55 @@ macro_rules! test_lanes {
|
|||
lanes_13 13;
|
||||
lanes_14 14;
|
||||
lanes_15 15;
|
||||
lanes_16 16;
|
||||
lanes_17 17;
|
||||
lanes_18 18;
|
||||
lanes_19 19;
|
||||
lanes_20 20;
|
||||
lanes_21 21;
|
||||
lanes_22 22;
|
||||
lanes_23 23;
|
||||
//lanes_18 18;
|
||||
//lanes_19 19;
|
||||
//lanes_20 20;
|
||||
//lanes_21 21;
|
||||
//lanes_22 22;
|
||||
//lanes_23 23;
|
||||
lanes_24 24;
|
||||
lanes_25 25;
|
||||
lanes_26 26;
|
||||
lanes_27 27;
|
||||
lanes_28 28;
|
||||
lanes_29 29;
|
||||
lanes_30 30;
|
||||
lanes_31 31;
|
||||
lanes_33 33;
|
||||
lanes_34 34;
|
||||
lanes_35 35;
|
||||
lanes_36 36;
|
||||
lanes_37 37;
|
||||
lanes_38 38;
|
||||
lanes_39 39;
|
||||
lanes_40 40;
|
||||
lanes_41 41;
|
||||
lanes_42 42;
|
||||
lanes_43 43;
|
||||
lanes_44 44;
|
||||
lanes_45 45;
|
||||
lanes_46 46;
|
||||
//lanes_25 25;
|
||||
//lanes_26 26;
|
||||
//lanes_27 27;
|
||||
//lanes_28 28;
|
||||
//lanes_29 29;
|
||||
//lanes_30 30;
|
||||
//lanes_31 31;
|
||||
lanes_32 32;
|
||||
//lanes_33 33;
|
||||
//lanes_34 34;
|
||||
//lanes_35 35;
|
||||
//lanes_36 36;
|
||||
//lanes_37 37;
|
||||
//lanes_38 38;
|
||||
//lanes_39 39;
|
||||
//lanes_40 40;
|
||||
//lanes_41 41;
|
||||
//lanes_42 42;
|
||||
//lanes_43 43;
|
||||
//lanes_44 44;
|
||||
//lanes_45 45;
|
||||
//lanes_46 46;
|
||||
lanes_47 47;
|
||||
lanes_48 48;
|
||||
lanes_49 49;
|
||||
lanes_50 50;
|
||||
lanes_51 51;
|
||||
lanes_52 52;
|
||||
lanes_53 53;
|
||||
lanes_54 54;
|
||||
lanes_55 55;
|
||||
//lanes_48 48;
|
||||
//lanes_49 49;
|
||||
//lanes_50 50;
|
||||
//lanes_51 51;
|
||||
//lanes_52 52;
|
||||
//lanes_53 53;
|
||||
//lanes_54 54;
|
||||
//lanes_55 55;
|
||||
lanes_56 56;
|
||||
lanes_57 57;
|
||||
lanes_58 58;
|
||||
lanes_59 59;
|
||||
lanes_60 60;
|
||||
lanes_61 61;
|
||||
lanes_62 62;
|
||||
//lanes_58 58;
|
||||
//lanes_59 59;
|
||||
//lanes_60 60;
|
||||
//lanes_61 61;
|
||||
//lanes_62 62;
|
||||
lanes_63 63;
|
||||
lanes_64 64;
|
||||
);
|
||||
}
|
||||
)*
|
||||
|
@ -639,36 +632,24 @@ macro_rules! test_lanes_panic {
|
|||
core_simd::simd::LaneCount<$lanes>: core_simd::simd::SupportedLaneCount,
|
||||
$body
|
||||
|
||||
// test some odd and even non-power-of-2 lengths on miri
|
||||
$crate::test_lanes_helper!(
|
||||
#[should_panic];
|
||||
lanes_1 1;
|
||||
lanes_2 2;
|
||||
lanes_4 4;
|
||||
);
|
||||
|
||||
#[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow
|
||||
$crate::test_lanes_helper!(
|
||||
#[should_panic];
|
||||
lanes_8 8;
|
||||
lanes_16 16;
|
||||
lanes_32 32;
|
||||
lanes_64 64;
|
||||
);
|
||||
|
||||
#[cfg(feature = "all_lane_counts")]
|
||||
$crate::test_lanes_helper!(
|
||||
// test some odd and even non-power-of-2 lengths on miri
|
||||
#[should_panic];
|
||||
lanes_3 3;
|
||||
lanes_5 5;
|
||||
|
||||
lanes_6 6;
|
||||
);
|
||||
|
||||
#[cfg(feature = "all_lane_counts")]
|
||||
#[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow
|
||||
$crate::test_lanes_helper!(
|
||||
#[should_panic];
|
||||
lanes_4 4;
|
||||
lanes_5 5;
|
||||
|
||||
lanes_7 7;
|
||||
lanes_8 8;
|
||||
lanes_9 9;
|
||||
lanes_10 10;
|
||||
lanes_11 11;
|
||||
|
@ -676,52 +657,55 @@ macro_rules! test_lanes_panic {
|
|||
lanes_13 13;
|
||||
lanes_14 14;
|
||||
lanes_15 15;
|
||||
lanes_16 16;
|
||||
lanes_17 17;
|
||||
lanes_18 18;
|
||||
lanes_19 19;
|
||||
lanes_20 20;
|
||||
lanes_21 21;
|
||||
lanes_22 22;
|
||||
lanes_23 23;
|
||||
//lanes_18 18;
|
||||
//lanes_19 19;
|
||||
//lanes_20 20;
|
||||
//lanes_21 21;
|
||||
//lanes_22 22;
|
||||
//lanes_23 23;
|
||||
lanes_24 24;
|
||||
lanes_25 25;
|
||||
lanes_26 26;
|
||||
lanes_27 27;
|
||||
lanes_28 28;
|
||||
lanes_29 29;
|
||||
lanes_30 30;
|
||||
lanes_31 31;
|
||||
lanes_33 33;
|
||||
lanes_34 34;
|
||||
lanes_35 35;
|
||||
lanes_36 36;
|
||||
lanes_37 37;
|
||||
lanes_38 38;
|
||||
lanes_39 39;
|
||||
lanes_40 40;
|
||||
lanes_41 41;
|
||||
lanes_42 42;
|
||||
lanes_43 43;
|
||||
lanes_44 44;
|
||||
lanes_45 45;
|
||||
lanes_46 46;
|
||||
//lanes_25 25;
|
||||
//lanes_26 26;
|
||||
//lanes_27 27;
|
||||
//lanes_28 28;
|
||||
//lanes_29 29;
|
||||
//lanes_30 30;
|
||||
//lanes_31 31;
|
||||
lanes_32 32;
|
||||
//lanes_33 33;
|
||||
//lanes_34 34;
|
||||
//lanes_35 35;
|
||||
//lanes_36 36;
|
||||
//lanes_37 37;
|
||||
//lanes_38 38;
|
||||
//lanes_39 39;
|
||||
//lanes_40 40;
|
||||
//lanes_41 41;
|
||||
//lanes_42 42;
|
||||
//lanes_43 43;
|
||||
//lanes_44 44;
|
||||
//lanes_45 45;
|
||||
//lanes_46 46;
|
||||
lanes_47 47;
|
||||
lanes_48 48;
|
||||
lanes_49 49;
|
||||
lanes_50 50;
|
||||
lanes_51 51;
|
||||
lanes_52 52;
|
||||
lanes_53 53;
|
||||
lanes_54 54;
|
||||
lanes_55 55;
|
||||
//lanes_48 48;
|
||||
//lanes_49 49;
|
||||
//lanes_50 50;
|
||||
//lanes_51 51;
|
||||
//lanes_52 52;
|
||||
//lanes_53 53;
|
||||
//lanes_54 54;
|
||||
//lanes_55 55;
|
||||
lanes_56 56;
|
||||
lanes_57 57;
|
||||
lanes_58 58;
|
||||
lanes_59 59;
|
||||
lanes_60 60;
|
||||
lanes_61 61;
|
||||
lanes_62 62;
|
||||
//lanes_58 58;
|
||||
//lanes_59 59;
|
||||
//lanes_60 60;
|
||||
//lanes_61 61;
|
||||
//lanes_62 62;
|
||||
lanes_63 63;
|
||||
lanes_64 64;
|
||||
);
|
||||
}
|
||||
)*
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue