Merge pull request #142 from rust-lang/feature/traits
Combine LanesAtMost32 and SimdArray into a single trait "Vector" Attempts to fix some unresolved questions in #139 regarding `SimdArray` having a generic parameter. In particular, this made it not appropriate for replacing `LanesAtMost32`. Additionally, it made it impossible to use in a context where you otherwise don't know the lane count, e.g. `impl Vector`. An unfortunate side effect of this change is that scatter/gather no longer work in the trait (nor does anything else that references the lane count in a type. This requires the super-unstable `const_evaluatable_checked` feature). I also threw in the change from `as_slice` to `as_array` as discussed in zulip, and fixes #51.
This commit is contained in:
commit
82e3405efe
24 changed files with 637 additions and 820 deletions
|
@ -1,253 +0,0 @@
|
|||
use crate::intrinsics;
|
||||
use crate::masks::*;
|
||||
use crate::vector::ptr::{SimdConstPtr, SimdMutPtr};
|
||||
use crate::vector::*;
|
||||
|
||||
/// A representation of a vector as an "array" with indices, implementing
|
||||
/// operations applicable to any vector type based solely on "having lanes",
|
||||
/// and describing relationships between vector and scalar types.
|
||||
pub trait SimdArray<const LANES: usize>: crate::LanesAtMost32
|
||||
where
|
||||
SimdUsize<LANES>: crate::LanesAtMost32,
|
||||
SimdIsize<LANES>: crate::LanesAtMost32,
|
||||
MaskSize<LANES>: crate::Mask,
|
||||
Self: Sized,
|
||||
{
|
||||
/// The scalar type in every lane of this vector type.
|
||||
type Scalar: Copy + Sized;
|
||||
/// The number of lanes for this vector.
|
||||
const LANES: usize = LANES;
|
||||
|
||||
/// Generates a SIMD vector with the same value in every lane.
|
||||
#[must_use]
|
||||
fn splat(val: Self::Scalar) -> Self;
|
||||
|
||||
/// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
|
||||
/// If an index is out of bounds, that lane instead selects the value from the "or" vector.
|
||||
/// ```
|
||||
/// # #![feature(portable_simd)]
|
||||
/// # use core_simd::*;
|
||||
/// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
|
||||
/// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]);
|
||||
/// let alt = SimdI32::from_array([-5, -4, -3, -2]);
|
||||
///
|
||||
/// let result = SimdI32::<4>::gather_or(&vec, idxs, alt); // Note the lane that is out-of-bounds.
|
||||
/// assert_eq!(result, SimdI32::from_array([-5, 13, 10, 15]));
|
||||
/// ```
|
||||
#[must_use]
|
||||
#[inline]
|
||||
fn gather_or(slice: &[Self::Scalar], idxs: SimdUsize<LANES>, or: Self) -> Self {
|
||||
Self::gather_select(slice, MaskSize::splat(true), idxs, or)
|
||||
}
|
||||
|
||||
/// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
|
||||
/// Out-of-bounds indices instead use the default value for that lane (0).
|
||||
/// ```
|
||||
/// # #![feature(portable_simd)]
|
||||
/// # use core_simd::*;
|
||||
/// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
|
||||
/// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]);
|
||||
///
|
||||
/// let result = SimdI32::<4>::gather_or_default(&vec, idxs); // Note the lane that is out-of-bounds.
|
||||
/// assert_eq!(result, SimdI32::from_array([0, 13, 10, 15]));
|
||||
/// ```
|
||||
#[must_use]
|
||||
#[inline]
|
||||
fn gather_or_default(slice: &[Self::Scalar], idxs: SimdUsize<LANES>) -> Self
|
||||
where
|
||||
Self::Scalar: Default,
|
||||
{
|
||||
Self::gather_or(slice, idxs, Self::splat(Self::Scalar::default()))
|
||||
}
|
||||
|
||||
/// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
|
||||
/// Out-of-bounds or masked indices instead select the value from the "or" vector.
|
||||
/// ```
|
||||
/// # #![feature(portable_simd)]
|
||||
/// # use core_simd::*;
|
||||
/// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
|
||||
/// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]);
|
||||
/// let alt = SimdI32::from_array([-5, -4, -3, -2]);
|
||||
/// let mask = MaskSize::from_array([true, true, true, false]); // Note the mask of the last lane.
|
||||
///
|
||||
/// let result = SimdI32::<4>::gather_select(&vec, mask, idxs, alt); // Note the lane that is out-of-bounds.
|
||||
/// assert_eq!(result, SimdI32::from_array([-5, 13, 10, -2]));
|
||||
/// ```
|
||||
#[must_use]
|
||||
#[inline]
|
||||
fn gather_select(
|
||||
slice: &[Self::Scalar],
|
||||
mask: MaskSize<LANES>,
|
||||
idxs: SimdUsize<LANES>,
|
||||
or: Self,
|
||||
) -> Self {
|
||||
let mask = (mask & idxs.lanes_lt(SimdUsize::splat(slice.len()))).to_int();
|
||||
let base_ptr = SimdConstPtr::splat(slice.as_ptr());
|
||||
// Ferris forgive me, I have done pointer arithmetic here.
|
||||
let ptrs = base_ptr.wrapping_add(idxs);
|
||||
// SAFETY: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah
|
||||
unsafe { intrinsics::simd_gather(or, ptrs, mask) }
|
||||
}
|
||||
|
||||
/// SIMD scatter: write a SIMD vector's values into a slice, using potentially discontiguous indices.
|
||||
/// Out-of-bounds indices are not written.
|
||||
/// `scatter` writes "in order", so if an index receives two writes, only the last is guaranteed.
|
||||
/// ```
|
||||
/// # #![feature(portable_simd)]
|
||||
/// # use core_simd::*;
|
||||
/// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
|
||||
/// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 0]);
|
||||
/// let vals = SimdI32::from_array([-27, 82, -41, 124]);
|
||||
///
|
||||
/// vals.scatter(&mut vec, idxs); // index 0 receives two writes.
|
||||
/// assert_eq!(vec, vec![124, 11, 12, 82, 14, 15, 16, 17, 18]);
|
||||
/// ```
|
||||
#[inline]
|
||||
fn scatter(self, slice: &mut [Self::Scalar], idxs: SimdUsize<LANES>) {
|
||||
self.scatter_select(slice, MaskSize::splat(true), idxs)
|
||||
}
|
||||
|
||||
/// SIMD scatter: write a SIMD vector's values into a slice, using potentially discontiguous indices.
|
||||
/// Out-of-bounds or masked indices are not written.
|
||||
/// `scatter_select` writes "in order", so if an index receives two writes, only the last is guaranteed.
|
||||
/// ```
|
||||
/// # #![feature(portable_simd)]
|
||||
/// # use core_simd::*;
|
||||
/// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
|
||||
/// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 0]);
|
||||
/// let vals = SimdI32::from_array([-27, 82, -41, 124]);
|
||||
/// let mask = MaskSize::from_array([true, true, true, false]); // Note the mask of the last lane.
|
||||
///
|
||||
/// vals.scatter_select(&mut vec, mask, idxs); // index 0's second write is masked, thus omitted.
|
||||
/// assert_eq!(vec, vec![-41, 11, 12, 82, 14, 15, 16, 17, 18]);
|
||||
/// ```
|
||||
#[inline]
|
||||
fn scatter_select(
|
||||
self,
|
||||
slice: &mut [Self::Scalar],
|
||||
mask: MaskSize<LANES>,
|
||||
idxs: SimdUsize<LANES>,
|
||||
) {
|
||||
// We must construct our scatter mask before we derive a pointer!
|
||||
let mask = (mask & idxs.lanes_lt(SimdUsize::splat(slice.len()))).to_int();
|
||||
// SAFETY: This block works with *mut T derived from &mut 'a [T],
|
||||
// which means it is delicate in Rust's borrowing model, circa 2021:
|
||||
// &mut 'a [T] asserts uniqueness, so deriving &'a [T] invalidates live *mut Ts!
|
||||
// Even though this block is largely safe methods, it must be almost exactly this way
|
||||
// to prevent invalidating the raw ptrs while they're live.
|
||||
// Thus, entering this block requires all values to use being already ready:
|
||||
// 0. idxs we want to write to, which are used to construct the mask.
|
||||
// 1. mask, which depends on an initial &'a [T] and the idxs.
|
||||
// 2. actual values to scatter (self).
|
||||
// 3. &mut [T] which will become our base ptr.
|
||||
unsafe {
|
||||
// Now Entering ☢️ *mut T Zone
|
||||
let base_ptr = SimdMutPtr::splat(slice.as_mut_ptr());
|
||||
// Ferris forgive me, I have done pointer arithmetic here.
|
||||
let ptrs = base_ptr.wrapping_add(idxs);
|
||||
// The ptrs have been bounds-masked to prevent memory-unsafe writes insha'allah
|
||||
intrinsics::simd_scatter(self, ptrs, mask)
|
||||
// Cleared ☢️ *mut T Zone
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_simdarray_for {
|
||||
($simd:ident {type Scalar = $scalar:ident;}) => {
|
||||
impl<const LANES: usize> SimdArray<LANES> for $simd<LANES>
|
||||
where SimdUsize<LANES>: crate::LanesAtMost32,
|
||||
SimdIsize<LANES>: crate::LanesAtMost32,
|
||||
MaskSize<LANES>: crate::Mask,
|
||||
Self: crate::LanesAtMost32,
|
||||
{
|
||||
type Scalar = $scalar;
|
||||
|
||||
#[must_use]
|
||||
#[inline]
|
||||
fn splat(val: Self::Scalar) -> Self {
|
||||
[val; LANES].into()
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
($simd:ident $impl:tt) => {
|
||||
impl<const LANES: usize> SimdArray<LANES> for $simd<LANES>
|
||||
where SimdUsize<LANES>: crate::LanesAtMost32,
|
||||
SimdIsize<LANES>: crate::LanesAtMost32,
|
||||
MaskSize<LANES>: crate::Mask,
|
||||
Self: crate::LanesAtMost32,
|
||||
$impl
|
||||
}
|
||||
}
|
||||
|
||||
impl_simdarray_for! {
|
||||
SimdUsize {
|
||||
type Scalar = usize;
|
||||
}
|
||||
}
|
||||
|
||||
impl_simdarray_for! {
|
||||
SimdIsize {
|
||||
type Scalar = isize;
|
||||
}
|
||||
}
|
||||
|
||||
impl_simdarray_for! {
|
||||
SimdI8 {
|
||||
type Scalar = i8;
|
||||
}
|
||||
}
|
||||
|
||||
impl_simdarray_for! {
|
||||
SimdI16 {
|
||||
type Scalar = i16;
|
||||
}
|
||||
}
|
||||
|
||||
impl_simdarray_for! {
|
||||
SimdI32 {
|
||||
type Scalar = i32;
|
||||
}
|
||||
}
|
||||
|
||||
impl_simdarray_for! {
|
||||
SimdI64 {
|
||||
type Scalar = i64;
|
||||
}
|
||||
}
|
||||
|
||||
impl_simdarray_for! {
|
||||
SimdU8 {
|
||||
type Scalar = u8;
|
||||
}
|
||||
}
|
||||
|
||||
impl_simdarray_for! {
|
||||
SimdU16 {
|
||||
type Scalar = u16;
|
||||
}
|
||||
}
|
||||
|
||||
impl_simdarray_for! {
|
||||
SimdU32 {
|
||||
type Scalar = u32;
|
||||
}
|
||||
}
|
||||
|
||||
impl_simdarray_for! {
|
||||
SimdU64 {
|
||||
type Scalar = u64;
|
||||
}
|
||||
}
|
||||
|
||||
impl_simdarray_for! {
|
||||
SimdF32 {
|
||||
type Scalar = f32;
|
||||
}
|
||||
}
|
||||
|
||||
impl_simdarray_for! {
|
||||
SimdF64 {
|
||||
type Scalar = f64;
|
||||
}
|
||||
}
|
|
@ -1,13 +1,11 @@
|
|||
use crate::LanesAtMost32;
|
||||
use crate::{LaneCount, SupportedLaneCount};
|
||||
|
||||
macro_rules! implement_mask_ops {
|
||||
{ $($vector:ident => $mask:ident ($inner_ty:ident),)* } => {
|
||||
$(
|
||||
impl<const LANES: usize> crate::$vector<LANES>
|
||||
where
|
||||
crate::$vector<LANES>: LanesAtMost32,
|
||||
crate::$inner_ty<LANES>: LanesAtMost32,
|
||||
crate::$mask<LANES>: crate::Mask,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
/// Test if each lane is equal to the corresponding lane in `other`.
|
||||
#[inline]
|
||||
|
|
|
@ -1,124 +0,0 @@
|
|||
/// Implements common traits on the specified vector `$name`, holding multiple `$lanes` of `$type`.
|
||||
macro_rules! impl_vector {
|
||||
{ $name:ident, $type:ty } => {
|
||||
impl<const LANES: usize> $name<LANES> where Self: crate::LanesAtMost32 {
|
||||
/// Construct a SIMD vector by setting all lanes to the given value.
|
||||
pub const fn splat(value: $type) -> Self {
|
||||
Self([value; LANES])
|
||||
}
|
||||
|
||||
/// Returns a slice containing the entire SIMD vector.
|
||||
pub const fn as_slice(&self) -> &[$type] {
|
||||
&self.0
|
||||
}
|
||||
|
||||
/// Returns a mutable slice containing the entire SIMD vector.
|
||||
pub fn as_mut_slice(&mut self) -> &mut [$type] {
|
||||
&mut self.0
|
||||
}
|
||||
|
||||
/// Converts an array to a SIMD vector.
|
||||
pub const fn from_array(array: [$type; LANES]) -> Self {
|
||||
Self(array)
|
||||
}
|
||||
|
||||
/// Converts a SIMD vector to an array.
|
||||
pub const fn to_array(self) -> [$type; LANES] {
|
||||
// workaround for rust-lang/rust#80108
|
||||
// TODO fix this
|
||||
#[cfg(target_arch = "wasm32")]
|
||||
{
|
||||
let mut arr = [self.0[0]; LANES];
|
||||
let mut i = 0;
|
||||
while i < LANES {
|
||||
arr[i] = self.0[i];
|
||||
i += 1;
|
||||
}
|
||||
arr
|
||||
}
|
||||
|
||||
#[cfg(not(target_arch = "wasm32"))]
|
||||
{
|
||||
self.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<const LANES: usize> Copy for $name<LANES> where Self: crate::LanesAtMost32 {}
|
||||
|
||||
impl<const LANES: usize> Clone for $name<LANES> where Self: crate::LanesAtMost32 {
|
||||
#[inline]
|
||||
fn clone(&self) -> Self {
|
||||
*self
|
||||
}
|
||||
}
|
||||
|
||||
impl<const LANES: usize> Default for $name<LANES> where Self: crate::LanesAtMost32 {
|
||||
#[inline]
|
||||
fn default() -> Self {
|
||||
Self::splat(<$type>::default())
|
||||
}
|
||||
}
|
||||
|
||||
impl<const LANES: usize> PartialEq for $name<LANES> where Self: crate::LanesAtMost32 {
|
||||
#[inline]
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
// TODO use SIMD equality
|
||||
self.to_array() == other.to_array()
|
||||
}
|
||||
}
|
||||
|
||||
impl<const LANES: usize> PartialOrd for $name<LANES> where Self: crate::LanesAtMost32 {
|
||||
#[inline]
|
||||
fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
|
||||
// TODO use SIMD equalitya
|
||||
self.to_array().partial_cmp(other.as_ref())
|
||||
}
|
||||
}
|
||||
|
||||
// array references
|
||||
impl<const LANES: usize> AsRef<[$type; LANES]> for $name<LANES> where Self: crate::LanesAtMost32 {
|
||||
#[inline]
|
||||
fn as_ref(&self) -> &[$type; LANES] {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<const LANES: usize> AsMut<[$type; LANES]> for $name<LANES> where Self: crate::LanesAtMost32 {
|
||||
#[inline]
|
||||
fn as_mut(&mut self) -> &mut [$type; LANES] {
|
||||
&mut self.0
|
||||
}
|
||||
}
|
||||
|
||||
// slice references
|
||||
impl<const LANES: usize> AsRef<[$type]> for $name<LANES> where Self: crate::LanesAtMost32 {
|
||||
#[inline]
|
||||
fn as_ref(&self) -> &[$type] {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<const LANES: usize> AsMut<[$type]> for $name<LANES> where Self: crate::LanesAtMost32 {
|
||||
#[inline]
|
||||
fn as_mut(&mut self) -> &mut [$type] {
|
||||
&mut self.0
|
||||
}
|
||||
}
|
||||
|
||||
// vector/array conversion
|
||||
impl<const LANES: usize> From<[$type; LANES]> for $name<LANES> where Self: crate::LanesAtMost32 {
|
||||
fn from(array: [$type; LANES]) -> Self {
|
||||
Self(array)
|
||||
}
|
||||
}
|
||||
|
||||
impl <const LANES: usize> From<$name<LANES>> for [$type; LANES] where $name<LANES>: crate::LanesAtMost32 {
|
||||
fn from(vector: $name<LANES>) -> Self {
|
||||
vector.to_array()
|
||||
}
|
||||
}
|
||||
|
||||
impl_shuffle_2pow_lanes!{ $name }
|
||||
}
|
||||
}
|
|
@ -35,7 +35,7 @@ macro_rules! impl_fmt_trait {
|
|||
$( // repeat trait
|
||||
impl<const LANES: usize> core::fmt::$trait for crate::$type<LANES>
|
||||
where
|
||||
Self: crate::LanesAtMost32,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
$format(self.as_ref(), f)
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
use crate::{LaneCount, SupportedLaneCount};
|
||||
|
||||
macro_rules! impl_traits {
|
||||
{ $type:ident } => {
|
||||
impl<const LANES: usize> core::iter::Sum<Self> for crate::$type<LANES>
|
||||
where
|
||||
Self: crate::LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
fn sum<I: core::iter::Iterator<Item = Self>>(iter: I) -> Self {
|
||||
iter.fold(Default::default(), core::ops::Add::add)
|
||||
|
@ -11,7 +13,7 @@ macro_rules! impl_traits {
|
|||
|
||||
impl<const LANES: usize> core::iter::Product<Self> for crate::$type<LANES>
|
||||
where
|
||||
Self: crate::LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
fn product<I: core::iter::Iterator<Item = Self>>(iter: I) -> Self {
|
||||
iter.fold(Default::default(), core::ops::Mul::mul)
|
||||
|
@ -20,7 +22,7 @@ macro_rules! impl_traits {
|
|||
|
||||
impl<'a, const LANES: usize> core::iter::Sum<&'a Self> for crate::$type<LANES>
|
||||
where
|
||||
Self: crate::LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
fn sum<I: core::iter::Iterator<Item = &'a Self>>(iter: I) -> Self {
|
||||
iter.fold(Default::default(), core::ops::Add::add)
|
||||
|
@ -29,7 +31,7 @@ macro_rules! impl_traits {
|
|||
|
||||
impl<'a, const LANES: usize> core::iter::Product<&'a Self> for crate::$type<LANES>
|
||||
where
|
||||
Self: crate::LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
fn product<I: core::iter::Iterator<Item = &'a Self>>(iter: I) -> Self {
|
||||
iter.fold(Default::default(), core::ops::Mul::mul)
|
||||
|
|
43
crates/core_simd/src/lane_count.rs
Normal file
43
crates/core_simd/src/lane_count.rs
Normal file
|
@ -0,0 +1,43 @@
|
|||
mod sealed {
|
||||
pub trait Sealed {}
|
||||
}
|
||||
use sealed::Sealed;
|
||||
|
||||
/// A type representing a vector lane count.
|
||||
pub struct LaneCount<const LANES: usize>;
|
||||
|
||||
/// Helper trait for vector lane counts.
|
||||
pub trait SupportedLaneCount: Sealed {
|
||||
/// The bitmask representation of a mask.
|
||||
type BitMask: Copy + Default + AsRef<[u8]> + AsMut<[u8]>;
|
||||
|
||||
#[doc(hidden)]
|
||||
type IntBitMask;
|
||||
}
|
||||
|
||||
impl<const LANES: usize> Sealed for LaneCount<LANES> {}
|
||||
|
||||
impl SupportedLaneCount for LaneCount<1> {
|
||||
type BitMask = [u8; 1];
|
||||
type IntBitMask = u8;
|
||||
}
|
||||
impl SupportedLaneCount for LaneCount<2> {
|
||||
type BitMask = [u8; 1];
|
||||
type IntBitMask = u8;
|
||||
}
|
||||
impl SupportedLaneCount for LaneCount<4> {
|
||||
type BitMask = [u8; 1];
|
||||
type IntBitMask = u8;
|
||||
}
|
||||
impl SupportedLaneCount for LaneCount<8> {
|
||||
type BitMask = [u8; 1];
|
||||
type IntBitMask = u8;
|
||||
}
|
||||
impl SupportedLaneCount for LaneCount<16> {
|
||||
type BitMask = [u8; 2];
|
||||
type IntBitMask = u16;
|
||||
}
|
||||
impl SupportedLaneCount for LaneCount<32> {
|
||||
type BitMask = [u8; 4];
|
||||
type IntBitMask = u32;
|
||||
}
|
|
@ -1,54 +0,0 @@
|
|||
/// Implemented for vectors that are supported by the implementation.
|
||||
pub trait LanesAtMost32: sealed::Sealed {
|
||||
#[doc(hidden)]
|
||||
type BitMask: Into<u64>;
|
||||
}
|
||||
|
||||
mod sealed {
|
||||
pub trait Sealed {}
|
||||
}
|
||||
|
||||
macro_rules! impl_for {
|
||||
{ $name:ident } => {
|
||||
impl<const LANES: usize> sealed::Sealed for $name<LANES>
|
||||
where
|
||||
$name<LANES>: LanesAtMost32,
|
||||
{}
|
||||
|
||||
impl LanesAtMost32 for $name<1> {
|
||||
type BitMask = u8;
|
||||
}
|
||||
impl LanesAtMost32 for $name<2> {
|
||||
type BitMask = u8;
|
||||
}
|
||||
impl LanesAtMost32 for $name<4> {
|
||||
type BitMask = u8;
|
||||
}
|
||||
impl LanesAtMost32 for $name<8> {
|
||||
type BitMask = u8;
|
||||
}
|
||||
impl LanesAtMost32 for $name<16> {
|
||||
type BitMask = u16;
|
||||
}
|
||||
impl LanesAtMost32 for $name<32> {
|
||||
type BitMask = u32;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
use crate::*;
|
||||
|
||||
impl_for! { SimdU8 }
|
||||
impl_for! { SimdU16 }
|
||||
impl_for! { SimdU32 }
|
||||
impl_for! { SimdU64 }
|
||||
impl_for! { SimdUsize }
|
||||
|
||||
impl_for! { SimdI8 }
|
||||
impl_for! { SimdI16 }
|
||||
impl_for! { SimdI32 }
|
||||
impl_for! { SimdI64 }
|
||||
impl_for! { SimdIsize }
|
||||
|
||||
impl_for! { SimdF32 }
|
||||
impl_for! { SimdF64 }
|
|
@ -12,8 +12,6 @@
|
|||
#![unstable(feature = "portable_simd", issue = "86656")]
|
||||
//! Portable SIMD module.
|
||||
|
||||
#[macro_use]
|
||||
mod first;
|
||||
#[macro_use]
|
||||
mod permute;
|
||||
#[macro_use]
|
||||
|
@ -29,20 +27,16 @@ mod comparisons;
|
|||
mod fmt;
|
||||
mod intrinsics;
|
||||
mod iter;
|
||||
mod math;
|
||||
mod ops;
|
||||
mod round;
|
||||
mod vendor;
|
||||
|
||||
mod math;
|
||||
|
||||
mod lanes_at_most_32;
|
||||
pub use lanes_at_most_32::LanesAtMost32;
|
||||
mod lane_count;
|
||||
pub use lane_count::*;
|
||||
|
||||
mod masks;
|
||||
pub use masks::*;
|
||||
|
||||
mod vector;
|
||||
pub use vector::*;
|
||||
|
||||
mod array;
|
||||
pub use array::SimdArray;
|
||||
|
|
|
@ -4,15 +4,15 @@
|
|||
|
||||
#[cfg_attr(
|
||||
not(all(target_arch = "x86_64", target_feature = "avx512f")),
|
||||
path = "full_masks.rs"
|
||||
path = "masks/full_masks.rs"
|
||||
)]
|
||||
#[cfg_attr(
|
||||
all(target_arch = "x86_64", target_feature = "avx512f"),
|
||||
path = "bitmask.rs"
|
||||
path = "masks/bitmask.rs"
|
||||
)]
|
||||
mod mask_impl;
|
||||
|
||||
use crate::{LanesAtMost32, SimdI16, SimdI32, SimdI64, SimdI8, SimdIsize};
|
||||
use crate::{SimdI16, SimdI32, SimdI64, SimdI8, SimdIsize};
|
||||
|
||||
mod sealed {
|
||||
pub trait Sealed {}
|
||||
|
@ -20,12 +20,12 @@ mod sealed {
|
|||
|
||||
/// Helper trait for mask types.
|
||||
pub trait Mask: sealed::Sealed {
|
||||
/// The bitmask representation of a mask.
|
||||
type BitMask: Copy + Default + AsRef<[u8]> + AsMut<[u8]>;
|
||||
/// The number of lanes for this mask.
|
||||
const LANES: usize;
|
||||
|
||||
// TODO remove this when rustc intrinsics are more flexible
|
||||
#[doc(hidden)]
|
||||
type IntBitMask;
|
||||
/// Generates a mask with the same value in every lane.
|
||||
#[must_use]
|
||||
fn splat(val: bool) -> Self;
|
||||
}
|
||||
|
||||
macro_rules! define_opaque_mask {
|
||||
|
@ -38,45 +38,30 @@ macro_rules! define_opaque_mask {
|
|||
#[allow(non_camel_case_types)]
|
||||
pub struct $name<const LANES: usize>($inner_ty)
|
||||
where
|
||||
$bits_ty<LANES>: LanesAtMost32,
|
||||
Self: Mask;
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount;
|
||||
|
||||
impl<const LANES: usize> sealed::Sealed for $name<LANES>
|
||||
where
|
||||
$bits_ty<LANES>: LanesAtMost32,
|
||||
Self: Mask,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{}
|
||||
impl Mask for $name<1> {
|
||||
type BitMask = [u8; 1];
|
||||
type IntBitMask = u8;
|
||||
}
|
||||
impl Mask for $name<2> {
|
||||
type BitMask = [u8; 1];
|
||||
type IntBitMask = u8;
|
||||
}
|
||||
impl Mask for $name<4> {
|
||||
type BitMask = [u8; 1];
|
||||
type IntBitMask = u8;
|
||||
}
|
||||
impl Mask for $name<8> {
|
||||
type BitMask = [u8; 1];
|
||||
type IntBitMask = u8;
|
||||
}
|
||||
impl Mask for $name<16> {
|
||||
type BitMask = [u8; 2];
|
||||
type IntBitMask = u16;
|
||||
}
|
||||
impl Mask for $name<32> {
|
||||
type BitMask = [u8; 4];
|
||||
type IntBitMask = u32;
|
||||
|
||||
impl<const LANES: usize> Mask for $name<LANES>
|
||||
where
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
const LANES: usize = LANES;
|
||||
|
||||
#[inline]
|
||||
fn splat(value: bool) -> Self {
|
||||
Self::splat(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl_opaque_mask_reductions! { $name, $bits_ty }
|
||||
|
||||
impl<const LANES: usize> $name<LANES>
|
||||
where
|
||||
$bits_ty<LANES>: LanesAtMost32,
|
||||
Self: Mask,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
/// Construct a mask by setting all lanes to the given value.
|
||||
pub fn splat(value: bool) -> Self {
|
||||
|
@ -175,21 +160,20 @@ macro_rules! define_opaque_mask {
|
|||
}
|
||||
|
||||
/// Convert this mask to a bitmask, with one bit set per lane.
|
||||
pub fn to_bitmask(self) -> <Self as Mask>::BitMask {
|
||||
self.0.to_bitmask::<Self>()
|
||||
pub fn to_bitmask(self) -> <crate::LaneCount<LANES> as crate::SupportedLaneCount>::BitMask {
|
||||
self.0.to_bitmask()
|
||||
}
|
||||
|
||||
/// Convert a bitmask to a mask.
|
||||
pub fn from_bitmask(bitmask: <Self as Mask>::BitMask) -> Self {
|
||||
Self(<$inner_ty>::from_bitmask::<Self>(bitmask))
|
||||
pub fn from_bitmask(bitmask: <crate::LaneCount<LANES> as crate::SupportedLaneCount>::BitMask) -> Self {
|
||||
Self(<$inner_ty>::from_bitmask(bitmask))
|
||||
}
|
||||
}
|
||||
|
||||
// vector/array conversion
|
||||
impl<const LANES: usize> From<[bool; LANES]> for $name<LANES>
|
||||
where
|
||||
$bits_ty<LANES>: crate::LanesAtMost32,
|
||||
Self: Mask,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
fn from(array: [bool; LANES]) -> Self {
|
||||
Self::from_array(array)
|
||||
|
@ -198,8 +182,7 @@ macro_rules! define_opaque_mask {
|
|||
|
||||
impl <const LANES: usize> From<$name<LANES>> for [bool; LANES]
|
||||
where
|
||||
$bits_ty<LANES>: crate::LanesAtMost32,
|
||||
$name<LANES>: Mask,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
fn from(vector: $name<LANES>) -> Self {
|
||||
vector.to_array()
|
||||
|
@ -208,14 +191,12 @@ macro_rules! define_opaque_mask {
|
|||
|
||||
impl<const LANES: usize> Copy for $name<LANES>
|
||||
where
|
||||
$bits_ty<LANES>: LanesAtMost32,
|
||||
Self: Mask,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{}
|
||||
|
||||
impl<const LANES: usize> Clone for $name<LANES>
|
||||
where
|
||||
$bits_ty<LANES>: LanesAtMost32,
|
||||
Self: Mask,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
#[inline]
|
||||
fn clone(&self) -> Self {
|
||||
|
@ -225,8 +206,7 @@ macro_rules! define_opaque_mask {
|
|||
|
||||
impl<const LANES: usize> Default for $name<LANES>
|
||||
where
|
||||
$bits_ty<LANES>: LanesAtMost32,
|
||||
Self: Mask,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
#[inline]
|
||||
fn default() -> Self {
|
||||
|
@ -236,8 +216,7 @@ macro_rules! define_opaque_mask {
|
|||
|
||||
impl<const LANES: usize> PartialEq for $name<LANES>
|
||||
where
|
||||
$bits_ty<LANES>: LanesAtMost32,
|
||||
Self: Mask,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
#[inline]
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
|
@ -247,8 +226,7 @@ macro_rules! define_opaque_mask {
|
|||
|
||||
impl<const LANES: usize> PartialOrd for $name<LANES>
|
||||
where
|
||||
$bits_ty<LANES>: LanesAtMost32,
|
||||
Self: Mask,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
#[inline]
|
||||
fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
|
||||
|
@ -258,8 +236,7 @@ macro_rules! define_opaque_mask {
|
|||
|
||||
impl<const LANES: usize> core::fmt::Debug for $name<LANES>
|
||||
where
|
||||
$bits_ty<LANES>: crate::LanesAtMost32,
|
||||
Self: Mask,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
f.debug_list()
|
||||
|
@ -270,8 +247,7 @@ macro_rules! define_opaque_mask {
|
|||
|
||||
impl<const LANES: usize> core::ops::BitAnd for $name<LANES>
|
||||
where
|
||||
$bits_ty<LANES>: LanesAtMost32,
|
||||
Self: Mask,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
|
@ -282,8 +258,7 @@ macro_rules! define_opaque_mask {
|
|||
|
||||
impl<const LANES: usize> core::ops::BitAnd<bool> for $name<LANES>
|
||||
where
|
||||
$bits_ty<LANES>: LanesAtMost32,
|
||||
Self: Mask,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
|
@ -294,8 +269,7 @@ macro_rules! define_opaque_mask {
|
|||
|
||||
impl<const LANES: usize> core::ops::BitAnd<$name<LANES>> for bool
|
||||
where
|
||||
$bits_ty<LANES>: LanesAtMost32,
|
||||
$name<LANES>: Mask,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
type Output = $name<LANES>;
|
||||
#[inline]
|
||||
|
@ -306,8 +280,7 @@ macro_rules! define_opaque_mask {
|
|||
|
||||
impl<const LANES: usize> core::ops::BitOr for $name<LANES>
|
||||
where
|
||||
$bits_ty<LANES>: LanesAtMost32,
|
||||
Self: Mask,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
|
@ -318,8 +291,7 @@ macro_rules! define_opaque_mask {
|
|||
|
||||
impl<const LANES: usize> core::ops::BitOr<bool> for $name<LANES>
|
||||
where
|
||||
$bits_ty<LANES>: LanesAtMost32,
|
||||
Self: Mask,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
|
@ -330,8 +302,7 @@ macro_rules! define_opaque_mask {
|
|||
|
||||
impl<const LANES: usize> core::ops::BitOr<$name<LANES>> for bool
|
||||
where
|
||||
$bits_ty<LANES>: LanesAtMost32,
|
||||
$name<LANES>: Mask,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
type Output = $name<LANES>;
|
||||
#[inline]
|
||||
|
@ -342,8 +313,7 @@ macro_rules! define_opaque_mask {
|
|||
|
||||
impl<const LANES: usize> core::ops::BitXor for $name<LANES>
|
||||
where
|
||||
$bits_ty<LANES>: LanesAtMost32,
|
||||
Self: Mask,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
|
@ -354,8 +324,7 @@ macro_rules! define_opaque_mask {
|
|||
|
||||
impl<const LANES: usize> core::ops::BitXor<bool> for $name<LANES>
|
||||
where
|
||||
$bits_ty<LANES>: LanesAtMost32,
|
||||
Self: Mask,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
|
@ -366,8 +335,7 @@ macro_rules! define_opaque_mask {
|
|||
|
||||
impl<const LANES: usize> core::ops::BitXor<$name<LANES>> for bool
|
||||
where
|
||||
$bits_ty<LANES>: LanesAtMost32,
|
||||
$name<LANES>: Mask,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
type Output = $name<LANES>;
|
||||
#[inline]
|
||||
|
@ -378,8 +346,7 @@ macro_rules! define_opaque_mask {
|
|||
|
||||
impl<const LANES: usize> core::ops::Not for $name<LANES>
|
||||
where
|
||||
$bits_ty<LANES>: LanesAtMost32,
|
||||
Self: Mask,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
type Output = $name<LANES>;
|
||||
#[inline]
|
||||
|
@ -390,8 +357,7 @@ macro_rules! define_opaque_mask {
|
|||
|
||||
impl<const LANES: usize> core::ops::BitAndAssign for $name<LANES>
|
||||
where
|
||||
$bits_ty<LANES>: LanesAtMost32,
|
||||
Self: Mask,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
#[inline]
|
||||
fn bitand_assign(&mut self, rhs: Self) {
|
||||
|
@ -401,8 +367,7 @@ macro_rules! define_opaque_mask {
|
|||
|
||||
impl<const LANES: usize> core::ops::BitAndAssign<bool> for $name<LANES>
|
||||
where
|
||||
$bits_ty<LANES>: LanesAtMost32,
|
||||
Self: Mask,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
#[inline]
|
||||
fn bitand_assign(&mut self, rhs: bool) {
|
||||
|
@ -412,8 +377,7 @@ macro_rules! define_opaque_mask {
|
|||
|
||||
impl<const LANES: usize> core::ops::BitOrAssign for $name<LANES>
|
||||
where
|
||||
$bits_ty<LANES>: LanesAtMost32,
|
||||
Self: Mask,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
#[inline]
|
||||
fn bitor_assign(&mut self, rhs: Self) {
|
||||
|
@ -423,8 +387,7 @@ macro_rules! define_opaque_mask {
|
|||
|
||||
impl<const LANES: usize> core::ops::BitOrAssign<bool> for $name<LANES>
|
||||
where
|
||||
$bits_ty<LANES>: LanesAtMost32,
|
||||
Self: Mask,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
#[inline]
|
||||
fn bitor_assign(&mut self, rhs: bool) {
|
||||
|
@ -434,8 +397,7 @@ macro_rules! define_opaque_mask {
|
|||
|
||||
impl<const LANES: usize> core::ops::BitXorAssign for $name<LANES>
|
||||
where
|
||||
$bits_ty<LANES>: LanesAtMost32,
|
||||
Self: Mask,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
#[inline]
|
||||
fn bitxor_assign(&mut self, rhs: Self) {
|
||||
|
@ -445,8 +407,7 @@ macro_rules! define_opaque_mask {
|
|||
|
||||
impl<const LANES: usize> core::ops::BitXorAssign<bool> for $name<LANES>
|
||||
where
|
||||
$bits_ty<LANES>: LanesAtMost32,
|
||||
Self: Mask,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
#[inline]
|
||||
fn bitxor_assign(&mut self, rhs: bool) {
|
||||
|
@ -460,7 +421,7 @@ define_opaque_mask! {
|
|||
/// Mask for vectors with `LANES` 8-bit elements.
|
||||
///
|
||||
/// The layout of this type is unspecified.
|
||||
struct Mask8<const LANES: usize>(mask_impl::Mask8<Self, LANES>);
|
||||
struct Mask8<const LANES: usize>(mask_impl::Mask8<LANES>);
|
||||
@bits SimdI8
|
||||
}
|
||||
|
||||
|
@ -468,7 +429,7 @@ define_opaque_mask! {
|
|||
/// Mask for vectors with `LANES` 16-bit elements.
|
||||
///
|
||||
/// The layout of this type is unspecified.
|
||||
struct Mask16<const LANES: usize>(mask_impl::Mask16<Self, LANES>);
|
||||
struct Mask16<const LANES: usize>(mask_impl::Mask16<LANES>);
|
||||
@bits SimdI16
|
||||
}
|
||||
|
||||
|
@ -476,7 +437,7 @@ define_opaque_mask! {
|
|||
/// Mask for vectors with `LANES` 32-bit elements.
|
||||
///
|
||||
/// The layout of this type is unspecified.
|
||||
struct Mask32<const LANES: usize>(mask_impl::Mask32<Self, LANES>);
|
||||
struct Mask32<const LANES: usize>(mask_impl::Mask32<LANES>);
|
||||
@bits SimdI32
|
||||
}
|
||||
|
||||
|
@ -484,7 +445,7 @@ define_opaque_mask! {
|
|||
/// Mask for vectors with `LANES` 64-bit elements.
|
||||
///
|
||||
/// The layout of this type is unspecified.
|
||||
struct Mask64<const LANES: usize>(mask_impl::Mask64<Self, LANES>);
|
||||
struct Mask64<const LANES: usize>(mask_impl::Mask64<LANES>);
|
||||
@bits SimdI64
|
||||
}
|
||||
|
||||
|
@ -492,7 +453,7 @@ define_opaque_mask! {
|
|||
/// Mask for vectors with `LANES` pointer-width elements.
|
||||
///
|
||||
/// The layout of this type is unspecified.
|
||||
struct MaskSize<const LANES: usize>(mask_impl::MaskSize<Self, LANES>);
|
||||
struct MaskSize<const LANES: usize>(mask_impl::MaskSize<LANES>);
|
||||
@bits SimdIsize
|
||||
}
|
||||
|
||||
|
@ -555,10 +516,7 @@ macro_rules! impl_from {
|
|||
$(
|
||||
impl<const LANES: usize> From<$from<LANES>> for $to<LANES>
|
||||
where
|
||||
crate::$from_inner<LANES>: crate::LanesAtMost32,
|
||||
crate::$to_inner<LANES>: crate::LanesAtMost32,
|
||||
$from<LANES>: Mask,
|
||||
Self: Mask,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
fn from(value: $from<LANES>) -> Self {
|
||||
Self(value.0.into())
|
|
@ -1,50 +1,81 @@
|
|||
use crate::Mask;
|
||||
use core::marker::PhantomData;
|
||||
use crate::{LaneCount, SupportedLaneCount};
|
||||
|
||||
/// Helper trait for limiting int conversion types
|
||||
pub trait ConvertToInt {}
|
||||
impl<const LANES: usize> ConvertToInt for crate::SimdI8<LANES> where Self: crate::LanesAtMost32 {}
|
||||
impl<const LANES: usize> ConvertToInt for crate::SimdI16<LANES> where Self: crate::LanesAtMost32 {}
|
||||
impl<const LANES: usize> ConvertToInt for crate::SimdI32<LANES> where Self: crate::LanesAtMost32 {}
|
||||
impl<const LANES: usize> ConvertToInt for crate::SimdI64<LANES> where Self: crate::LanesAtMost32 {}
|
||||
impl<const LANES: usize> ConvertToInt for crate::SimdIsize<LANES> where Self: crate::LanesAtMost32 {}
|
||||
impl<const LANES: usize> ConvertToInt for crate::SimdI8<LANES> where
|
||||
LaneCount<LANES>: SupportedLaneCount
|
||||
{
|
||||
}
|
||||
impl<const LANES: usize> ConvertToInt for crate::SimdI16<LANES> where
|
||||
LaneCount<LANES>: SupportedLaneCount
|
||||
{
|
||||
}
|
||||
impl<const LANES: usize> ConvertToInt for crate::SimdI32<LANES> where
|
||||
LaneCount<LANES>: SupportedLaneCount
|
||||
{
|
||||
}
|
||||
impl<const LANES: usize> ConvertToInt for crate::SimdI64<LANES> where
|
||||
LaneCount<LANES>: SupportedLaneCount
|
||||
{
|
||||
}
|
||||
impl<const LANES: usize> ConvertToInt for crate::SimdIsize<LANES> where
|
||||
LaneCount<LANES>: SupportedLaneCount
|
||||
{
|
||||
}
|
||||
|
||||
/// A mask where each lane is represented by a single bit.
|
||||
#[repr(transparent)]
|
||||
pub struct BitMask<T: Mask, const LANES: usize>(T::BitMask, PhantomData<[(); LANES]>);
|
||||
pub struct BitMask<const LANES: usize>(<LaneCount<LANES> as SupportedLaneCount>::BitMask)
|
||||
where
|
||||
LaneCount<LANES>: SupportedLaneCount;
|
||||
|
||||
impl<T: Mask, const LANES: usize> Copy for BitMask<T, LANES> {}
|
||||
impl<const LANES: usize> Copy for BitMask<LANES> where LaneCount<LANES>: SupportedLaneCount {}
|
||||
|
||||
impl<T: Mask, const LANES: usize> Clone for BitMask<T, LANES> {
|
||||
impl<const LANES: usize> Clone for BitMask<LANES>
|
||||
where
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
fn clone(&self) -> Self {
|
||||
*self
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Mask, const LANES: usize> PartialEq for BitMask<T, LANES> {
|
||||
impl<const LANES: usize> PartialEq for BitMask<LANES>
|
||||
where
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.0.as_ref() == other.0.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Mask, const LANES: usize> PartialOrd for BitMask<T, LANES> {
|
||||
impl<const LANES: usize> PartialOrd for BitMask<LANES>
|
||||
where
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
|
||||
self.0.as_ref().partial_cmp(other.0.as_ref())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Mask, const LANES: usize> Eq for BitMask<T, LANES> {}
|
||||
impl<const LANES: usize> Eq for BitMask<LANES> where LaneCount<LANES>: SupportedLaneCount {}
|
||||
|
||||
impl<T: Mask, const LANES: usize> Ord for BitMask<T, LANES> {
|
||||
impl<const LANES: usize> Ord for BitMask<LANES>
|
||||
where
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
fn cmp(&self, other: &Self) -> core::cmp::Ordering {
|
||||
self.0.as_ref().cmp(other.0.as_ref())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Mask, const LANES: usize> BitMask<T, LANES> {
|
||||
impl<const LANES: usize> BitMask<LANES>
|
||||
where
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
#[inline]
|
||||
pub fn splat(value: bool) -> Self {
|
||||
let mut mask = T::BitMask::default();
|
||||
let mut mask = <LaneCount<LANES> as SupportedLaneCount>::BitMask::default();
|
||||
if value {
|
||||
mask.as_mut().fill(u8::MAX)
|
||||
} else {
|
||||
|
@ -53,12 +84,12 @@ impl<T: Mask, const LANES: usize> BitMask<T, LANES> {
|
|||
if LANES % 8 > 0 {
|
||||
*mask.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - LANES % 8);
|
||||
}
|
||||
Self(mask, PhantomData)
|
||||
Self(mask)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub unsafe fn test_unchecked(&self, lane: usize) -> bool {
|
||||
(self.0.as_ref()[lane / 8] >> lane % 8) & 0x1 > 0
|
||||
(self.0.as_ref()[lane / 8] >> (lane % 8)) & 0x1 > 0
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
@ -72,7 +103,8 @@ impl<T: Mask, const LANES: usize> BitMask<T, LANES> {
|
|||
V: ConvertToInt + Default + core::ops::Not<Output = V>,
|
||||
{
|
||||
unsafe {
|
||||
let mask: T::IntBitMask = core::mem::transmute_copy(&self);
|
||||
let mask: <LaneCount<LANES> as SupportedLaneCount>::IntBitMask =
|
||||
core::mem::transmute_copy(&self);
|
||||
crate::intrinsics::simd_select_bitmask(mask, !V::default(), V::default())
|
||||
}
|
||||
}
|
||||
|
@ -80,33 +112,29 @@ impl<T: Mask, const LANES: usize> BitMask<T, LANES> {
|
|||
#[inline]
|
||||
pub unsafe fn from_int_unchecked<V>(value: V) -> Self
|
||||
where
|
||||
V: crate::LanesAtMost32,
|
||||
V: crate::Vector,
|
||||
{
|
||||
// TODO remove the transmute when rustc is more flexible
|
||||
assert_eq!(
|
||||
core::mem::size_of::<T::IntBitMask>(),
|
||||
core::mem::size_of::<T::BitMask>()
|
||||
core::mem::size_of::<<crate::LaneCount::<LANES> as crate::SupportedLaneCount>::BitMask>(
|
||||
),
|
||||
core::mem::size_of::<
|
||||
<crate::LaneCount::<LANES> as crate::SupportedLaneCount>::IntBitMask,
|
||||
>(),
|
||||
);
|
||||
let mask: T::IntBitMask = crate::intrinsics::simd_bitmask(value);
|
||||
Self(core::mem::transmute_copy(&mask), PhantomData)
|
||||
let mask: <LaneCount<LANES> as SupportedLaneCount>::IntBitMask =
|
||||
crate::intrinsics::simd_bitmask(value);
|
||||
Self(core::mem::transmute_copy(&mask))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn to_bitmask<U: Mask>(self) -> U::BitMask {
|
||||
assert_eq!(
|
||||
core::mem::size_of::<T::BitMask>(),
|
||||
core::mem::size_of::<U::BitMask>()
|
||||
);
|
||||
unsafe { core::mem::transmute_copy(&self.0) }
|
||||
pub fn to_bitmask(self) -> <LaneCount<LANES> as SupportedLaneCount>::BitMask {
|
||||
self.0
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn from_bitmask<U: Mask>(bitmask: U::BitMask) -> Self {
|
||||
assert_eq!(
|
||||
core::mem::size_of::<T::BitMask>(),
|
||||
core::mem::size_of::<U::BitMask>()
|
||||
);
|
||||
unsafe { core::mem::transmute_copy(&bitmask) }
|
||||
pub fn from_bitmask(bitmask: <LaneCount<LANES> as SupportedLaneCount>::BitMask) -> Self {
|
||||
Self(bitmask)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
@ -120,9 +148,10 @@ impl<T: Mask, const LANES: usize> BitMask<T, LANES> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<T: Mask, const LANES: usize> core::ops::BitAnd for BitMask<T, LANES>
|
||||
impl<const LANES: usize> core::ops::BitAnd for BitMask<LANES>
|
||||
where
|
||||
T::BitMask: Default + AsRef<[u8]> + AsMut<[u8]>,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
<LaneCount<LANES> as SupportedLaneCount>::BitMask: Default + AsRef<[u8]> + AsMut<[u8]>,
|
||||
{
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
|
@ -134,9 +163,10 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
impl<T: Mask, const LANES: usize> core::ops::BitOr for BitMask<T, LANES>
|
||||
impl<const LANES: usize> core::ops::BitOr for BitMask<LANES>
|
||||
where
|
||||
T::BitMask: Default + AsRef<[u8]> + AsMut<[u8]>,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
<LaneCount<LANES> as SupportedLaneCount>::BitMask: Default + AsRef<[u8]> + AsMut<[u8]>,
|
||||
{
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
|
@ -148,7 +178,10 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
impl<T: Mask, const LANES: usize> core::ops::BitXor for BitMask<T, LANES> {
|
||||
impl<const LANES: usize> core::ops::BitXor for BitMask<LANES>
|
||||
where
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
fn bitxor(mut self, rhs: Self) -> Self::Output {
|
||||
|
@ -159,7 +192,10 @@ impl<T: Mask, const LANES: usize> core::ops::BitXor for BitMask<T, LANES> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<T: Mask, const LANES: usize> core::ops::Not for BitMask<T, LANES> {
|
||||
impl<const LANES: usize> core::ops::Not for BitMask<LANES>
|
||||
where
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
fn not(mut self) -> Self::Output {
|
||||
|
@ -173,31 +209,8 @@ impl<T: Mask, const LANES: usize> core::ops::Not for BitMask<T, LANES> {
|
|||
}
|
||||
}
|
||||
|
||||
pub type Mask8<T, const LANES: usize> = BitMask<T, LANES>;
|
||||
pub type Mask16<T, const LANES: usize> = BitMask<T, LANES>;
|
||||
pub type Mask32<T, const LANES: usize> = BitMask<T, LANES>;
|
||||
pub type Mask64<T, const LANES: usize> = BitMask<T, LANES>;
|
||||
pub type MaskSize<T, const LANES: usize> = BitMask<T, LANES>;
|
||||
|
||||
macro_rules! impl_from {
|
||||
{ $from:ident ($from_inner:ident) => $($to:ident ($to_inner:ident)),* } => {
|
||||
$(
|
||||
impl<const LANES: usize> From<$from<crate::$from<LANES>, LANES>> for $to<crate::$to<LANES>, LANES>
|
||||
where
|
||||
crate::$from_inner<LANES>: crate::LanesAtMost32,
|
||||
crate::$to_inner<LANES>: crate::LanesAtMost32,
|
||||
crate::$from<LANES>: crate::Mask,
|
||||
crate::$to<LANES>: crate::Mask,
|
||||
{
|
||||
fn from(value: $from<crate::$from<LANES>, LANES>) -> Self {
|
||||
unsafe { core::mem::transmute_copy(&value) }
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
impl_from! { Mask8 (SimdI8) => Mask16 (SimdI16), Mask32 (SimdI32), Mask64 (SimdI64), MaskSize (SimdIsize) }
|
||||
impl_from! { Mask16 (SimdI16) => Mask32 (SimdI32), Mask64 (SimdI64), MaskSize (SimdIsize), Mask8 (SimdI8) }
|
||||
impl_from! { Mask32 (SimdI32) => Mask64 (SimdI64), MaskSize (SimdIsize), Mask8 (SimdI8), Mask16 (SimdI16) }
|
||||
impl_from! { Mask64 (SimdI64) => MaskSize (SimdIsize), Mask8 (SimdI8), Mask16 (SimdI16), Mask32 (SimdI32) }
|
||||
impl_from! { MaskSize (SimdIsize) => Mask8 (SimdI8), Mask16 (SimdI16), Mask32 (SimdI32), Mask64 (SimdI64) }
|
||||
pub type Mask8<const LANES: usize> = BitMask<LANES>;
|
||||
pub type Mask16<const LANES: usize> = BitMask<LANES>;
|
||||
pub type Mask32<const LANES: usize> = BitMask<LANES>;
|
||||
pub type Mask64<const LANES: usize> = BitMask<LANES>;
|
||||
pub type MaskSize<const LANES: usize> = BitMask<LANES>;
|
||||
|
|
|
@ -1,8 +1,5 @@
|
|||
//! Masks that take up full SIMD vector registers.
|
||||
|
||||
use crate::Mask;
|
||||
use core::marker::PhantomData;
|
||||
|
||||
macro_rules! define_mask {
|
||||
{
|
||||
$(#[$attr:meta])*
|
||||
|
@ -12,20 +9,20 @@ macro_rules! define_mask {
|
|||
} => {
|
||||
$(#[$attr])*
|
||||
#[repr(transparent)]
|
||||
pub struct $name<T: Mask, const $lanes: usize>(crate::$type<$lanes2>, PhantomData<T>)
|
||||
pub struct $name<const $lanes: usize>(crate::$type<$lanes>)
|
||||
where
|
||||
crate::$type<LANES>: crate::LanesAtMost32;
|
||||
crate::LaneCount<$lanes>: crate::SupportedLaneCount;
|
||||
|
||||
impl_full_mask_reductions! { $name, $type }
|
||||
|
||||
impl<T: Mask, const LANES: usize> Copy for $name<T, LANES>
|
||||
impl<const LANES: usize> Copy for $name<LANES>
|
||||
where
|
||||
crate::$type<LANES>: crate::LanesAtMost32,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{}
|
||||
|
||||
impl<T: Mask, const LANES: usize> Clone for $name<T, LANES>
|
||||
impl<const LANES: usize> Clone for $name<LANES>
|
||||
where
|
||||
crate::$type<LANES>: crate::LanesAtMost32,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
#[inline]
|
||||
fn clone(&self) -> Self {
|
||||
|
@ -33,41 +30,41 @@ macro_rules! define_mask {
|
|||
}
|
||||
}
|
||||
|
||||
impl<T: Mask, const LANES: usize> PartialEq for $name<T, LANES>
|
||||
impl<const LANES: usize> PartialEq for $name<LANES>
|
||||
where
|
||||
crate::$type<LANES>: crate::LanesAtMost32,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.0 == other.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Mask, const LANES: usize> PartialOrd for $name<T, LANES>
|
||||
impl<const LANES: usize> PartialOrd for $name<LANES>
|
||||
where
|
||||
crate::$type<LANES>: crate::LanesAtMost32,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
|
||||
self.0.partial_cmp(&other.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Mask, const LANES: usize> Eq for $name<T, LANES>
|
||||
impl<const LANES: usize> Eq for $name<LANES>
|
||||
where
|
||||
crate::$type<LANES>: crate::LanesAtMost32,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{}
|
||||
|
||||
impl<T: Mask, const LANES: usize> Ord for $name<T, LANES>
|
||||
impl<const LANES: usize> Ord for $name<LANES>
|
||||
where
|
||||
crate::$type<LANES>: crate::LanesAtMost32,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
fn cmp(&self, other: &Self) -> core::cmp::Ordering {
|
||||
self.0.cmp(&other.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Mask, const LANES: usize> $name<T, LANES>
|
||||
impl<const LANES: usize> $name<LANES>
|
||||
where
|
||||
crate::$type<LANES>: crate::LanesAtMost32,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
pub fn splat(value: bool) -> Self {
|
||||
Self(
|
||||
|
@ -78,7 +75,6 @@ macro_rules! define_mask {
|
|||
0
|
||||
}
|
||||
),
|
||||
PhantomData,
|
||||
)
|
||||
}
|
||||
|
||||
|
@ -103,16 +99,19 @@ macro_rules! define_mask {
|
|||
|
||||
#[inline]
|
||||
pub unsafe fn from_int_unchecked(value: crate::$type<LANES>) -> Self {
|
||||
Self(value, PhantomData)
|
||||
Self(value)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn to_bitmask<U: crate::Mask>(self) -> U::BitMask {
|
||||
pub fn to_bitmask(self) -> <crate::LaneCount::<LANES> as crate::SupportedLaneCount>::BitMask {
|
||||
unsafe {
|
||||
// TODO remove the transmute when rustc is more flexible
|
||||
assert_eq!(core::mem::size_of::<U::IntBitMask>(), core::mem::size_of::<U::BitMask>());
|
||||
let mask: U::IntBitMask = crate::intrinsics::simd_bitmask(self.0);
|
||||
let mut bitmask: U::BitMask = core::mem::transmute_copy(&mask);
|
||||
// TODO remove the transmute when rustc can use arrays of u8 as bitmasks
|
||||
assert_eq!(
|
||||
core::mem::size_of::<<crate::LaneCount::<LANES> as crate::SupportedLaneCount>::BitMask>(),
|
||||
core::mem::size_of::<<crate::LaneCount::<LANES> as crate::SupportedLaneCount>::IntBitMask>(),
|
||||
);
|
||||
let bitmask: <crate::LaneCount::<LANES> as crate::SupportedLaneCount>::IntBitMask = crate::intrinsics::simd_bitmask(self.0);
|
||||
let mut bitmask: <crate::LaneCount::<LANES> as crate::SupportedLaneCount>::BitMask = core::mem::transmute_copy(&bitmask);
|
||||
|
||||
// There is a bug where LLVM appears to implement this operation with the wrong
|
||||
// bit order.
|
||||
|
@ -128,7 +127,7 @@ macro_rules! define_mask {
|
|||
}
|
||||
|
||||
#[inline]
|
||||
pub fn from_bitmask<U: crate::Mask>(mut bitmask: U::BitMask) -> Self {
|
||||
pub fn from_bitmask(mut bitmask: <crate::LaneCount::<LANES> as crate::SupportedLaneCount>::BitMask) -> Self {
|
||||
unsafe {
|
||||
// There is a bug where LLVM appears to implement this operation with the wrong
|
||||
// bit order.
|
||||
|
@ -139,9 +138,12 @@ macro_rules! define_mask {
|
|||
}
|
||||
}
|
||||
|
||||
// TODO remove the transmute when rustc is more flexible
|
||||
assert_eq!(core::mem::size_of::<U::IntBitMask>(), core::mem::size_of::<U::BitMask>());
|
||||
let bitmask: U::IntBitMask = core::mem::transmute_copy(&bitmask);
|
||||
// TODO remove the transmute when rustc can use arrays of u8 as bitmasks
|
||||
assert_eq!(
|
||||
core::mem::size_of::<<crate::LaneCount::<LANES> as crate::SupportedLaneCount>::BitMask>(),
|
||||
core::mem::size_of::<<crate::LaneCount::<LANES> as crate::SupportedLaneCount>::IntBitMask>(),
|
||||
);
|
||||
let bitmask: <crate::LaneCount::<LANES> as crate::SupportedLaneCount>::IntBitMask = core::mem::transmute_copy(&bitmask);
|
||||
|
||||
Self::from_int_unchecked(crate::intrinsics::simd_select_bitmask(
|
||||
bitmask,
|
||||
|
@ -152,56 +154,56 @@ macro_rules! define_mask {
|
|||
}
|
||||
}
|
||||
|
||||
impl<T: Mask, const LANES: usize> core::convert::From<$name<T, LANES>> for crate::$type<LANES>
|
||||
impl<const LANES: usize> core::convert::From<$name<LANES>> for crate::$type<LANES>
|
||||
where
|
||||
crate::$type<LANES>: crate::LanesAtMost32,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
fn from(value: $name<T, LANES>) -> Self {
|
||||
fn from(value: $name<LANES>) -> Self {
|
||||
value.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Mask, const LANES: usize> core::ops::BitAnd for $name<T, LANES>
|
||||
impl<const LANES: usize> core::ops::BitAnd for $name<LANES>
|
||||
where
|
||||
crate::$type<LANES>: crate::LanesAtMost32,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
fn bitand(self, rhs: Self) -> Self {
|
||||
Self(self.0 & rhs.0, PhantomData)
|
||||
Self(self.0 & rhs.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Mask, const LANES: usize> core::ops::BitOr for $name<T, LANES>
|
||||
impl<const LANES: usize> core::ops::BitOr for $name<LANES>
|
||||
where
|
||||
crate::$type<LANES>: crate::LanesAtMost32,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
fn bitor(self, rhs: Self) -> Self {
|
||||
Self(self.0 | rhs.0, PhantomData)
|
||||
Self(self.0 | rhs.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Mask, const LANES: usize> core::ops::BitXor for $name<T, LANES>
|
||||
impl<const LANES: usize> core::ops::BitXor for $name<LANES>
|
||||
where
|
||||
crate::$type<LANES>: crate::LanesAtMost32,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
fn bitxor(self, rhs: Self) -> Self::Output {
|
||||
Self(self.0 ^ rhs.0, PhantomData)
|
||||
Self(self.0 ^ rhs.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Mask, const LANES: usize> core::ops::Not for $name<T, LANES>
|
||||
impl<const LANES: usize> core::ops::Not for $name<LANES>
|
||||
where
|
||||
crate::$type<LANES>: crate::LanesAtMost32,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
fn not(self) -> Self::Output {
|
||||
Self(!self.0, PhantomData)
|
||||
Self(!self.0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -240,14 +242,11 @@ define_mask! {
|
|||
macro_rules! impl_from {
|
||||
{ $from:ident ($from_inner:ident) => $($to:ident ($to_inner:ident)),* } => {
|
||||
$(
|
||||
impl<const LANES: usize, T, U> From<$from<T, LANES>> for $to<U, LANES>
|
||||
impl<const LANES: usize> From<$from<LANES>> for $to<LANES>
|
||||
where
|
||||
crate::$from_inner<LANES>: crate::LanesAtMost32,
|
||||
crate::$to_inner<LANES>: crate::LanesAtMost32,
|
||||
T: crate::Mask,
|
||||
U: crate::Mask,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
fn from(value: $from<T, LANES>) -> Self {
|
||||
fn from(value: $from<LANES>) -> Self {
|
||||
let mut new = Self::splat(false);
|
||||
for i in 0..LANES {
|
||||
unsafe { new.set_unchecked(i, value.test_unchecked(i)) }
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
macro_rules! impl_uint_arith {
|
||||
($(($name:ident, $n:ident)),+) => {
|
||||
$( impl<const LANES: usize> $name<LANES> where Self: crate::LanesAtMost32 {
|
||||
$( impl<const LANES: usize> $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
|
||||
|
||||
/// Lanewise saturating add.
|
||||
///
|
||||
|
@ -44,7 +44,7 @@ macro_rules! impl_uint_arith {
|
|||
|
||||
macro_rules! impl_int_arith {
|
||||
($(($name:ident, $n:ident)),+) => {
|
||||
$( impl<const LANES: usize> $name<LANES> where Self: crate::LanesAtMost32 {
|
||||
$( impl<const LANES: usize> $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
|
||||
|
||||
/// Lanewise saturating add.
|
||||
///
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
use crate::LanesAtMost32;
|
||||
use crate::{LaneCount, SupportedLaneCount};
|
||||
|
||||
/// Checks if the right-hand side argument of a left- or right-shift would cause overflow.
|
||||
fn invalid_shift_rhs<T>(rhs: T) -> bool
|
||||
|
@ -16,7 +16,7 @@ macro_rules! impl_ref_ops {
|
|||
{
|
||||
impl<const $lanes:ident: usize> core::ops::$trait:ident<$rhs:ty> for $type:ty
|
||||
where
|
||||
$($bound:path: LanesAtMost32,)*
|
||||
LaneCount<$lanes2:ident>: SupportedLaneCount,
|
||||
{
|
||||
type Output = $output:ty;
|
||||
|
||||
|
@ -26,7 +26,7 @@ macro_rules! impl_ref_ops {
|
|||
} => {
|
||||
impl<const $lanes: usize> core::ops::$trait<$rhs> for $type
|
||||
where
|
||||
$($bound: LanesAtMost32,)*
|
||||
LaneCount<$lanes2>: SupportedLaneCount,
|
||||
{
|
||||
type Output = $output;
|
||||
|
||||
|
@ -36,7 +36,7 @@ macro_rules! impl_ref_ops {
|
|||
|
||||
impl<const $lanes: usize> core::ops::$trait<&'_ $rhs> for $type
|
||||
where
|
||||
$($bound: LanesAtMost32,)*
|
||||
LaneCount<$lanes2>: SupportedLaneCount,
|
||||
{
|
||||
type Output = <$type as core::ops::$trait<$rhs>>::Output;
|
||||
|
||||
|
@ -48,7 +48,7 @@ macro_rules! impl_ref_ops {
|
|||
|
||||
impl<const $lanes: usize> core::ops::$trait<$rhs> for &'_ $type
|
||||
where
|
||||
$($bound: LanesAtMost32,)*
|
||||
LaneCount<$lanes2>: SupportedLaneCount,
|
||||
{
|
||||
type Output = <$type as core::ops::$trait<$rhs>>::Output;
|
||||
|
||||
|
@ -60,7 +60,7 @@ macro_rules! impl_ref_ops {
|
|||
|
||||
impl<const $lanes: usize> core::ops::$trait<&'_ $rhs> for &'_ $type
|
||||
where
|
||||
$($bound: LanesAtMost32,)*
|
||||
LaneCount<$lanes2>: SupportedLaneCount,
|
||||
{
|
||||
type Output = <$type as core::ops::$trait<$rhs>>::Output;
|
||||
|
||||
|
@ -75,7 +75,7 @@ macro_rules! impl_ref_ops {
|
|||
{
|
||||
impl<const $lanes:ident: usize> core::ops::$trait:ident<$rhs:ty> for $type:ty
|
||||
where
|
||||
$($bound:path: LanesAtMost32,)*
|
||||
LaneCount<$lanes2:ident>: SupportedLaneCount,
|
||||
{
|
||||
$(#[$attrs:meta])*
|
||||
fn $fn:ident(&mut $self_tok:ident, $rhs_arg:ident: $rhs_arg_ty:ty) $body:tt
|
||||
|
@ -83,7 +83,7 @@ macro_rules! impl_ref_ops {
|
|||
} => {
|
||||
impl<const $lanes: usize> core::ops::$trait<$rhs> for $type
|
||||
where
|
||||
$($bound: LanesAtMost32,)*
|
||||
LaneCount<$lanes2>: SupportedLaneCount,
|
||||
{
|
||||
$(#[$attrs])*
|
||||
fn $fn(&mut $self_tok, $rhs_arg: $rhs_arg_ty) $body
|
||||
|
@ -91,7 +91,7 @@ macro_rules! impl_ref_ops {
|
|||
|
||||
impl<const $lanes: usize> core::ops::$trait<&'_ $rhs> for $type
|
||||
where
|
||||
$($bound: LanesAtMost32,)*
|
||||
LaneCount<$lanes2>: SupportedLaneCount,
|
||||
{
|
||||
$(#[$attrs])*
|
||||
fn $fn(&mut $self_tok, $rhs_arg: &$rhs_arg_ty) {
|
||||
|
@ -104,7 +104,7 @@ macro_rules! impl_ref_ops {
|
|||
{
|
||||
impl<const $lanes:ident: usize> core::ops::$trait:ident for $type:ty
|
||||
where
|
||||
$($bound:path: LanesAtMost32,)*
|
||||
LaneCount<$lanes2:ident>: SupportedLaneCount,
|
||||
{
|
||||
type Output = $output:ty;
|
||||
fn $fn:ident($self_tok:ident) -> Self::Output $body:tt
|
||||
|
@ -112,7 +112,7 @@ macro_rules! impl_ref_ops {
|
|||
} => {
|
||||
impl<const $lanes: usize> core::ops::$trait for $type
|
||||
where
|
||||
$($bound: LanesAtMost32,)*
|
||||
LaneCount<$lanes2>: SupportedLaneCount,
|
||||
{
|
||||
type Output = $output;
|
||||
fn $fn($self_tok) -> Self::Output $body
|
||||
|
@ -120,7 +120,7 @@ macro_rules! impl_ref_ops {
|
|||
|
||||
impl<const $lanes: usize> core::ops::$trait for &'_ $type
|
||||
where
|
||||
$($bound: LanesAtMost32,)*
|
||||
LaneCount<$lanes2>: SupportedLaneCount,
|
||||
{
|
||||
type Output = <$type as core::ops::$trait>::Output;
|
||||
fn $fn($self_tok) -> Self::Output {
|
||||
|
@ -167,7 +167,7 @@ macro_rules! impl_op {
|
|||
impl_ref_ops! {
|
||||
impl<const LANES: usize> core::ops::Not for crate::$type<LANES>
|
||||
where
|
||||
crate::$type<LANES>: LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
type Output = Self;
|
||||
fn not(self) -> Self::Output {
|
||||
|
@ -181,7 +181,7 @@ macro_rules! impl_op {
|
|||
impl_ref_ops! {
|
||||
impl<const LANES: usize> core::ops::Neg for crate::$type<LANES>
|
||||
where
|
||||
crate::$type<LANES>: LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
type Output = Self;
|
||||
fn neg(self) -> Self::Output {
|
||||
|
@ -194,7 +194,7 @@ macro_rules! impl_op {
|
|||
{ impl Index for $type:ident, $scalar:ty } => {
|
||||
impl<I, const LANES: usize> core::ops::Index<I> for crate::$type<LANES>
|
||||
where
|
||||
Self: LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
I: core::slice::SliceIndex<[$scalar]>,
|
||||
{
|
||||
type Output = I::Output;
|
||||
|
@ -206,7 +206,7 @@ macro_rules! impl_op {
|
|||
|
||||
impl<I, const LANES: usize> core::ops::IndexMut<I> for crate::$type<LANES>
|
||||
where
|
||||
Self: LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
I: core::slice::SliceIndex<[$scalar]>,
|
||||
{
|
||||
fn index_mut(&mut self, index: I) -> &mut Self::Output {
|
||||
|
@ -221,7 +221,7 @@ macro_rules! impl_op {
|
|||
impl_ref_ops! {
|
||||
impl<const LANES: usize> core::ops::$trait<Self> for crate::$type<LANES>
|
||||
where
|
||||
crate::$type<LANES>: LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
type Output = Self;
|
||||
|
||||
|
@ -237,7 +237,7 @@ macro_rules! impl_op {
|
|||
impl_ref_ops! {
|
||||
impl<const LANES: usize> core::ops::$trait<$scalar> for crate::$type<LANES>
|
||||
where
|
||||
crate::$type<LANES>: LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
type Output = Self;
|
||||
|
||||
|
@ -251,7 +251,7 @@ macro_rules! impl_op {
|
|||
impl_ref_ops! {
|
||||
impl<const LANES: usize> core::ops::$trait<crate::$type<LANES>> for $scalar
|
||||
where
|
||||
crate::$type<LANES>: LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
type Output = crate::$type<LANES>;
|
||||
|
||||
|
@ -265,7 +265,7 @@ macro_rules! impl_op {
|
|||
impl_ref_ops! {
|
||||
impl<const LANES: usize> core::ops::$assign_trait<Self> for crate::$type<LANES>
|
||||
where
|
||||
crate::$type<LANES>: LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
#[inline]
|
||||
fn $assign_trait_fn(&mut self, rhs: Self) {
|
||||
|
@ -279,7 +279,7 @@ macro_rules! impl_op {
|
|||
impl_ref_ops! {
|
||||
impl<const LANES: usize> core::ops::$assign_trait<$scalar> for crate::$type<LANES>
|
||||
where
|
||||
crate::$type<LANES>: LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
#[inline]
|
||||
fn $assign_trait_fn(&mut self, rhs: $scalar) {
|
||||
|
@ -325,13 +325,13 @@ macro_rules! impl_unsigned_int_ops {
|
|||
impl_ref_ops! {
|
||||
impl<const LANES: usize> core::ops::Div<Self> for crate::$vector<LANES>
|
||||
where
|
||||
crate::$vector<LANES>: LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
type Output = Self;
|
||||
|
||||
#[inline]
|
||||
fn div(self, rhs: Self) -> Self::Output {
|
||||
if rhs.as_slice()
|
||||
if rhs.as_array()
|
||||
.iter()
|
||||
.any(|x| *x == 0)
|
||||
{
|
||||
|
@ -340,8 +340,8 @@ macro_rules! impl_unsigned_int_ops {
|
|||
|
||||
// Guards for div(MIN, -1),
|
||||
// this check only applies to signed ints
|
||||
if <$scalar>::MIN != 0 && self.as_slice().iter()
|
||||
.zip(rhs.as_slice().iter())
|
||||
if <$scalar>::MIN != 0 && self.as_array().iter()
|
||||
.zip(rhs.as_array().iter())
|
||||
.any(|(x,y)| *x == <$scalar>::MIN && *y == -1 as _) {
|
||||
panic!("attempt to divide with overflow");
|
||||
}
|
||||
|
@ -353,7 +353,7 @@ macro_rules! impl_unsigned_int_ops {
|
|||
impl_ref_ops! {
|
||||
impl<const LANES: usize> core::ops::Div<$scalar> for crate::$vector<LANES>
|
||||
where
|
||||
crate::$vector<LANES>: LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
type Output = Self;
|
||||
|
||||
|
@ -363,7 +363,7 @@ macro_rules! impl_unsigned_int_ops {
|
|||
panic!("attempt to divide by zero");
|
||||
}
|
||||
if <$scalar>::MIN != 0 &&
|
||||
self.as_slice().iter().any(|x| *x == <$scalar>::MIN) &&
|
||||
self.as_array().iter().any(|x| *x == <$scalar>::MIN) &&
|
||||
rhs == -1 as _ {
|
||||
panic!("attempt to divide with overflow");
|
||||
}
|
||||
|
@ -376,7 +376,7 @@ macro_rules! impl_unsigned_int_ops {
|
|||
impl_ref_ops! {
|
||||
impl<const LANES: usize> core::ops::Div<crate::$vector<LANES>> for $scalar
|
||||
where
|
||||
crate::$vector<LANES>: LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
type Output = crate::$vector<LANES>;
|
||||
|
||||
|
@ -390,7 +390,7 @@ macro_rules! impl_unsigned_int_ops {
|
|||
impl_ref_ops! {
|
||||
impl<const LANES: usize> core::ops::DivAssign<Self> for crate::$vector<LANES>
|
||||
where
|
||||
crate::$vector<LANES>: LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
#[inline]
|
||||
fn div_assign(&mut self, rhs: Self) {
|
||||
|
@ -402,7 +402,7 @@ macro_rules! impl_unsigned_int_ops {
|
|||
impl_ref_ops! {
|
||||
impl<const LANES: usize> core::ops::DivAssign<$scalar> for crate::$vector<LANES>
|
||||
where
|
||||
crate::$vector<LANES>: LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
#[inline]
|
||||
fn div_assign(&mut self, rhs: $scalar) {
|
||||
|
@ -415,13 +415,13 @@ macro_rules! impl_unsigned_int_ops {
|
|||
impl_ref_ops! {
|
||||
impl<const LANES: usize> core::ops::Rem<Self> for crate::$vector<LANES>
|
||||
where
|
||||
crate::$vector<LANES>: LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
type Output = Self;
|
||||
|
||||
#[inline]
|
||||
fn rem(self, rhs: Self) -> Self::Output {
|
||||
if rhs.as_slice()
|
||||
if rhs.as_array()
|
||||
.iter()
|
||||
.any(|x| *x == 0)
|
||||
{
|
||||
|
@ -430,8 +430,8 @@ macro_rules! impl_unsigned_int_ops {
|
|||
|
||||
// Guards for rem(MIN, -1)
|
||||
// this branch applies the check only to signed ints
|
||||
if <$scalar>::MIN != 0 && self.as_slice().iter()
|
||||
.zip(rhs.as_slice().iter())
|
||||
if <$scalar>::MIN != 0 && self.as_array().iter()
|
||||
.zip(rhs.as_array().iter())
|
||||
.any(|(x,y)| *x == <$scalar>::MIN && *y == -1 as _) {
|
||||
panic!("attempt to calculate the remainder with overflow");
|
||||
}
|
||||
|
@ -443,7 +443,7 @@ macro_rules! impl_unsigned_int_ops {
|
|||
impl_ref_ops! {
|
||||
impl<const LANES: usize> core::ops::Rem<$scalar> for crate::$vector<LANES>
|
||||
where
|
||||
crate::$vector<LANES>: LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
type Output = Self;
|
||||
|
||||
|
@ -453,7 +453,7 @@ macro_rules! impl_unsigned_int_ops {
|
|||
panic!("attempt to calculate the remainder with a divisor of zero");
|
||||
}
|
||||
if <$scalar>::MIN != 0 &&
|
||||
self.as_slice().iter().any(|x| *x == <$scalar>::MIN) &&
|
||||
self.as_array().iter().any(|x| *x == <$scalar>::MIN) &&
|
||||
rhs == -1 as _ {
|
||||
panic!("attempt to calculate the remainder with overflow");
|
||||
}
|
||||
|
@ -466,7 +466,7 @@ macro_rules! impl_unsigned_int_ops {
|
|||
impl_ref_ops! {
|
||||
impl<const LANES: usize> core::ops::Rem<crate::$vector<LANES>> for $scalar
|
||||
where
|
||||
crate::$vector<LANES>: LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
type Output = crate::$vector<LANES>;
|
||||
|
||||
|
@ -480,7 +480,7 @@ macro_rules! impl_unsigned_int_ops {
|
|||
impl_ref_ops! {
|
||||
impl<const LANES: usize> core::ops::RemAssign<Self> for crate::$vector<LANES>
|
||||
where
|
||||
crate::$vector<LANES>: LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
#[inline]
|
||||
fn rem_assign(&mut self, rhs: Self) {
|
||||
|
@ -492,7 +492,7 @@ macro_rules! impl_unsigned_int_ops {
|
|||
impl_ref_ops! {
|
||||
impl<const LANES: usize> core::ops::RemAssign<$scalar> for crate::$vector<LANES>
|
||||
where
|
||||
crate::$vector<LANES>: LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
#[inline]
|
||||
fn rem_assign(&mut self, rhs: $scalar) {
|
||||
|
@ -505,14 +505,14 @@ macro_rules! impl_unsigned_int_ops {
|
|||
impl_ref_ops! {
|
||||
impl<const LANES: usize> core::ops::Shl<Self> for crate::$vector<LANES>
|
||||
where
|
||||
crate::$vector<LANES>: LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
type Output = Self;
|
||||
|
||||
#[inline]
|
||||
fn shl(self, rhs: Self) -> Self::Output {
|
||||
// TODO there is probably a better way of doing this
|
||||
if rhs.as_slice()
|
||||
if rhs.as_array()
|
||||
.iter()
|
||||
.copied()
|
||||
.any(invalid_shift_rhs)
|
||||
|
@ -527,7 +527,7 @@ macro_rules! impl_unsigned_int_ops {
|
|||
impl_ref_ops! {
|
||||
impl<const LANES: usize> core::ops::Shl<$scalar> for crate::$vector<LANES>
|
||||
where
|
||||
crate::$vector<LANES>: LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
type Output = Self;
|
||||
|
||||
|
@ -546,7 +546,7 @@ macro_rules! impl_unsigned_int_ops {
|
|||
impl_ref_ops! {
|
||||
impl<const LANES: usize> core::ops::ShlAssign<Self> for crate::$vector<LANES>
|
||||
where
|
||||
crate::$vector<LANES>: LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
#[inline]
|
||||
fn shl_assign(&mut self, rhs: Self) {
|
||||
|
@ -558,7 +558,7 @@ macro_rules! impl_unsigned_int_ops {
|
|||
impl_ref_ops! {
|
||||
impl<const LANES: usize> core::ops::ShlAssign<$scalar> for crate::$vector<LANES>
|
||||
where
|
||||
crate::$vector<LANES>: LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
#[inline]
|
||||
fn shl_assign(&mut self, rhs: $scalar) {
|
||||
|
@ -570,14 +570,14 @@ macro_rules! impl_unsigned_int_ops {
|
|||
impl_ref_ops! {
|
||||
impl<const LANES: usize> core::ops::Shr<Self> for crate::$vector<LANES>
|
||||
where
|
||||
crate::$vector<LANES>: LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
type Output = Self;
|
||||
|
||||
#[inline]
|
||||
fn shr(self, rhs: Self) -> Self::Output {
|
||||
// TODO there is probably a better way of doing this
|
||||
if rhs.as_slice()
|
||||
if rhs.as_array()
|
||||
.iter()
|
||||
.copied()
|
||||
.any(invalid_shift_rhs)
|
||||
|
@ -592,7 +592,7 @@ macro_rules! impl_unsigned_int_ops {
|
|||
impl_ref_ops! {
|
||||
impl<const LANES: usize> core::ops::Shr<$scalar> for crate::$vector<LANES>
|
||||
where
|
||||
crate::$vector<LANES>: LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
type Output = Self;
|
||||
|
||||
|
@ -611,7 +611,7 @@ macro_rules! impl_unsigned_int_ops {
|
|||
impl_ref_ops! {
|
||||
impl<const LANES: usize> core::ops::ShrAssign<Self> for crate::$vector<LANES>
|
||||
where
|
||||
crate::$vector<LANES>: LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
#[inline]
|
||||
fn shr_assign(&mut self, rhs: Self) {
|
||||
|
@ -623,7 +623,7 @@ macro_rules! impl_unsigned_int_ops {
|
|||
impl_ref_ops! {
|
||||
impl<const LANES: usize> core::ops::ShrAssign<$scalar> for crate::$vector<LANES>
|
||||
where
|
||||
crate::$vector<LANES>: LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
#[inline]
|
||||
fn shr_assign(&mut self, rhs: $scalar) {
|
||||
|
|
|
@ -2,7 +2,7 @@ macro_rules! impl_integer_reductions {
|
|||
{ $name:ident, $scalar:ty } => {
|
||||
impl<const LANES: usize> crate::$name<LANES>
|
||||
where
|
||||
Self: crate::LanesAtMost32
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
/// Horizontal wrapping add. Returns the sum of the lanes of the vector, with wrapping addition.
|
||||
#[inline]
|
||||
|
@ -56,7 +56,7 @@ macro_rules! impl_float_reductions {
|
|||
{ $name:ident, $scalar:ty } => {
|
||||
impl<const LANES: usize> crate::$name<LANES>
|
||||
where
|
||||
Self: crate::LanesAtMost32
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
|
||||
/// Horizontal add. Returns the sum of the lanes of the vector.
|
||||
|
@ -64,7 +64,7 @@ macro_rules! impl_float_reductions {
|
|||
pub fn horizontal_sum(self) -> $scalar {
|
||||
// LLVM sum is inaccurate on i586
|
||||
if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
|
||||
self.as_slice().iter().sum()
|
||||
self.as_array().iter().sum()
|
||||
} else {
|
||||
unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0.) }
|
||||
}
|
||||
|
@ -75,7 +75,7 @@ macro_rules! impl_float_reductions {
|
|||
pub fn horizontal_product(self) -> $scalar {
|
||||
// LLVM product is inaccurate on i586
|
||||
if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
|
||||
self.as_slice().iter().product()
|
||||
self.as_array().iter().product()
|
||||
} else {
|
||||
unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1.) }
|
||||
}
|
||||
|
@ -104,9 +104,9 @@ macro_rules! impl_float_reductions {
|
|||
|
||||
macro_rules! impl_full_mask_reductions {
|
||||
{ $name:ident, $bits_ty:ident } => {
|
||||
impl<T: crate::Mask, const LANES: usize> $name<T, LANES>
|
||||
impl<const LANES: usize> $name<LANES>
|
||||
where
|
||||
crate::$bits_ty<LANES>: crate::LanesAtMost32
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
#[inline]
|
||||
pub fn any(self) -> bool {
|
||||
|
@ -125,8 +125,7 @@ macro_rules! impl_opaque_mask_reductions {
|
|||
{ $name:ident, $bits_ty:ident } => {
|
||||
impl<const LANES: usize> $name<LANES>
|
||||
where
|
||||
crate::$bits_ty<LANES>: crate::LanesAtMost32,
|
||||
$name<LANES>: crate::Mask,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
/// Returns true if any lane is set, or false otherwise.
|
||||
#[inline]
|
||||
|
|
|
@ -5,7 +5,7 @@ macro_rules! implement {
|
|||
#[cfg(feature = "std")]
|
||||
impl<const LANES: usize> crate::$type<LANES>
|
||||
where
|
||||
Self: crate::LanesAtMost32,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
/// Returns the smallest integer greater than or equal to each lane.
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
|
@ -41,13 +41,7 @@ macro_rules! implement {
|
|||
pub fn fract(self) -> Self {
|
||||
self - self.trunc()
|
||||
}
|
||||
}
|
||||
|
||||
impl<const LANES: usize> crate::$type<LANES>
|
||||
where
|
||||
Self: crate::LanesAtMost32,
|
||||
crate::$int_type<LANES>: crate::LanesAtMost32,
|
||||
{
|
||||
/// Rounds toward zero and converts to the same-width integer type, assuming that
|
||||
/// the value is finite and fits in that type.
|
||||
///
|
||||
|
|
|
@ -14,12 +14,10 @@ macro_rules! impl_select {
|
|||
$mask:ident ($bits_ty:ident): $($type:ident),*
|
||||
} => {
|
||||
$(
|
||||
impl<const LANES: usize> Sealed for crate::$type<LANES> where Self: crate::LanesAtMost32 {}
|
||||
impl<const LANES: usize> Sealed for crate::$type<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {}
|
||||
impl<const LANES: usize> Select<crate::$mask<LANES>> for crate::$type<LANES>
|
||||
where
|
||||
crate::$mask<LANES>: crate::Mask,
|
||||
crate::$bits_ty<LANES>: crate::LanesAtMost32,
|
||||
Self: crate::LanesAtMost32,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
#[doc(hidden)]
|
||||
#[inline]
|
||||
|
@ -31,13 +29,12 @@ macro_rules! impl_select {
|
|||
|
||||
impl<const LANES: usize> Sealed for crate::$mask<LANES>
|
||||
where
|
||||
Self: crate::Mask,
|
||||
crate::$bits_ty<LANES>: crate::LanesAtMost32,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{}
|
||||
|
||||
impl<const LANES: usize> Select<Self> for crate::$mask<LANES>
|
||||
where
|
||||
Self: crate::Mask,
|
||||
crate::$bits_ty<LANES>: crate::LanesAtMost32,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
#[doc(hidden)]
|
||||
#[inline]
|
||||
|
@ -48,8 +45,7 @@ macro_rules! impl_select {
|
|||
|
||||
impl<const LANES: usize> crate::$mask<LANES>
|
||||
where
|
||||
Self: crate::Mask,
|
||||
crate::$bits_ty<LANES>: crate::LanesAtMost32,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
/// Choose lanes from two vectors.
|
||||
///
|
||||
|
|
|
@ -18,11 +18,14 @@ pub trait ToBytes: Sealed {
|
|||
macro_rules! impl_to_bytes {
|
||||
{ $name:ident, $($int_width:literal -> $byte_width:literal),* } => {
|
||||
$(
|
||||
impl Sealed for crate::$name<$int_width> where Self: crate::LanesAtMost32 {}
|
||||
impl Sealed for crate::$name<$int_width>
|
||||
where
|
||||
crate::LaneCount<$int_width>: crate::SupportedLaneCount,
|
||||
{}
|
||||
|
||||
impl ToBytes for crate::$name<$int_width>
|
||||
where
|
||||
Self: crate::LanesAtMost32,
|
||||
crate::SimdU8<$byte_width>: crate::LanesAtMost32,
|
||||
crate::LaneCount<$int_width>: crate::SupportedLaneCount,
|
||||
{
|
||||
type Bytes = crate::SimdU8<$byte_width>;
|
||||
fn to_bytes_impl(self) -> Self::Bytes {
|
||||
|
@ -36,7 +39,8 @@ macro_rules! impl_to_bytes {
|
|||
|
||||
impl<const LANES: usize> crate::$name<LANES>
|
||||
where
|
||||
Self: ToBytes + crate::LanesAtMost32,
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
Self: ToBytes,
|
||||
{
|
||||
/// Return the memory representation of this integer as a byte array in native byte
|
||||
/// order.
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
#[macro_use]
|
||||
mod vector_impl;
|
||||
|
||||
mod float;
|
||||
mod int;
|
||||
mod uint;
|
||||
|
@ -8,3 +11,22 @@ pub use uint::*;
|
|||
|
||||
// Vectors of pointers are not for public use at the current time.
|
||||
pub(crate) mod ptr;
|
||||
|
||||
mod sealed {
|
||||
pub trait Sealed {}
|
||||
}
|
||||
|
||||
/// A representation of a vector as an "array" with indices, implementing
|
||||
/// operations applicable to any vector type based solely on "having lanes",
|
||||
/// and describing relationships between vector and scalar types.
|
||||
pub trait Vector: sealed::Sealed {
|
||||
/// The scalar type in every lane of this vector type.
|
||||
type Scalar: Copy + Sized;
|
||||
|
||||
/// The number of lanes for this vector.
|
||||
const LANES: usize;
|
||||
|
||||
/// Generates a SIMD vector with the same value in every lane.
|
||||
#[must_use]
|
||||
fn splat(val: Self::Scalar) -> Self;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#![allow(non_camel_case_types)]
|
||||
|
||||
use crate::{LaneCount, SupportedLaneCount};
|
||||
|
||||
/// Implements inherent methods for a float vector `$name` containing multiple
|
||||
/// `$lanes` of float `$type`, which uses `$bits_ty` as its binary
|
||||
/// representation. Called from `define_float_vector!`.
|
||||
|
@ -10,8 +12,7 @@ macro_rules! impl_float_vector {
|
|||
|
||||
impl<const LANES: usize> $name<LANES>
|
||||
where
|
||||
Self: crate::LanesAtMost32,
|
||||
crate::$bits_ty<LANES>: crate::LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
/// Raw transmutation to an unsigned integer vector type with the
|
||||
/// same size and number of lanes.
|
||||
|
@ -74,15 +75,7 @@ macro_rules! impl_float_vector {
|
|||
pub fn to_radians(self) -> Self {
|
||||
self * Self::splat($type::to_radians(1.))
|
||||
}
|
||||
}
|
||||
|
||||
impl<const LANES: usize> $name<LANES>
|
||||
where
|
||||
Self: crate::LanesAtMost32,
|
||||
crate::$bits_ty<LANES>: crate::LanesAtMost32,
|
||||
crate::$mask_impl_ty<LANES>: crate::LanesAtMost32,
|
||||
crate::$mask_ty<LANES>: crate::Mask,
|
||||
{
|
||||
/// Returns true for each lane if it has a positive sign, including
|
||||
/// `+0.0`, `NaN`s with positive sign bit and positive infinity.
|
||||
#[inline]
|
||||
|
@ -197,7 +190,7 @@ macro_rules! impl_float_vector {
|
|||
#[repr(simd)]
|
||||
pub struct SimdF32<const LANES: usize>([f32; LANES])
|
||||
where
|
||||
Self: crate::LanesAtMost32;
|
||||
LaneCount<LANES>: SupportedLaneCount;
|
||||
|
||||
impl_float_vector! { SimdF32, f32, SimdU32, Mask32, SimdI32 }
|
||||
|
||||
|
@ -205,7 +198,7 @@ impl_float_vector! { SimdF32, f32, SimdU32, Mask32, SimdI32 }
|
|||
#[repr(simd)]
|
||||
pub struct SimdF64<const LANES: usize>([f64; LANES])
|
||||
where
|
||||
Self: crate::LanesAtMost32;
|
||||
LaneCount<LANES>: SupportedLaneCount;
|
||||
|
||||
impl_float_vector! { SimdF64, f64, SimdU64, Mask64, SimdI64 }
|
||||
|
||||
|
|
|
@ -1,36 +1,39 @@
|
|||
#![allow(non_camel_case_types)]
|
||||
|
||||
use crate::{LaneCount, SupportedLaneCount};
|
||||
|
||||
/// Implements additional integer traits (Eq, Ord, Hash) on the specified vector `$name`, holding multiple `$lanes` of `$type`.
|
||||
macro_rules! impl_integer_vector {
|
||||
{ $name:ident, $type:ty, $mask_ty:ident, $mask_impl_ty:ident } => {
|
||||
impl_vector! { $name, $type }
|
||||
impl_integer_reductions! { $name, $type }
|
||||
|
||||
impl<const LANES: usize> Eq for $name<LANES> where Self: crate::LanesAtMost32 {}
|
||||
impl<const LANES: usize> Eq for $name<LANES> where LaneCount<LANES>: SupportedLaneCount {}
|
||||
|
||||
impl<const LANES: usize> Ord for $name<LANES> where Self: crate::LanesAtMost32 {
|
||||
impl<const LANES: usize> Ord for $name<LANES> where LaneCount<LANES>: SupportedLaneCount {
|
||||
#[inline]
|
||||
fn cmp(&self, other: &Self) -> core::cmp::Ordering {
|
||||
// TODO use SIMD cmp
|
||||
self.to_array().cmp(other.as_ref())
|
||||
self.as_array().cmp(other.as_ref())
|
||||
}
|
||||
}
|
||||
|
||||
impl<const LANES: usize> core::hash::Hash for $name<LANES> where Self: crate::LanesAtMost32 {
|
||||
impl<const LANES: usize> core::hash::Hash for $name<LANES>
|
||||
where
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
#[inline]
|
||||
fn hash<H>(&self, state: &mut H)
|
||||
where
|
||||
H: core::hash::Hasher
|
||||
{
|
||||
self.as_slice().hash(state)
|
||||
self.as_array().hash(state)
|
||||
}
|
||||
}
|
||||
|
||||
impl<const LANES: usize> $name<LANES>
|
||||
where
|
||||
Self: crate::LanesAtMost32,
|
||||
crate::$mask_impl_ty<LANES>: crate::LanesAtMost32,
|
||||
crate::$mask_ty<LANES>: crate::Mask,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
/// Returns true for each positive lane and false if it is zero or negative.
|
||||
#[inline]
|
||||
|
@ -63,7 +66,7 @@ macro_rules! impl_integer_vector {
|
|||
#[repr(simd)]
|
||||
pub struct SimdIsize<const LANES: usize>([isize; LANES])
|
||||
where
|
||||
Self: crate::LanesAtMost32;
|
||||
LaneCount<LANES>: SupportedLaneCount;
|
||||
|
||||
impl_integer_vector! { SimdIsize, isize, MaskSize, SimdIsize }
|
||||
|
||||
|
@ -71,7 +74,7 @@ impl_integer_vector! { SimdIsize, isize, MaskSize, SimdIsize }
|
|||
#[repr(simd)]
|
||||
pub struct SimdI16<const LANES: usize>([i16; LANES])
|
||||
where
|
||||
Self: crate::LanesAtMost32;
|
||||
LaneCount<LANES>: SupportedLaneCount;
|
||||
|
||||
impl_integer_vector! { SimdI16, i16, Mask16, SimdI16 }
|
||||
|
||||
|
@ -79,7 +82,7 @@ impl_integer_vector! { SimdI16, i16, Mask16, SimdI16 }
|
|||
#[repr(simd)]
|
||||
pub struct SimdI32<const LANES: usize>([i32; LANES])
|
||||
where
|
||||
Self: crate::LanesAtMost32;
|
||||
LaneCount<LANES>: SupportedLaneCount;
|
||||
|
||||
impl_integer_vector! { SimdI32, i32, Mask32, SimdI32 }
|
||||
|
||||
|
@ -87,7 +90,7 @@ impl_integer_vector! { SimdI32, i32, Mask32, SimdI32 }
|
|||
#[repr(simd)]
|
||||
pub struct SimdI64<const LANES: usize>([i64; LANES])
|
||||
where
|
||||
Self: crate::LanesAtMost32;
|
||||
LaneCount<LANES>: SupportedLaneCount;
|
||||
|
||||
impl_integer_vector! { SimdI64, i64, Mask64, SimdI64 }
|
||||
|
||||
|
@ -95,7 +98,7 @@ impl_integer_vector! { SimdI64, i64, Mask64, SimdI64 }
|
|||
#[repr(simd)]
|
||||
pub struct SimdI8<const LANES: usize>([i8; LANES])
|
||||
where
|
||||
Self: crate::LanesAtMost32;
|
||||
LaneCount<LANES>: SupportedLaneCount;
|
||||
|
||||
impl_integer_vector! { SimdI8, i8, Mask8, SimdI8 }
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
//! Private implementation details of public gather/scatter APIs.
|
||||
use crate::SimdUsize;
|
||||
use crate::{LaneCount, SimdUsize, SupportedLaneCount};
|
||||
use core::mem;
|
||||
|
||||
/// A vector of *const T.
|
||||
|
@ -9,7 +9,7 @@ pub(crate) struct SimdConstPtr<T, const LANES: usize>([*const T; LANES]);
|
|||
|
||||
impl<T, const LANES: usize> SimdConstPtr<T, LANES>
|
||||
where
|
||||
SimdUsize<LANES>: crate::LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
T: Sized,
|
||||
{
|
||||
#[inline]
|
||||
|
@ -35,7 +35,7 @@ pub(crate) struct SimdMutPtr<T, const LANES: usize>([*mut T; LANES]);
|
|||
|
||||
impl<T, const LANES: usize> SimdMutPtr<T, LANES>
|
||||
where
|
||||
SimdUsize<LANES>: crate::LanesAtMost32,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
T: Sized,
|
||||
{
|
||||
#[inline]
|
||||
|
|
|
@ -1,28 +1,33 @@
|
|||
#![allow(non_camel_case_types)]
|
||||
|
||||
use crate::{LaneCount, SupportedLaneCount};
|
||||
|
||||
/// Implements additional integer traits (Eq, Ord, Hash) on the specified vector `$name`, holding multiple `$lanes` of `$type`.
|
||||
macro_rules! impl_unsigned_vector {
|
||||
{ $name:ident, $type:ty } => {
|
||||
impl_vector! { $name, $type }
|
||||
impl_integer_reductions! { $name, $type }
|
||||
|
||||
impl<const LANES: usize> Eq for $name<LANES> where Self: crate::LanesAtMost32 {}
|
||||
impl<const LANES: usize> Eq for $name<LANES> where LaneCount<LANES>: SupportedLaneCount {}
|
||||
|
||||
impl<const LANES: usize> Ord for $name<LANES> where Self: crate::LanesAtMost32 {
|
||||
impl<const LANES: usize> Ord for $name<LANES> where LaneCount<LANES>: SupportedLaneCount {
|
||||
#[inline]
|
||||
fn cmp(&self, other: &Self) -> core::cmp::Ordering {
|
||||
// TODO use SIMD cmp
|
||||
self.to_array().cmp(other.as_ref())
|
||||
self.as_array().cmp(other.as_ref())
|
||||
}
|
||||
}
|
||||
|
||||
impl<const LANES: usize> core::hash::Hash for $name<LANES> where Self: crate::LanesAtMost32 {
|
||||
impl<const LANES: usize> core::hash::Hash for $name<LANES>
|
||||
where
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
#[inline]
|
||||
fn hash<H>(&self, state: &mut H)
|
||||
where
|
||||
H: core::hash::Hasher
|
||||
{
|
||||
self.as_slice().hash(state)
|
||||
self.as_array().hash(state)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -32,7 +37,7 @@ macro_rules! impl_unsigned_vector {
|
|||
#[repr(simd)]
|
||||
pub struct SimdUsize<const LANES: usize>([usize; LANES])
|
||||
where
|
||||
Self: crate::LanesAtMost32;
|
||||
LaneCount<LANES>: SupportedLaneCount;
|
||||
|
||||
impl_unsigned_vector! { SimdUsize, usize }
|
||||
|
||||
|
@ -40,7 +45,7 @@ impl_unsigned_vector! { SimdUsize, usize }
|
|||
#[repr(simd)]
|
||||
pub struct SimdU16<const LANES: usize>([u16; LANES])
|
||||
where
|
||||
Self: crate::LanesAtMost32;
|
||||
LaneCount<LANES>: SupportedLaneCount;
|
||||
|
||||
impl_unsigned_vector! { SimdU16, u16 }
|
||||
|
||||
|
@ -48,7 +53,7 @@ impl_unsigned_vector! { SimdU16, u16 }
|
|||
#[repr(simd)]
|
||||
pub struct SimdU32<const LANES: usize>([u32; LANES])
|
||||
where
|
||||
Self: crate::LanesAtMost32;
|
||||
LaneCount<LANES>: SupportedLaneCount;
|
||||
|
||||
impl_unsigned_vector! { SimdU32, u32 }
|
||||
|
||||
|
@ -56,7 +61,7 @@ impl_unsigned_vector! { SimdU32, u32 }
|
|||
#[repr(simd)]
|
||||
pub struct SimdU64<const LANES: usize>([u64; LANES])
|
||||
where
|
||||
Self: crate::LanesAtMost32;
|
||||
LaneCount<LANES>: SupportedLaneCount;
|
||||
|
||||
impl_unsigned_vector! { SimdU64, u64 }
|
||||
|
||||
|
@ -64,7 +69,7 @@ impl_unsigned_vector! { SimdU64, u64 }
|
|||
#[repr(simd)]
|
||||
pub struct SimdU8<const LANES: usize>([u8; LANES])
|
||||
where
|
||||
Self: crate::LanesAtMost32;
|
||||
LaneCount<LANES>: SupportedLaneCount;
|
||||
|
||||
impl_unsigned_vector! { SimdU8, u8 }
|
||||
|
||||
|
|
257
crates/core_simd/src/vector/vector_impl.rs
Normal file
257
crates/core_simd/src/vector/vector_impl.rs
Normal file
|
@ -0,0 +1,257 @@
|
|||
/// Implements common traits on the specified vector `$name`, holding multiple `$lanes` of `$type`.
|
||||
macro_rules! impl_vector {
|
||||
{ $name:ident, $type:ty } => {
|
||||
impl<const LANES: usize> crate::vector::sealed::Sealed for $name<LANES>
|
||||
where
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{}
|
||||
|
||||
impl<const LANES: usize> crate::vector::Vector for $name<LANES>
|
||||
where
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
type Scalar = $type;
|
||||
const LANES: usize = LANES;
|
||||
|
||||
#[inline]
|
||||
fn splat(val: Self::Scalar) -> Self {
|
||||
Self::splat(val)
|
||||
}
|
||||
}
|
||||
|
||||
impl<const LANES: usize> $name<LANES>
|
||||
where
|
||||
crate::LaneCount<LANES>: crate::SupportedLaneCount,
|
||||
{
|
||||
/// Construct a SIMD vector by setting all lanes to the given value.
|
||||
pub const fn splat(value: $type) -> Self {
|
||||
Self([value; LANES])
|
||||
}
|
||||
|
||||
/// Returns an array reference containing the entire SIMD vector.
|
||||
pub const fn as_array(&self) -> &[$type; LANES] {
|
||||
&self.0
|
||||
}
|
||||
|
||||
/// Returns a mutable array reference containing the entire SIMD vector.
|
||||
pub fn as_mut_array(&mut self) -> &mut [$type; LANES] {
|
||||
&mut self.0
|
||||
}
|
||||
|
||||
/// Converts an array to a SIMD vector.
|
||||
pub const fn from_array(array: [$type; LANES]) -> Self {
|
||||
Self(array)
|
||||
}
|
||||
|
||||
/// Converts a SIMD vector to an array.
|
||||
pub const fn to_array(self) -> [$type; LANES] {
|
||||
self.0
|
||||
}
|
||||
|
||||
/// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
|
||||
/// If an index is out of bounds, that lane instead selects the value from the "or" vector.
|
||||
/// ```
|
||||
/// # #![feature(portable_simd)]
|
||||
/// # use core_simd::*;
|
||||
/// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
|
||||
/// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]);
|
||||
/// let alt = SimdI32::from_array([-5, -4, -3, -2]);
|
||||
///
|
||||
/// let result = SimdI32::<4>::gather_or(&vec, idxs, alt); // Note the lane that is out-of-bounds.
|
||||
/// assert_eq!(result, SimdI32::from_array([-5, 13, 10, 15]));
|
||||
/// ```
|
||||
#[must_use]
|
||||
#[inline]
|
||||
pub fn gather_or(slice: &[$type], idxs: crate::SimdUsize<LANES>, or: Self) -> Self {
|
||||
Self::gather_select(slice, crate::MaskSize::splat(true), idxs, or)
|
||||
}
|
||||
|
||||
/// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
|
||||
/// Out-of-bounds indices instead use the default value for that lane (0).
|
||||
/// ```
|
||||
/// # #![feature(portable_simd)]
|
||||
/// # use core_simd::*;
|
||||
/// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
|
||||
/// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]);
|
||||
///
|
||||
/// let result = SimdI32::<4>::gather_or_default(&vec, idxs); // Note the lane that is out-of-bounds.
|
||||
/// assert_eq!(result, SimdI32::from_array([0, 13, 10, 15]));
|
||||
/// ```
|
||||
#[must_use]
|
||||
#[inline]
|
||||
pub fn gather_or_default(slice: &[$type], idxs: crate::SimdUsize<LANES>) -> Self {
|
||||
Self::gather_or(slice, idxs, Self::splat(<$type>::default()))
|
||||
}
|
||||
|
||||
/// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
|
||||
/// Out-of-bounds or masked indices instead select the value from the "or" vector.
|
||||
/// ```
|
||||
/// # #![feature(portable_simd)]
|
||||
/// # use core_simd::*;
|
||||
/// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
|
||||
/// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]);
|
||||
/// let alt = SimdI32::from_array([-5, -4, -3, -2]);
|
||||
/// let mask = MaskSize::from_array([true, true, true, false]); // Note the mask of the last lane.
|
||||
///
|
||||
/// let result = SimdI32::<4>::gather_select(&vec, mask, idxs, alt); // Note the lane that is out-of-bounds.
|
||||
/// assert_eq!(result, SimdI32::from_array([-5, 13, 10, -2]));
|
||||
/// ```
|
||||
#[must_use]
|
||||
#[inline]
|
||||
pub fn gather_select(
|
||||
slice: &[$type],
|
||||
mask: crate::MaskSize<LANES>,
|
||||
idxs: crate::SimdUsize<LANES>,
|
||||
or: Self,
|
||||
) -> Self
|
||||
{
|
||||
let mask = (mask & idxs.lanes_lt(crate::SimdUsize::splat(slice.len()))).to_int();
|
||||
let base_ptr = crate::vector::ptr::SimdConstPtr::splat(slice.as_ptr());
|
||||
// Ferris forgive me, I have done pointer arithmetic here.
|
||||
let ptrs = base_ptr.wrapping_add(idxs);
|
||||
// SAFETY: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah
|
||||
unsafe { crate::intrinsics::simd_gather(or, ptrs, mask) }
|
||||
}
|
||||
|
||||
/// SIMD scatter: write a SIMD vector's values into a slice, using potentially discontiguous indices.
|
||||
/// Out-of-bounds indices are not written.
|
||||
/// `scatter` writes "in order", so if an index receives two writes, only the last is guaranteed.
|
||||
/// ```
|
||||
/// # #![feature(portable_simd)]
|
||||
/// # use core_simd::*;
|
||||
/// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
|
||||
/// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 0]);
|
||||
/// let vals = SimdI32::from_array([-27, 82, -41, 124]);
|
||||
///
|
||||
/// vals.scatter(&mut vec, idxs); // index 0 receives two writes.
|
||||
/// assert_eq!(vec, vec![124, 11, 12, 82, 14, 15, 16, 17, 18]);
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn scatter(self, slice: &mut [$type], idxs: crate::SimdUsize<LANES>) {
|
||||
self.scatter_select(slice, crate::MaskSize::splat(true), idxs)
|
||||
}
|
||||
|
||||
/// SIMD scatter: write a SIMD vector's values into a slice, using potentially discontiguous indices.
|
||||
/// Out-of-bounds or masked indices are not written.
|
||||
/// `scatter_select` writes "in order", so if an index receives two writes, only the last is guaranteed.
|
||||
/// ```
|
||||
/// # #![feature(portable_simd)]
|
||||
/// # use core_simd::*;
|
||||
/// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
|
||||
/// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 0]);
|
||||
/// let vals = SimdI32::from_array([-27, 82, -41, 124]);
|
||||
/// let mask = MaskSize::from_array([true, true, true, false]); // Note the mask of the last lane.
|
||||
///
|
||||
/// vals.scatter_select(&mut vec, mask, idxs); // index 0's second write is masked, thus omitted.
|
||||
/// assert_eq!(vec, vec![-41, 11, 12, 82, 14, 15, 16, 17, 18]);
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn scatter_select(
|
||||
self,
|
||||
slice: &mut [$type],
|
||||
mask: crate::MaskSize<LANES>,
|
||||
idxs: crate::SimdUsize<LANES>,
|
||||
)
|
||||
{
|
||||
// We must construct our scatter mask before we derive a pointer!
|
||||
let mask = (mask & idxs.lanes_lt(crate::SimdUsize::splat(slice.len()))).to_int();
|
||||
// SAFETY: This block works with *mut T derived from &mut 'a [T],
|
||||
// which means it is delicate in Rust's borrowing model, circa 2021:
|
||||
// &mut 'a [T] asserts uniqueness, so deriving &'a [T] invalidates live *mut Ts!
|
||||
// Even though this block is largely safe methods, it must be almost exactly this way
|
||||
// to prevent invalidating the raw ptrs while they're live.
|
||||
// Thus, entering this block requires all values to use being already ready:
|
||||
// 0. idxs we want to write to, which are used to construct the mask.
|
||||
// 1. mask, which depends on an initial &'a [T] and the idxs.
|
||||
// 2. actual values to scatter (self).
|
||||
// 3. &mut [T] which will become our base ptr.
|
||||
unsafe {
|
||||
// Now Entering ☢️ *mut T Zone
|
||||
let base_ptr = crate::vector::ptr::SimdMutPtr::splat(slice.as_mut_ptr());
|
||||
// Ferris forgive me, I have done pointer arithmetic here.
|
||||
let ptrs = base_ptr.wrapping_add(idxs);
|
||||
// The ptrs have been bounds-masked to prevent memory-unsafe writes insha'allah
|
||||
crate::intrinsics::simd_scatter(self, ptrs, mask)
|
||||
// Cleared ☢️ *mut T Zone
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<const LANES: usize> Copy for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {}
|
||||
|
||||
impl<const LANES: usize> Clone for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
|
||||
#[inline]
|
||||
fn clone(&self) -> Self {
|
||||
*self
|
||||
}
|
||||
}
|
||||
|
||||
impl<const LANES: usize> Default for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
|
||||
#[inline]
|
||||
fn default() -> Self {
|
||||
Self::splat(<$type>::default())
|
||||
}
|
||||
}
|
||||
|
||||
impl<const LANES: usize> PartialEq for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
|
||||
#[inline]
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
// TODO use SIMD equality
|
||||
self.to_array() == other.to_array()
|
||||
}
|
||||
}
|
||||
|
||||
impl<const LANES: usize> PartialOrd for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
|
||||
#[inline]
|
||||
fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
|
||||
// TODO use SIMD equalitya
|
||||
self.to_array().partial_cmp(other.as_ref())
|
||||
}
|
||||
}
|
||||
|
||||
// array references
|
||||
impl<const LANES: usize> AsRef<[$type; LANES]> for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
|
||||
#[inline]
|
||||
fn as_ref(&self) -> &[$type; LANES] {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<const LANES: usize> AsMut<[$type; LANES]> for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
|
||||
#[inline]
|
||||
fn as_mut(&mut self) -> &mut [$type; LANES] {
|
||||
&mut self.0
|
||||
}
|
||||
}
|
||||
|
||||
// slice references
|
||||
impl<const LANES: usize> AsRef<[$type]> for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
|
||||
#[inline]
|
||||
fn as_ref(&self) -> &[$type] {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<const LANES: usize> AsMut<[$type]> for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
|
||||
#[inline]
|
||||
fn as_mut(&mut self) -> &mut [$type] {
|
||||
&mut self.0
|
||||
}
|
||||
}
|
||||
|
||||
// vector/array conversion
|
||||
impl<const LANES: usize> From<[$type; LANES]> for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
|
||||
fn from(array: [$type; LANES]) -> Self {
|
||||
Self(array)
|
||||
}
|
||||
}
|
||||
|
||||
impl <const LANES: usize> From<$name<LANES>> for [$type; LANES] where crate::LaneCount<LANES>: crate::SupportedLaneCount {
|
||||
fn from(vector: $name<LANES>) -> Self {
|
||||
vector.to_array()
|
||||
}
|
||||
}
|
||||
|
||||
impl_shuffle_2pow_lanes!{ $name }
|
||||
}
|
||||
}
|
|
@ -335,23 +335,7 @@ macro_rules! test_lanes {
|
|||
|
||||
fn implementation<const $lanes: usize>()
|
||||
where
|
||||
core_simd::SimdU8<$lanes>: core_simd::LanesAtMost32,
|
||||
core_simd::SimdU16<$lanes>: core_simd::LanesAtMost32,
|
||||
core_simd::SimdU32<$lanes>: core_simd::LanesAtMost32,
|
||||
core_simd::SimdU64<$lanes>: core_simd::LanesAtMost32,
|
||||
core_simd::SimdUsize<$lanes>: core_simd::LanesAtMost32,
|
||||
core_simd::SimdI8<$lanes>: core_simd::LanesAtMost32,
|
||||
core_simd::SimdI16<$lanes>: core_simd::LanesAtMost32,
|
||||
core_simd::SimdI32<$lanes>: core_simd::LanesAtMost32,
|
||||
core_simd::SimdI64<$lanes>: core_simd::LanesAtMost32,
|
||||
core_simd::SimdIsize<$lanes>: core_simd::LanesAtMost32,
|
||||
core_simd::SimdF32<$lanes>: core_simd::LanesAtMost32,
|
||||
core_simd::SimdF64<$lanes>: core_simd::LanesAtMost32,
|
||||
core_simd::Mask8<$lanes>: core_simd::Mask,
|
||||
core_simd::Mask16<$lanes>: core_simd::Mask,
|
||||
core_simd::Mask32<$lanes>: core_simd::Mask,
|
||||
core_simd::Mask64<$lanes>: core_simd::Mask,
|
||||
core_simd::MaskSize<$lanes>: core_simd::Mask,
|
||||
core_simd::LaneCount<$lanes>: core_simd::SupportedLaneCount,
|
||||
$body
|
||||
|
||||
#[cfg(target_arch = "wasm32")]
|
||||
|
@ -409,23 +393,7 @@ macro_rules! test_lanes_panic {
|
|||
|
||||
fn implementation<const $lanes: usize>()
|
||||
where
|
||||
core_simd::SimdU8<$lanes>: core_simd::LanesAtMost32,
|
||||
core_simd::SimdU16<$lanes>: core_simd::LanesAtMost32,
|
||||
core_simd::SimdU32<$lanes>: core_simd::LanesAtMost32,
|
||||
core_simd::SimdU64<$lanes>: core_simd::LanesAtMost32,
|
||||
core_simd::SimdUsize<$lanes>: core_simd::LanesAtMost32,
|
||||
core_simd::SimdI8<$lanes>: core_simd::LanesAtMost32,
|
||||
core_simd::SimdI16<$lanes>: core_simd::LanesAtMost32,
|
||||
core_simd::SimdI32<$lanes>: core_simd::LanesAtMost32,
|
||||
core_simd::SimdI64<$lanes>: core_simd::LanesAtMost32,
|
||||
core_simd::SimdIsize<$lanes>: core_simd::LanesAtMost32,
|
||||
core_simd::SimdF32<$lanes>: core_simd::LanesAtMost32,
|
||||
core_simd::SimdF64<$lanes>: core_simd::LanesAtMost32,
|
||||
core_simd::Mask8<$lanes>: core_simd::Mask,
|
||||
core_simd::Mask16<$lanes>: core_simd::Mask,
|
||||
core_simd::Mask32<$lanes>: core_simd::Mask,
|
||||
core_simd::Mask64<$lanes>: core_simd::Mask,
|
||||
core_simd::MaskSize<$lanes>: core_simd::Mask,
|
||||
core_simd::LaneCount<$lanes>: core_simd::SupportedLaneCount,
|
||||
$body
|
||||
|
||||
#[test]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue