Merge pull request #142 from rust-lang/feature/traits

Combine LanesAtMost32 and SimdArray into a single trait "Vector" Attempts to fix some unresolved questions in #139 regarding `SimdArray` having a generic parameter. In particular, this made it not appropriate for replacing `LanesAtMost32`. Additionally, it made it impossible to use in a context where you otherwise don't know the lane count, e.g. `impl Vector`. An unfortunate side effect of this change is that scatter/gather no longer work in the trait (nor does anything else that references the lane count in a type. This requires the super-unstable `const_evaluatable_checked` feature). I also threw in the change from `as_slice` to `as_array` as discussed in zulip, and fixes #51.
2021-07-24 16:01:57 -07:00 · 2021-07-24 16:01:57 -07:00 · 82e3405efe
commit 82e3405efe
parent 732b7edfab 97c25dd746
24 changed files with 637 additions and 820 deletions
--- a/crates/core_simd/src/array.rs
+++ b/crates/core_simd/src/array.rs
@ -1,253 +0,0 @@
-use crate::intrinsics;
-use crate::masks::*;
-use crate::vector::ptr::{SimdConstPtr, SimdMutPtr};
-use crate::vector::*;
-
-/// A representation of a vector as an "array" with indices, implementing
-/// operations applicable to any vector type based solely on "having lanes",
-/// and describing relationships between vector and scalar types.
-pub trait SimdArray<const LANES: usize>: crate::LanesAtMost32
-where
-    SimdUsize<LANES>: crate::LanesAtMost32,
-    SimdIsize<LANES>: crate::LanesAtMost32,
-    MaskSize<LANES>: crate::Mask,
-    Self: Sized,
-{
-    /// The scalar type in every lane of this vector type.
-    type Scalar: Copy + Sized;
-    /// The number of lanes for this vector.
-    const LANES: usize = LANES;
-
-    /// Generates a SIMD vector with the same value in every lane.
-    #[must_use]
-    fn splat(val: Self::Scalar) -> Self;
-
-    /// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
-    /// If an index is out of bounds, that lane instead selects the value from the "or" vector.
-    /// ```
-    /// # #![feature(portable_simd)]
-    /// # use core_simd::*;
-    /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
-    /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]);
-    /// let alt = SimdI32::from_array([-5, -4, -3, -2]);
-    ///
-    /// let result = SimdI32::<4>::gather_or(&vec, idxs, alt); // Note the lane that is out-of-bounds.
-    /// assert_eq!(result, SimdI32::from_array([-5, 13, 10, 15]));
-    /// ```
-    #[must_use]
-    #[inline]
-    fn gather_or(slice: &[Self::Scalar], idxs: SimdUsize<LANES>, or: Self) -> Self {
-        Self::gather_select(slice, MaskSize::splat(true), idxs, or)
-    }
-
-    /// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
-    /// Out-of-bounds indices instead use the default value for that lane (0).
-    /// ```
-    /// # #![feature(portable_simd)]
-    /// # use core_simd::*;
-    /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
-    /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]);
-    ///
-    /// let result = SimdI32::<4>::gather_or_default(&vec, idxs); // Note the lane that is out-of-bounds.
-    /// assert_eq!(result, SimdI32::from_array([0, 13, 10, 15]));
-    /// ```
-    #[must_use]
-    #[inline]
-    fn gather_or_default(slice: &[Self::Scalar], idxs: SimdUsize<LANES>) -> Self
-    where
-        Self::Scalar: Default,
-    {
-        Self::gather_or(slice, idxs, Self::splat(Self::Scalar::default()))
-    }
-
-    /// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
-    /// Out-of-bounds or masked indices instead select the value from the "or" vector.
-    /// ```
-    /// # #![feature(portable_simd)]
-    /// # use core_simd::*;
-    /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
-    /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]);
-    /// let alt = SimdI32::from_array([-5, -4, -3, -2]);
-    /// let mask = MaskSize::from_array([true, true, true, false]); // Note the mask of the last lane.
-    ///
-    /// let result = SimdI32::<4>::gather_select(&vec, mask, idxs, alt); // Note the lane that is out-of-bounds.
-    /// assert_eq!(result, SimdI32::from_array([-5, 13, 10, -2]));
-    /// ```
-    #[must_use]
-    #[inline]
-    fn gather_select(
-        slice: &[Self::Scalar],
-        mask: MaskSize<LANES>,
-        idxs: SimdUsize<LANES>,
-        or: Self,
-    ) -> Self {
-        let mask = (mask & idxs.lanes_lt(SimdUsize::splat(slice.len()))).to_int();
-        let base_ptr = SimdConstPtr::splat(slice.as_ptr());
-        // Ferris forgive me, I have done pointer arithmetic here.
-        let ptrs = base_ptr.wrapping_add(idxs);
-        // SAFETY: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah
-        unsafe { intrinsics::simd_gather(or, ptrs, mask) }
-    }
-
-    /// SIMD scatter: write a SIMD vector's values into a slice, using potentially discontiguous indices.
-    /// Out-of-bounds indices are not written.
-    /// `scatter` writes "in order", so if an index receives two writes, only the last is guaranteed.
-    /// ```
-    /// # #![feature(portable_simd)]
-    /// # use core_simd::*;
-    /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
-    /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 0]);
-    /// let vals = SimdI32::from_array([-27, 82, -41, 124]);
-    ///
-    /// vals.scatter(&mut vec, idxs); // index 0 receives two writes.
-    /// assert_eq!(vec, vec![124, 11, 12, 82, 14, 15, 16, 17, 18]);
-    /// ```
-    #[inline]
-    fn scatter(self, slice: &mut [Self::Scalar], idxs: SimdUsize<LANES>) {
-        self.scatter_select(slice, MaskSize::splat(true), idxs)
-    }
-
-    /// SIMD scatter: write a SIMD vector's values into a slice, using potentially discontiguous indices.
-    /// Out-of-bounds or masked indices are not written.
-    /// `scatter_select` writes "in order", so if an index receives two writes, only the last is guaranteed.
-    /// ```
-    /// # #![feature(portable_simd)]
-    /// # use core_simd::*;
-    /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
-    /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 0]);
-    /// let vals = SimdI32::from_array([-27, 82, -41, 124]);
-    /// let mask = MaskSize::from_array([true, true, true, false]); // Note the mask of the last lane.
-    ///
-    /// vals.scatter_select(&mut vec, mask, idxs); // index 0's second write is masked, thus omitted.
-    /// assert_eq!(vec, vec![-41, 11, 12, 82, 14, 15, 16, 17, 18]);
-    /// ```
-    #[inline]
-    fn scatter_select(
-        self,
-        slice: &mut [Self::Scalar],
-        mask: MaskSize<LANES>,
-        idxs: SimdUsize<LANES>,
-    ) {
-        // We must construct our scatter mask before we derive a pointer!
-        let mask = (mask & idxs.lanes_lt(SimdUsize::splat(slice.len()))).to_int();
-        // SAFETY: This block works with *mut T derived from &mut 'a [T],
-        // which means it is delicate in Rust's borrowing model, circa 2021:
-        // &mut 'a [T] asserts uniqueness, so deriving &'a [T] invalidates live *mut Ts!
-        // Even though this block is largely safe methods, it must be almost exactly this way
-        // to prevent invalidating the raw ptrs while they're live.
-        // Thus, entering this block requires all values to use being already ready:
-        // 0. idxs we want to write to, which are used to construct the mask.
-        // 1. mask, which depends on an initial &'a [T] and the idxs.
-        // 2. actual values to scatter (self).
-        // 3. &mut [T] which will become our base ptr.
-        unsafe {
-            // Now Entering ☢️ *mut T Zone
-            let base_ptr = SimdMutPtr::splat(slice.as_mut_ptr());
-            // Ferris forgive me, I have done pointer arithmetic here.
-            let ptrs = base_ptr.wrapping_add(idxs);
-            // The ptrs have been bounds-masked to prevent memory-unsafe writes insha'allah
-            intrinsics::simd_scatter(self, ptrs, mask)
-            // Cleared ☢️ *mut T Zone
-        }
-    }
-}
-
-macro_rules! impl_simdarray_for {
-    ($simd:ident {type Scalar = $scalar:ident;}) => {
-        impl<const LANES: usize> SimdArray<LANES> for $simd<LANES>
-            where SimdUsize<LANES>: crate::LanesAtMost32,
-            SimdIsize<LANES>: crate::LanesAtMost32,
-            MaskSize<LANES>: crate::Mask,
-            Self: crate::LanesAtMost32,
-        {
-            type Scalar = $scalar;
-
-            #[must_use]
-            #[inline]
-            fn splat(val: Self::Scalar) -> Self {
-                [val; LANES].into()
-            }
-        }
-    };
-
-    ($simd:ident $impl:tt) => {
-        impl<const LANES: usize> SimdArray<LANES> for $simd<LANES>
-            where SimdUsize<LANES>: crate::LanesAtMost32,
-            SimdIsize<LANES>: crate::LanesAtMost32,
-            MaskSize<LANES>: crate::Mask,
-            Self: crate::LanesAtMost32,
-        $impl
-    }
-}
-
-impl_simdarray_for! {
-    SimdUsize {
-        type Scalar = usize;
-    }
-}
-
-impl_simdarray_for! {
-    SimdIsize {
-        type Scalar = isize;
-    }
-}
-
-impl_simdarray_for! {
-    SimdI8 {
-        type Scalar = i8;
-    }
-}
-
-impl_simdarray_for! {
-    SimdI16 {
-        type Scalar = i16;
-    }
-}
-
-impl_simdarray_for! {
-    SimdI32 {
-        type Scalar = i32;
-    }
-}
-
-impl_simdarray_for! {
-    SimdI64 {
-        type Scalar = i64;
-    }
-}
-
-impl_simdarray_for! {
-    SimdU8 {
-        type Scalar = u8;
-    }
-}
-
-impl_simdarray_for! {
-    SimdU16 {
-        type Scalar = u16;
-    }
-}
-
-impl_simdarray_for! {
-    SimdU32 {
-        type Scalar = u32;
-    }
-}
-
-impl_simdarray_for! {
-    SimdU64 {
-        type Scalar = u64;
-    }
-}
-
-impl_simdarray_for! {
-    SimdF32 {
-        type Scalar = f32;
-    }
-}
-
-impl_simdarray_for! {
-    SimdF64 {
-        type Scalar = f64;
-    }
-}
--- a/crates/core_simd/src/comparisons.rs
+++ b/crates/core_simd/src/comparisons.rs
@ -1,13 +1,11 @@
-use crate::LanesAtMost32;
+use crate::{LaneCount, SupportedLaneCount};

 macro_rules! implement_mask_ops {
    { $($vector:ident => $mask:ident ($inner_ty:ident),)* } => {
        $(
            impl<const LANES: usize> crate::$vector<LANES>
            where
-                crate::$vector<LANES>: LanesAtMost32,
-                crate::$inner_ty<LANES>: LanesAtMost32,
-                crate::$mask<LANES>: crate::Mask,
+                LaneCount<LANES>: SupportedLaneCount,
            {
                /// Test if each lane is equal to the corresponding lane in `other`.
                #[inline]
--- a/crates/core_simd/src/first.rs
+++ b/crates/core_simd/src/first.rs
@ -1,124 +0,0 @@
-/// Implements common traits on the specified vector `$name`, holding multiple `$lanes` of `$type`.
-macro_rules! impl_vector {
-    { $name:ident, $type:ty } => {
-        impl<const LANES: usize> $name<LANES> where Self: crate::LanesAtMost32 {
-            /// Construct a SIMD vector by setting all lanes to the given value.
-            pub const fn splat(value: $type) -> Self {
-                Self([value; LANES])
-            }
-
-            /// Returns a slice containing the entire SIMD vector.
-            pub const fn as_slice(&self) -> &[$type] {
-                &self.0
-            }
-
-            /// Returns a mutable slice containing the entire SIMD vector.
-            pub fn as_mut_slice(&mut self) -> &mut [$type] {
-                &mut self.0
-            }
-
-            /// Converts an array to a SIMD vector.
-            pub const fn from_array(array: [$type; LANES]) -> Self {
-                Self(array)
-            }
-
-            /// Converts a SIMD vector to an array.
-            pub const fn to_array(self) -> [$type; LANES] {
-                // workaround for rust-lang/rust#80108
-                // TODO fix this
-                #[cfg(target_arch = "wasm32")]
-                {
-                    let mut arr = [self.0[0]; LANES];
-                    let mut i = 0;
-                    while i < LANES {
-                        arr[i] = self.0[i];
-                        i += 1;
-                    }
-                    arr
-                }
-
-                #[cfg(not(target_arch = "wasm32"))]
-                {
-                    self.0
-                }
-            }
-        }
-
-        impl<const LANES: usize> Copy for $name<LANES> where Self: crate::LanesAtMost32 {}
-
-        impl<const LANES: usize> Clone for $name<LANES> where Self: crate::LanesAtMost32 {
-            #[inline]
-            fn clone(&self) -> Self {
-                *self
-            }
-        }
-
-        impl<const LANES: usize> Default for $name<LANES> where Self: crate::LanesAtMost32 {
-            #[inline]
-            fn default() -> Self {
-                Self::splat(<$type>::default())
-            }
-        }
-
-        impl<const LANES: usize> PartialEq for $name<LANES> where Self: crate::LanesAtMost32 {
-            #[inline]
-            fn eq(&self, other: &Self) -> bool {
-                // TODO use SIMD equality
-                self.to_array() == other.to_array()
-            }
-        }
-
-        impl<const LANES: usize> PartialOrd for $name<LANES> where Self: crate::LanesAtMost32 {
-            #[inline]
-            fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
-                // TODO use SIMD equalitya
-                self.to_array().partial_cmp(other.as_ref())
-            }
-        }
-
-        // array references
-        impl<const LANES: usize> AsRef<[$type; LANES]> for $name<LANES> where Self: crate::LanesAtMost32 {
-            #[inline]
-            fn as_ref(&self) -> &[$type; LANES] {
-                &self.0
-            }
-        }
-
-        impl<const LANES: usize> AsMut<[$type; LANES]> for $name<LANES> where Self: crate::LanesAtMost32 {
-            #[inline]
-            fn as_mut(&mut self) -> &mut [$type; LANES] {
-                &mut self.0
-            }
-        }
-
-        // slice references
-        impl<const LANES: usize> AsRef<[$type]> for $name<LANES> where Self: crate::LanesAtMost32 {
-            #[inline]
-            fn as_ref(&self) -> &[$type] {
-                &self.0
-            }
-        }
-
-        impl<const LANES: usize> AsMut<[$type]> for $name<LANES> where Self: crate::LanesAtMost32 {
-            #[inline]
-            fn as_mut(&mut self) -> &mut [$type] {
-                &mut self.0
-            }
-        }
-
-        // vector/array conversion
-        impl<const LANES: usize> From<[$type; LANES]> for $name<LANES> where Self: crate::LanesAtMost32 {
-            fn from(array: [$type; LANES]) -> Self {
-                Self(array)
-            }
-        }
-
-        impl <const LANES: usize> From<$name<LANES>> for [$type; LANES] where $name<LANES>: crate::LanesAtMost32 {
-            fn from(vector: $name<LANES>) -> Self {
-                vector.to_array()
-            }
-        }
-
-        impl_shuffle_2pow_lanes!{ $name }
-    }
-}
--- a/crates/core_simd/src/fmt.rs
+++ b/crates/core_simd/src/fmt.rs
@ -35,7 +35,7 @@ macro_rules! impl_fmt_trait {
            $( // repeat trait
                impl<const LANES: usize> core::fmt::$trait for crate::$type<LANES>
                where
-                    Self: crate::LanesAtMost32,
+                    crate::LaneCount<LANES>: crate::SupportedLaneCount,
                {
                    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
                        $format(self.as_ref(), f)
--- a/crates/core_simd/src/iter.rs
+++ b/crates/core_simd/src/iter.rs
@ -1,8 +1,10 @@
+use crate::{LaneCount, SupportedLaneCount};
+
 macro_rules! impl_traits {
    { $type:ident } => {
        impl<const LANES: usize> core::iter::Sum<Self> for crate::$type<LANES>
        where
-            Self: crate::LanesAtMost32,
+            LaneCount<LANES>: SupportedLaneCount,
        {
            fn sum<I: core::iter::Iterator<Item = Self>>(iter: I) -> Self {
                iter.fold(Default::default(), core::ops::Add::add)
@ -11,7 +13,7 @@ macro_rules! impl_traits {

        impl<const LANES: usize> core::iter::Product<Self> for crate::$type<LANES>
        where
-            Self: crate::LanesAtMost32,
+            LaneCount<LANES>: SupportedLaneCount,
        {
            fn product<I: core::iter::Iterator<Item = Self>>(iter: I) -> Self {
                iter.fold(Default::default(), core::ops::Mul::mul)
@ -20,7 +22,7 @@ macro_rules! impl_traits {

        impl<'a, const LANES: usize> core::iter::Sum<&'a Self> for crate::$type<LANES>
        where
-            Self: crate::LanesAtMost32,
+            LaneCount<LANES>: SupportedLaneCount,
        {
            fn sum<I: core::iter::Iterator<Item = &'a Self>>(iter: I) -> Self {
                iter.fold(Default::default(), core::ops::Add::add)
@ -29,7 +31,7 @@ macro_rules! impl_traits {

        impl<'a, const LANES: usize> core::iter::Product<&'a Self> for crate::$type<LANES>
        where
-            Self: crate::LanesAtMost32,
+            LaneCount<LANES>: SupportedLaneCount,
        {
            fn product<I: core::iter::Iterator<Item = &'a Self>>(iter: I) -> Self {
                iter.fold(Default::default(), core::ops::Mul::mul)
--- a/crates/core_simd/src/lane_count.rs
+++ b/crates/core_simd/src/lane_count.rs
@ -0,0 +1,43 @@
+mod sealed {
+    pub trait Sealed {}
+}
+use sealed::Sealed;
+
+/// A type representing a vector lane count.
+pub struct LaneCount<const LANES: usize>;
+
+/// Helper trait for vector lane counts.
+pub trait SupportedLaneCount: Sealed {
+    /// The bitmask representation of a mask.
+    type BitMask: Copy + Default + AsRef<[u8]> + AsMut<[u8]>;
+
+    #[doc(hidden)]
+    type IntBitMask;
+}
+
+impl<const LANES: usize> Sealed for LaneCount<LANES> {}
+
+impl SupportedLaneCount for LaneCount<1> {
+    type BitMask = [u8; 1];
+    type IntBitMask = u8;
+}
+impl SupportedLaneCount for LaneCount<2> {
+    type BitMask = [u8; 1];
+    type IntBitMask = u8;
+}
+impl SupportedLaneCount for LaneCount<4> {
+    type BitMask = [u8; 1];
+    type IntBitMask = u8;
+}
+impl SupportedLaneCount for LaneCount<8> {
+    type BitMask = [u8; 1];
+    type IntBitMask = u8;
+}
+impl SupportedLaneCount for LaneCount<16> {
+    type BitMask = [u8; 2];
+    type IntBitMask = u16;
+}
+impl SupportedLaneCount for LaneCount<32> {
+    type BitMask = [u8; 4];
+    type IntBitMask = u32;
+}
--- a/crates/core_simd/src/lanes_at_most_32.rs
+++ b/crates/core_simd/src/lanes_at_most_32.rs
@ -1,54 +0,0 @@
-/// Implemented for vectors that are supported by the implementation.
-pub trait LanesAtMost32: sealed::Sealed {
-    #[doc(hidden)]
-    type BitMask: Into<u64>;
-}
-
-mod sealed {
-    pub trait Sealed {}
-}
-
-macro_rules! impl_for {
-    { $name:ident } => {
-        impl<const LANES: usize> sealed::Sealed for $name<LANES>
-        where
-            $name<LANES>: LanesAtMost32,
-        {}
-
-        impl LanesAtMost32 for $name<1> {
-            type BitMask = u8;
-        }
-        impl LanesAtMost32 for $name<2> {
-            type BitMask = u8;
-        }
-        impl LanesAtMost32 for $name<4> {
-            type BitMask = u8;
-        }
-        impl LanesAtMost32 for $name<8> {
-            type BitMask = u8;
-        }
-        impl LanesAtMost32 for $name<16> {
-            type BitMask = u16;
-        }
-        impl LanesAtMost32 for $name<32> {
-            type BitMask = u32;
-        }
-    }
-}
-
-use crate::*;
-
-impl_for! { SimdU8 }
-impl_for! { SimdU16 }
-impl_for! { SimdU32 }
-impl_for! { SimdU64 }
-impl_for! { SimdUsize }
-
-impl_for! { SimdI8 }
-impl_for! { SimdI16 }
-impl_for! { SimdI32 }
-impl_for! { SimdI64 }
-impl_for! { SimdIsize }
-
-impl_for! { SimdF32 }
-impl_for! { SimdF64 }
--- a/crates/core_simd/src/lib.rs
+++ b/crates/core_simd/src/lib.rs
@ -12,8 +12,6 @@
 #![unstable(feature = "portable_simd", issue = "86656")]
 //! Portable SIMD module.

-#[macro_use]
-mod first;
 #[macro_use]
 mod permute;
 #[macro_use]
@ -29,20 +27,16 @@ mod comparisons;
 mod fmt;
 mod intrinsics;
 mod iter;
+mod math;
 mod ops;
 mod round;
 mod vendor;

-mod math;
-
-mod lanes_at_most_32;
-pub use lanes_at_most_32::LanesAtMost32;
+mod lane_count;
+pub use lane_count::*;

 mod masks;
 pub use masks::*;

 mod vector;
 pub use vector::*;
-
-mod array;
-pub use array::SimdArray;
--- a/crates/core_simd/src/masks/mod.rs
+++ b/crates/core_simd/src/masks/mod.rs
@ -4,15 +4,15 @@

 #[cfg_attr(
    not(all(target_arch = "x86_64", target_feature = "avx512f")),
-    path = "full_masks.rs"
+    path = "masks/full_masks.rs"
 )]
 #[cfg_attr(
    all(target_arch = "x86_64", target_feature = "avx512f"),
-    path = "bitmask.rs"
+    path = "masks/bitmask.rs"
 )]
 mod mask_impl;

-use crate::{LanesAtMost32, SimdI16, SimdI32, SimdI64, SimdI8, SimdIsize};
+use crate::{SimdI16, SimdI32, SimdI64, SimdI8, SimdIsize};

 mod sealed {
    pub trait Sealed {}
@ -20,12 +20,12 @@ mod sealed {

 /// Helper trait for mask types.
 pub trait Mask: sealed::Sealed {
-    /// The bitmask representation of a mask.
-    type BitMask: Copy + Default + AsRef<[u8]> + AsMut<[u8]>;
+    /// The number of lanes for this mask.
+    const LANES: usize;

-    // TODO remove this when rustc intrinsics are more flexible
-    #[doc(hidden)]
-    type IntBitMask;
+    /// Generates a mask with the same value in every lane.
+    #[must_use]
+    fn splat(val: bool) -> Self;
 }

 macro_rules! define_opaque_mask {
@ -38,45 +38,30 @@ macro_rules! define_opaque_mask {
        #[allow(non_camel_case_types)]
        pub struct $name<const LANES: usize>($inner_ty)
        where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask;
+            crate::LaneCount<LANES>: crate::SupportedLaneCount;

        impl<const LANES: usize> sealed::Sealed for $name<LANES>
        where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {}
-        impl Mask for $name<1> {
-            type BitMask = [u8; 1];
-            type IntBitMask = u8;
-        }
-        impl Mask for $name<2> {
-            type BitMask = [u8; 1];
-            type IntBitMask = u8;
-        }
-        impl Mask for $name<4> {
-            type BitMask = [u8; 1];
-            type IntBitMask = u8;
-        }
-        impl Mask for $name<8> {
-            type BitMask = [u8; 1];
-            type IntBitMask = u8;
-        }
-        impl Mask for $name<16> {
-            type BitMask = [u8; 2];
-            type IntBitMask = u16;
-        }
-        impl Mask for $name<32> {
-            type BitMask = [u8; 4];
-            type IntBitMask = u32;
+
+        impl<const LANES: usize> Mask for $name<LANES>
+        where
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
+        {
+            const LANES: usize = LANES;
+
+            #[inline]
+            fn splat(value: bool) -> Self {
+                Self::splat(value)
+            }
        }

        impl_opaque_mask_reductions! { $name, $bits_ty }

        impl<const LANES: usize> $name<LANES>
        where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            /// Construct a mask by setting all lanes to the given value.
            pub fn splat(value: bool) -> Self {
@ -175,21 +160,20 @@ macro_rules! define_opaque_mask {
            }

            /// Convert this mask to a bitmask, with one bit set per lane.
-            pub fn to_bitmask(self) -> <Self as Mask>::BitMask {
-                self.0.to_bitmask::<Self>()
+            pub fn to_bitmask(self) -> <crate::LaneCount<LANES> as crate::SupportedLaneCount>::BitMask {
+                self.0.to_bitmask()
            }

            /// Convert a bitmask to a mask.
-            pub fn from_bitmask(bitmask: <Self as Mask>::BitMask) -> Self {
-                Self(<$inner_ty>::from_bitmask::<Self>(bitmask))
+            pub fn from_bitmask(bitmask: <crate::LaneCount<LANES> as crate::SupportedLaneCount>::BitMask) -> Self {
+                Self(<$inner_ty>::from_bitmask(bitmask))
            }
        }

        // vector/array conversion
        impl<const LANES: usize> From<[bool; LANES]> for $name<LANES>
        where
-            $bits_ty<LANES>: crate::LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            fn from(array: [bool; LANES]) -> Self {
                Self::from_array(array)
@ -198,8 +182,7 @@ macro_rules! define_opaque_mask {

        impl <const LANES: usize> From<$name<LANES>> for [bool; LANES]
        where
-            $bits_ty<LANES>: crate::LanesAtMost32,
-            $name<LANES>: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            fn from(vector: $name<LANES>) -> Self {
                vector.to_array()
@ -208,14 +191,12 @@ macro_rules! define_opaque_mask {

        impl<const LANES: usize> Copy for $name<LANES>
        where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {}

        impl<const LANES: usize> Clone for $name<LANES>
        where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            #[inline]
            fn clone(&self) -> Self {
@ -225,8 +206,7 @@ macro_rules! define_opaque_mask {

        impl<const LANES: usize> Default for $name<LANES>
        where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            #[inline]
            fn default() -> Self {
@ -236,8 +216,7 @@ macro_rules! define_opaque_mask {

        impl<const LANES: usize> PartialEq for $name<LANES>
        where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            #[inline]
            fn eq(&self, other: &Self) -> bool {
@ -247,8 +226,7 @@ macro_rules! define_opaque_mask {

        impl<const LANES: usize> PartialOrd for $name<LANES>
        where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            #[inline]
            fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
@ -258,8 +236,7 @@ macro_rules! define_opaque_mask {

        impl<const LANES: usize> core::fmt::Debug for $name<LANES>
        where
-            $bits_ty<LANES>: crate::LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
                f.debug_list()
@ -270,8 +247,7 @@ macro_rules! define_opaque_mask {

        impl<const LANES: usize> core::ops::BitAnd for $name<LANES>
        where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            type Output = Self;
            #[inline]
@ -282,8 +258,7 @@ macro_rules! define_opaque_mask {

        impl<const LANES: usize> core::ops::BitAnd<bool> for $name<LANES>
        where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            type Output = Self;
            #[inline]
@ -294,8 +269,7 @@ macro_rules! define_opaque_mask {

        impl<const LANES: usize> core::ops::BitAnd<$name<LANES>> for bool
        where
-            $bits_ty<LANES>: LanesAtMost32,
-            $name<LANES>: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            type Output = $name<LANES>;
            #[inline]
@ -306,8 +280,7 @@ macro_rules! define_opaque_mask {

        impl<const LANES: usize> core::ops::BitOr for $name<LANES>
        where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            type Output = Self;
            #[inline]
@ -318,8 +291,7 @@ macro_rules! define_opaque_mask {

        impl<const LANES: usize> core::ops::BitOr<bool> for $name<LANES>
        where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            type Output = Self;
            #[inline]
@ -330,8 +302,7 @@ macro_rules! define_opaque_mask {

        impl<const LANES: usize> core::ops::BitOr<$name<LANES>> for bool
        where
-            $bits_ty<LANES>: LanesAtMost32,
-            $name<LANES>: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            type Output = $name<LANES>;
            #[inline]
@ -342,8 +313,7 @@ macro_rules! define_opaque_mask {

        impl<const LANES: usize> core::ops::BitXor for $name<LANES>
        where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            type Output = Self;
            #[inline]
@ -354,8 +324,7 @@ macro_rules! define_opaque_mask {

        impl<const LANES: usize> core::ops::BitXor<bool> for $name<LANES>
        where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            type Output = Self;
            #[inline]
@ -366,8 +335,7 @@ macro_rules! define_opaque_mask {

        impl<const LANES: usize> core::ops::BitXor<$name<LANES>> for bool
        where
-            $bits_ty<LANES>: LanesAtMost32,
-            $name<LANES>: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            type Output = $name<LANES>;
            #[inline]
@ -378,8 +346,7 @@ macro_rules! define_opaque_mask {

        impl<const LANES: usize> core::ops::Not for $name<LANES>
        where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            type Output = $name<LANES>;
            #[inline]
@ -390,8 +357,7 @@ macro_rules! define_opaque_mask {

        impl<const LANES: usize> core::ops::BitAndAssign for $name<LANES>
        where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            #[inline]
            fn bitand_assign(&mut self, rhs: Self) {
@ -401,8 +367,7 @@ macro_rules! define_opaque_mask {

        impl<const LANES: usize> core::ops::BitAndAssign<bool> for $name<LANES>
        where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            #[inline]
            fn bitand_assign(&mut self, rhs: bool) {
@ -412,8 +377,7 @@ macro_rules! define_opaque_mask {

        impl<const LANES: usize> core::ops::BitOrAssign for $name<LANES>
        where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            #[inline]
            fn bitor_assign(&mut self, rhs: Self) {
@ -423,8 +387,7 @@ macro_rules! define_opaque_mask {

        impl<const LANES: usize> core::ops::BitOrAssign<bool> for $name<LANES>
        where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            #[inline]
            fn bitor_assign(&mut self, rhs: bool) {
@ -434,8 +397,7 @@ macro_rules! define_opaque_mask {

        impl<const LANES: usize> core::ops::BitXorAssign for $name<LANES>
        where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            #[inline]
            fn bitxor_assign(&mut self, rhs: Self) {
@ -445,8 +407,7 @@ macro_rules! define_opaque_mask {

        impl<const LANES: usize> core::ops::BitXorAssign<bool> for $name<LANES>
        where
-            $bits_ty<LANES>: LanesAtMost32,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            #[inline]
            fn bitxor_assign(&mut self, rhs: bool) {
@ -460,7 +421,7 @@ define_opaque_mask! {
    /// Mask for vectors with `LANES` 8-bit elements.
    ///
    /// The layout of this type is unspecified.
-    struct Mask8<const LANES: usize>(mask_impl::Mask8<Self, LANES>);
+    struct Mask8<const LANES: usize>(mask_impl::Mask8<LANES>);
    @bits SimdI8
 }

@ -468,7 +429,7 @@ define_opaque_mask! {
    /// Mask for vectors with `LANES` 16-bit elements.
    ///
    /// The layout of this type is unspecified.
-    struct Mask16<const LANES: usize>(mask_impl::Mask16<Self, LANES>);
+    struct Mask16<const LANES: usize>(mask_impl::Mask16<LANES>);
    @bits SimdI16
 }

@ -476,7 +437,7 @@ define_opaque_mask! {
    /// Mask for vectors with `LANES` 32-bit elements.
    ///
    /// The layout of this type is unspecified.
-    struct Mask32<const LANES: usize>(mask_impl::Mask32<Self, LANES>);
+    struct Mask32<const LANES: usize>(mask_impl::Mask32<LANES>);
    @bits SimdI32
 }

@ -484,7 +445,7 @@ define_opaque_mask! {
    /// Mask for vectors with `LANES` 64-bit elements.
    ///
    /// The layout of this type is unspecified.
-    struct Mask64<const LANES: usize>(mask_impl::Mask64<Self, LANES>);
+    struct Mask64<const LANES: usize>(mask_impl::Mask64<LANES>);
    @bits SimdI64
 }

@ -492,7 +453,7 @@ define_opaque_mask! {
    /// Mask for vectors with `LANES` pointer-width elements.
    ///
    /// The layout of this type is unspecified.
-    struct MaskSize<const LANES: usize>(mask_impl::MaskSize<Self, LANES>);
+    struct MaskSize<const LANES: usize>(mask_impl::MaskSize<LANES>);
    @bits SimdIsize
 }

@ -555,10 +516,7 @@ macro_rules! impl_from {
        $(
        impl<const LANES: usize> From<$from<LANES>> for $to<LANES>
        where
-            crate::$from_inner<LANES>: crate::LanesAtMost32,
-            crate::$to_inner<LANES>: crate::LanesAtMost32,
-            $from<LANES>: Mask,
-            Self: Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            fn from(value: $from<LANES>) -> Self {
                Self(value.0.into())
--- a/crates/core_simd/src/masks/bitmask.rs
+++ b/crates/core_simd/src/masks/bitmask.rs
@ -1,50 +1,81 @@
-use crate::Mask;
-use core::marker::PhantomData;
+use crate::{LaneCount, SupportedLaneCount};

 /// Helper trait for limiting int conversion types
 pub trait ConvertToInt {}
-impl<const LANES: usize> ConvertToInt for crate::SimdI8<LANES> where Self: crate::LanesAtMost32 {}
-impl<const LANES: usize> ConvertToInt for crate::SimdI16<LANES> where Self: crate::LanesAtMost32 {}
-impl<const LANES: usize> ConvertToInt for crate::SimdI32<LANES> where Self: crate::LanesAtMost32 {}
-impl<const LANES: usize> ConvertToInt for crate::SimdI64<LANES> where Self: crate::LanesAtMost32 {}
-impl<const LANES: usize> ConvertToInt for crate::SimdIsize<LANES> where Self: crate::LanesAtMost32 {}
+impl<const LANES: usize> ConvertToInt for crate::SimdI8<LANES> where
+    LaneCount<LANES>: SupportedLaneCount
+{
+}
+impl<const LANES: usize> ConvertToInt for crate::SimdI16<LANES> where
+    LaneCount<LANES>: SupportedLaneCount
+{
+}
+impl<const LANES: usize> ConvertToInt for crate::SimdI32<LANES> where
+    LaneCount<LANES>: SupportedLaneCount
+{
+}
+impl<const LANES: usize> ConvertToInt for crate::SimdI64<LANES> where
+    LaneCount<LANES>: SupportedLaneCount
+{
+}
+impl<const LANES: usize> ConvertToInt for crate::SimdIsize<LANES> where
+    LaneCount<LANES>: SupportedLaneCount
+{
+}

 /// A mask where each lane is represented by a single bit.
 #[repr(transparent)]
-pub struct BitMask<T: Mask, const LANES: usize>(T::BitMask, PhantomData<[(); LANES]>);
+pub struct BitMask<const LANES: usize>(<LaneCount<LANES> as SupportedLaneCount>::BitMask)
+where
+    LaneCount<LANES>: SupportedLaneCount;

-impl<T: Mask, const LANES: usize> Copy for BitMask<T, LANES> {}
+impl<const LANES: usize> Copy for BitMask<LANES> where LaneCount<LANES>: SupportedLaneCount {}

-impl<T: Mask, const LANES: usize> Clone for BitMask<T, LANES> {
+impl<const LANES: usize> Clone for BitMask<LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+{
    fn clone(&self) -> Self {
        *self
    }
 }

-impl<T: Mask, const LANES: usize> PartialEq for BitMask<T, LANES> {
+impl<const LANES: usize> PartialEq for BitMask<LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+{
    fn eq(&self, other: &Self) -> bool {
        self.0.as_ref() == other.0.as_ref()
    }
 }

-impl<T: Mask, const LANES: usize> PartialOrd for BitMask<T, LANES> {
+impl<const LANES: usize> PartialOrd for BitMask<LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+{
    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
        self.0.as_ref().partial_cmp(other.0.as_ref())
    }
 }

-impl<T: Mask, const LANES: usize> Eq for BitMask<T, LANES> {}
+impl<const LANES: usize> Eq for BitMask<LANES> where LaneCount<LANES>: SupportedLaneCount {}

-impl<T: Mask, const LANES: usize> Ord for BitMask<T, LANES> {
+impl<const LANES: usize> Ord for BitMask<LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+{
    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
        self.0.as_ref().cmp(other.0.as_ref())
    }
 }

-impl<T: Mask, const LANES: usize> BitMask<T, LANES> {
+impl<const LANES: usize> BitMask<LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+{
    #[inline]
    pub fn splat(value: bool) -> Self {
-        let mut mask = T::BitMask::default();
+        let mut mask = <LaneCount<LANES> as SupportedLaneCount>::BitMask::default();
        if value {
            mask.as_mut().fill(u8::MAX)
        } else {
@ -53,12 +84,12 @@ impl<T: Mask, const LANES: usize> BitMask<T, LANES> {
        if LANES % 8 > 0 {
            *mask.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - LANES % 8);
        }
-        Self(mask, PhantomData)
+        Self(mask)
    }

    #[inline]
    pub unsafe fn test_unchecked(&self, lane: usize) -> bool {
-        (self.0.as_ref()[lane / 8] >> lane % 8) & 0x1 > 0
+        (self.0.as_ref()[lane / 8] >> (lane % 8)) & 0x1 > 0
    }

    #[inline]
@ -72,7 +103,8 @@ impl<T: Mask, const LANES: usize> BitMask<T, LANES> {
        V: ConvertToInt + Default + core::ops::Not<Output = V>,
    {
        unsafe {
-            let mask: T::IntBitMask = core::mem::transmute_copy(&self);
+            let mask: <LaneCount<LANES> as SupportedLaneCount>::IntBitMask =
+                core::mem::transmute_copy(&self);
            crate::intrinsics::simd_select_bitmask(mask, !V::default(), V::default())
        }
    }
@ -80,33 +112,29 @@ impl<T: Mask, const LANES: usize> BitMask<T, LANES> {
    #[inline]
    pub unsafe fn from_int_unchecked<V>(value: V) -> Self
    where
-        V: crate::LanesAtMost32,
+        V: crate::Vector,
    {
        // TODO remove the transmute when rustc is more flexible
        assert_eq!(
-            core::mem::size_of::<T::IntBitMask>(),
-            core::mem::size_of::<T::BitMask>()
+            core::mem::size_of::<<crate::LaneCount::<LANES> as crate::SupportedLaneCount>::BitMask>(
+            ),
+            core::mem::size_of::<
+                <crate::LaneCount::<LANES> as crate::SupportedLaneCount>::IntBitMask,
+            >(),
        );
-        let mask: T::IntBitMask = crate::intrinsics::simd_bitmask(value);
-        Self(core::mem::transmute_copy(&mask), PhantomData)
+        let mask: <LaneCount<LANES> as SupportedLaneCount>::IntBitMask =
+            crate::intrinsics::simd_bitmask(value);
+        Self(core::mem::transmute_copy(&mask))
    }

    #[inline]
-    pub fn to_bitmask<U: Mask>(self) -> U::BitMask {
-        assert_eq!(
-            core::mem::size_of::<T::BitMask>(),
-            core::mem::size_of::<U::BitMask>()
-        );
-        unsafe { core::mem::transmute_copy(&self.0) }
+    pub fn to_bitmask(self) -> <LaneCount<LANES> as SupportedLaneCount>::BitMask {
+        self.0
    }

    #[inline]
-    pub fn from_bitmask<U: Mask>(bitmask: U::BitMask) -> Self {
-        assert_eq!(
-            core::mem::size_of::<T::BitMask>(),
-            core::mem::size_of::<U::BitMask>()
-        );
-        unsafe { core::mem::transmute_copy(&bitmask) }
+    pub fn from_bitmask(bitmask: <LaneCount<LANES> as SupportedLaneCount>::BitMask) -> Self {
+        Self(bitmask)
    }

    #[inline]
@ -120,9 +148,10 @@ impl<T: Mask, const LANES: usize> BitMask<T, LANES> {
    }
 }

-impl<T: Mask, const LANES: usize> core::ops::BitAnd for BitMask<T, LANES>
+impl<const LANES: usize> core::ops::BitAnd for BitMask<LANES>
 where
-    T::BitMask: Default + AsRef<[u8]> + AsMut<[u8]>,
+    LaneCount<LANES>: SupportedLaneCount,
+    <LaneCount<LANES> as SupportedLaneCount>::BitMask: Default + AsRef<[u8]> + AsMut<[u8]>,
 {
    type Output = Self;
    #[inline]
@ -134,9 +163,10 @@ where
    }
 }

-impl<T: Mask, const LANES: usize> core::ops::BitOr for BitMask<T, LANES>
+impl<const LANES: usize> core::ops::BitOr for BitMask<LANES>
 where
-    T::BitMask: Default + AsRef<[u8]> + AsMut<[u8]>,
+    LaneCount<LANES>: SupportedLaneCount,
+    <LaneCount<LANES> as SupportedLaneCount>::BitMask: Default + AsRef<[u8]> + AsMut<[u8]>,
 {
    type Output = Self;
    #[inline]
@ -148,7 +178,10 @@ where
    }
 }

-impl<T: Mask, const LANES: usize> core::ops::BitXor for BitMask<T, LANES> {
+impl<const LANES: usize> core::ops::BitXor for BitMask<LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+{
    type Output = Self;
    #[inline]
    fn bitxor(mut self, rhs: Self) -> Self::Output {
@ -159,7 +192,10 @@ impl<T: Mask, const LANES: usize> core::ops::BitXor for BitMask<T, LANES> {
    }
 }

-impl<T: Mask, const LANES: usize> core::ops::Not for BitMask<T, LANES> {
+impl<const LANES: usize> core::ops::Not for BitMask<LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+{
    type Output = Self;
    #[inline]
    fn not(mut self) -> Self::Output {
@ -173,31 +209,8 @@ impl<T: Mask, const LANES: usize> core::ops::Not for BitMask<T, LANES> {
    }
 }

-pub type Mask8<T, const LANES: usize> = BitMask<T, LANES>;
-pub type Mask16<T, const LANES: usize> = BitMask<T, LANES>;
-pub type Mask32<T, const LANES: usize> = BitMask<T, LANES>;
-pub type Mask64<T, const LANES: usize> = BitMask<T, LANES>;
-pub type MaskSize<T, const LANES: usize> = BitMask<T, LANES>;
-
-macro_rules! impl_from {
-    { $from:ident ($from_inner:ident) => $($to:ident ($to_inner:ident)),* } => {
-        $(
-        impl<const LANES: usize> From<$from<crate::$from<LANES>, LANES>> for $to<crate::$to<LANES>, LANES>
-        where
-            crate::$from_inner<LANES>: crate::LanesAtMost32,
-            crate::$to_inner<LANES>: crate::LanesAtMost32,
-            crate::$from<LANES>: crate::Mask,
-            crate::$to<LANES>: crate::Mask,
-        {
-            fn from(value: $from<crate::$from<LANES>, LANES>) -> Self {
-                unsafe { core::mem::transmute_copy(&value) }
-            }
-        }
-        )*
-    }
-}
-impl_from! { Mask8 (SimdI8) => Mask16 (SimdI16), Mask32 (SimdI32), Mask64 (SimdI64), MaskSize (SimdIsize) }
-impl_from! { Mask16 (SimdI16) => Mask32 (SimdI32), Mask64 (SimdI64), MaskSize (SimdIsize), Mask8 (SimdI8) }
-impl_from! { Mask32 (SimdI32) => Mask64 (SimdI64), MaskSize (SimdIsize), Mask8 (SimdI8), Mask16 (SimdI16) }
-impl_from! { Mask64 (SimdI64) => MaskSize (SimdIsize), Mask8 (SimdI8), Mask16 (SimdI16), Mask32 (SimdI32) }
-impl_from! { MaskSize (SimdIsize) => Mask8 (SimdI8), Mask16 (SimdI16), Mask32 (SimdI32), Mask64 (SimdI64) }
+pub type Mask8<const LANES: usize> = BitMask<LANES>;
+pub type Mask16<const LANES: usize> = BitMask<LANES>;
+pub type Mask32<const LANES: usize> = BitMask<LANES>;
+pub type Mask64<const LANES: usize> = BitMask<LANES>;
+pub type MaskSize<const LANES: usize> = BitMask<LANES>;
--- a/crates/core_simd/src/masks/full_masks.rs
+++ b/crates/core_simd/src/masks/full_masks.rs
@ -1,8 +1,5 @@
 //! Masks that take up full SIMD vector registers.

-use crate::Mask;
-use core::marker::PhantomData;
-
 macro_rules! define_mask {
    {
        $(#[$attr:meta])*
@ -12,20 +9,20 @@ macro_rules! define_mask {
    } => {
        $(#[$attr])*
        #[repr(transparent)]
-        pub struct $name<T: Mask, const $lanes: usize>(crate::$type<$lanes2>, PhantomData<T>)
+        pub struct $name<const $lanes: usize>(crate::$type<$lanes>)
        where
-            crate::$type<LANES>: crate::LanesAtMost32;
+            crate::LaneCount<$lanes>: crate::SupportedLaneCount;

        impl_full_mask_reductions! { $name, $type }

-        impl<T: Mask, const LANES: usize> Copy for $name<T, LANES>
+        impl<const LANES: usize> Copy for $name<LANES>
        where
-            crate::$type<LANES>: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {}

-        impl<T: Mask, const LANES: usize> Clone for $name<T, LANES>
+        impl<const LANES: usize> Clone for $name<LANES>
        where
-            crate::$type<LANES>: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            #[inline]
            fn clone(&self) -> Self {
@ -33,41 +30,41 @@ macro_rules! define_mask {
            }
        }

-        impl<T: Mask, const LANES: usize> PartialEq for $name<T, LANES>
+        impl<const LANES: usize> PartialEq for $name<LANES>
        where
-            crate::$type<LANES>: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            fn eq(&self, other: &Self) -> bool {
                self.0 == other.0
            }
        }

-        impl<T: Mask, const LANES: usize> PartialOrd for $name<T, LANES>
+        impl<const LANES: usize> PartialOrd for $name<LANES>
        where
-            crate::$type<LANES>: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
                self.0.partial_cmp(&other.0)
            }
        }

-        impl<T: Mask, const LANES: usize> Eq for $name<T, LANES>
+        impl<const LANES: usize> Eq for $name<LANES>
        where
-            crate::$type<LANES>: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {}

-        impl<T: Mask, const LANES: usize> Ord for $name<T, LANES>
+        impl<const LANES: usize> Ord for $name<LANES>
        where
-            crate::$type<LANES>: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            fn cmp(&self, other: &Self) -> core::cmp::Ordering {
                self.0.cmp(&other.0)
            }
        }

-        impl<T: Mask, const LANES: usize> $name<T, LANES>
+        impl<const LANES: usize> $name<LANES>
        where
-            crate::$type<LANES>: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            pub fn splat(value: bool) -> Self {
                Self(
@ -78,7 +75,6 @@ macro_rules! define_mask {
                            0
                        }
                    ),
-                    PhantomData,
                )
            }

@ -103,16 +99,19 @@ macro_rules! define_mask {

            #[inline]
            pub unsafe fn from_int_unchecked(value: crate::$type<LANES>) -> Self {
-                Self(value, PhantomData)
+                Self(value)
            }

            #[inline]
-            pub fn to_bitmask<U: crate::Mask>(self) -> U::BitMask {
+            pub fn to_bitmask(self) -> <crate::LaneCount::<LANES> as crate::SupportedLaneCount>::BitMask {
                unsafe {
-                    // TODO remove the transmute when rustc is more flexible
-                    assert_eq!(core::mem::size_of::<U::IntBitMask>(), core::mem::size_of::<U::BitMask>());
-                    let mask: U::IntBitMask = crate::intrinsics::simd_bitmask(self.0);
-                    let mut bitmask: U::BitMask = core::mem::transmute_copy(&mask);
+                    // TODO remove the transmute when rustc can use arrays of u8 as bitmasks
+                    assert_eq!(
+                        core::mem::size_of::<<crate::LaneCount::<LANES> as crate::SupportedLaneCount>::BitMask>(),
+                        core::mem::size_of::<<crate::LaneCount::<LANES> as crate::SupportedLaneCount>::IntBitMask>(),
+                    );
+                    let bitmask: <crate::LaneCount::<LANES> as crate::SupportedLaneCount>::IntBitMask = crate::intrinsics::simd_bitmask(self.0);
+                    let mut bitmask: <crate::LaneCount::<LANES> as crate::SupportedLaneCount>::BitMask = core::mem::transmute_copy(&bitmask);

                    // There is a bug where LLVM appears to implement this operation with the wrong
                    // bit order.
@ -128,7 +127,7 @@ macro_rules! define_mask {
            }

            #[inline]
-            pub fn from_bitmask<U: crate::Mask>(mut bitmask: U::BitMask) -> Self {
+            pub fn from_bitmask(mut bitmask: <crate::LaneCount::<LANES> as crate::SupportedLaneCount>::BitMask) -> Self {
                unsafe {
                    // There is a bug where LLVM appears to implement this operation with the wrong
                    // bit order.
@ -139,9 +138,12 @@ macro_rules! define_mask {
                        }
                    }

-                    // TODO remove the transmute when rustc is more flexible
-                    assert_eq!(core::mem::size_of::<U::IntBitMask>(), core::mem::size_of::<U::BitMask>());
-                    let bitmask: U::IntBitMask = core::mem::transmute_copy(&bitmask);
+                    // TODO remove the transmute when rustc can use arrays of u8 as bitmasks
+                    assert_eq!(
+                        core::mem::size_of::<<crate::LaneCount::<LANES> as crate::SupportedLaneCount>::BitMask>(),
+                        core::mem::size_of::<<crate::LaneCount::<LANES> as crate::SupportedLaneCount>::IntBitMask>(),
+                    );
+                    let bitmask: <crate::LaneCount::<LANES> as crate::SupportedLaneCount>::IntBitMask = core::mem::transmute_copy(&bitmask);

                    Self::from_int_unchecked(crate::intrinsics::simd_select_bitmask(
                        bitmask,
@ -152,56 +154,56 @@ macro_rules! define_mask {
            }
        }

-        impl<T: Mask, const LANES: usize> core::convert::From<$name<T, LANES>> for crate::$type<LANES>
+        impl<const LANES: usize> core::convert::From<$name<LANES>> for crate::$type<LANES>
        where
-            crate::$type<LANES>: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
-            fn from(value: $name<T, LANES>) -> Self {
+            fn from(value: $name<LANES>) -> Self {
                value.0
            }
        }

-        impl<T: Mask, const LANES: usize> core::ops::BitAnd for $name<T, LANES>
+        impl<const LANES: usize> core::ops::BitAnd for $name<LANES>
        where
-            crate::$type<LANES>: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            type Output = Self;
            #[inline]
            fn bitand(self, rhs: Self) -> Self {
-                Self(self.0 & rhs.0, PhantomData)
+                Self(self.0 & rhs.0)
            }
        }

-        impl<T: Mask, const LANES: usize> core::ops::BitOr for $name<T, LANES>
+        impl<const LANES: usize> core::ops::BitOr for $name<LANES>
        where
-            crate::$type<LANES>: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            type Output = Self;
            #[inline]
            fn bitor(self, rhs: Self) -> Self {
-                Self(self.0 | rhs.0, PhantomData)
+                Self(self.0 | rhs.0)
            }
        }

-        impl<T: Mask, const LANES: usize> core::ops::BitXor for $name<T, LANES>
+        impl<const LANES: usize> core::ops::BitXor for $name<LANES>
        where
-            crate::$type<LANES>: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            type Output = Self;
            #[inline]
            fn bitxor(self, rhs: Self) -> Self::Output {
-                Self(self.0 ^ rhs.0, PhantomData)
+                Self(self.0 ^ rhs.0)
            }
        }

-        impl<T: Mask, const LANES: usize> core::ops::Not for $name<T, LANES>
+        impl<const LANES: usize> core::ops::Not for $name<LANES>
        where
-            crate::$type<LANES>: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            type Output = Self;
            #[inline]
            fn not(self) -> Self::Output {
-                Self(!self.0, PhantomData)
+                Self(!self.0)
            }
        }
    }
@ -240,14 +242,11 @@ define_mask! {
 macro_rules! impl_from {
    { $from:ident ($from_inner:ident) => $($to:ident ($to_inner:ident)),* } => {
        $(
-        impl<const LANES: usize, T, U> From<$from<T, LANES>> for $to<U, LANES>
+        impl<const LANES: usize> From<$from<LANES>> for $to<LANES>
        where
-            crate::$from_inner<LANES>: crate::LanesAtMost32,
-            crate::$to_inner<LANES>: crate::LanesAtMost32,
-            T: crate::Mask,
-            U: crate::Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
-            fn from(value: $from<T, LANES>) -> Self {
+            fn from(value: $from<LANES>) -> Self {
                let mut new = Self::splat(false);
                for i in 0..LANES {
                    unsafe { new.set_unchecked(i, value.test_unchecked(i)) }
--- a/crates/core_simd/src/math.rs
+++ b/crates/core_simd/src/math.rs
@ -1,6 +1,6 @@
 macro_rules! impl_uint_arith {
    ($(($name:ident, $n:ident)),+) => {
-        $( impl<const LANES: usize> $name<LANES> where Self: crate::LanesAtMost32 {
+        $( impl<const LANES: usize> $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {

            /// Lanewise saturating add.
            ///
@ -44,7 +44,7 @@ macro_rules! impl_uint_arith {

 macro_rules! impl_int_arith {
    ($(($name:ident, $n:ident)),+) => {
-        $( impl<const LANES: usize> $name<LANES> where Self: crate::LanesAtMost32 {
+        $( impl<const LANES: usize> $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {

            /// Lanewise saturating add.
            ///
--- a/crates/core_simd/src/ops.rs
+++ b/crates/core_simd/src/ops.rs
@ -1,4 +1,4 @@
-use crate::LanesAtMost32;
+use crate::{LaneCount, SupportedLaneCount};

 /// Checks if the right-hand side argument of a left- or right-shift would cause overflow.
 fn invalid_shift_rhs<T>(rhs: T) -> bool
@ -16,7 +16,7 @@ macro_rules! impl_ref_ops {
    {
        impl<const $lanes:ident: usize> core::ops::$trait:ident<$rhs:ty> for $type:ty
        where
-            $($bound:path: LanesAtMost32,)*
+            LaneCount<$lanes2:ident>: SupportedLaneCount,
        {
            type Output = $output:ty;

@ -26,7 +26,7 @@ macro_rules! impl_ref_ops {
    } => {
        impl<const $lanes: usize> core::ops::$trait<$rhs> for $type
        where
-            $($bound: LanesAtMost32,)*
+            LaneCount<$lanes2>: SupportedLaneCount,
        {
            type Output = $output;

@ -36,7 +36,7 @@ macro_rules! impl_ref_ops {

        impl<const $lanes: usize> core::ops::$trait<&'_ $rhs> for $type
        where
-            $($bound: LanesAtMost32,)*
+            LaneCount<$lanes2>: SupportedLaneCount,
        {
            type Output = <$type as core::ops::$trait<$rhs>>::Output;

@ -48,7 +48,7 @@ macro_rules! impl_ref_ops {

        impl<const $lanes: usize> core::ops::$trait<$rhs> for &'_ $type
        where
-            $($bound: LanesAtMost32,)*
+            LaneCount<$lanes2>: SupportedLaneCount,
        {
            type Output = <$type as core::ops::$trait<$rhs>>::Output;

@ -60,7 +60,7 @@ macro_rules! impl_ref_ops {

        impl<const $lanes: usize> core::ops::$trait<&'_ $rhs> for &'_ $type
        where
-            $($bound: LanesAtMost32,)*
+            LaneCount<$lanes2>: SupportedLaneCount,
        {
            type Output = <$type as core::ops::$trait<$rhs>>::Output;

@ -75,7 +75,7 @@ macro_rules! impl_ref_ops {
    {
        impl<const $lanes:ident: usize> core::ops::$trait:ident<$rhs:ty> for $type:ty
        where
-            $($bound:path: LanesAtMost32,)*
+            LaneCount<$lanes2:ident>: SupportedLaneCount,
        {
            $(#[$attrs:meta])*
            fn $fn:ident(&mut $self_tok:ident, $rhs_arg:ident: $rhs_arg_ty:ty) $body:tt
@ -83,7 +83,7 @@ macro_rules! impl_ref_ops {
    } => {
        impl<const $lanes: usize> core::ops::$trait<$rhs> for $type
        where
-            $($bound: LanesAtMost32,)*
+            LaneCount<$lanes2>: SupportedLaneCount,
        {
            $(#[$attrs])*
            fn $fn(&mut $self_tok, $rhs_arg: $rhs_arg_ty) $body
@ -91,7 +91,7 @@ macro_rules! impl_ref_ops {

        impl<const $lanes: usize> core::ops::$trait<&'_ $rhs> for $type
        where
-            $($bound: LanesAtMost32,)*
+            LaneCount<$lanes2>: SupportedLaneCount,
        {
            $(#[$attrs])*
            fn $fn(&mut $self_tok, $rhs_arg: &$rhs_arg_ty) {
@ -104,7 +104,7 @@ macro_rules! impl_ref_ops {
    {
        impl<const $lanes:ident: usize> core::ops::$trait:ident for $type:ty
        where
-            $($bound:path: LanesAtMost32,)*
+            LaneCount<$lanes2:ident>: SupportedLaneCount,
        {
            type Output = $output:ty;
            fn $fn:ident($self_tok:ident) -> Self::Output $body:tt
@ -112,7 +112,7 @@ macro_rules! impl_ref_ops {
    } => {
        impl<const $lanes: usize> core::ops::$trait for $type
        where
-            $($bound: LanesAtMost32,)*
+            LaneCount<$lanes2>: SupportedLaneCount,
        {
            type Output = $output;
            fn $fn($self_tok) -> Self::Output $body
@ -120,7 +120,7 @@ macro_rules! impl_ref_ops {

        impl<const $lanes: usize> core::ops::$trait for &'_ $type
        where
-            $($bound: LanesAtMost32,)*
+            LaneCount<$lanes2>: SupportedLaneCount,
        {
            type Output = <$type as core::ops::$trait>::Output;
            fn $fn($self_tok) -> Self::Output {
@ -167,7 +167,7 @@ macro_rules! impl_op {
        impl_ref_ops! {
            impl<const LANES: usize> core::ops::Not for crate::$type<LANES>
            where
-                crate::$type<LANES>: LanesAtMost32,
+                LaneCount<LANES>: SupportedLaneCount,
            {
                type Output = Self;
                fn not(self) -> Self::Output {
@ -181,7 +181,7 @@ macro_rules! impl_op {
        impl_ref_ops! {
            impl<const LANES: usize> core::ops::Neg for crate::$type<LANES>
            where
-                crate::$type<LANES>: LanesAtMost32,
+                LaneCount<LANES>: SupportedLaneCount,
            {
                type Output = Self;
                fn neg(self) -> Self::Output {
@ -194,7 +194,7 @@ macro_rules! impl_op {
    { impl Index for $type:ident, $scalar:ty } => {
        impl<I, const LANES: usize> core::ops::Index<I> for crate::$type<LANES>
        where
-            Self: LanesAtMost32,
+            LaneCount<LANES>: SupportedLaneCount,
            I: core::slice::SliceIndex<[$scalar]>,
        {
            type Output = I::Output;
@ -206,7 +206,7 @@ macro_rules! impl_op {

        impl<I, const LANES: usize> core::ops::IndexMut<I> for crate::$type<LANES>
        where
-            Self: LanesAtMost32,
+            LaneCount<LANES>: SupportedLaneCount,
            I: core::slice::SliceIndex<[$scalar]>,
        {
            fn index_mut(&mut self, index: I) -> &mut Self::Output {
@ -221,7 +221,7 @@ macro_rules! impl_op {
        impl_ref_ops! {
            impl<const LANES: usize> core::ops::$trait<Self> for crate::$type<LANES>
            where
-                crate::$type<LANES>: LanesAtMost32,
+                LaneCount<LANES>: SupportedLaneCount,
            {
                type Output = Self;

@ -237,7 +237,7 @@ macro_rules! impl_op {
        impl_ref_ops! {
            impl<const LANES: usize> core::ops::$trait<$scalar> for crate::$type<LANES>
            where
-                crate::$type<LANES>: LanesAtMost32,
+                LaneCount<LANES>: SupportedLaneCount,
            {
                type Output = Self;

@ -251,7 +251,7 @@ macro_rules! impl_op {
        impl_ref_ops! {
            impl<const LANES: usize> core::ops::$trait<crate::$type<LANES>> for $scalar
            where
-                crate::$type<LANES>: LanesAtMost32,
+                LaneCount<LANES>: SupportedLaneCount,
            {
                type Output = crate::$type<LANES>;

@ -265,7 +265,7 @@ macro_rules! impl_op {
        impl_ref_ops! {
            impl<const LANES: usize> core::ops::$assign_trait<Self> for crate::$type<LANES>
            where
-                crate::$type<LANES>: LanesAtMost32,
+                LaneCount<LANES>: SupportedLaneCount,
            {
                #[inline]
                fn $assign_trait_fn(&mut self, rhs: Self) {
@ -279,7 +279,7 @@ macro_rules! impl_op {
        impl_ref_ops! {
            impl<const LANES: usize> core::ops::$assign_trait<$scalar> for crate::$type<LANES>
            where
-                crate::$type<LANES>: LanesAtMost32,
+                LaneCount<LANES>: SupportedLaneCount,
            {
                #[inline]
                fn $assign_trait_fn(&mut self, rhs: $scalar) {
@ -325,13 +325,13 @@ macro_rules! impl_unsigned_int_ops {
                impl_ref_ops! {
                    impl<const LANES: usize> core::ops::Div<Self> for crate::$vector<LANES>
                    where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                    {
                        type Output = Self;

                        #[inline]
                        fn div(self, rhs: Self) -> Self::Output {
-                            if rhs.as_slice()
+                            if rhs.as_array()
                                .iter()
                                .any(|x| *x == 0)
                            {
@ -340,8 +340,8 @@ macro_rules! impl_unsigned_int_ops {

                            // Guards for div(MIN, -1),
                            // this check only applies to signed ints
-                            if <$scalar>::MIN != 0 && self.as_slice().iter()
-                                    .zip(rhs.as_slice().iter())
+                            if <$scalar>::MIN != 0 && self.as_array().iter()
+                                    .zip(rhs.as_array().iter())
                                    .any(|(x,y)| *x == <$scalar>::MIN && *y == -1 as _) {
                                panic!("attempt to divide with overflow");
                            }
@ -353,7 +353,7 @@ macro_rules! impl_unsigned_int_ops {
                impl_ref_ops! {
                    impl<const LANES: usize> core::ops::Div<$scalar> for crate::$vector<LANES>
                    where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                    {
                        type Output = Self;

@ -363,7 +363,7 @@ macro_rules! impl_unsigned_int_ops {
                                panic!("attempt to divide by zero");
                            }
                            if <$scalar>::MIN != 0 &&
-                                self.as_slice().iter().any(|x| *x == <$scalar>::MIN) &&
+                                self.as_array().iter().any(|x| *x == <$scalar>::MIN) &&
                                rhs == -1 as _ {
                                    panic!("attempt to divide with overflow");
                            }
@ -376,7 +376,7 @@ macro_rules! impl_unsigned_int_ops {
                impl_ref_ops! {
                    impl<const LANES: usize> core::ops::Div<crate::$vector<LANES>> for $scalar
                    where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                    {
                        type Output = crate::$vector<LANES>;

@ -390,7 +390,7 @@ macro_rules! impl_unsigned_int_ops {
                impl_ref_ops! {
                    impl<const LANES: usize> core::ops::DivAssign<Self> for crate::$vector<LANES>
                    where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                    {
                        #[inline]
                        fn div_assign(&mut self, rhs: Self) {
@ -402,7 +402,7 @@ macro_rules! impl_unsigned_int_ops {
                impl_ref_ops! {
                    impl<const LANES: usize> core::ops::DivAssign<$scalar> for crate::$vector<LANES>
                    where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                    {
                        #[inline]
                        fn div_assign(&mut self, rhs: $scalar) {
@ -415,13 +415,13 @@ macro_rules! impl_unsigned_int_ops {
                impl_ref_ops! {
                    impl<const LANES: usize> core::ops::Rem<Self> for crate::$vector<LANES>
                    where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                    {
                        type Output = Self;

                        #[inline]
                        fn rem(self, rhs: Self) -> Self::Output {
-                            if rhs.as_slice()
+                            if rhs.as_array()
                                .iter()
                                .any(|x| *x == 0)
                            {
@ -430,8 +430,8 @@ macro_rules! impl_unsigned_int_ops {

                            // Guards for rem(MIN, -1)
                            // this branch applies the check only to signed ints
-                            if <$scalar>::MIN != 0 && self.as_slice().iter()
-                                    .zip(rhs.as_slice().iter())
+                            if <$scalar>::MIN != 0 && self.as_array().iter()
+                                    .zip(rhs.as_array().iter())
                                    .any(|(x,y)| *x == <$scalar>::MIN && *y == -1 as _) {
                                panic!("attempt to calculate the remainder with overflow");
                            }
@ -443,7 +443,7 @@ macro_rules! impl_unsigned_int_ops {
                impl_ref_ops! {
                    impl<const LANES: usize> core::ops::Rem<$scalar> for crate::$vector<LANES>
                    where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                    {
                        type Output = Self;

@ -453,7 +453,7 @@ macro_rules! impl_unsigned_int_ops {
                                panic!("attempt to calculate the remainder with a divisor of zero");
                            }
                            if <$scalar>::MIN != 0 &&
-                                self.as_slice().iter().any(|x| *x == <$scalar>::MIN) &&
+                                self.as_array().iter().any(|x| *x == <$scalar>::MIN) &&
                                rhs == -1 as _ {
                                    panic!("attempt to calculate the remainder with overflow");
                            }
@ -466,7 +466,7 @@ macro_rules! impl_unsigned_int_ops {
                impl_ref_ops! {
                    impl<const LANES: usize> core::ops::Rem<crate::$vector<LANES>> for $scalar
                    where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                    {
                        type Output = crate::$vector<LANES>;

@ -480,7 +480,7 @@ macro_rules! impl_unsigned_int_ops {
                impl_ref_ops! {
                    impl<const LANES: usize> core::ops::RemAssign<Self> for crate::$vector<LANES>
                    where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                    {
                        #[inline]
                        fn rem_assign(&mut self, rhs: Self) {
@ -492,7 +492,7 @@ macro_rules! impl_unsigned_int_ops {
                impl_ref_ops! {
                    impl<const LANES: usize> core::ops::RemAssign<$scalar> for crate::$vector<LANES>
                    where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                    {
                        #[inline]
                        fn rem_assign(&mut self, rhs: $scalar) {
@ -505,14 +505,14 @@ macro_rules! impl_unsigned_int_ops {
                impl_ref_ops! {
                    impl<const LANES: usize> core::ops::Shl<Self> for crate::$vector<LANES>
                    where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                    {
                        type Output = Self;

                        #[inline]
                        fn shl(self, rhs: Self) -> Self::Output {
                            // TODO there is probably a better way of doing this
-                            if rhs.as_slice()
+                            if rhs.as_array()
                                .iter()
                                .copied()
                                .any(invalid_shift_rhs)
@ -527,7 +527,7 @@ macro_rules! impl_unsigned_int_ops {
                impl_ref_ops! {
                    impl<const LANES: usize> core::ops::Shl<$scalar> for crate::$vector<LANES>
                    where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                    {
                        type Output = Self;

@ -546,7 +546,7 @@ macro_rules! impl_unsigned_int_ops {
                impl_ref_ops! {
                    impl<const LANES: usize> core::ops::ShlAssign<Self> for crate::$vector<LANES>
                    where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                    {
                        #[inline]
                        fn shl_assign(&mut self, rhs: Self) {
@ -558,7 +558,7 @@ macro_rules! impl_unsigned_int_ops {
                impl_ref_ops! {
                    impl<const LANES: usize> core::ops::ShlAssign<$scalar> for crate::$vector<LANES>
                    where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                    {
                        #[inline]
                        fn shl_assign(&mut self, rhs: $scalar) {
@ -570,14 +570,14 @@ macro_rules! impl_unsigned_int_ops {
                impl_ref_ops! {
                    impl<const LANES: usize> core::ops::Shr<Self> for crate::$vector<LANES>
                    where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                    {
                        type Output = Self;

                        #[inline]
                        fn shr(self, rhs: Self) -> Self::Output {
                            // TODO there is probably a better way of doing this
-                            if rhs.as_slice()
+                            if rhs.as_array()
                                .iter()
                                .copied()
                                .any(invalid_shift_rhs)
@ -592,7 +592,7 @@ macro_rules! impl_unsigned_int_ops {
                impl_ref_ops! {
                    impl<const LANES: usize> core::ops::Shr<$scalar> for crate::$vector<LANES>
                    where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                    {
                        type Output = Self;

@ -611,7 +611,7 @@ macro_rules! impl_unsigned_int_ops {
                impl_ref_ops! {
                    impl<const LANES: usize> core::ops::ShrAssign<Self> for crate::$vector<LANES>
                    where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                    {
                        #[inline]
                        fn shr_assign(&mut self, rhs: Self) {
@ -623,7 +623,7 @@ macro_rules! impl_unsigned_int_ops {
                impl_ref_ops! {
                    impl<const LANES: usize> core::ops::ShrAssign<$scalar> for crate::$vector<LANES>
                    where
-                        crate::$vector<LANES>: LanesAtMost32,
+                        LaneCount<LANES>: SupportedLaneCount,
                    {
                        #[inline]
                        fn shr_assign(&mut self, rhs: $scalar) {
--- a/crates/core_simd/src/reduction.rs
+++ b/crates/core_simd/src/reduction.rs
@ -2,7 +2,7 @@ macro_rules! impl_integer_reductions {
    { $name:ident, $scalar:ty } => {
        impl<const LANES: usize> crate::$name<LANES>
        where
-            Self: crate::LanesAtMost32
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            /// Horizontal wrapping add.  Returns the sum of the lanes of the vector, with wrapping addition.
            #[inline]
@ -56,7 +56,7 @@ macro_rules! impl_float_reductions {
    { $name:ident, $scalar:ty } => {
        impl<const LANES: usize> crate::$name<LANES>
        where
-            Self: crate::LanesAtMost32
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {

            /// Horizontal add.  Returns the sum of the lanes of the vector.
@ -64,7 +64,7 @@ macro_rules! impl_float_reductions {
            pub fn horizontal_sum(self) -> $scalar {
                // LLVM sum is inaccurate on i586
                if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
-                    self.as_slice().iter().sum()
+                    self.as_array().iter().sum()
                } else {
                    unsafe { crate::intrinsics::simd_reduce_add_ordered(self, 0.) }
                }
@ -75,7 +75,7 @@ macro_rules! impl_float_reductions {
            pub fn horizontal_product(self) -> $scalar {
                // LLVM product is inaccurate on i586
                if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
-                    self.as_slice().iter().product()
+                    self.as_array().iter().product()
                } else {
                    unsafe { crate::intrinsics::simd_reduce_mul_ordered(self, 1.) }
                }
@ -104,9 +104,9 @@ macro_rules! impl_float_reductions {

 macro_rules! impl_full_mask_reductions {
    { $name:ident, $bits_ty:ident } => {
-        impl<T: crate::Mask, const LANES: usize> $name<T, LANES>
+        impl<const LANES: usize> $name<LANES>
        where
-            crate::$bits_ty<LANES>: crate::LanesAtMost32
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            #[inline]
            pub fn any(self) -> bool {
@ -125,8 +125,7 @@ macro_rules! impl_opaque_mask_reductions {
    { $name:ident, $bits_ty:ident } => {
        impl<const LANES: usize> $name<LANES>
        where
-            crate::$bits_ty<LANES>: crate::LanesAtMost32,
-            $name<LANES>: crate::Mask,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            /// Returns true if any lane is set, or false otherwise.
            #[inline]
--- a/crates/core_simd/src/round.rs
+++ b/crates/core_simd/src/round.rs
@ -5,7 +5,7 @@ macro_rules! implement {
        #[cfg(feature = "std")]
        impl<const LANES: usize> crate::$type<LANES>
        where
-            Self: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            /// Returns the smallest integer greater than or equal to each lane.
            #[must_use = "method returns a new vector and does not mutate the original value"]
@ -41,13 +41,7 @@ macro_rules! implement {
            pub fn fract(self) -> Self {
                self - self.trunc()
            }
-        }

-        impl<const LANES: usize> crate::$type<LANES>
-        where
-            Self: crate::LanesAtMost32,
-            crate::$int_type<LANES>: crate::LanesAtMost32,
-        {
            /// Rounds toward zero and converts to the same-width integer type, assuming that
            /// the value is finite and fits in that type.
            ///
--- a/crates/core_simd/src/select.rs
+++ b/crates/core_simd/src/select.rs
@ -14,12 +14,10 @@ macro_rules! impl_select {
        $mask:ident ($bits_ty:ident): $($type:ident),*
    } => {
        $(
-        impl<const LANES: usize> Sealed for crate::$type<LANES> where Self: crate::LanesAtMost32 {}
+        impl<const LANES: usize> Sealed for crate::$type<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {}
        impl<const LANES: usize> Select<crate::$mask<LANES>> for crate::$type<LANES>
        where
-            crate::$mask<LANES>: crate::Mask,
-            crate::$bits_ty<LANES>: crate::LanesAtMost32,
-            Self: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            #[doc(hidden)]
            #[inline]
@ -31,13 +29,12 @@ macro_rules! impl_select {

        impl<const LANES: usize> Sealed for crate::$mask<LANES>
        where
-            Self: crate::Mask,
-            crate::$bits_ty<LANES>: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {}
+
        impl<const LANES: usize> Select<Self> for crate::$mask<LANES>
        where
-            Self: crate::Mask,
-            crate::$bits_ty<LANES>: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            #[doc(hidden)]
            #[inline]
@ -48,8 +45,7 @@ macro_rules! impl_select {

        impl<const LANES: usize> crate::$mask<LANES>
        where
-            Self: crate::Mask,
-            crate::$bits_ty<LANES>: crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
        {
            /// Choose lanes from two vectors.
            ///
--- a/crates/core_simd/src/to_bytes.rs
+++ b/crates/core_simd/src/to_bytes.rs
@ -18,11 +18,14 @@ pub trait ToBytes: Sealed {
 macro_rules! impl_to_bytes {
    { $name:ident, $($int_width:literal -> $byte_width:literal),* } => {
        $(
-        impl Sealed for crate::$name<$int_width> where Self: crate::LanesAtMost32 {}
+        impl Sealed for crate::$name<$int_width>
+        where
+            crate::LaneCount<$int_width>: crate::SupportedLaneCount,
+        {}
+
        impl ToBytes for crate::$name<$int_width>
        where
-            Self: crate::LanesAtMost32,
-            crate::SimdU8<$byte_width>: crate::LanesAtMost32,
+            crate::LaneCount<$int_width>: crate::SupportedLaneCount,
        {
            type Bytes = crate::SimdU8<$byte_width>;
            fn to_bytes_impl(self) -> Self::Bytes {
@ -36,7 +39,8 @@ macro_rules! impl_to_bytes {

        impl<const LANES: usize> crate::$name<LANES>
        where
-            Self: ToBytes + crate::LanesAtMost32,
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
+            Self: ToBytes,
        {
            /// Return the memory representation of this integer as a byte array in native byte
            /// order.
--- a/crates/core_simd/src/vector.rs
+++ b/crates/core_simd/src/vector.rs
@ -1,3 +1,6 @@
+#[macro_use]
+mod vector_impl;
+
 mod float;
 mod int;
 mod uint;
@ -8,3 +11,22 @@ pub use uint::*;

 // Vectors of pointers are not for public use at the current time.
 pub(crate) mod ptr;
+
+mod sealed {
+    pub trait Sealed {}
+}
+
+/// A representation of a vector as an "array" with indices, implementing
+/// operations applicable to any vector type based solely on "having lanes",
+/// and describing relationships between vector and scalar types.
+pub trait Vector: sealed::Sealed {
+    /// The scalar type in every lane of this vector type.
+    type Scalar: Copy + Sized;
+
+    /// The number of lanes for this vector.
+    const LANES: usize;
+
+    /// Generates a SIMD vector with the same value in every lane.
+    #[must_use]
+    fn splat(val: Self::Scalar) -> Self;
+}
--- a/crates/core_simd/src/vector/float.rs
+++ b/crates/core_simd/src/vector/float.rs
@ -1,5 +1,7 @@
 #![allow(non_camel_case_types)]

+use crate::{LaneCount, SupportedLaneCount};
+
 /// Implements inherent methods for a float vector `$name` containing multiple
 /// `$lanes` of float `$type`, which uses `$bits_ty` as its binary
 /// representation. Called from `define_float_vector!`.
@ -10,8 +12,7 @@ macro_rules! impl_float_vector {

        impl<const LANES: usize> $name<LANES>
        where
-            Self: crate::LanesAtMost32,
-            crate::$bits_ty<LANES>: crate::LanesAtMost32,
+            LaneCount<LANES>: SupportedLaneCount,
        {
            /// Raw transmutation to an unsigned integer vector type with the
            /// same size and number of lanes.
@ -74,15 +75,7 @@ macro_rules! impl_float_vector {
            pub fn to_radians(self) -> Self {
                self * Self::splat($type::to_radians(1.))
            }
-        }

-        impl<const LANES: usize> $name<LANES>
-        where
-            Self: crate::LanesAtMost32,
-            crate::$bits_ty<LANES>: crate::LanesAtMost32,
-            crate::$mask_impl_ty<LANES>: crate::LanesAtMost32,
-            crate::$mask_ty<LANES>: crate::Mask,
-        {
            /// Returns true for each lane if it has a positive sign, including
            /// `+0.0`, `NaN`s with positive sign bit and positive infinity.
            #[inline]
@ -197,7 +190,7 @@ macro_rules! impl_float_vector {
 #[repr(simd)]
 pub struct SimdF32<const LANES: usize>([f32; LANES])
 where
-    Self: crate::LanesAtMost32;
+    LaneCount<LANES>: SupportedLaneCount;

 impl_float_vector! { SimdF32, f32, SimdU32, Mask32, SimdI32 }

@ -205,7 +198,7 @@ impl_float_vector! { SimdF32, f32, SimdU32, Mask32, SimdI32 }
 #[repr(simd)]
 pub struct SimdF64<const LANES: usize>([f64; LANES])
 where
-    Self: crate::LanesAtMost32;
+    LaneCount<LANES>: SupportedLaneCount;

 impl_float_vector! { SimdF64, f64, SimdU64, Mask64, SimdI64 }

--- a/crates/core_simd/src/vector/int.rs
+++ b/crates/core_simd/src/vector/int.rs
@ -1,36 +1,39 @@
 #![allow(non_camel_case_types)]

+use crate::{LaneCount, SupportedLaneCount};
+
 /// Implements additional integer traits (Eq, Ord, Hash) on the specified vector `$name`, holding multiple `$lanes` of `$type`.
 macro_rules! impl_integer_vector {
    { $name:ident, $type:ty, $mask_ty:ident, $mask_impl_ty:ident } => {
        impl_vector! { $name, $type }
        impl_integer_reductions! { $name, $type }

-        impl<const LANES: usize> Eq for $name<LANES> where Self: crate::LanesAtMost32 {}
+        impl<const LANES: usize> Eq for $name<LANES> where LaneCount<LANES>: SupportedLaneCount {}

-        impl<const LANES: usize> Ord for $name<LANES> where Self: crate::LanesAtMost32 {
+        impl<const LANES: usize> Ord for $name<LANES> where LaneCount<LANES>: SupportedLaneCount {
            #[inline]
            fn cmp(&self, other: &Self) -> core::cmp::Ordering {
                // TODO use SIMD cmp
-                self.to_array().cmp(other.as_ref())
+                self.as_array().cmp(other.as_ref())
            }
        }

-        impl<const LANES: usize> core::hash::Hash for $name<LANES> where Self: crate::LanesAtMost32 {
+        impl<const LANES: usize> core::hash::Hash for $name<LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
            #[inline]
            fn hash<H>(&self, state: &mut H)
            where
                H: core::hash::Hasher
            {
-                self.as_slice().hash(state)
+                self.as_array().hash(state)
            }
        }

        impl<const LANES: usize> $name<LANES>
        where
-            Self: crate::LanesAtMost32,
-            crate::$mask_impl_ty<LANES>: crate::LanesAtMost32,
-            crate::$mask_ty<LANES>: crate::Mask,
+            LaneCount<LANES>: SupportedLaneCount,
        {
            /// Returns true for each positive lane and false if it is zero or negative.
            #[inline]
@ -63,7 +66,7 @@ macro_rules! impl_integer_vector {
 #[repr(simd)]
 pub struct SimdIsize<const LANES: usize>([isize; LANES])
 where
-    Self: crate::LanesAtMost32;
+    LaneCount<LANES>: SupportedLaneCount;

 impl_integer_vector! { SimdIsize, isize, MaskSize, SimdIsize }

@ -71,7 +74,7 @@ impl_integer_vector! { SimdIsize, isize, MaskSize, SimdIsize }
 #[repr(simd)]
 pub struct SimdI16<const LANES: usize>([i16; LANES])
 where
-    Self: crate::LanesAtMost32;
+    LaneCount<LANES>: SupportedLaneCount;

 impl_integer_vector! { SimdI16, i16, Mask16, SimdI16 }

@ -79,7 +82,7 @@ impl_integer_vector! { SimdI16, i16, Mask16, SimdI16 }
 #[repr(simd)]
 pub struct SimdI32<const LANES: usize>([i32; LANES])
 where
-    Self: crate::LanesAtMost32;
+    LaneCount<LANES>: SupportedLaneCount;

 impl_integer_vector! { SimdI32, i32, Mask32, SimdI32 }

@ -87,7 +90,7 @@ impl_integer_vector! { SimdI32, i32, Mask32, SimdI32 }
 #[repr(simd)]
 pub struct SimdI64<const LANES: usize>([i64; LANES])
 where
-    Self: crate::LanesAtMost32;
+    LaneCount<LANES>: SupportedLaneCount;

 impl_integer_vector! { SimdI64, i64, Mask64, SimdI64 }

@ -95,7 +98,7 @@ impl_integer_vector! { SimdI64, i64, Mask64, SimdI64 }
 #[repr(simd)]
 pub struct SimdI8<const LANES: usize>([i8; LANES])
 where
-    Self: crate::LanesAtMost32;
+    LaneCount<LANES>: SupportedLaneCount;

 impl_integer_vector! { SimdI8, i8, Mask8, SimdI8 }

--- a/crates/core_simd/src/vector/ptr.rs
+++ b/crates/core_simd/src/vector/ptr.rs
@ -1,5 +1,5 @@
 //! Private implementation details of public gather/scatter APIs.
-use crate::SimdUsize;
+use crate::{LaneCount, SimdUsize, SupportedLaneCount};
 use core::mem;

 /// A vector of *const T.
@ -9,7 +9,7 @@ pub(crate) struct SimdConstPtr<T, const LANES: usize>([*const T; LANES]);

 impl<T, const LANES: usize> SimdConstPtr<T, LANES>
 where
-    SimdUsize<LANES>: crate::LanesAtMost32,
+    LaneCount<LANES>: SupportedLaneCount,
    T: Sized,
 {
    #[inline]
@ -35,7 +35,7 @@ pub(crate) struct SimdMutPtr<T, const LANES: usize>([*mut T; LANES]);

 impl<T, const LANES: usize> SimdMutPtr<T, LANES>
 where
-    SimdUsize<LANES>: crate::LanesAtMost32,
+    LaneCount<LANES>: SupportedLaneCount,
    T: Sized,
 {
    #[inline]
--- a/crates/core_simd/src/vector/uint.rs
+++ b/crates/core_simd/src/vector/uint.rs
@ -1,28 +1,33 @@
 #![allow(non_camel_case_types)]

+use crate::{LaneCount, SupportedLaneCount};
+
 /// Implements additional integer traits (Eq, Ord, Hash) on the specified vector `$name`, holding multiple `$lanes` of `$type`.
 macro_rules! impl_unsigned_vector {
    { $name:ident, $type:ty } => {
        impl_vector! { $name, $type }
        impl_integer_reductions! { $name, $type }

-        impl<const LANES: usize> Eq for $name<LANES> where Self: crate::LanesAtMost32 {}
+        impl<const LANES: usize> Eq for $name<LANES> where LaneCount<LANES>: SupportedLaneCount {}

-        impl<const LANES: usize> Ord for $name<LANES> where Self: crate::LanesAtMost32 {
+        impl<const LANES: usize> Ord for $name<LANES> where LaneCount<LANES>: SupportedLaneCount {
            #[inline]
            fn cmp(&self, other: &Self) -> core::cmp::Ordering {
                // TODO use SIMD cmp
-                self.to_array().cmp(other.as_ref())
+                self.as_array().cmp(other.as_ref())
            }
        }

-        impl<const LANES: usize> core::hash::Hash for $name<LANES> where Self: crate::LanesAtMost32 {
+        impl<const LANES: usize> core::hash::Hash for $name<LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
            #[inline]
            fn hash<H>(&self, state: &mut H)
            where
                H: core::hash::Hasher
            {
-                self.as_slice().hash(state)
+                self.as_array().hash(state)
            }
        }
    }
@ -32,7 +37,7 @@ macro_rules! impl_unsigned_vector {
 #[repr(simd)]
 pub struct SimdUsize<const LANES: usize>([usize; LANES])
 where
-    Self: crate::LanesAtMost32;
+    LaneCount<LANES>: SupportedLaneCount;

 impl_unsigned_vector! { SimdUsize, usize }

@ -40,7 +45,7 @@ impl_unsigned_vector! { SimdUsize, usize }
 #[repr(simd)]
 pub struct SimdU16<const LANES: usize>([u16; LANES])
 where
-    Self: crate::LanesAtMost32;
+    LaneCount<LANES>: SupportedLaneCount;

 impl_unsigned_vector! { SimdU16, u16 }

@ -48,7 +53,7 @@ impl_unsigned_vector! { SimdU16, u16 }
 #[repr(simd)]
 pub struct SimdU32<const LANES: usize>([u32; LANES])
 where
-    Self: crate::LanesAtMost32;
+    LaneCount<LANES>: SupportedLaneCount;

 impl_unsigned_vector! { SimdU32, u32 }

@ -56,7 +61,7 @@ impl_unsigned_vector! { SimdU32, u32 }
 #[repr(simd)]
 pub struct SimdU64<const LANES: usize>([u64; LANES])
 where
-    Self: crate::LanesAtMost32;
+    LaneCount<LANES>: SupportedLaneCount;

 impl_unsigned_vector! { SimdU64, u64 }

@ -64,7 +69,7 @@ impl_unsigned_vector! { SimdU64, u64 }
 #[repr(simd)]
 pub struct SimdU8<const LANES: usize>([u8; LANES])
 where
-    Self: crate::LanesAtMost32;
+    LaneCount<LANES>: SupportedLaneCount;

 impl_unsigned_vector! { SimdU8, u8 }

--- a/crates/core_simd/src/vector/vector_impl.rs
+++ b/crates/core_simd/src/vector/vector_impl.rs
@ -0,0 +1,257 @@
+/// Implements common traits on the specified vector `$name`, holding multiple `$lanes` of `$type`.
+macro_rules! impl_vector {
+    { $name:ident, $type:ty } => {
+        impl<const LANES: usize> crate::vector::sealed::Sealed for $name<LANES>
+        where
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
+        {}
+
+        impl<const LANES: usize> crate::vector::Vector for $name<LANES>
+        where
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
+        {
+            type Scalar = $type;
+            const LANES: usize = LANES;
+
+            #[inline]
+            fn splat(val: Self::Scalar) -> Self {
+                Self::splat(val)
+            }
+        }
+
+        impl<const LANES: usize> $name<LANES>
+        where
+            crate::LaneCount<LANES>: crate::SupportedLaneCount,
+        {
+            /// Construct a SIMD vector by setting all lanes to the given value.
+            pub const fn splat(value: $type) -> Self {
+                Self([value; LANES])
+            }
+
+            /// Returns an array reference containing the entire SIMD vector.
+            pub const fn as_array(&self) -> &[$type; LANES] {
+                &self.0
+            }
+
+            /// Returns a mutable array reference containing the entire SIMD vector.
+            pub fn as_mut_array(&mut self) -> &mut [$type; LANES] {
+                &mut self.0
+            }
+
+            /// Converts an array to a SIMD vector.
+            pub const fn from_array(array: [$type; LANES]) -> Self {
+                Self(array)
+            }
+
+            /// Converts a SIMD vector to an array.
+            pub const fn to_array(self) -> [$type; LANES] {
+                self.0
+            }
+
+            /// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
+            /// If an index is out of bounds, that lane instead selects the value from the "or" vector.
+            /// ```
+            /// # #![feature(portable_simd)]
+            /// # use core_simd::*;
+            /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+            /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]);
+            /// let alt = SimdI32::from_array([-5, -4, -3, -2]);
+            ///
+            /// let result = SimdI32::<4>::gather_or(&vec, idxs, alt); // Note the lane that is out-of-bounds.
+            /// assert_eq!(result, SimdI32::from_array([-5, 13, 10, 15]));
+            /// ```
+            #[must_use]
+            #[inline]
+            pub fn gather_or(slice: &[$type], idxs: crate::SimdUsize<LANES>, or: Self) -> Self {
+                Self::gather_select(slice, crate::MaskSize::splat(true), idxs, or)
+            }
+
+            /// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
+            /// Out-of-bounds indices instead use the default value for that lane (0).
+            /// ```
+            /// # #![feature(portable_simd)]
+            /// # use core_simd::*;
+            /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+            /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]);
+            ///
+            /// let result = SimdI32::<4>::gather_or_default(&vec, idxs); // Note the lane that is out-of-bounds.
+            /// assert_eq!(result, SimdI32::from_array([0, 13, 10, 15]));
+            /// ```
+            #[must_use]
+            #[inline]
+            pub fn gather_or_default(slice: &[$type], idxs: crate::SimdUsize<LANES>) -> Self {
+                Self::gather_or(slice, idxs, Self::splat(<$type>::default()))
+            }
+
+            /// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
+            /// Out-of-bounds or masked indices instead select the value from the "or" vector.
+            /// ```
+            /// # #![feature(portable_simd)]
+            /// # use core_simd::*;
+            /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+            /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 5]);
+            /// let alt = SimdI32::from_array([-5, -4, -3, -2]);
+            /// let mask = MaskSize::from_array([true, true, true, false]); // Note the mask of the last lane.
+            ///
+            /// let result = SimdI32::<4>::gather_select(&vec, mask, idxs, alt); // Note the lane that is out-of-bounds.
+            /// assert_eq!(result, SimdI32::from_array([-5, 13, 10, -2]));
+            /// ```
+            #[must_use]
+            #[inline]
+            pub fn gather_select(
+                slice: &[$type],
+                mask: crate::MaskSize<LANES>,
+                idxs: crate::SimdUsize<LANES>,
+                or: Self,
+            ) -> Self
+            {
+                let mask = (mask & idxs.lanes_lt(crate::SimdUsize::splat(slice.len()))).to_int();
+                let base_ptr = crate::vector::ptr::SimdConstPtr::splat(slice.as_ptr());
+                // Ferris forgive me, I have done pointer arithmetic here.
+                let ptrs = base_ptr.wrapping_add(idxs);
+                // SAFETY: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah
+                unsafe { crate::intrinsics::simd_gather(or, ptrs, mask) }
+            }
+
+            /// SIMD scatter: write a SIMD vector's values into a slice, using potentially discontiguous indices.
+            /// Out-of-bounds indices are not written.
+            /// `scatter` writes "in order", so if an index receives two writes, only the last is guaranteed.
+            /// ```
+            /// # #![feature(portable_simd)]
+            /// # use core_simd::*;
+            /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+            /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 0]);
+            /// let vals = SimdI32::from_array([-27, 82, -41, 124]);
+            ///
+            /// vals.scatter(&mut vec, idxs); // index 0 receives two writes.
+            /// assert_eq!(vec, vec![124, 11, 12, 82, 14, 15, 16, 17, 18]);
+            /// ```
+            #[inline]
+            pub fn scatter(self, slice: &mut [$type], idxs: crate::SimdUsize<LANES>) {
+                self.scatter_select(slice, crate::MaskSize::splat(true), idxs)
+            }
+
+            /// SIMD scatter: write a SIMD vector's values into a slice, using potentially discontiguous indices.
+            /// Out-of-bounds or masked indices are not written.
+            /// `scatter_select` writes "in order", so if an index receives two writes, only the last is guaranteed.
+            /// ```
+            /// # #![feature(portable_simd)]
+            /// # use core_simd::*;
+            /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+            /// let idxs = SimdUsize::<4>::from_array([9, 3, 0, 0]);
+            /// let vals = SimdI32::from_array([-27, 82, -41, 124]);
+            /// let mask = MaskSize::from_array([true, true, true, false]); // Note the mask of the last lane.
+            ///
+            /// vals.scatter_select(&mut vec, mask, idxs); // index 0's second write is masked, thus omitted.
+            /// assert_eq!(vec, vec![-41, 11, 12, 82, 14, 15, 16, 17, 18]);
+            /// ```
+            #[inline]
+            pub fn scatter_select(
+                self,
+                slice: &mut [$type],
+                mask: crate::MaskSize<LANES>,
+                idxs: crate::SimdUsize<LANES>,
+            )
+            {
+                // We must construct our scatter mask before we derive a pointer!
+                let mask = (mask & idxs.lanes_lt(crate::SimdUsize::splat(slice.len()))).to_int();
+                // SAFETY: This block works with *mut T derived from &mut 'a [T],
+                // which means it is delicate in Rust's borrowing model, circa 2021:
+                // &mut 'a [T] asserts uniqueness, so deriving &'a [T] invalidates live *mut Ts!
+                // Even though this block is largely safe methods, it must be almost exactly this way
+                // to prevent invalidating the raw ptrs while they're live.
+                // Thus, entering this block requires all values to use being already ready:
+                // 0. idxs we want to write to, which are used to construct the mask.
+                // 1. mask, which depends on an initial &'a [T] and the idxs.
+                // 2. actual values to scatter (self).
+                // 3. &mut [T] which will become our base ptr.
+                unsafe {
+                    // Now Entering ☢️ *mut T Zone
+                    let base_ptr = crate::vector::ptr::SimdMutPtr::splat(slice.as_mut_ptr());
+                    // Ferris forgive me, I have done pointer arithmetic here.
+                    let ptrs = base_ptr.wrapping_add(idxs);
+                    // The ptrs have been bounds-masked to prevent memory-unsafe writes insha'allah
+                    crate::intrinsics::simd_scatter(self, ptrs, mask)
+                    // Cleared ☢️ *mut T Zone
+                }
+            }
+        }
+
+        impl<const LANES: usize> Copy for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {}
+
+        impl<const LANES: usize> Clone for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
+            #[inline]
+            fn clone(&self) -> Self {
+                *self
+            }
+        }
+
+        impl<const LANES: usize> Default for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
+            #[inline]
+            fn default() -> Self {
+                Self::splat(<$type>::default())
+            }
+        }
+
+        impl<const LANES: usize> PartialEq for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
+            #[inline]
+            fn eq(&self, other: &Self) -> bool {
+                // TODO use SIMD equality
+                self.to_array() == other.to_array()
+            }
+        }
+
+        impl<const LANES: usize> PartialOrd for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
+            #[inline]
+            fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
+                // TODO use SIMD equalitya
+                self.to_array().partial_cmp(other.as_ref())
+            }
+        }
+
+        // array references
+        impl<const LANES: usize> AsRef<[$type; LANES]> for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
+            #[inline]
+            fn as_ref(&self) -> &[$type; LANES] {
+                &self.0
+            }
+        }
+
+        impl<const LANES: usize> AsMut<[$type; LANES]> for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
+            #[inline]
+            fn as_mut(&mut self) -> &mut [$type; LANES] {
+                &mut self.0
+            }
+        }
+
+        // slice references
+        impl<const LANES: usize> AsRef<[$type]> for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
+            #[inline]
+            fn as_ref(&self) -> &[$type] {
+                &self.0
+            }
+        }
+
+        impl<const LANES: usize> AsMut<[$type]> for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
+            #[inline]
+            fn as_mut(&mut self) -> &mut [$type] {
+                &mut self.0
+            }
+        }
+
+        // vector/array conversion
+        impl<const LANES: usize> From<[$type; LANES]> for $name<LANES> where crate::LaneCount<LANES>: crate::SupportedLaneCount {
+            fn from(array: [$type; LANES]) -> Self {
+                Self(array)
+            }
+        }
+
+        impl <const LANES: usize> From<$name<LANES>> for [$type; LANES] where crate::LaneCount<LANES>: crate::SupportedLaneCount {
+            fn from(vector: $name<LANES>) -> Self {
+                vector.to_array()
+            }
+        }
+
+        impl_shuffle_2pow_lanes!{ $name }
+    }
+}
--- a/crates/test_helpers/src/lib.rs
+++ b/crates/test_helpers/src/lib.rs
@ -335,23 +335,7 @@ macro_rules! test_lanes {

                fn implementation<const $lanes: usize>()
                where
-                    core_simd::SimdU8<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdU16<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdU32<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdU64<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdUsize<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdI8<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdI16<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdI32<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdI64<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdIsize<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdF32<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdF64<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::Mask8<$lanes>: core_simd::Mask,
-                    core_simd::Mask16<$lanes>: core_simd::Mask,
-                    core_simd::Mask32<$lanes>: core_simd::Mask,
-                    core_simd::Mask64<$lanes>: core_simd::Mask,
-                    core_simd::MaskSize<$lanes>: core_simd::Mask,
+                    core_simd::LaneCount<$lanes>: core_simd::SupportedLaneCount,
                $body

                #[cfg(target_arch = "wasm32")]
@ -409,23 +393,7 @@ macro_rules! test_lanes_panic {

                fn implementation<const $lanes: usize>()
                where
-                    core_simd::SimdU8<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdU16<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdU32<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdU64<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdUsize<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdI8<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdI16<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdI32<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdI64<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdIsize<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdF32<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::SimdF64<$lanes>: core_simd::LanesAtMost32,
-                    core_simd::Mask8<$lanes>: core_simd::Mask,
-                    core_simd::Mask16<$lanes>: core_simd::Mask,
-                    core_simd::Mask32<$lanes>: core_simd::Mask,
-                    core_simd::Mask64<$lanes>: core_simd::Mask,
-                    core_simd::MaskSize<$lanes>: core_simd::Mask,
+                    core_simd::LaneCount<$lanes>: core_simd::SupportedLaneCount,
                $body

                #[test]