1
Fork 0

Document tagged pointers better

This commit is contained in:
Maybe Waffle 2023-04-12 16:21:28 +00:00
parent 6f9b15c40c
commit 838c5491a4
3 changed files with 130 additions and 31 deletions

View file

@ -3,15 +3,17 @@
//! In order to utilize the pointer packing, you must have two types: a pointer,
//! and a tag.
//!
//! The pointer must implement the `Pointer` trait, with the primary requirement
//! being conversion to and from a usize. Note that the pointer must be
//! dereferenceable, so raw pointers generally cannot implement the `Pointer`
//! trait. This implies that the pointer must also be nonzero.
//! The pointer must implement the [`Pointer`] trait, with the primary
//! requirement being convertible to and from a raw pointer. Note that the
//! pointer must be dereferenceable, so raw pointers generally cannot implement
//! the [`Pointer`] trait. This implies that the pointer must also be non-null.
//!
//! Many common pointer types already implement the `Pointer` trait.
//! Many common pointer types already implement the [`Pointer`] trait.
//!
//! The tag must implement the `Tag` trait. We assert that the tag and `Pointer`
//! are compatible at compile time.
//! The tag must implement the [`Tag`] trait.
//!
//! We assert that the tag and the [`Pointer`] types are compatible at compile
//! time.
use std::ops::Deref;
use std::ptr::NonNull;
@ -71,32 +73,66 @@ pub unsafe trait Pointer: Deref {
/// [`Self::Target`]: Deref::Target
const BITS: usize;
/// Turns this pointer into a raw, non-null pointer.
///
/// The inverse of this function is [`from_ptr`].
///
/// This function guarantees that the least-significant [`Self::BITS`] bits
/// are zero.
///
/// [`from_ptr`]: Pointer::from_ptr
/// [`Self::BITS`]: Pointer::BITS
fn into_ptr(self) -> NonNull<Self::Target>;
/// Re-creates the original pointer, from a raw pointer returned by [`into_ptr`].
///
/// # Safety
///
/// The passed `ptr` must be returned from `into_usize`.
/// The passed `ptr` must be returned from [`into_ptr`].
///
/// This acts as `ptr::read` semantically, it should not be called more than
/// once on non-`Copy` `Pointer`s.
/// This acts as [`ptr::read::<Self>()`] semantically, it should not be called more than
/// once on non-[`Copy`] `Pointer`s.
///
/// [`into_ptr`]: Pointer::into_ptr
/// [`ptr::read::<Self>()`]: std::ptr::read
unsafe fn from_ptr(ptr: NonNull<Self::Target>) -> Self;
}
/// This describes tags that the `TaggedPtr` struct can hold.
/// This describes tags that the [`TaggedPtr`] struct can hold.
///
/// # Safety
///
/// The BITS constant must be correct.
/// The [`BITS`] constant must be correct.
///
/// No more than `BITS` least significant bits may be set in the returned usize.
/// No more than [`BITS`] least significant bits may be set in the returned usize.
///
/// [`BITS`]: Tag::BITS
pub unsafe trait Tag: Copy {
/// Number of least-significant bits in the return value of [`into_usize`]
/// which may be non-zero. In other words this is the bit width of the
/// value.
///
/// [`into_usize`]: Tag::into_usize
const BITS: usize;
/// Turns this tag into an integer.
///
/// The inverse of this function is [`from_usize`].
///
/// This function guarantees that only the least-significant [`Self::BITS`]
/// bits can be non-zero.
///
/// [`from_usize`]: Pointer::from_usize
/// [`Self::BITS`]: Tag::BITS
fn into_usize(self) -> usize;
/// Re-creates the tag from the integer returned by [`into_usize`].
///
/// # Safety
///
/// The passed `tag` must be returned from `into_usize`.
/// The passed `tag` must be returned from [`into_usize`].
///
/// [`into_usize`]: Tag::into_usize
unsafe fn from_usize(tag: usize) -> Self;
}
@ -111,6 +147,7 @@ unsafe impl<T: ?Sized + Aligned> Pointer for Box<T> {
#[inline]
unsafe fn from_ptr(ptr: NonNull<T>) -> Self {
// Safety: `ptr` comes from `into_ptr` which calls `Box::into_raw`
Box::from_raw(ptr.as_ptr())
}
}
@ -120,11 +157,13 @@ unsafe impl<T: ?Sized + Aligned> Pointer for Rc<T> {
#[inline]
fn into_ptr(self) -> NonNull<T> {
// Safety: pointers from `Rc::into_raw` are valid & non-null
unsafe { NonNull::new_unchecked(Rc::into_raw(self).cast_mut()) }
}
#[inline]
unsafe fn from_ptr(ptr: NonNull<T>) -> Self {
// Safety: `ptr` comes from `into_ptr` which calls `Rc::into_raw`
Rc::from_raw(ptr.as_ptr())
}
}
@ -134,11 +173,13 @@ unsafe impl<T: ?Sized + Aligned> Pointer for Arc<T> {
#[inline]
fn into_ptr(self) -> NonNull<T> {
// Safety: pointers from `Arc::into_raw` are valid & non-null
unsafe { NonNull::new_unchecked(Arc::into_raw(self).cast_mut()) }
}
#[inline]
unsafe fn from_ptr(ptr: NonNull<T>) -> Self {
// Safety: `ptr` comes from `into_ptr` which calls `Arc::into_raw`
Arc::from_raw(ptr.as_ptr())
}
}
@ -153,6 +194,8 @@ unsafe impl<'a, T: 'a + ?Sized + Aligned> Pointer for &'a T {
#[inline]
unsafe fn from_ptr(ptr: NonNull<T>) -> Self {
// Safety:
// `ptr` comes from `into_ptr` which gets the pointer from a reference
ptr.as_ref()
}
}
@ -167,6 +210,8 @@ unsafe impl<'a, T: 'a + ?Sized + Aligned> Pointer for &'a mut T {
#[inline]
unsafe fn from_ptr(mut ptr: NonNull<T>) -> Self {
// Safety:
// `ptr` comes from `into_ptr` which gets the pointer from a reference
ptr.as_mut()
}
}

View file

@ -8,35 +8,75 @@ use std::num::NonZeroUsize;
use std::ops::{Deref, DerefMut};
use std::ptr::NonNull;
/// A `Copy` TaggedPtr.
/// A [`Copy`] tagged pointer.
///
/// You should use this instead of the `TaggedPtr` type in all cases where
/// `P: Copy`.
/// This is essentially `{ pointer: P, tag: T }` packed in a single pointer.
///
/// You should use this instead of the [`TaggedPtr`] type in all cases where
/// `P` implements [`Copy`].
///
/// If `COMPARE_PACKED` is true, then the pointers will be compared and hashed without
/// unpacking. Otherwise we don't implement PartialEq/Eq/Hash; if you want that,
/// wrap the TaggedPtr.
/// unpacking. Otherwise we don't implement [`PartialEq`], [`Eq`] and [`Hash`];
/// if you want that, wrap the [`CopyTaggedPtr`].
///
/// [`TaggedPtr`]: crate::tagged_ptr::TaggedPtr
pub struct CopyTaggedPtr<P, T, const COMPARE_PACKED: bool>
where
P: Pointer,
T: Tag,
{
/// This is semantically a pair of `pointer: P` and `tag: T` fields,
/// however we pack them in a single pointer, to save space.
///
/// We pack the tag into the **most**-significant bits of the pointer to
/// ease retrieval of the value. A left shift is a multiplication and
/// those are embeddable in instruction encoding, for example:
///
/// ```asm
/// // (https://godbolt.org/z/jqcYPWEr3)
/// example::shift_read3:
/// mov eax, dword ptr [8*rdi]
/// ret
///
/// example::mask_read3:
/// and rdi, -8
/// mov eax, dword ptr [rdi]
/// ret
/// ```
///
/// This is ASM outputted by rustc for reads of values behind tagged
/// pointers for different approaches of tagging:
/// - `shift_read3` uses `<< 3` (the tag is in the most-significant bits)
/// - `mask_read3` uses `& !0b111` (the tag is in the least-significant bits)
///
/// The shift approach thus produces less instructions and is likely faster.
///
/// Encoding diagram:
/// ```text
/// [ packed.addr ]
/// [ tag ] [ pointer.addr >> T::BITS ] <-- usize::BITS - T::BITS bits
/// ^
/// |
/// T::BITS bits
/// ```
///
/// The tag can be retrieved by `packed.addr() >> T::BITS` and the pointer
/// can be retrieved by `packed.map_addr(|addr| addr << T::BITS)`.
packed: NonNull<P::Target>,
tag_ghost: PhantomData<T>,
}
// We pack the tag into the *upper* bits of the pointer to ease retrieval of the
// value; a left shift is a multiplication and those are embeddable in
// instruction encoding.
impl<P, T, const CP: bool> CopyTaggedPtr<P, T, CP>
where
P: Pointer,
T: Tag,
{
/// Tags `pointer` with `tag`.
pub fn new(pointer: P, tag: T) -> Self {
Self { packed: Self::pack(P::into_ptr(pointer), tag), tag_ghost: PhantomData }
}
/// Retrieves the pointer.
pub fn pointer(self) -> P
where
P: Copy,
@ -48,11 +88,18 @@ where
unsafe { P::from_ptr(self.pointer_raw()) }
}
/// Retrieves the tag.
#[inline]
pub fn tag(&self) -> T {
unsafe { T::from_usize(self.packed.addr().get() >> Self::TAG_BIT_SHIFT) }
// Unpack the tag, according to the `self.packed` encoding scheme
let tag = self.packed.addr().get() >> Self::TAG_BIT_SHIFT;
// Safety:
//
unsafe { T::from_usize(tag) }
}
/// Sets the tag to a new value.
#[inline]
pub fn set_tag(&mut self, tag: T) {
self.packed = Self::pack(self.pointer_raw(), tag);
@ -61,7 +108,8 @@ where
const TAG_BIT_SHIFT: usize = usize::BITS as usize - T::BITS;
const ASSERTION: () = { assert!(T::BITS <= P::BITS) };
/// Pack pointer `ptr` that comes from [`P::into_ptr`] with a `tag`.
/// Pack pointer `ptr` that comes from [`P::into_ptr`] with a `tag`,
/// according to `self.packed` encoding scheme.
///
/// [`P::into_ptr`]: Pointer::into_ptr
fn pack(ptr: NonNull<P::Target>, tag: T) -> NonNull<P::Target> {
@ -71,7 +119,7 @@ where
let packed_tag = tag.into_usize() << Self::TAG_BIT_SHIFT;
ptr.map_addr(|addr| {
// SAFETY:
// Safety:
// - The pointer is `NonNull` => it's address is `NonZeroUsize`
// - `P::BITS` least significant bits are always zero (`Pointer` contract)
// - `T::BITS <= P::BITS` (from `Self::ASSERTION`)
@ -85,6 +133,7 @@ where
})
}
/// Retrieves the original raw pointer from `self.packed`.
pub(super) fn pointer_raw(&self) -> NonNull<P::Target> {
self.packed.map_addr(|addr| unsafe { NonZeroUsize::new_unchecked(addr.get() << T::BITS) })
}

View file

@ -6,11 +6,16 @@ use super::CopyTaggedPtr;
use super::{Pointer, Tag};
use crate::stable_hasher::{HashStable, StableHasher};
/// A TaggedPtr implementing `Drop`.
/// A tagged pointer that supports pointers that implement [`Drop`].
///
/// This is essentially `{ pointer: P, tag: T }` packed in a single pointer.
///
/// You should use [`CopyTaggedPtr`] instead of the this type in all cases
/// where `P` implements [`Copy`].
///
/// If `COMPARE_PACKED` is true, then the pointers will be compared and hashed without
/// unpacking. Otherwise we don't implement PartialEq/Eq/Hash; if you want that,
/// wrap the TaggedPtr.
/// unpacking. Otherwise we don't implement [`PartialEq`], [`Eq`] and [`Hash`];
/// if you want that, wrap the [`TaggedPtr`].
pub struct TaggedPtr<P, T, const COMPARE_PACKED: bool>
where
P: Pointer,
@ -19,22 +24,22 @@ where
raw: CopyTaggedPtr<P, T, COMPARE_PACKED>,
}
// We pack the tag into the *upper* bits of the pointer to ease retrieval of the
// value; a right shift is a multiplication and those are embeddable in
// instruction encoding.
impl<P, T, const CP: bool> TaggedPtr<P, T, CP>
where
P: Pointer,
T: Tag,
{
/// Tags `pointer` with `tag`.
pub fn new(pointer: P, tag: T) -> Self {
TaggedPtr { raw: CopyTaggedPtr::new(pointer, tag) }
}
/// Retrieves the tag.
pub fn tag(&self) -> T {
self.raw.tag()
}
/// Sets the tag to a new value.
pub fn set_tag(&mut self, tag: T) {
self.raw.set_tag(tag)
}