From 838c5491a4296bb92891403c1a6d0e9d84991b51 Mon Sep 17 00:00:00 2001 From: Maybe Waffle Date: Wed, 12 Apr 2023 16:21:28 +0000 Subject: [PATCH] Document tagged pointers better --- .../rustc_data_structures/src/tagged_ptr.rs | 73 +++++++++++++++---- .../src/tagged_ptr/copy.rs | 71 +++++++++++++++--- .../src/tagged_ptr/drop.rs | 17 +++-- 3 files changed, 130 insertions(+), 31 deletions(-) diff --git a/compiler/rustc_data_structures/src/tagged_ptr.rs b/compiler/rustc_data_structures/src/tagged_ptr.rs index 9a3d76fd4d4..f10c12ceeda 100644 --- a/compiler/rustc_data_structures/src/tagged_ptr.rs +++ b/compiler/rustc_data_structures/src/tagged_ptr.rs @@ -3,15 +3,17 @@ //! In order to utilize the pointer packing, you must have two types: a pointer, //! and a tag. //! -//! The pointer must implement the `Pointer` trait, with the primary requirement -//! being conversion to and from a usize. Note that the pointer must be -//! dereferenceable, so raw pointers generally cannot implement the `Pointer` -//! trait. This implies that the pointer must also be nonzero. +//! The pointer must implement the [`Pointer`] trait, with the primary +//! requirement being convertible to and from a raw pointer. Note that the +//! pointer must be dereferenceable, so raw pointers generally cannot implement +//! the [`Pointer`] trait. This implies that the pointer must also be non-null. //! -//! Many common pointer types already implement the `Pointer` trait. +//! Many common pointer types already implement the [`Pointer`] trait. //! -//! The tag must implement the `Tag` trait. We assert that the tag and `Pointer` -//! are compatible at compile time. +//! The tag must implement the [`Tag`] trait. +//! +//! We assert that the tag and the [`Pointer`] types are compatible at compile +//! time. use std::ops::Deref; use std::ptr::NonNull; @@ -71,32 +73,66 @@ pub unsafe trait Pointer: Deref { /// [`Self::Target`]: Deref::Target const BITS: usize; + /// Turns this pointer into a raw, non-null pointer. + /// + /// The inverse of this function is [`from_ptr`]. + /// + /// This function guarantees that the least-significant [`Self::BITS`] bits + /// are zero. + /// + /// [`from_ptr`]: Pointer::from_ptr + /// [`Self::BITS`]: Pointer::BITS fn into_ptr(self) -> NonNull; + /// Re-creates the original pointer, from a raw pointer returned by [`into_ptr`]. + /// /// # Safety /// - /// The passed `ptr` must be returned from `into_usize`. + /// The passed `ptr` must be returned from [`into_ptr`]. /// - /// This acts as `ptr::read` semantically, it should not be called more than - /// once on non-`Copy` `Pointer`s. + /// This acts as [`ptr::read::()`] semantically, it should not be called more than + /// once on non-[`Copy`] `Pointer`s. + /// + /// [`into_ptr`]: Pointer::into_ptr + /// [`ptr::read::()`]: std::ptr::read unsafe fn from_ptr(ptr: NonNull) -> Self; } -/// This describes tags that the `TaggedPtr` struct can hold. +/// This describes tags that the [`TaggedPtr`] struct can hold. /// /// # Safety /// -/// The BITS constant must be correct. +/// The [`BITS`] constant must be correct. /// -/// No more than `BITS` least significant bits may be set in the returned usize. +/// No more than [`BITS`] least significant bits may be set in the returned usize. +/// +/// [`BITS`]: Tag::BITS pub unsafe trait Tag: Copy { + /// Number of least-significant bits in the return value of [`into_usize`] + /// which may be non-zero. In other words this is the bit width of the + /// value. + /// + /// [`into_usize`]: Tag::into_usize const BITS: usize; + /// Turns this tag into an integer. + /// + /// The inverse of this function is [`from_usize`]. + /// + /// This function guarantees that only the least-significant [`Self::BITS`] + /// bits can be non-zero. + /// + /// [`from_usize`]: Pointer::from_usize + /// [`Self::BITS`]: Tag::BITS fn into_usize(self) -> usize; + /// Re-creates the tag from the integer returned by [`into_usize`]. + /// /// # Safety /// - /// The passed `tag` must be returned from `into_usize`. + /// The passed `tag` must be returned from [`into_usize`]. + /// + /// [`into_usize`]: Tag::into_usize unsafe fn from_usize(tag: usize) -> Self; } @@ -111,6 +147,7 @@ unsafe impl Pointer for Box { #[inline] unsafe fn from_ptr(ptr: NonNull) -> Self { + // Safety: `ptr` comes from `into_ptr` which calls `Box::into_raw` Box::from_raw(ptr.as_ptr()) } } @@ -120,11 +157,13 @@ unsafe impl Pointer for Rc { #[inline] fn into_ptr(self) -> NonNull { + // Safety: pointers from `Rc::into_raw` are valid & non-null unsafe { NonNull::new_unchecked(Rc::into_raw(self).cast_mut()) } } #[inline] unsafe fn from_ptr(ptr: NonNull) -> Self { + // Safety: `ptr` comes from `into_ptr` which calls `Rc::into_raw` Rc::from_raw(ptr.as_ptr()) } } @@ -134,11 +173,13 @@ unsafe impl Pointer for Arc { #[inline] fn into_ptr(self) -> NonNull { + // Safety: pointers from `Arc::into_raw` are valid & non-null unsafe { NonNull::new_unchecked(Arc::into_raw(self).cast_mut()) } } #[inline] unsafe fn from_ptr(ptr: NonNull) -> Self { + // Safety: `ptr` comes from `into_ptr` which calls `Arc::into_raw` Arc::from_raw(ptr.as_ptr()) } } @@ -153,6 +194,8 @@ unsafe impl<'a, T: 'a + ?Sized + Aligned> Pointer for &'a T { #[inline] unsafe fn from_ptr(ptr: NonNull) -> Self { + // Safety: + // `ptr` comes from `into_ptr` which gets the pointer from a reference ptr.as_ref() } } @@ -167,6 +210,8 @@ unsafe impl<'a, T: 'a + ?Sized + Aligned> Pointer for &'a mut T { #[inline] unsafe fn from_ptr(mut ptr: NonNull) -> Self { + // Safety: + // `ptr` comes from `into_ptr` which gets the pointer from a reference ptr.as_mut() } } diff --git a/compiler/rustc_data_structures/src/tagged_ptr/copy.rs b/compiler/rustc_data_structures/src/tagged_ptr/copy.rs index aebf24ebbde..02dcbd389df 100644 --- a/compiler/rustc_data_structures/src/tagged_ptr/copy.rs +++ b/compiler/rustc_data_structures/src/tagged_ptr/copy.rs @@ -8,35 +8,75 @@ use std::num::NonZeroUsize; use std::ops::{Deref, DerefMut}; use std::ptr::NonNull; -/// A `Copy` TaggedPtr. +/// A [`Copy`] tagged pointer. /// -/// You should use this instead of the `TaggedPtr` type in all cases where -/// `P: Copy`. +/// This is essentially `{ pointer: P, tag: T }` packed in a single pointer. +/// +/// You should use this instead of the [`TaggedPtr`] type in all cases where +/// `P` implements [`Copy`]. /// /// If `COMPARE_PACKED` is true, then the pointers will be compared and hashed without -/// unpacking. Otherwise we don't implement PartialEq/Eq/Hash; if you want that, -/// wrap the TaggedPtr. +/// unpacking. Otherwise we don't implement [`PartialEq`], [`Eq`] and [`Hash`]; +/// if you want that, wrap the [`CopyTaggedPtr`]. +/// +/// [`TaggedPtr`]: crate::tagged_ptr::TaggedPtr pub struct CopyTaggedPtr where P: Pointer, T: Tag, { + /// This is semantically a pair of `pointer: P` and `tag: T` fields, + /// however we pack them in a single pointer, to save space. + /// + /// We pack the tag into the **most**-significant bits of the pointer to + /// ease retrieval of the value. A left shift is a multiplication and + /// those are embeddable in instruction encoding, for example: + /// + /// ```asm + /// // (https://godbolt.org/z/jqcYPWEr3) + /// example::shift_read3: + /// mov eax, dword ptr [8*rdi] + /// ret + /// + /// example::mask_read3: + /// and rdi, -8 + /// mov eax, dword ptr [rdi] + /// ret + /// ``` + /// + /// This is ASM outputted by rustc for reads of values behind tagged + /// pointers for different approaches of tagging: + /// - `shift_read3` uses `<< 3` (the tag is in the most-significant bits) + /// - `mask_read3` uses `& !0b111` (the tag is in the least-significant bits) + /// + /// The shift approach thus produces less instructions and is likely faster. + /// + /// Encoding diagram: + /// ```text + /// [ packed.addr ] + /// [ tag ] [ pointer.addr >> T::BITS ] <-- usize::BITS - T::BITS bits + /// ^ + /// | + /// T::BITS bits + /// ``` + /// + /// The tag can be retrieved by `packed.addr() >> T::BITS` and the pointer + /// can be retrieved by `packed.map_addr(|addr| addr << T::BITS)`. packed: NonNull, tag_ghost: PhantomData, } -// We pack the tag into the *upper* bits of the pointer to ease retrieval of the -// value; a left shift is a multiplication and those are embeddable in -// instruction encoding. impl CopyTaggedPtr where P: Pointer, T: Tag, { + /// Tags `pointer` with `tag`. pub fn new(pointer: P, tag: T) -> Self { Self { packed: Self::pack(P::into_ptr(pointer), tag), tag_ghost: PhantomData } } + /// Retrieves the pointer. pub fn pointer(self) -> P where P: Copy, @@ -48,11 +88,18 @@ where unsafe { P::from_ptr(self.pointer_raw()) } } + /// Retrieves the tag. #[inline] pub fn tag(&self) -> T { - unsafe { T::from_usize(self.packed.addr().get() >> Self::TAG_BIT_SHIFT) } + // Unpack the tag, according to the `self.packed` encoding scheme + let tag = self.packed.addr().get() >> Self::TAG_BIT_SHIFT; + + // Safety: + // + unsafe { T::from_usize(tag) } } + /// Sets the tag to a new value. #[inline] pub fn set_tag(&mut self, tag: T) { self.packed = Self::pack(self.pointer_raw(), tag); @@ -61,7 +108,8 @@ where const TAG_BIT_SHIFT: usize = usize::BITS as usize - T::BITS; const ASSERTION: () = { assert!(T::BITS <= P::BITS) }; - /// Pack pointer `ptr` that comes from [`P::into_ptr`] with a `tag`. + /// Pack pointer `ptr` that comes from [`P::into_ptr`] with a `tag`, + /// according to `self.packed` encoding scheme. /// /// [`P::into_ptr`]: Pointer::into_ptr fn pack(ptr: NonNull, tag: T) -> NonNull { @@ -71,7 +119,7 @@ where let packed_tag = tag.into_usize() << Self::TAG_BIT_SHIFT; ptr.map_addr(|addr| { - // SAFETY: + // Safety: // - The pointer is `NonNull` => it's address is `NonZeroUsize` // - `P::BITS` least significant bits are always zero (`Pointer` contract) // - `T::BITS <= P::BITS` (from `Self::ASSERTION`) @@ -85,6 +133,7 @@ where }) } + /// Retrieves the original raw pointer from `self.packed`. pub(super) fn pointer_raw(&self) -> NonNull { self.packed.map_addr(|addr| unsafe { NonZeroUsize::new_unchecked(addr.get() << T::BITS) }) } diff --git a/compiler/rustc_data_structures/src/tagged_ptr/drop.rs b/compiler/rustc_data_structures/src/tagged_ptr/drop.rs index 286951ce0e9..6ca6c7d1283 100644 --- a/compiler/rustc_data_structures/src/tagged_ptr/drop.rs +++ b/compiler/rustc_data_structures/src/tagged_ptr/drop.rs @@ -6,11 +6,16 @@ use super::CopyTaggedPtr; use super::{Pointer, Tag}; use crate::stable_hasher::{HashStable, StableHasher}; -/// A TaggedPtr implementing `Drop`. +/// A tagged pointer that supports pointers that implement [`Drop`]. +/// +/// This is essentially `{ pointer: P, tag: T }` packed in a single pointer. +/// +/// You should use [`CopyTaggedPtr`] instead of the this type in all cases +/// where `P` implements [`Copy`]. /// /// If `COMPARE_PACKED` is true, then the pointers will be compared and hashed without -/// unpacking. Otherwise we don't implement PartialEq/Eq/Hash; if you want that, -/// wrap the TaggedPtr. +/// unpacking. Otherwise we don't implement [`PartialEq`], [`Eq`] and [`Hash`]; +/// if you want that, wrap the [`TaggedPtr`]. pub struct TaggedPtr where P: Pointer, @@ -19,22 +24,22 @@ where raw: CopyTaggedPtr, } -// We pack the tag into the *upper* bits of the pointer to ease retrieval of the -// value; a right shift is a multiplication and those are embeddable in -// instruction encoding. impl TaggedPtr where P: Pointer, T: Tag, { + /// Tags `pointer` with `tag`. pub fn new(pointer: P, tag: T) -> Self { TaggedPtr { raw: CopyTaggedPtr::new(pointer, tag) } } + /// Retrieves the tag. pub fn tag(&self) -> T { self.raw.tag() } + /// Sets the tag to a new value. pub fn set_tag(&mut self, tag: T) { self.raw.set_tag(tag) }