diff --git a/README.md b/README.md index de3f17d..7936485 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,84 @@ # ptr-stuff -stuffing things into pointers in a strict provenance friendly way + +A crate for stuffing things into a pointer. + +This crate consists of three parts: +* The type `StuffedPtr` +* The trait `StuffingStrategy` +* The trait `Backend` +`StuffedPtr` is the main type of this crate. You it's a type whose size depends on the +choice of `Backend` (defaults to `usize`, `u64` and `u128` are also possible). It can store a +pointer or some extra data, so you can imagine it being +```rust +enum StuffedPtr { + Ptr(*mut T), + Extra(E), +} +``` +except that the extra data is bitstuffed into the pointer. You can chose any arbitrary bitstuffing +depending on the `StuffingStrategy`, an unsafe trait that governs how the extra data +(or the pointer itself) will be packed into the backend. + +# Example: NaN-Boxing +Pointers are hidden in the NaN values of floats. NaN boxing often involves also hiding booleans +or null in there, but we stay with floats and pointers (pointers to a `HashMap` that servers +as our "object" type). +See [crafting interpreters](https://craftinginterpreters.com/optimization.html#nan-boxing) +for more details. +```rust +use std::collections::HashMap; +use ptr_stuff::{StuffedPtr, StuffingStrategy}; + +// Create a unit struct for our strategy +struct NanBoxStrategy; + +const QNAN: u64 = 0x7ffc000000000000; // implementation detail of NaN boxing, a quiet NaN mask + +const SIGN_BIT: u64 = 0x8000000000000000; // implementation detail of NaN boxing, the sign bit of an f64 + +unsafe impl StuffingStrategy for NanBoxStrategy { + type Extra = f64; + + fn is_extra(data: u64) -> bool { + (data & QNAN) != QNAN + } + + fn stuff_extra(inner: Self::Extra) -> u64 { + unsafe { std::mem::transmute(inner) } // both are 64 bit POD's + } + + unsafe fn extract_extra(data: u64) -> Self::Extra { + std::mem::transmute(data) // both are 64 bit POD's + } + + fn stuff_ptr(addr: usize) -> u64 { + // add the QNAN and SIGN_BIT + SIGN_BIT | QNAN | u64::try_from(addr).unwrap() + } + + fn extract_ptr(inner: u64) -> usize { + // keep everything except for QNAN and SIGN_BIT + (inner & !(SIGN_BIT | QNAN)).try_into().unwrap() + } +} + +type Object = HashMap; // a very, very crude representation of an object + +type Value = StuffedPtr; // our value type + +fn main() { + let float: Value = StuffedPtr::new_extra(123.5); + assert_eq!(float.copy_extra(), Some(123.5)); + + let object: Object = HashMap::from([("a".to_owned(), 457)]); + + let boxed = Box::new(object); + let ptr: Value = StuffedPtr::new_ptr(Box::into_raw(boxed)); + + let object = unsafe { &*ptr.get_ptr().unwrap() }; + assert_eq!(object.get("a"), Some(&457)); + + drop(unsafe { Box::from_raw(ptr.get_ptr().unwrap()) }); + // `ptr` is a dangling pointer now! +} +``` diff --git a/src/backend.rs b/src/backend.rs index bb2be51..64f3a7c 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -1,9 +1,22 @@ -use std::mem; +use core::mem; use sptr::Strict; /// A backend where the stuffed pointer is stored. Must be bigger or equal to the pointer size. -pub trait Backend { +/// +/// The [`Backend`] is a trait to define types that store the stuffed pointer. It's supposed to +/// be implemented on `Copy` types like `usize``u64`, `u128`. Note that these integers are basically +/// just the strategy and exchange types for addresses, but *not* the actual underlying storage, which +/// always contains a pointer to keep provenance (for example `(*mut T, u32)` on 32 bit for `u64`). +/// +/// This trait is just exposed for convenience and flexibility, you are usually not expected to implement +/// it yourself, although such occasions could occur (for example to have a bigger storage than `u128` +/// or smaller storage that only works on 32-bit or 16-bit platforms. +/// +/// # Safety +/// Implementers of this trait *must* keep provenance of pointers, so if a valid pointer address+provenance +/// combination is set in `set_ptr`, `get_ptr` *must* return the exact same values and provenance. +pub unsafe trait Backend { /// The underlying type where the data is stored. Often a tuple of a pointer (for the provenance) /// and some integers to fill up the bytes. type Stored: Copy; @@ -34,7 +47,7 @@ const _: () = assert_same_size::>::Stored>(); const _: () = assert_same_size::>::Stored>(); const _: () = assert_same_size::>::Stored>(); -impl Backend for usize { +unsafe impl Backend for usize { type Stored = *mut T; fn get_ptr(s: Self::Stored) -> (*mut T, Self) { @@ -52,7 +65,7 @@ impl Backend for usize { #[cfg(target_pointer_width = "64")] /// on 64 bit, we can just treat u64/usize interchangeably, because uintptr_t == size_t in Rust -impl Backend for u64 { +unsafe impl Backend for u64 { type Stored = *mut T; fn get_ptr(s: Self::Stored) -> (*mut T, Self) { @@ -70,7 +83,7 @@ impl Backend for u64 { macro_rules! impl_backend_2_tuple { (impl for $ty:ty { (*mut T, $int:ident), $num:literal }) => { - impl Backend for $ty { + unsafe impl Backend for $ty { // this one keeps the MSB in the pointer address, and the LSB in the integer type Stored = (*mut T, $int); @@ -97,7 +110,7 @@ macro_rules! impl_backend_2_tuple { #[cfg_attr(target_pointer_width = "64", allow(unused))] // not required on 64 bit macro_rules! impl_backend_3_tuple { (impl for $ty:ty { (*mut T, $int1:ident, $int2:ident), $num1:literal, $num2:literal }) => { - impl Backend for $ty { + unsafe impl Backend for $ty { // this one keeps the MSB in the pointer address, ISB in int1 and the LSB in the int2 type Stored = (*mut T, $int1, $int2); diff --git a/src/lib.rs b/src/lib.rs index 687364c..32fa62e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +#![no_std] #![warn(rust_2018_idioms)] #![deny(unsafe_op_in_unsafe_fn)] #![warn(missing_docs)] @@ -21,11 +22,77 @@ //! except that the extra data is bitstuffed into the pointer. You can chose any arbitrary bitstuffing //! depending on the [`StuffingStrategy`], an unsafe trait that governs how the extra data //! (or the pointer itself) will be packed into the backend. +//! +//! # Example: NaN-Boxing +//! Pointers are hidden in the NaN values of floats. NaN boxing often involves also hiding booleans +//! or null in there, but we stay with floats and pointers (pointers to a `HashMap` that servers +//! as our "object" type). +//! +//! See [crafting interpreters](https://craftinginterpreters.com/optimization.html#nan-boxing) +//! for more details. +//! ``` +//! use std::collections::HashMap; +//! use ptr_stuff::{StuffedPtr, StuffingStrategy}; +//! +//! // Create a unit struct for our strategy +//! struct NanBoxStrategy; +//! +//! const QNAN: u64 = 0x7ffc000000000000; // implementation detail of NaN boxing, a quiet NaN mask +//! const SIGN_BIT: u64 = 0x8000000000000000; // implementation detail of NaN boxing, the sign bit of an f64 +//! +//! unsafe impl StuffingStrategy for NanBoxStrategy { +//! type Extra = f64; +//! +//! fn is_extra(data: u64) -> bool { +//! (data & QNAN) != QNAN +//! } +//! +//! fn stuff_extra(inner: Self::Extra) -> u64 { +//! unsafe { std::mem::transmute(inner) } // both are 64 bit POD's +//! } +//! +//! unsafe fn extract_extra(data: u64) -> Self::Extra { +//! std::mem::transmute(data) // both are 64 bit POD's +//! } +//! +//! fn stuff_ptr(addr: usize) -> u64 { +//! // add the QNAN and SIGN_BIT +//! SIGN_BIT | QNAN | u64::try_from(addr).unwrap() +//! } +//! +//! fn extract_ptr(inner: u64) -> usize { +//! // keep everything except for QNAN and SIGN_BIT +//! (inner & !(SIGN_BIT | QNAN)).try_into().unwrap() +//! } +//! } +//! +//! type Object = HashMap; // a very, very crude representation of an object +//! +//! type Value = StuffedPtr; // our value type +//! +//! let float: Value = StuffedPtr::new_extra(123.5); +//! assert_eq!(float.copy_extra(), Some(123.5)); +//! +//! let object: Object = HashMap::from([("a".to_owned(), 457)]); +//! let boxed = Box::new(object); +//! let ptr: Value = StuffedPtr::new_ptr(Box::into_raw(boxed)); +//! +//! let object = unsafe { &*ptr.get_ptr().unwrap() }; +//! assert_eq!(object.get("a"), Some(&457)); +//! +//! drop(unsafe { Box::from_raw(ptr.get_ptr().unwrap()) }); +//! +//! // `ptr` is a dangling pointer now! +//! ``` + +#[cfg(test)] +extern crate alloc; // we want that for tests so we can use `Box` mod backend; mod strategy; -use std::{ +use core::hash::{Hash, Hasher}; +use core::{ fmt::{Debug, Formatter}, marker::PhantomData, mem, @@ -36,7 +103,14 @@ use sptr::Strict; pub use crate::{backend::Backend, strategy::StuffingStrategy}; -/// A union of a pointer and some extra data. +/// A union of a pointer and some extra data, bitpacked into a pointer (or custom, using the third +/// generic param `I`) size. +/// +/// For a usage example, view the crate level documentation. +/// +/// This pointer does *not* drop extra data, [`StuffedPtr::into_extra`] can be used if that is required. +/// +/// `StuffedPtr` implements most traits like `Clone`, `PartialEq` or `Copy` if the extra type does. pub struct StuffedPtr(I::Stored, PhantomData) where S: StuffingStrategy, @@ -54,16 +128,16 @@ where Self(I::set_ptr(ptr, stuffed), PhantomData) } - /// Create a new `StuffPtr` from extra + /// Create a new `StuffPtr` from extra data pub fn new_extra(extra: S::Extra) -> Self { // this doesn't have any provenance, which is ok, since it's never a pointer anyways. // if the user calls `set_ptr` it will use the new provenance from that ptr - let ptr = std::ptr::null_mut(); + let ptr = core::ptr::null_mut(); let extra = S::stuff_extra(extra); Self(I::set_ptr(ptr, extra), PhantomData) } - /// Get the pointer data, or `None` if it contains extra + /// Get the pointer data, or `None` if it contains extra data pub fn get_ptr(&self) -> Option<*mut T> { self.is_extra().not().then(|| { // SAFETY: We have done a check that it's not extra @@ -71,9 +145,11 @@ where }) } - /// Get the pointer data + /// Get the unstuffed pointer data from the stuffed pointer, assuming that the `StuffedPtr` + /// contains pointer data. + /// /// # Safety - /// Must contain pointer data and not extra + /// `StuffedPtr` must contain pointer data and not extra data pub unsafe fn get_ptr_unchecked(&self) -> *mut T { let (provenance, addr) = I::get_ptr(self.0); let addr = S::extract_ptr(addr); @@ -88,9 +164,9 @@ where }) } - /// Get owned extra data from this + /// Turn this pointer into extra data. /// # Safety - /// Must contain extra data and not pointer + /// `StuffedPtr` must contain extra data and not pointer pub unsafe fn into_extra_unchecked(self) -> S::Extra { // SAFETY: `self` is consumed and forgotten after this call let extra = unsafe { self.get_extra_unchecked() }; @@ -153,7 +229,7 @@ where S::Extra: Debug, I: Backend, { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { // SAFETY: // If S::Extra: !Copy, we can't just copy it out and call it a day // For example, if it's a Box, not forgetting it here would lead to a double free @@ -174,22 +250,6 @@ where } } -impl Drop for StuffedPtr -where - S: StuffingStrategy, - I: Backend, -{ - fn drop(&mut self) { - if self.is_extra() { - // SAFETY: We move it out here and it's never accessed again. - let extra = unsafe { self.get_extra_unchecked() }; - drop(extra); - } else { - // dropping a ptr is a no-op - } - } -} - impl Clone for StuffedPtr where S: StuffingStrategy, @@ -209,6 +269,14 @@ where } } +impl Copy for StuffedPtr +where + S: StuffingStrategy, + S::Extra: Copy, + I: Backend, +{ +} + impl PartialEq for StuffedPtr where S: StuffingStrategy, @@ -226,7 +294,7 @@ where unsafe { let ptr1 = self.get_ptr_unchecked(); let ptr2 = self.get_ptr_unchecked(); - std::ptr::eq(ptr1, ptr2) + core::ptr::eq(ptr1, ptr2) } } _ => false, @@ -246,31 +314,50 @@ where { } -impl From> for StuffedPtr +impl Hash for StuffedPtr where S: StuffingStrategy, + S::Extra: Hash, I: Backend, { - fn from(boxed: Box) -> Self { - Self::new_ptr(Box::into_raw(boxed)) + fn hash(&self, state: &mut H) { + // SAFETY: We forget that `extra` ever existed after taking the reference and cloning it + if let Some(extra) = unsafe { self.get_extra() } { + extra.hash(state); + mem::forget(extra); + } else { + // SAFETY: Checked above + let ptr = unsafe { self.get_ptr_unchecked() }; + core::ptr::hash(ptr, state); + } } } #[cfg(test)] mod tests { #![allow(non_snake_case)] - use std::mem; + + use alloc::{boxed::Box, format}; + use core::mem; use paste::paste; use crate::{ strategy::test_strategies::{EmptyInMax, HasDebug, PanicsInDrop}, - StuffedPtr, + Backend, StuffedPtr, StuffingStrategy, }; + // note: the tests mostly use the `PanicsInDrop` type and strategy, to make sure that no // extra is ever dropped accidentally. - // note: the tests mostly use the `PanicsInDrop` type and strategy, to make sure that no + fn from_box(boxed: Box) -> StuffedPtr + where + S: StuffingStrategy, + I: Backend, + { + StuffedPtr::new_ptr(Box::into_raw(boxed)) + } + macro_rules! make_tests { ($backend:ident) => { paste! { @@ -278,7 +365,7 @@ mod tests { fn []() { unsafe { let boxed = Box::new(1); - let stuffed_ptr: StuffedPtr = boxed.into(); + let stuffed_ptr: StuffedPtr = from_box(boxed); let ptr = stuffed_ptr.get_ptr_unchecked(); let boxed = Box::from_raw(ptr); assert_eq!(*boxed, 1); @@ -296,7 +383,7 @@ mod tests { #[test] fn []() { let boxed = Box::new(1); - let stuffed_ptr: StuffedPtr = boxed.into(); + let stuffed_ptr: StuffedPtr = from_box(boxed); assert!(format!("{stuffed_ptr:?}").starts_with("StuffedPtr::Ptr {")); drop(unsafe { Box::from_raw(stuffed_ptr.get_ptr().unwrap()) }); @@ -309,16 +396,6 @@ mod tests { ); } - - #[test] - #[should_panic] - fn []() { - let stuffed_ptr: StuffedPtr<(), PanicsInDrop, $backend> = StuffedPtr::new_extra(PanicsInDrop); - // the panicking drop needs to be called here! - drop(stuffed_ptr); - } - - #[test] #[allow(clippy::redundant_clone)] fn []() { diff --git a/src/strategy.rs b/src/strategy.rs index 87cc5dd..421a778 100644 --- a/src/strategy.rs +++ b/src/strategy.rs @@ -1,4 +1,10 @@ /// A trait that describes how to stuff extras and pointers into the pointer sized object. +/// +/// This trait is what a user of this crate is expected to implement to use the crate for their own +/// pointer stuffing. It's usually implemented on ZSTs that only serve as stuffing strategies, but +/// it's also completely possible to implement it on the type in [`StuffingStrategy::Extra`] directly +/// if possible. +/// /// # Safety /// /// If [`StuffingStrategy::is_extra`] returns true for a value, then @@ -108,7 +114,7 @@ unsafe impl StuffingStrategy for () { #[cfg(test)] pub(crate) mod test_strategies { - use std::fmt::{Debug, Formatter}; + use core::fmt::{Debug, Formatter}; use super::StuffingStrategy; @@ -124,7 +130,7 @@ pub(crate) mod test_strategies { #[allow(clippy::forget_copy)] fn stuff_extra(inner: Self::Extra) -> usize { - std::mem::forget(inner); + core::mem::forget(inner); usize::MAX } @@ -149,7 +155,7 @@ pub(crate) mod test_strategies { #[allow(clippy::forget_copy)] fn stuff_extra(inner: Self::Extra) -> u64 { - std::mem::forget(inner); + core::mem::forget(inner); u64::MAX } @@ -175,7 +181,7 @@ pub(crate) mod test_strategies { #[allow(clippy::forget_copy)] fn stuff_extra(inner: Self::Extra) -> u128 { - std::mem::forget(inner); + core::mem::forget(inner); u128::MAX } @@ -202,7 +208,7 @@ pub(crate) mod test_strategies { pub struct HasDebug; impl Debug for HasDebug { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { f.write_str("hello!") } }