1
Fork 0

Rollup merge of #108022 - CraftSpider:align-bytes, r=oli-obk

Support allocations with non-Box<[u8]> bytes

This is prep work for allowing miri to support passing pointers to C code, which will require `Allocation`s to be correctly aligned. Currently, it just makes `Allocation` generic and plumbs the necessary changes through the right places.

The follow-up to this will be adding a type in the miri interpreter which correctly aligns the bytes, using that for the Miri engine, then allowing Miri to pass pointers into these allocations to C calls.

Based off of #100467, credit to ```@emarteca``` for the code
This commit is contained in:
Matthias Krüger 2023-03-02 23:05:27 +01:00 committed by GitHub
commit f75f440bbf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 128 additions and 50 deletions

View file

@ -8,7 +8,8 @@ mod tests;
use std::borrow::Cow;
use std::fmt;
use std::hash;
use std::ops::Range;
use std::hash::Hash;
use std::ops::{Deref, DerefMut, Range};
use std::ptr;
use either::{Left, Right};
@ -29,6 +30,39 @@ use provenance_map::*;
pub use init_mask::{InitChunk, InitChunkIter};
/// Functionality required for the bytes of an `Allocation`.
pub trait AllocBytes:
Clone + fmt::Debug + Eq + PartialEq + Hash + Deref<Target = [u8]> + DerefMut<Target = [u8]>
{
/// Adjust the bytes to the specified alignment -- by default, this is a no-op.
fn adjust_to_align(self, _align: Align) -> Self;
/// Create an `AllocBytes` from a slice of `u8`.
fn from_bytes<'a>(slice: impl Into<Cow<'a, [u8]>>, _align: Align) -> Self;
/// Create a zeroed `AllocBytes` of the specified size and alignment;
/// call the callback error handler if there is an error in allocating the memory.
fn zeroed(size: Size, _align: Align) -> Option<Self>;
}
// Default `bytes` for `Allocation` is a `Box<[u8]>`.
impl AllocBytes for Box<[u8]> {
fn adjust_to_align(self, _align: Align) -> Self {
self
}
fn from_bytes<'a>(slice: impl Into<Cow<'a, [u8]>>, _align: Align) -> Self {
Box::<[u8]>::from(slice.into())
}
fn zeroed(size: Size, _align: Align) -> Option<Self> {
let bytes = Box::<[u8]>::try_new_zeroed_slice(size.bytes_usize()).ok()?;
// SAFETY: the box was zero-allocated, which is a valid initial value for Box<[u8]>
let bytes = unsafe { bytes.assume_init() };
Some(bytes)
}
}
/// This type represents an Allocation in the Miri/CTFE core engine.
///
/// Its public API is rather low-level, working directly with allocation offsets and a custom error
@ -38,10 +72,10 @@ pub use init_mask::{InitChunk, InitChunkIter};
// hashed. (see the `Hash` impl below for more details), so the impl is not derived.
#[derive(Clone, Eq, PartialEq, TyEncodable, TyDecodable)]
#[derive(HashStable)]
pub struct Allocation<Prov: Provenance = AllocId, Extra = ()> {
pub struct Allocation<Prov: Provenance = AllocId, Extra = (), Bytes = Box<[u8]>> {
/// The actual bytes of the allocation.
/// Note that the bytes of a pointer represent the offset of the pointer.
bytes: Box<[u8]>,
bytes: Bytes,
/// Maps from byte addresses to extra provenance data for each pointer.
/// Only the first byte of a pointer is inserted into the map; i.e.,
/// every entry in this map applies to `pointer_size` consecutive bytes starting
@ -220,14 +254,27 @@ impl AllocRange {
}
// The constructors are all without extra; the extra gets added by a machine hook later.
impl<Prov: Provenance> Allocation<Prov> {
impl<Prov: Provenance, Bytes: AllocBytes> Allocation<Prov, (), Bytes> {
/// Creates an allocation from an existing `Bytes` value - this is needed for miri FFI support
pub fn from_raw_bytes(bytes: Bytes, align: Align, mutability: Mutability) -> Self {
let size = Size::from_bytes(bytes.len());
Self {
bytes,
provenance: ProvenanceMap::new(),
init_mask: InitMask::new(size, true),
align,
mutability,
extra: (),
}
}
/// Creates an allocation initialized by the given bytes
pub fn from_bytes<'a>(
slice: impl Into<Cow<'a, [u8]>>,
align: Align,
mutability: Mutability,
) -> Self {
let bytes = Box::<[u8]>::from(slice.into());
let bytes = Bytes::from_bytes(slice, align);
let size = Size::from_bytes(bytes.len());
Self {
bytes,
@ -248,7 +295,7 @@ impl<Prov: Provenance> Allocation<Prov> {
///
/// If `panic_on_fail` is true, this will never return `Err`.
pub fn uninit<'tcx>(size: Size, align: Align, panic_on_fail: bool) -> InterpResult<'tcx, Self> {
let bytes = Box::<[u8]>::try_new_zeroed_slice(size.bytes_usize()).map_err(|_| {
let bytes = Bytes::zeroed(size, align).ok_or_else(|| {
// This results in an error that can happen non-deterministically, since the memory
// available to the compiler can change between runs. Normally queries are always
// deterministic. However, we can be non-deterministic here because all uses of const
@ -262,8 +309,7 @@ impl<Prov: Provenance> Allocation<Prov> {
});
InterpError::ResourceExhaustion(ResourceExhaustionInfo::MemoryExhausted)
})?;
// SAFETY: the box was zero-allocated, which is a valid initial value for Box<[u8]>
let bytes = unsafe { bytes.assume_init() };
Ok(Allocation {
bytes,
provenance: ProvenanceMap::new(),
@ -275,7 +321,7 @@ impl<Prov: Provenance> Allocation<Prov> {
}
}
impl Allocation {
impl<Bytes: AllocBytes> Allocation<AllocId, (), Bytes> {
/// Adjust allocation from the ones in tcx to a custom Machine instance
/// with a different Provenance and Extra type.
pub fn adjust_from_tcx<Prov: Provenance, Extra, Err>(
@ -283,9 +329,11 @@ impl Allocation {
cx: &impl HasDataLayout,
extra: Extra,
mut adjust_ptr: impl FnMut(Pointer<AllocId>) -> Result<Pointer<Prov>, Err>,
) -> Result<Allocation<Prov, Extra>, Err> {
// Compute new pointer provenance, which also adjusts the bytes.
let mut bytes = self.bytes;
) -> Result<Allocation<Prov, Extra, Bytes>, Err> {
// Compute new pointer provenance, which also adjusts the bytes, and realign the pointer if
// necessary.
let mut bytes = self.bytes.adjust_to_align(self.align);
let mut new_provenance = Vec::with_capacity(self.provenance.ptrs().len());
let ptr_size = cx.data_layout().pointer_size.bytes_usize();
let endian = cx.data_layout().endian;
@ -311,7 +359,7 @@ impl Allocation {
}
/// Raw accessors. Provide access to otherwise private bytes.
impl<Prov: Provenance, Extra> Allocation<Prov, Extra> {
impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes> {
pub fn len(&self) -> usize {
self.bytes.len()
}
@ -340,7 +388,11 @@ impl<Prov: Provenance, Extra> Allocation<Prov, Extra> {
}
/// Byte accessors.
impl<Prov: Provenance, Extra> Allocation<Prov, Extra> {
impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes> {
pub fn base_addr(&self) -> *const u8 {
self.bytes.as_ptr()
}
/// This is the entirely abstraction-violating way to just grab the raw bytes without
/// caring about provenance or initialization.
///
@ -412,7 +464,7 @@ impl<Prov: Provenance, Extra> Allocation<Prov, Extra> {
}
/// Reading and writing.
impl<Prov: Provenance, Extra> Allocation<Prov, Extra> {
impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes> {
/// Sets the init bit for the given range.
fn mark_init(&mut self, range: AllocRange, is_init: bool) {
if range.size.bytes() == 0 {

View file

@ -127,8 +127,8 @@ pub use self::error::{
pub use self::value::{get_slice_bytes, ConstAlloc, ConstValue, Scalar};
pub use self::allocation::{
alloc_range, AllocError, AllocRange, AllocResult, Allocation, ConstAllocation, InitChunk,
InitChunkIter,
alloc_range, AllocBytes, AllocError, AllocRange, AllocResult, Allocation, ConstAllocation,
InitChunk, InitChunkIter,
};
pub use self::pointer::{Pointer, PointerArithmetic, Provenance};

View file

@ -12,8 +12,8 @@ use rustc_data_structures::fx::FxHashMap;
use rustc_hir::def_id::DefId;
use rustc_index::vec::Idx;
use rustc_middle::mir::interpret::{
alloc_range, read_target_uint, AllocId, Allocation, ConstAllocation, ConstValue, GlobalAlloc,
Pointer, Provenance,
alloc_range, read_target_uint, AllocBytes, AllocId, Allocation, ConstAllocation, ConstValue,
GlobalAlloc, Pointer, Provenance,
};
use rustc_middle::mir::visit::Visitor;
use rustc_middle::mir::*;
@ -787,21 +787,21 @@ pub fn write_allocations<'tcx>(
/// After the hex dump, an ascii dump follows, replacing all unprintable characters (control
/// characters or characters whose value is larger than 127) with a `.`
/// This also prints provenance adequately.
pub fn display_allocation<'a, 'tcx, Prov: Provenance, Extra>(
pub fn display_allocation<'a, 'tcx, Prov: Provenance, Extra, Bytes: AllocBytes>(
tcx: TyCtxt<'tcx>,
alloc: &'a Allocation<Prov, Extra>,
) -> RenderAllocation<'a, 'tcx, Prov, Extra> {
alloc: &'a Allocation<Prov, Extra, Bytes>,
) -> RenderAllocation<'a, 'tcx, Prov, Extra, Bytes> {
RenderAllocation { tcx, alloc }
}
#[doc(hidden)]
pub struct RenderAllocation<'a, 'tcx, Prov: Provenance, Extra> {
pub struct RenderAllocation<'a, 'tcx, Prov: Provenance, Extra, Bytes: AllocBytes> {
tcx: TyCtxt<'tcx>,
alloc: &'a Allocation<Prov, Extra>,
alloc: &'a Allocation<Prov, Extra, Bytes>,
}
impl<'a, 'tcx, Prov: Provenance, Extra> std::fmt::Display
for RenderAllocation<'a, 'tcx, Prov, Extra>
impl<'a, 'tcx, Prov: Provenance, Extra, Bytes: AllocBytes> std::fmt::Display
for RenderAllocation<'a, 'tcx, Prov, Extra, Bytes>
{
fn fmt(&self, w: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let RenderAllocation { tcx, alloc } = *self;
@ -845,9 +845,9 @@ fn write_allocation_newline(
/// The `prefix` argument allows callers to add an arbitrary prefix before each line (even if there
/// is only one line). Note that your prefix should contain a trailing space as the lines are
/// printed directly after it.
fn write_allocation_bytes<'tcx, Prov: Provenance, Extra>(
fn write_allocation_bytes<'tcx, Prov: Provenance, Extra, Bytes: AllocBytes>(
tcx: TyCtxt<'tcx>,
alloc: &Allocation<Prov, Extra>,
alloc: &Allocation<Prov, Extra, Bytes>,
w: &mut dyn std::fmt::Write,
prefix: &str,
) -> std::fmt::Result {