1
Fork 0

interpret: better control over whether we read data with provenance, and implicit provenance stripping where possible

This commit is contained in:
Ralf Jung 2022-06-02 20:30:29 -04:00
parent 5e6bb83268
commit 47d11a8483
22 changed files with 502 additions and 411 deletions

View file

@ -908,11 +908,15 @@ impl<'tcx, 'a, Tag: Provenance, Extra> AllocRefMut<'a, 'tcx, Tag, Extra> {
}
impl<'tcx, 'a, Tag: Provenance, Extra> AllocRef<'a, 'tcx, Tag, Extra> {
pub fn read_scalar(&self, range: AllocRange) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
pub fn read_scalar(
&self,
range: AllocRange,
read_provenance: bool,
) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
let range = self.range.subrange(range);
let res = self
.alloc
.read_scalar(&self.tcx, range)
.read_scalar(&self.tcx, range, read_provenance)
.map_err(|e| e.to_interp_error(self.alloc_id))?;
debug!(
"read_scalar in {} at {:#x}, size {}: {:?}",
@ -924,8 +928,19 @@ impl<'tcx, 'a, Tag: Provenance, Extra> AllocRef<'a, 'tcx, Tag, Extra> {
Ok(res)
}
pub fn read_ptr_sized(&self, offset: Size) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
self.read_scalar(alloc_range(offset, self.tcx.data_layout().pointer_size))
pub fn read_integer(
&self,
offset: Size,
size: Size,
) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
self.read_scalar(alloc_range(offset, size), /*read_provenance*/ false)
}
pub fn read_pointer(&self, offset: Size) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
self.read_scalar(
alloc_range(offset, self.tcx.data_layout().pointer_size),
/*read_provenance*/ true,
)
}
pub fn check_bytes(

View file

@ -15,8 +15,8 @@ use rustc_target::abi::{VariantIdx, Variants};
use super::{
alloc_range, from_known_layout, mir_assign_valid_types, AllocId, ConstValue, GlobalId,
InterpCx, InterpResult, MPlaceTy, Machine, MemPlace, Place, PlaceTy, Pointer, Provenance,
Scalar, ScalarMaybeUninit,
InterpCx, InterpResult, MPlaceTy, Machine, MemPlace, Place, PlaceTy, Pointer,
PointerArithmetic, Provenance, Scalar, ScalarMaybeUninit,
};
/// An `Immediate` represents a single immediate self-contained Rust value.
@ -284,11 +284,15 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
Abi::Scalar(s) if force => Some(s.primitive()),
_ => None,
};
if let Some(_s) = scalar_layout {
let number_may_have_provenance = !M::enforce_number_no_provenance(self);
if let Some(s) = scalar_layout {
//FIXME(#96185): let size = s.size(self);
//FIXME(#96185): assert_eq!(size, mplace.layout.size, "abi::Scalar size does not match layout size");
let size = mplace.layout.size; //FIXME(#96185): remove this line
let scalar = alloc.read_scalar(alloc_range(Size::ZERO, size))?;
let scalar = alloc.read_scalar(
alloc_range(Size::ZERO, size),
s.is_ptr() || (number_may_have_provenance && size == self.pointer_size()),
)?;
return Ok(Some(ImmTy { imm: scalar.into(), layout: mplace.layout }));
}
let scalar_pair_layout = match mplace.layout.abi {
@ -306,8 +310,14 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
let (a_size, b_size) = (a.size(self), b.size(self));
let b_offset = a_size.align_to(b.align(self).abi);
assert!(b_offset.bytes() > 0); // in `operand_field` we use the offset to tell apart the fields
let a_val = alloc.read_scalar(alloc_range(Size::ZERO, a_size))?;
let b_val = alloc.read_scalar(alloc_range(b_offset, b_size))?;
let a_val = alloc.read_scalar(
alloc_range(Size::ZERO, a_size),
a.is_ptr() || (number_may_have_provenance && a_size == self.pointer_size()),
)?;
let b_val = alloc.read_scalar(
alloc_range(b_offset, b_size),
b.is_ptr() || (number_may_have_provenance && b_size == self.pointer_size()),
)?;
return Ok(Some(ImmTy {
imm: Immediate::ScalarPair(a_val, b_val),
layout: mplace.layout,

View file

@ -50,7 +50,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
let vtable_slot = self
.get_ptr_alloc(vtable_slot, ptr_size, self.tcx.data_layout.pointer_align.abi)?
.expect("cannot be a ZST");
let fn_ptr = self.scalar_to_ptr(vtable_slot.read_ptr_sized(Size::ZERO)?.check_init()?)?;
let fn_ptr = self.scalar_to_ptr(vtable_slot.read_pointer(Size::ZERO)?.check_init()?)?;
self.get_ptr_fn(fn_ptr)
}
@ -69,9 +69,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
)?
.expect("cannot be a ZST");
let drop_fn = vtable
.read_ptr_sized(
pointer_size * u64::try_from(COMMON_VTABLE_ENTRIES_DROPINPLACE).unwrap(),
)?
.read_pointer(pointer_size * u64::try_from(COMMON_VTABLE_ENTRIES_DROPINPLACE).unwrap())?
.check_init()?;
// We *need* an instance here, no other kind of function value, to be able
// to determine the type.
@ -104,12 +102,18 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
)?
.expect("cannot be a ZST");
let size = vtable
.read_ptr_sized(pointer_size * u64::try_from(COMMON_VTABLE_ENTRIES_SIZE).unwrap())?
.read_integer(
pointer_size * u64::try_from(COMMON_VTABLE_ENTRIES_SIZE).unwrap(),
pointer_size,
)?
.check_init()?;
let size = size.to_machine_usize(self)?;
let size = Size::from_bytes(size);
let align = vtable
.read_ptr_sized(pointer_size * u64::try_from(COMMON_VTABLE_ENTRIES_ALIGN).unwrap())?
.read_integer(
pointer_size * u64::try_from(COMMON_VTABLE_ENTRIES_ALIGN).unwrap(),
pointer_size,
)?
.check_init()?;
let align = align.to_machine_usize(self)?;
let align = Align::from_bytes(align).map_err(|e| err_ub!(InvalidVtableAlignment(e)))?;
@ -132,8 +136,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
.get_ptr_alloc(vtable_slot, pointer_size, self.tcx.data_layout.pointer_align.abi)?
.expect("cannot be a ZST");
let new_vtable =
self.scalar_to_ptr(new_vtable.read_ptr_sized(Size::ZERO)?.check_init()?)?;
let new_vtable = self.scalar_to_ptr(new_vtable.read_pointer(Size::ZERO)?.check_init()?)?;
Ok(new_vtable)
}

View file

@ -264,9 +264,18 @@ impl<Tag, Extra> Allocation<Tag, Extra> {
/// Byte accessors.
impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {
/// The last argument controls whether we error out when there are uninitialized
/// or pointer bytes. You should never call this, call `get_bytes` or
/// `get_bytes_with_uninit_and_ptr` instead,
/// This is the entirely abstraction-violating way to just grab the raw bytes without
/// caring about relocations. It just deduplicates some code between `read_scalar`
/// and `get_bytes_internal`.
fn get_bytes_even_more_internal(&self, range: AllocRange) -> &[u8] {
&self.bytes[range.start.bytes_usize()..range.end().bytes_usize()]
}
/// The last argument controls whether we error out when there are uninitialized or pointer
/// bytes. However, we *always* error when there are relocations overlapping the edges of the
/// range.
///
/// You should never call this, call `get_bytes` or `get_bytes_with_uninit_and_ptr` instead,
///
/// This function also guarantees that the resulting pointer will remain stable
/// even when new allocations are pushed to the `HashMap`. `mem_copy_repeatedly` relies
@ -287,7 +296,7 @@ impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {
self.check_relocation_edges(cx, range)?;
}
Ok(&self.bytes[range.start.bytes_usize()..range.end().bytes_usize()])
Ok(self.get_bytes_even_more_internal(range))
}
/// Checks that these bytes are initialized and not pointer bytes, and then return them
@ -373,6 +382,9 @@ impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {
/// Reads a *non-ZST* scalar.
///
/// If `read_provenance` is `true`, this will also read provenance; otherwise (if the machine
/// supports that) provenance is entirely ignored.
///
/// ZSTs can't be read because in order to obtain a `Pointer`, we need to check
/// for ZSTness anyway due to integer pointers being valid for ZSTs.
///
@ -382,35 +394,47 @@ impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {
&self,
cx: &impl HasDataLayout,
range: AllocRange,
read_provenance: bool,
) -> AllocResult<ScalarMaybeUninit<Tag>> {
// `get_bytes_with_uninit_and_ptr` tests relocation edges.
// We deliberately error when loading data that partially has provenance, or partially
// initialized data (that's the check below), into a scalar. The LLVM semantics of this are
// unclear so we are conservative. See <https://github.com/rust-lang/rust/issues/69488> for
// further discussion.
let bytes = self.get_bytes_with_uninit_and_ptr(cx, range)?;
// Uninit check happens *after* we established that the alignment is correct.
// We must not return `Ok()` for unaligned pointers!
if read_provenance {
assert_eq!(range.size, cx.data_layout().pointer_size);
}
// First and foremost, if anything is uninit, bail.
if self.is_init(range).is_err() {
// This inflates uninitialized bytes to the entire scalar, even if only a few
// bytes are uninitialized.
return Ok(ScalarMaybeUninit::Uninit);
}
// Now we do the actual reading.
let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
// See if we got a pointer.
if range.size != cx.data_layout().pointer_size {
// Not a pointer.
// *Now*, we better make sure that the inside is free of relocations too.
self.check_relocations(cx, range)?;
} else {
// Maybe a pointer.
if let Some(&prov) = self.relocations.get(&range.start) {
let ptr = Pointer::new(prov, Size::from_bytes(bits));
return Ok(ScalarMaybeUninit::from_pointer(ptr, cx));
}
// If we are doing a pointer read, and there is a relocation exactly where we
// are reading, then we can put data and relocation back together and return that.
if read_provenance && let Some(&prov) = self.relocations.get(&range.start) {
// We already checked init and relocations, so we can use this function.
let bytes = self.get_bytes_even_more_internal(range);
let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
let ptr = Pointer::new(prov, Size::from_bytes(bits));
return Ok(ScalarMaybeUninit::from_pointer(ptr, cx));
}
// We don't. Just return the bits.
// If we are *not* reading a pointer, and we can just ignore relocations,
// then do exactly that.
if !read_provenance && Tag::OFFSET_IS_ADDR {
// We just strip provenance.
let bytes = self.get_bytes_even_more_internal(range);
let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
return Ok(ScalarMaybeUninit::Scalar(Scalar::from_uint(bits, range.size)));
}
// It's complicated. Better make sure there is no provenance anywhere.
// FIXME: If !OFFSET_IS_ADDR, this is the best we can do. But if OFFSET_IS_ADDR, then
// `read_pointer` is true and we ideally would distinguish the following two cases:
// - The entire `range` is covered by 2 relocations for the same provenance.
// Then we should return a pointer with that provenance.
// - The range has inhomogeneous provenance. Then we should return just the
// underlying bits.
let bytes = self.get_bytes(cx, range)?;
let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
Ok(ScalarMaybeUninit::Scalar(Scalar::from_uint(bits, range.size)))
}
@ -513,8 +537,9 @@ impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
let start = range.start;
let end = range.end();
// We need to handle clearing the relocations from parts of a pointer. See
// <https://github.com/rust-lang/rust/issues/87184> for details.
// We need to handle clearing the relocations from parts of a pointer.
// FIXME: Miri should preserve partial relocations; see
// https://github.com/rust-lang/miri/issues/2181.
if first < start {
if Tag::ERR_ON_PARTIAL_PTR_OVERWRITE {
return Err(AllocError::PartialPointerOverwrite(first));

View file

@ -428,7 +428,7 @@ pub enum UnsupportedOpInfo {
/// Encountered a pointer where we needed raw bytes.
ReadPointerAsBytes,
/// Overwriting parts of a pointer; the resulting state cannot be represented in our
/// `Allocation` data structure.
/// `Allocation` data structure. See <https://github.com/rust-lang/miri/issues/2181>.
PartialPointerOverwrite(Pointer<AllocId>),
//
// The variants below are only reachable from CTFE/const prop, miri will never emit them.

View file

@ -746,6 +746,11 @@ impl Primitive {
pub fn is_int(self) -> bool {
matches!(self, Int(..))
}
#[inline]
pub fn is_ptr(self) -> bool {
matches!(self, Pointer)
}
}
/// Inclusive wrap-around range of valid values, that is, if