diff --git a/src/tools/miri/src/intptrcast.rs b/src/tools/miri/src/intptrcast.rs index 0bdea157633..ab6a256f714 100644 --- a/src/tools/miri/src/intptrcast.rs +++ b/src/tools/miri/src/intptrcast.rs @@ -26,8 +26,10 @@ pub type GlobalState = RefCell; #[derive(Clone, Debug)] pub struct GlobalStateInner { - /// This is used as a map between the address of each allocation and its `AllocId`. - /// It is always sorted + /// This is used as a map between the address of each allocation and its `AllocId`. It is always + /// sorted. We cannot use a `HashMap` since we can be given an address that is offset from the + /// base address, and we need to find the `AllocId` it belongs to. + /// This is not the *full* inverse of `base_addr`; dead allocations have been removed. int_to_ptr_map: Vec<(u64, AllocId)>, /// The base address for each allocation. We cannot put that into /// `AllocExtra` because function pointers also have a base address, and @@ -62,10 +64,21 @@ impl GlobalStateInner { } } -impl<'mir, 'tcx> GlobalStateInner { +/// Shifts `addr` to make it aligned with `align` by rounding `addr` to the smallest multiple +/// of `align` that is larger or equal to `addr` +fn align_addr(addr: u64, align: u64) -> u64 { + match addr % align { + 0 => addr, + rem => addr.checked_add(align).unwrap() - rem, + } +} + +impl<'mir, 'tcx: 'mir> EvalContextExtPriv<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {} +trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { // Returns the exposed `AllocId` that corresponds to the specified addr, // or `None` if the addr is out of bounds - fn alloc_id_from_addr(ecx: &MiriInterpCx<'mir, 'tcx>, addr: u64) -> Option { + fn alloc_id_from_addr(&self, addr: u64) -> Option { + let ecx = self.eval_context_ref(); let global_state = ecx.machine.intptrcast.borrow(); assert!(global_state.provenance_mode != ProvenanceMode::Strict); @@ -82,31 +95,90 @@ impl<'mir, 'tcx> GlobalStateInner { let (glb, alloc_id) = global_state.int_to_ptr_map[pos - 1]; // This never overflows because `addr >= glb` let offset = addr - glb; - // If the offset exceeds the size of the allocation, don't use this `alloc_id`. + // We require this to be strict in-bounds of the allocation. This arm is only + // entered for addresses that are not the base address, so even zero-sized + // allocations will get recognized at their base address -- but all other + // allocations will *not* be recognized at their "end" address. let size = ecx.get_alloc_info(alloc_id).0; - if offset <= size.bytes() { Some(alloc_id) } else { None } + if offset < size.bytes() { Some(alloc_id) } else { None } } }?; - // We only use this provenance if it has been exposed, *and* is still live. + // We only use this provenance if it has been exposed. if global_state.exposed.contains(&alloc_id) { - let (_size, _align, kind) = ecx.get_alloc_info(alloc_id); - match kind { - AllocKind::LiveData | AllocKind::Function | AllocKind::VTable => { - return Some(alloc_id); - } - AllocKind::Dead => {} - } + // This must still be live, since we remove allocations from `int_to_ptr_map` when they get freed. + debug_assert!(!matches!(ecx.get_alloc_info(alloc_id).2, AllocKind::Dead)); + Some(alloc_id) + } else { + None } - - None } - pub fn expose_ptr( - ecx: &mut MiriInterpCx<'mir, 'tcx>, - alloc_id: AllocId, - tag: BorTag, - ) -> InterpResult<'tcx> { + fn addr_from_alloc_id(&self, alloc_id: AllocId) -> InterpResult<'tcx, u64> { + let ecx = self.eval_context_ref(); + let mut global_state = ecx.machine.intptrcast.borrow_mut(); + let global_state = &mut *global_state; + + Ok(match global_state.base_addr.entry(alloc_id) { + Entry::Occupied(entry) => *entry.get(), + Entry::Vacant(entry) => { + let (size, align, kind) = ecx.get_alloc_info(alloc_id); + // This is either called immediately after allocation (and then cached), or when + // adjusting `tcx` pointers (which never get freed). So assert that we are looking + // at a live allocation. This also ensures that we never re-assign an address to an + // allocation that previously had an address, but then was freed and the address + // information was removed. + assert!(!matches!(kind, AllocKind::Dead)); + + // This allocation does not have a base address yet, pick one. + // Leave some space to the previous allocation, to give it some chance to be less aligned. + let slack = { + let mut rng = ecx.machine.rng.borrow_mut(); + // This means that `(global_state.next_base_addr + slack) % 16` is uniformly distributed. + rng.gen_range(0..16) + }; + // From next_base_addr + slack, round up to adjust for alignment. + let base_addr = global_state + .next_base_addr + .checked_add(slack) + .ok_or_else(|| err_exhaust!(AddressSpaceFull))?; + let base_addr = align_addr(base_addr, align.bytes()); + entry.insert(base_addr); + trace!( + "Assigning base address {:#x} to allocation {:?} (size: {}, align: {}, slack: {})", + base_addr, + alloc_id, + size.bytes(), + align.bytes(), + slack, + ); + + // Remember next base address. If this allocation is zero-sized, leave a gap + // of at least 1 to avoid two allocations having the same base address. + // (The logic in `alloc_id_from_addr` assumes unique addresses, and different + // function/vtable pointers need to be distinguishable!) + global_state.next_base_addr = base_addr + .checked_add(max(size.bytes(), 1)) + .ok_or_else(|| err_exhaust!(AddressSpaceFull))?; + // Even if `Size` didn't overflow, we might still have filled up the address space. + if global_state.next_base_addr > ecx.target_usize_max() { + throw_exhaust!(AddressSpaceFull); + } + // Also maintain the opposite mapping in `int_to_ptr_map`. + // Given that `next_base_addr` increases in each allocation, pushing the + // corresponding tuple keeps `int_to_ptr_map` sorted + global_state.int_to_ptr_map.push((base_addr, alloc_id)); + + base_addr + } + }) + } +} + +impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {} +pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { + fn expose_ptr(&mut self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> { + let ecx = self.eval_context_mut(); let global_state = ecx.machine.intptrcast.get_mut(); // In strict mode, we don't need this, so we can save some cycles by not tracking it. if global_state.provenance_mode != ProvenanceMode::Strict { @@ -119,14 +191,13 @@ impl<'mir, 'tcx> GlobalStateInner { Ok(()) } - pub fn ptr_from_addr_cast( - ecx: &MiriInterpCx<'mir, 'tcx>, - addr: u64, - ) -> InterpResult<'tcx, Pointer>> { + fn ptr_from_addr_cast(&self, addr: u64) -> InterpResult<'tcx, Pointer>> { trace!("Casting {:#x} to a pointer", addr); - // Potentially emit a warning. + let ecx = self.eval_context_ref(); let global_state = ecx.machine.intptrcast.borrow(); + + // Potentially emit a warning. match global_state.provenance_mode { ProvenanceMode::Default => { // The first time this happens at a particular location, print a warning. @@ -157,71 +228,16 @@ impl<'mir, 'tcx> GlobalStateInner { Ok(Pointer::new(Some(Provenance::Wildcard), Size::from_bytes(addr))) } - fn alloc_base_addr( - ecx: &MiriInterpCx<'mir, 'tcx>, - alloc_id: AllocId, - ) -> InterpResult<'tcx, u64> { - let mut global_state = ecx.machine.intptrcast.borrow_mut(); - let global_state = &mut *global_state; - - Ok(match global_state.base_addr.entry(alloc_id) { - Entry::Occupied(entry) => *entry.get(), - Entry::Vacant(entry) => { - // There is nothing wrong with a raw pointer being cast to an integer only after - // it became dangling. Hence we allow dead allocations. - let (size, align, _kind) = ecx.get_alloc_info(alloc_id); - - // This allocation does not have a base address yet, pick one. - // Leave some space to the previous allocation, to give it some chance to be less aligned. - let slack = { - let mut rng = ecx.machine.rng.borrow_mut(); - // This means that `(global_state.next_base_addr + slack) % 16` is uniformly distributed. - rng.gen_range(0..16) - }; - // From next_base_addr + slack, round up to adjust for alignment. - let base_addr = global_state - .next_base_addr - .checked_add(slack) - .ok_or_else(|| err_exhaust!(AddressSpaceFull))?; - let base_addr = Self::align_addr(base_addr, align.bytes()); - entry.insert(base_addr); - trace!( - "Assigning base address {:#x} to allocation {:?} (size: {}, align: {}, slack: {})", - base_addr, - alloc_id, - size.bytes(), - align.bytes(), - slack, - ); - - // Remember next base address. If this allocation is zero-sized, leave a gap - // of at least 1 to avoid two allocations having the same base address. - // (The logic in `alloc_id_from_addr` assumes unique addresses, and different - // function/vtable pointers need to be distinguishable!) - global_state.next_base_addr = base_addr - .checked_add(max(size.bytes(), 1)) - .ok_or_else(|| err_exhaust!(AddressSpaceFull))?; - // Even if `Size` didn't overflow, we might still have filled up the address space. - if global_state.next_base_addr > ecx.target_usize_max() { - throw_exhaust!(AddressSpaceFull); - } - // Given that `next_base_addr` increases in each allocation, pushing the - // corresponding tuple keeps `int_to_ptr_map` sorted - global_state.int_to_ptr_map.push((base_addr, alloc_id)); - - base_addr - } - }) - } - /// Convert a relative (tcx) pointer to a Miri pointer. - pub fn ptr_from_rel_ptr( - ecx: &MiriInterpCx<'mir, 'tcx>, + fn ptr_from_rel_ptr( + &self, ptr: Pointer, tag: BorTag, ) -> InterpResult<'tcx, Pointer> { + let ecx = self.eval_context_ref(); + let (alloc_id, offset) = ptr.into_parts(); // offset is relative (AllocId provenance) - let base_addr = GlobalStateInner::alloc_base_addr(ecx, alloc_id)?; + let base_addr = ecx.addr_from_alloc_id(alloc_id)?; // Add offset with the right kind of pointer-overflowing arithmetic. let dl = ecx.data_layout(); @@ -231,22 +247,21 @@ impl<'mir, 'tcx> GlobalStateInner { /// When a pointer is used for a memory access, this computes where in which allocation the /// access is going. - pub fn ptr_get_alloc( - ecx: &MiriInterpCx<'mir, 'tcx>, - ptr: Pointer, - ) -> Option<(AllocId, Size)> { + fn ptr_get_alloc(&self, ptr: Pointer) -> Option<(AllocId, Size)> { + let ecx = self.eval_context_ref(); + let (tag, addr) = ptr.into_parts(); // addr is absolute (Tag provenance) let alloc_id = if let Provenance::Concrete { alloc_id, .. } = tag { alloc_id } else { // A wildcard pointer. - GlobalStateInner::alloc_id_from_addr(ecx, addr.bytes())? + ecx.alloc_id_from_addr(addr.bytes())? }; // This cannot fail: since we already have a pointer with that provenance, rel_ptr_to_addr - // must have been called in the past. - let base_addr = GlobalStateInner::alloc_base_addr(ecx, alloc_id).unwrap(); + // must have been called in the past, so we can just look up the address in the map. + let base_addr = ecx.addr_from_alloc_id(alloc_id).unwrap(); // Wrapping "addr - base_addr" #[allow(clippy::cast_possible_wrap)] // we want to wrap here @@ -256,14 +271,24 @@ impl<'mir, 'tcx> GlobalStateInner { Size::from_bytes(ecx.overflowing_signed_offset(addr.bytes(), neg_base_addr).0), )) } +} - /// Shifts `addr` to make it aligned with `align` by rounding `addr` to the smallest multiple - /// of `align` that is larger or equal to `addr` - fn align_addr(addr: u64, align: u64) -> u64 { - match addr % align { - 0 => addr, - rem => addr.checked_add(align).unwrap() - rem, - } +impl GlobalStateInner { + pub fn free_alloc_id(&mut self, dead_id: AllocId) { + // We can *not* remove this from `base_addr`, since the interpreter design requires that we + // be able to retrieve an AllocId + offset for any memory access *before* we check if the + // access is valid. Specifically, `ptr_get_alloc` is called on each attempt at a memory + // access to determine the allocation ID and offset -- and there can still be pointers with + // `dead_id` that one can attempt to use for a memory access. `ptr_get_alloc` may return + // `None` only if the pointer truly has no provenance (this ensures consistent error + // messages). + // However, we *can* remove it from `int_to_ptr_map`, since any wildcard pointers that exist + // can no longer actually be accessing that address. This ensures `alloc_id_from_addr` never + // returns a dead allocation. + self.int_to_ptr_map.retain(|&(_, id)| id != dead_id); + // We can also remove it from `exposed`, since this allocation can anyway not be returned by + // `alloc_id_from_addr` any more. + self.exposed.remove(&dead_id); } } @@ -273,7 +298,7 @@ mod tests { #[test] fn test_align_addr() { - assert_eq!(GlobalStateInner::align_addr(37, 4), 40); - assert_eq!(GlobalStateInner::align_addr(44, 4), 44); + assert_eq!(align_addr(37, 4), 40); + assert_eq!(align_addr(44, 4), 44); } } diff --git a/src/tools/miri/src/lib.rs b/src/tools/miri/src/lib.rs index f1d8ce01bc2..68b9164dec0 100644 --- a/src/tools/miri/src/lib.rs +++ b/src/tools/miri/src/lib.rs @@ -117,7 +117,7 @@ pub use crate::eval::{ create_ecx, eval_entry, AlignmentCheck, BacktraceStyle, IsolatedOp, MiriConfig, RejectOpWith, }; pub use crate::helpers::EvalContextExt as _; -pub use crate::intptrcast::ProvenanceMode; +pub use crate::intptrcast::{EvalContextExt as _, ProvenanceMode}; pub use crate::machine::{ AllocExtra, FrameExtra, MiriInterpCx, MiriInterpCxExt, MiriMachine, MiriMemoryKind, PrimitiveLayouts, Provenance, ProvenanceExtra, diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs index 0ade43d4a8d..d5775912eab 100644 --- a/src/tools/miri/src/machine.rs +++ b/src/tools/miri/src/machine.rs @@ -1149,7 +1149,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> { // Value does not matter, SB is disabled BorTag::default() }; - intptrcast::GlobalStateInner::ptr_from_rel_ptr(ecx, ptr, tag) + ecx.ptr_from_rel_ptr(ptr, tag) } /// Called on `usize as ptr` casts. @@ -1158,7 +1158,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> { ecx: &MiriInterpCx<'mir, 'tcx>, addr: u64, ) -> InterpResult<'tcx, Pointer>> { - intptrcast::GlobalStateInner::ptr_from_addr_cast(ecx, addr) + ecx.ptr_from_addr_cast(addr) } /// Called on `ptr as usize` casts. @@ -1169,8 +1169,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> { ptr: Pointer, ) -> InterpResult<'tcx> { match ptr.provenance { - Provenance::Concrete { alloc_id, tag } => - intptrcast::GlobalStateInner::expose_ptr(ecx, alloc_id, tag), + Provenance::Concrete { alloc_id, tag } => ecx.expose_ptr(alloc_id, tag), Provenance::Wildcard => { // No need to do anything for wildcard pointers as // their provenances have already been previously exposed. @@ -1191,7 +1190,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> { ecx: &MiriInterpCx<'mir, 'tcx>, ptr: Pointer, ) -> Option<(AllocId, Size, Self::ProvenanceExtra)> { - let rel = intptrcast::GlobalStateInner::ptr_get_alloc(ecx, ptr); + let rel = ecx.ptr_get_alloc(ptr); rel.map(|(alloc_id, size)| { let tag = match ptr.provenance { @@ -1263,6 +1262,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> { { *deallocated_at = Some(machine.current_span()); } + machine.intptrcast.get_mut().free_alloc_id(alloc_id); Ok(()) }