Rollup merge of #137802 - RalfJung:miri-native-call-exposed, r=oli-obk

miri native-call support: all previously exposed provenance is accessible to the callee

When Miri invokes a native C function, the memory C can access needs to be "prepared": to avoid false positives, we need to consider all that memory initialized, and we need to consider it to have arbitrary provenance. So far we did this for all pointers passed to C, but not for pointers that were exposed already before the native call. This PR adjusts the logic so that we now "prepare" all memory that has ever been exposed.

This fixes cases such as:
- cast a pointer to integer, send that integer to C, and access the memory there (`test_pass_ptr_as_int`)
- send a pointer to some memory to C, which stores it somewhere; then in Rust store another pointer in that memory, and access that via C (`test_pass_ptr_via_previously_shared_mem`)

r? `````@oli-obk`````
This commit is contained in:
Michael Goulet 2025-03-06 12:22:18 -05:00 committed by GitHub
commit b7b2179b5e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 125 additions and 64 deletions

View file

@ -955,18 +955,13 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
/// Handle the effect an FFI call might have on the state of allocations.
/// This overapproximates the modifications which external code might make to memory:
/// We set all reachable allocations as initialized, mark all provenances as exposed
/// We set all reachable allocations as initialized, mark all reachable provenances as exposed
/// and overwrite them with `Provenance::WILDCARD`.
pub fn prepare_for_native_call(
&mut self,
id: AllocId,
initial_prov: M::Provenance,
) -> InterpResult<'tcx> {
// Expose provenance of the root allocation.
M::expose_provenance(self, initial_prov)?;
///
/// The allocations in `ids` are assumed to be already exposed.
pub fn prepare_for_native_call(&mut self, ids: Vec<AllocId>) -> InterpResult<'tcx> {
let mut done = FxHashSet::default();
let mut todo = vec![id];
let mut todo = ids;
while let Some(id) = todo.pop() {
if !done.insert(id) {
// We already saw this allocation before, don't process it again.

View file

@ -285,9 +285,19 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
fn expose_ptr(&self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
fn expose_provenance(&self, provenance: Provenance) -> InterpResult<'tcx> {
let this = self.eval_context_ref();
let mut global_state = this.machine.alloc_addresses.borrow_mut();
let (alloc_id, tag) = match provenance {
Provenance::Concrete { alloc_id, tag } => (alloc_id, tag),
Provenance::Wildcard => {
// No need to do anything for wildcard pointers as
// their provenances have already been previously exposed.
return interp_ok(());
}
};
// In strict mode, we don't need this, so we can save some cycles by not tracking it.
if global_state.provenance_mode == ProvenanceMode::Strict {
return interp_ok(());
@ -422,6 +432,19 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
let rel_offset = this.truncate_to_target_usize(addr.bytes().wrapping_sub(base_addr));
Some((alloc_id, Size::from_bytes(rel_offset)))
}
/// Prepare all exposed memory for a native call.
/// This overapproximates the modifications which external code might make to memory:
/// We set all reachable allocations as initialized, mark all reachable provenances as exposed
/// and overwrite them with `Provenance::WILDCARD`.
fn prepare_exposed_for_native_call(&mut self) -> InterpResult<'tcx> {
let this = self.eval_context_mut();
// We need to make a deep copy of this list, but it's fine; it also serves as scratch space
// for the search within `prepare_for_native_call`.
let exposed: Vec<AllocId> =
this.machine.alloc_addresses.get_mut().exposed.iter().copied().collect();
this.prepare_for_native_call(exposed)
}
}
impl<'tcx> MiriMachine<'tcx> {

View file

@ -1291,18 +1291,12 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
/// Called on `ptr as usize` casts.
/// (Actually computing the resulting `usize` doesn't need machine help,
/// that's just `Scalar::try_to_int`.)
#[inline(always)]
fn expose_provenance(
ecx: &InterpCx<'tcx, Self>,
provenance: Self::Provenance,
) -> InterpResult<'tcx> {
match provenance {
Provenance::Concrete { alloc_id, tag } => ecx.expose_ptr(alloc_id, tag),
Provenance::Wildcard => {
// No need to do anything for wildcard pointers as
// their provenances have already been previously exposed.
interp_ok(())
}
}
ecx.expose_provenance(provenance)
}
/// Convert a pointer with provenance into an allocation-offset pair and extra provenance info.

View file

@ -160,16 +160,12 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
}
let imm = this.read_immediate(arg)?;
libffi_args.push(imm_to_carg(&imm, this)?);
// If we are passing a pointer, prepare the memory it points to.
// If we are passing a pointer, expose its provenance. Below, all exposed memory
// (previously exposed and new exposed) will then be properly prepared.
if matches!(arg.layout.ty.kind(), ty::RawPtr(..)) {
let ptr = imm.to_scalar().to_pointer(this)?;
let Some(prov) = ptr.provenance else {
// Pointer without provenance may not access any memory.
continue;
};
// We use `get_alloc_id` for its best-effort behaviour with Wildcard provenance.
let Some(alloc_id) = prov.get_alloc_id() else {
// Wildcard pointer, whatever it points to must be already exposed.
// Pointer without provenance may not access any memory anyway, skip.
continue;
};
// The first time this happens, print a warning.
@ -178,12 +174,12 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem);
}
this.prepare_for_native_call(alloc_id, prov)?;
this.expose_provenance(prov)?;
}
}
// FIXME: In the future, we should also call `prepare_for_native_call` on all previously
// exposed allocations, since C may access any of them.
// Prepare all exposed memory.
this.prepare_exposed_for_native_call()?;
// Convert them to `libffi::high::Arg` type.
let libffi_args = libffi_args

View file

@ -6,7 +6,7 @@
#![feature(box_as_ptr)]
use std::mem::MaybeUninit;
use std::ptr::null;
use std::ptr;
fn main() {
test_increment_int();
@ -20,6 +20,8 @@ fn main() {
test_pass_dangling();
test_swap_ptr_triple_dangling();
test_return_ptr();
test_pass_ptr_as_int();
test_pass_ptr_via_previously_shared_mem();
}
/// Test function that modifies an int.
@ -112,7 +114,7 @@ fn test_swap_ptr() {
}
let x = 61;
let (mut ptr0, mut ptr1) = (&raw const x, null());
let (mut ptr0, mut ptr1) = (&raw const x, ptr::null());
unsafe { swap_ptr(&mut ptr0, &mut ptr1) };
assert_eq!(unsafe { *ptr1 }, x);
@ -131,7 +133,7 @@ fn test_swap_ptr_tuple() {
}
let x = 71;
let mut tuple = Tuple { ptr0: &raw const x, ptr1: null() };
let mut tuple = Tuple { ptr0: &raw const x, ptr1: ptr::null() };
unsafe { swap_ptr_tuple(&mut tuple) }
assert_eq!(unsafe { *tuple.ptr1 }, x);
@ -148,7 +150,7 @@ fn test_overwrite_dangling() {
drop(b);
unsafe { overwrite_ptr(&mut ptr) };
assert_eq!(ptr, null());
assert_eq!(ptr, ptr::null());
}
/// Test function that passes a dangling pointer.
@ -200,3 +202,33 @@ fn test_return_ptr() {
let ptr = unsafe { return_ptr(ptr) };
assert_eq!(unsafe { *ptr }, x);
}
/// Test casting a pointer to an integer and passing that to C.
fn test_pass_ptr_as_int() {
extern "C" {
fn pass_ptr_as_int(ptr: usize, set_to_val: i32);
}
let mut m: MaybeUninit<i32> = MaybeUninit::uninit();
unsafe { pass_ptr_as_int(m.as_mut_ptr() as usize, 42) };
assert_eq!(unsafe { m.assume_init() }, 42);
}
fn test_pass_ptr_via_previously_shared_mem() {
extern "C" {
fn set_shared_mem(ptr: *mut *mut i32);
fn init_ptr_stored_in_shared_mem(val: i32);
}
let mut m: *mut i32 = ptr::null_mut();
let ptr_to_m = &raw mut m;
unsafe { set_shared_mem(&raw mut m) };
let mut m2: MaybeUninit<i32> = MaybeUninit::uninit();
// Store a pointer to m2 somewhere that C code can access it.
unsafe { ptr_to_m.write(m2.as_mut_ptr()) };
// Have C code write there.
unsafe { init_ptr_stored_in_shared_mem(42) };
// Ensure this memory is now considered initialized.
assert_eq!(unsafe { m2.assume_init() }, 42);
}

View file

@ -1,33 +1,34 @@
#include <stdio.h>
#include <stdint.h>
// See comments in build_native_lib()
#define EXPORT __attribute__((visibility("default")))
/* Test: test_access_pointer */
EXPORT void print_pointer(const int *ptr) {
EXPORT void print_pointer(const int32_t *ptr) {
printf("printing pointer dereference from C: %d\n", *ptr);
}
/* Test: test_access_simple */
typedef struct Simple {
int field;
int32_t field;
} Simple;
EXPORT int access_simple(const Simple *s_ptr) {
EXPORT int32_t access_simple(const Simple *s_ptr) {
return s_ptr->field;
}
/* Test: test_access_nested */
typedef struct Nested {
int value;
int32_t value;
struct Nested *next;
} Nested;
// Returns the innermost/last value of a Nested pointer chain.
EXPORT int access_nested(const Nested *n_ptr) {
EXPORT int32_t access_nested(const Nested *n_ptr) {
// Edge case: `n_ptr == NULL` (i.e. first Nested is None).
if (!n_ptr) { return 0; }
@ -41,10 +42,10 @@ EXPORT int access_nested(const Nested *n_ptr) {
/* Test: test_access_static */
typedef struct Static {
int value;
int32_t value;
struct Static *recurse;
} Static;
EXPORT int access_static(const Static *s_ptr) {
EXPORT int32_t access_static(const Static *s_ptr) {
return s_ptr->recurse->recurse->value;
}

View file

@ -1,23 +1,24 @@
#include <stddef.h>
#include <stdint.h>
// See comments in build_native_lib()
#define EXPORT __attribute__((visibility("default")))
/* Test: test_increment_int */
EXPORT void increment_int(int *ptr) {
EXPORT void increment_int(int32_t *ptr) {
*ptr += 1;
}
/* Test: test_init_int */
EXPORT void init_int(int *ptr, int val) {
EXPORT void init_int(int32_t *ptr, int32_t val) {
*ptr = val;
}
/* Test: test_init_array */
EXPORT void init_array(int *array, size_t len, int val) {
EXPORT void init_array(int32_t *array, size_t len, int32_t val) {
for (size_t i = 0; i < len; i++) {
array[i] = val;
}
@ -26,28 +27,28 @@ EXPORT void init_array(int *array, size_t len, int val) {
/* Test: test_init_static_inner */
typedef struct SyncPtr {
int *ptr;
int32_t *ptr;
} SyncPtr;
EXPORT void init_static_inner(const SyncPtr *s_ptr, int val) {
EXPORT void init_static_inner(const SyncPtr *s_ptr, int32_t val) {
*(s_ptr->ptr) = val;
}
/* Tests: test_exposed, test_pass_dangling */
EXPORT void ignore_ptr(__attribute__((unused)) const int *ptr) {
EXPORT void ignore_ptr(__attribute__((unused)) const int32_t *ptr) {
return;
}
/* Test: test_expose_int */
EXPORT void expose_int(const int *int_ptr, const int **pptr) {
EXPORT void expose_int(const int32_t *int_ptr, const int32_t **pptr) {
*pptr = int_ptr;
}
/* Test: test_swap_ptr */
EXPORT void swap_ptr(const int **pptr0, const int **pptr1) {
const int *tmp = *pptr0;
EXPORT void swap_ptr(const int32_t **pptr0, const int32_t **pptr1) {
const int32_t *tmp = *pptr0;
*pptr0 = *pptr1;
*pptr1 = tmp;
}
@ -55,36 +56,54 @@ EXPORT void swap_ptr(const int **pptr0, const int **pptr1) {
/* Test: test_swap_ptr_tuple */
typedef struct Tuple {
int *ptr0;
int *ptr1;
int32_t *ptr0;
int32_t *ptr1;
} Tuple;
EXPORT void swap_ptr_tuple(Tuple *t_ptr) {
int *tmp = t_ptr->ptr0;
int32_t *tmp = t_ptr->ptr0;
t_ptr->ptr0 = t_ptr->ptr1;
t_ptr->ptr1 = tmp;
}
/* Test: test_overwrite_dangling */
EXPORT void overwrite_ptr(const int **pptr) {
EXPORT void overwrite_ptr(const int32_t **pptr) {
*pptr = NULL;
}
/* Test: test_swap_ptr_triple_dangling */
typedef struct Triple {
int *ptr0;
int *ptr1;
int *ptr2;
int32_t *ptr0;
int32_t *ptr1;
int32_t *ptr2;
} Triple;
EXPORT void swap_ptr_triple_dangling(Triple *t_ptr) {
int *tmp = t_ptr->ptr0;
int32_t *tmp = t_ptr->ptr0;
t_ptr->ptr0 = t_ptr->ptr2;
t_ptr->ptr2 = tmp;
}
EXPORT const int *return_ptr(const int *ptr) {
EXPORT const int32_t *return_ptr(const int32_t *ptr) {
return ptr;
}
/* Test: test_pass_ptr_as_int */
EXPORT void pass_ptr_as_int(uintptr_t ptr, int32_t set_to_val) {
*(int32_t*)ptr = set_to_val;
}
/* Test: test_pass_ptr_via_previously_shared_mem */
int32_t** shared_place;
EXPORT void set_shared_mem(int32_t** ptr) {
shared_place = ptr;
}
EXPORT void init_ptr_stored_in_shared_mem(int32_t val) {
**shared_place = val;
}

View file

@ -1,9 +1,10 @@
#include <stdio.h>
#include <stdint.h>
// See comments in build_native_lib()
#define EXPORT __attribute__((visibility("default")))
EXPORT int add_one_int(int x) {
EXPORT int32_t add_one_int(int32_t x) {
return 2 + x;
}
@ -13,23 +14,23 @@ EXPORT void printer(void) {
// function with many arguments, to test functionality when some args are stored
// on the stack
EXPORT int test_stack_spill(int a, int b, int c, int d, int e, int f, int g, int h, int i, int j, int k, int l) {
EXPORT int32_t test_stack_spill(int32_t a, int32_t b, int32_t c, int32_t d, int32_t e, int32_t f, int32_t g, int32_t h, int32_t i, int32_t j, int32_t k, int32_t l) {
return a+b+c+d+e+f+g+h+i+j+k+l;
}
EXPORT unsigned int get_unsigned_int(void) {
EXPORT uint32_t get_unsigned_int(void) {
return -10;
}
EXPORT short add_int16(short x) {
EXPORT short add_int16(int16_t x) {
return x + 3;
}
EXPORT long add_short_to_long(short x, long y) {
EXPORT long add_short_to_long(int16_t x, int64_t y) {
return x + y;
}
// To test that functions not marked with EXPORT cannot be called by Miri.
int not_exported(void) {
int32_t not_exported(void) {
return 0;
}