Skip to content

rustc_const_eval: Expose APIs for signalling foreign accesses to memory #141391

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 112 additions & 14 deletions compiler/rustc_const_eval/src/interpret/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -979,12 +979,18 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
}

/// Handle the effect an FFI call might have on the state of allocations.
/// This overapproximates the modifications which external code might make to memory:
/// We set all reachable allocations as initialized, mark all reachable provenances as exposed
/// and overwrite them with `Provenance::WILDCARD`.
/// If `paranoid` is true, overapproximates the modifications which external code might make
/// to memory: We set all reachable allocations as initialized, mark all reachable provenances
/// as exposed and overwrite them with `Provenance::WILDCARD`. Otherwise, it just makes sure
/// that all allocations are properly set up so that we don't leak whatever was in the uninit
/// bytes on FFI call.
///
/// The allocations in `ids` are assumed to be already exposed.
pub fn prepare_for_native_call(&mut self, ids: Vec<AllocId>) -> InterpResult<'tcx> {
pub fn prepare_for_native_call(
&mut self,
ids: Vec<AllocId>,
paranoid: bool,
) -> InterpResult<'tcx> {
let mut done = FxHashSet::default();
let mut todo = ids;
while let Some(id) = todo.pop() {
Expand All @@ -999,25 +1005,117 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
continue;
}

// Expose all provenances in this allocation, and add them to `todo`.
// Make sure we iterate over everything recursively, preparing the extra alloc info.
let alloc = self.get_alloc_raw(id)?;
for prov in alloc.provenance().provenances() {
M::expose_provenance(self, prov)?;
if paranoid {
// Expose all provenances in this allocation, and add them to `todo`.
M::expose_provenance(self, prov)?;
}
if let Some(id) = prov.get_alloc_id() {
todo.push(id);
}
}

// Also expose the provenance of the interpreter-level allocation, so it can
// be read by FFI. The `black_box` is defensive programming as LLVM likes
// to (incorrectly) optimize away ptr2int casts whose result is unused.
std::hint::black_box(alloc.get_bytes_unchecked_raw().expose_provenance());

// Prepare for possible write from native code if mutable.
if info.mutbl.is_mut() {
self.get_alloc_raw_mut(id)?
.0
.prepare_for_native_write()
.map_err(|e| e.to_interp_error(id))?;
if paranoid {
std::hint::black_box(alloc.get_bytes_unchecked_raw().expose_provenance());
// Prepare for possible write from native code if mutable.
if info.mutbl.is_mut() {
self.get_alloc_raw_mut(id)?.0.prepare_for_native_write();
}
}
}
interp_ok(())
}

/// Updates the machine state "as if" the accesses given had been performed.
/// Used only by Miri for FFI, for taking note of events that were intercepted from foreign
/// code and properly (but still conservatively) marking their effects. Remember to call
/// `prepare_for_native_call` with `paranoid` set to false first on the same `AllocId`s, or
/// some writes may be discarded!
///
/// The allocations in `ids` are assumed to be already exposed.
pub fn apply_accesses(
&mut self,
mut ids: Vec<AllocId>,
reads: Vec<std::ops::Range<u64>>,
writes: Vec<std::ops::Range<u64>>,
) -> InterpResult<'tcx> {
/// Helper function to avoid some code duplication over range overlaps.
fn get_start_size(
rg: std::ops::Range<u64>,
alloc_base: u64,
alloc_size: u64,
) -> Option<(u64, u64)> {
// A bunch of range bounds nonsense that effectively simplifies to
// "get the starting point of the overlap and the length from there".
// Needs to also account for the allocation being in the middle of the
// range or completely containing it
let signed_start = rg.start.cast_signed() - alloc_base.cast_signed();
let size_uncapped = if signed_start < 0 {
// If this returns, they don't overlap
(signed_start + (rg.end - rg.start).cast_signed()).try_into().ok()?
} else {
rg.end - rg.start
};
let start: u64 = signed_start.try_into().unwrap_or(0);
let size = std::cmp::min(size_uncapped, alloc_size - start);
Some((start, size))
}

let mut done = FxHashSet::default();
while let Some(id) = ids.pop() {
if !done.insert(id) {
continue;
}
let info = self.get_alloc_info(id);

// If there is no data behind this pointer, skip this.
if !matches!(info.kind, AllocKind::LiveData) {
continue;
}

let alloc_base: u64 = {
// Keep the alloc here so the borrow checker is happy
let alloc = self.get_alloc_raw(id)?;
// No need for black_box trickery since we actually use the address
alloc.get_bytes_unchecked_raw().expose_provenance().try_into().unwrap()
};
let alloc_size = info.size.bytes();

// Find reads which overlap with the current allocation
for rg in &reads {
if let Some((start, size)) = get_start_size(rg.clone(), alloc_base, alloc_size) {
let alloc = self.get_alloc_raw(id)?;
let prov_map = alloc.provenance();
// Only iterate on the bytes that overlap with the access
for i in start..start + size {
// We can be conservative and only expose provenances actually read
if let Some(prov) = prov_map.get(Size::from_bytes(1), self)
&& rg.contains(&(alloc_base + i))
{
M::expose_provenance(self, prov)?;
if let Some(id) = prov.get_alloc_id() {
ids.push(id);
}
}
}
}
}

// Then do the same thing for writes, marking down that a write happened
for rg in &writes {
if let Some((start, size)) = get_start_size(rg.clone(), alloc_base, alloc_size)
&& self.get_alloc_mutability(id)?.is_mut()
{
let alloc_mut = self.get_alloc_raw_mut(id)?.0;
let range =
AllocRange { start: Size::from_bytes(start), size: Size::from_bytes(size) };
alloc_mut.mark_foreign_write(range);
}
}
}
interp_ok(())
Expand Down
17 changes: 11 additions & 6 deletions compiler/rustc_middle/src/mir/interpret/allocation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -799,7 +799,7 @@ impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes>
/// Initialize all previously uninitialized bytes in the entire allocation, and set
/// provenance of everything to `Wildcard`. Before calling this, make sure all
/// provenance in this allocation is exposed!
pub fn prepare_for_native_write(&mut self) -> AllocResult {
pub fn prepare_for_native_write(&mut self) {
let full_range = AllocRange { start: Size::ZERO, size: Size::from_bytes(self.len()) };
// Overwrite uninitialized bytes with 0, to ensure we don't leak whatever their value happens to be.
for chunk in self.init_mask.range_as_init_chunks(full_range) {
Expand All @@ -809,18 +809,23 @@ impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes>
uninit_bytes.fill(0);
}
}
self.mark_foreign_write(full_range);
}

/// Initialise previously uninitialised bytes in the given range, and set provenance of
/// everything in it to `Wildcard`. Before calling this, make sure all provenance in this
/// range is exposed!
pub fn mark_foreign_write(&mut self, range: AllocRange) {
// Mark everything as initialized now.
self.mark_init(full_range, true);
self.mark_init(range, true);

// Set provenance of all bytes to wildcard.
self.provenance.write_wildcards(self.len());
// Set provenance of affected bytes to wildcard.
self.provenance.write_wildcards(range);

// Also expose the provenance of the interpreter-level allocation, so it can
// be written by FFI. The `black_box` is defensive programming as LLVM likes
// to (incorrectly) optimize away ptr2int casts whose result is unused.
std::hint::black_box(self.get_bytes_unchecked_raw_mut().expose_provenance());

Ok(())
}

/// Remove all provenance in the given memory range.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -213,10 +213,11 @@ impl<Prov: Provenance> ProvenanceMap<Prov> {
Ok(())
}

/// Overwrites all provenance in the allocation with wildcard provenance.
/// Overwrites all provenance in the specified range within the allocation
/// with wildcard provenance.
///
/// Provided for usage in Miri and panics otherwise.
pub fn write_wildcards(&mut self, alloc_size: usize) {
pub fn write_wildcards(&mut self, range: AllocRange) {
assert!(
Prov::OFFSET_IS_ADDR,
"writing wildcard provenance is not supported when `OFFSET_IS_ADDR` is false"
Expand All @@ -225,9 +226,8 @@ impl<Prov: Provenance> ProvenanceMap<Prov> {

// Remove all pointer provenances, then write wildcards into the whole byte range.
self.ptrs.clear();
let last = Size::from_bytes(alloc_size);
let bytes = self.bytes.get_or_insert_with(Box::default);
for offset in Size::ZERO..last {
for offset in range.start..range.start + range.size {
bytes.insert(offset, wildcard);
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/tools/miri/src/alloc_addresses/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
// for the search within `prepare_for_native_call`.
let exposed: Vec<AllocId> =
this.machine.alloc_addresses.get_mut().exposed.iter().copied().collect();
this.prepare_for_native_call(exposed)
this.prepare_for_native_call(exposed, true)
}
}

Expand Down
Loading