expose apis needed for better miri ffi

nia-e · nia-e · commit a544425839d9 · 2025-05-22T14:45:01.000+02:00
diff --git a/compiler/rustc_const_eval/src/interpret/memory.rs b/compiler/rustc_const_eval/src/interpret/memory.rs
@@ -977,12 +977,18 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
     }
 
     /// Handle the effect an FFI call might have on the state of allocations.
-    /// This overapproximates the modifications which external code might make to memory:
-    /// We set all reachable allocations as initialized, mark all reachable provenances as exposed
-    /// and overwrite them with `Provenance::WILDCARD`.
+    /// If `paranoid` is true, overapproximates the modifications which external code might make
+    /// to memory: We set all reachable allocations as initialized, mark all reachable provenances
+    /// as exposed and overwrite them with `Provenance::WILDCARD`. Otherwise, it just makes sure
+    /// that all allocations are properly set up so that we don't leak whatever was in the uninit
+    /// bytes on FFI call.
     ///
     /// The allocations in `ids` are assumed to be already exposed.
-    pub fn prepare_for_native_call(&mut self, ids: Vec<AllocId>) -> InterpResult<'tcx> {
+    pub fn prepare_for_native_call(
+        &mut self,
+        ids: Vec<AllocId>,
+        paranoid: bool,
+    ) -> InterpResult<'tcx> {
         let mut done = FxHashSet::default();
         let mut todo = ids;
         while let Some(id) = todo.pop() {
@@ -997,25 +1003,119 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
                 continue;
             }
 
-            // Expose all provenances in this allocation, and add them to `todo`.
+            // Make sure we iterate over everything recursively, preparing the extra alloc info.
             let alloc = self.get_alloc_raw(id)?;
             for prov in alloc.provenance().provenances() {
-                M::expose_provenance(self, prov)?;
+                if paranoid {
+                    // Expose all provenances in this allocation, and add them to `todo`.
+                    M::expose_provenance(self, prov)?;
+                }
                 if let Some(id) = prov.get_alloc_id() {
                     todo.push(id);
                 }
             }
+
             // Also expose the provenance of the interpreter-level allocation, so it can
             // be read by FFI. The `black_box` is defensive programming as LLVM likes
             // to (incorrectly) optimize away ptr2int casts whose result is unused.
-            std::hint::black_box(alloc.get_bytes_unchecked_raw().expose_provenance());
-
-            // Prepare for possible write from native code if mutable.
-            if info.mutbl.is_mut() {
-                self.get_alloc_raw_mut(id)?
-                    .0
-                    .prepare_for_native_write()
-                    .map_err(|e| e.to_interp_error(id))?;
+            if paranoid {
+                std::hint::black_box(alloc.get_bytes_unchecked_raw().expose_provenance());
+                // Prepare for possible write from native code if mutable.
+                if info.mutbl.is_mut() {
+                    self.get_alloc_raw_mut(id)?.0.prepare_for_native_write();
+                }
+            }
+        }
+        interp_ok(())
+    }
+
+    /// Updates the machine state "as if" the accesses given had been performed.
+    /// Used only by Miri for FFI, for taking note of events that were intercepted from foreign
+    /// code and properly (but still conservatively) marking their effects. Remember to call
+    /// `prepare_for_native_call` with `paranoid` set to false first on the same `AllocId`s, or
+    /// some writes may be discarded!
+    ///
+    /// The allocations in `ids` are assumed to be already exposed.
+    pub fn apply_accesses(
+        &mut self,
+        mut ids: Vec<AllocId>,
+        reads: Vec<std::ops::Range<u64>>,
+        writes: Vec<std::ops::Range<u64>>,
+    ) -> InterpResult<'tcx> {
+        // Helper function to avoid some code duplication
+        fn get_start_size(
+            rg: std::ops::Range<u64>,
+            alloc_base: u64,
+            alloc_size: u64,
+        ) -> (u64, u64) {
+            // A bunch of range bounds nonsense that effectively simplifies to
+            // "get the starting point of the overlap and the length from there"
+            let signed_start = rg.start.cast_signed() - alloc_base.cast_signed();
+            let size_uncapped = if signed_start < 0 {
+                // We already know the ranges overlap, so this must be > 0
+                (signed_start + (rg.end - rg.start).cast_signed()).try_into().unwrap()
+            } else {
+                rg.end - rg.start
+            };
+            let start: u64 = signed_start.try_into().unwrap_or(0);
+            let size = std::cmp::min(size_uncapped, alloc_size - start);
+            (start, size)
+        }
+
+        let mut done = FxHashSet::default();
+        while let Some(id) = ids.pop() {
+            if !done.insert(id) {
+                continue;
+            }
+            let info = self.get_alloc_info(id);
+
+            // If there is no data behind this pointer, skip this.
+            if !matches!(info.kind, AllocKind::LiveData) {
+                continue;
+            }
+
+            let alloc_base: u64 = {
+                // Keep the alloc here so the borrow checker is happy
+                let alloc = self.get_alloc_raw(id)?;
+                // No need for black_box trickery since we actually use the address
+                alloc.get_bytes_unchecked_raw().expose_provenance().try_into().unwrap()
+            };
+            let alloc_size = info.size.bytes();
+
+            // Find reads which overlap with the current allocation
+            for rg in &reads {
+                let overlap = rg.start <= alloc_base + alloc_size && alloc_base <= rg.end;
+                if overlap {
+                    let (start, size) = get_start_size(rg.clone(), alloc_base, alloc_size);
+
+                    let alloc = self.get_alloc_raw(id)?;
+                    let prov_map = alloc.provenance();
+                    // Only iterate on the bytes that overlap with the access
+                    for i in start..start + size {
+                        // We can be conservative and only expose provenances actually read
+                        if let Some(prov) = prov_map.get(Size::from_bytes(1), self)
+                            && rg.contains(&(alloc_base + i))
+                        {
+                            M::expose_provenance(self, prov)?;
+                            if let Some(id) = prov.get_alloc_id() {
+                                ids.push(id);
+                            }
+                        }
+                    }
+                }
+            }
+
+            // Then do the same thing for writes
+            for rg in &writes {
+                let overlap = rg.start <= alloc_base + alloc_size && alloc_base <= rg.end;
+                if overlap {
+                    let (start, size) = get_start_size(rg.clone(), alloc_base, alloc_size);
+
+                    let alloc_mut = self.get_alloc_raw_mut(id)?.0;
+                    let range =
+                        AllocRange { start: Size::from_bytes(start), size: Size::from_bytes(size) };
+                    alloc_mut.mark_foreign_write(range);
+                }
             }
         }
         interp_ok(())
diff --git a/compiler/rustc_middle/src/mir/interpret/allocation.rs b/compiler/rustc_middle/src/mir/interpret/allocation.rs
@@ -775,7 +775,7 @@ impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes>
     /// Initialize all previously uninitialized bytes in the entire allocation, and set
     /// provenance of everything to `Wildcard`. Before calling this, make sure all
     /// provenance in this allocation is exposed!
-    pub fn prepare_for_native_write(&mut self) -> AllocResult {
+    pub fn prepare_for_native_write(&mut self) {
         let full_range = AllocRange { start: Size::ZERO, size: Size::from_bytes(self.len()) };
         // Overwrite uninitialized bytes with 0, to ensure we don't leak whatever their value happens to be.
         for chunk in self.init_mask.range_as_init_chunks(full_range) {
@@ -785,18 +785,23 @@ impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes>
                 uninit_bytes.fill(0);
             }
         }
+        self.mark_foreign_write(full_range);
+    }
+
+    /// Initialise previously uninitialised bytes in the given range, and set provenance of
+    /// everything in it to `Wildcard`. Before calling this, make sure all provenance in this
+    /// range is exposed!
+    pub fn mark_foreign_write(&mut self, range: AllocRange) {
         // Mark everything as initialized now.
-        self.mark_init(full_range, true);
+        self.mark_init(range, true);
 
-        // Set provenance of all bytes to wildcard.
-        self.provenance.write_wildcards(self.len());
+        // Set provenance of affected bytes to wildcard.
+        self.provenance.write_wildcards(range);
 
         // Also expose the provenance of the interpreter-level allocation, so it can
         // be written by FFI. The `black_box` is defensive programming as LLVM likes
         // to (incorrectly) optimize away ptr2int casts whose result is unused.
         std::hint::black_box(self.get_bytes_unchecked_raw_mut().expose_provenance());
-
-        Ok(())
     }
 
     /// Remove all provenance in the given memory range.
diff --git a/compiler/rustc_middle/src/mir/interpret/allocation/provenance_map.rs b/compiler/rustc_middle/src/mir/interpret/allocation/provenance_map.rs
@@ -213,10 +213,11 @@ impl<Prov: Provenance> ProvenanceMap<Prov> {
         Ok(())
     }
 
-    /// Overwrites all provenance in the allocation with wildcard provenance.
+    /// Overwrites all provenance in the specified range within the allocation
+    /// with wildcard provenance.
     ///
     /// Provided for usage in Miri and panics otherwise.
-    pub fn write_wildcards(&mut self, alloc_size: usize) {
+    pub fn write_wildcards(&mut self, range: AllocRange) {
         assert!(
             Prov::OFFSET_IS_ADDR,
             "writing wildcard provenance is not supported when `OFFSET_IS_ADDR` is false"
@@ -225,9 +226,8 @@ impl<Prov: Provenance> ProvenanceMap<Prov> {
 
         // Remove all pointer provenances, then write wildcards into the whole byte range.
         self.ptrs.clear();
-        let last = Size::from_bytes(alloc_size);
         let bytes = self.bytes.get_or_insert_with(Box::default);
-        for offset in Size::ZERO..last {
+        for offset in range.start..range.start + range.size {
             bytes.insert(offset, wildcard);
         }
     }
diff --git a/src/tools/miri/src/alloc_addresses/mod.rs b/src/tools/miri/src/alloc_addresses/mod.rs
@@ -471,7 +471,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         // for the search within `prepare_for_native_call`.
         let exposed: Vec<AllocId> =
             this.machine.alloc_addresses.get_mut().exposed.iter().copied().collect();
-        this.prepare_for_native_call(exposed)
+        this.prepare_for_native_call(exposed, true)
     }
 }
 

Original file line number	Diff line number	Diff line change
`@@ -471,7 +471,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {`
`471`	`471`	// for the search within `prepare_for_native_call`.
`472`	`472`	`let exposed: Vec<AllocId> =`
`473`	`473`	`this.machine.alloc_addresses.get_mut().exposed.iter().copied().collect();`
`474`		`- this.prepare_for_native_call(exposed)`
	`474`	`+ this.prepare_for_native_call(exposed, true)`
`475`	`475`	`}`
`476`	`476`	`}`
`477`	`477`