Skip to content

miri: add some chance to reuse addresses of previously freed allocations #122240

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion compiler/rustc_const_eval/src/interpret/machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,8 @@ pub trait Machine<'mir, 'tcx: 'mir>: Sized {
_machine: &mut Self,
_alloc_extra: &mut Self::AllocExtra,
_prov: (AllocId, Self::ProvenanceExtra),
_range: AllocRange,
_size: Size,
_align: Align,
) -> InterpResult<'tcx> {
Ok(())
}
Expand Down
3 changes: 2 additions & 1 deletion compiler/rustc_const_eval/src/interpret/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,8 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
&mut self.machine,
&mut alloc.extra,
(alloc_id, prov),
alloc_range(Size::ZERO, size),
size,
alloc.align,
)?;

// Don't forget to remember size and align of this now-dead allocation
Expand Down
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ha! This name makes so much more sense, I'm surprised I didn't see this earlier

Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
//! This module is responsible for managing the absolute addresses that allocations are located at,
//! and for casting between pointers and integers based on those addresses.

mod reuse_pool;

use std::cell::RefCell;
use std::cmp::max;
use std::collections::hash_map::Entry;
Expand All @@ -6,9 +11,10 @@ use rand::Rng;

use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_span::Span;
use rustc_target::abi::{HasDataLayout, Size};
use rustc_target::abi::{Align, HasDataLayout, Size};

use crate::*;
use reuse_pool::ReusePool;

#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum ProvenanceMode {
Expand All @@ -23,7 +29,7 @@ pub enum ProvenanceMode {

pub type GlobalState = RefCell<GlobalStateInner>;

#[derive(Clone, Debug)]
#[derive(Debug)]
pub struct GlobalStateInner {
/// This is used as a map between the address of each allocation and its `AllocId`. It is always
/// sorted by address. We cannot use a `HashMap` since we can be given an address that is offset
Expand All @@ -35,6 +41,8 @@ pub struct GlobalStateInner {
/// they do not have an `AllocExtra`.
/// This is the inverse of `int_to_ptr_map`.
base_addr: FxHashMap<AllocId, u64>,
/// A pool of addresses we can reuse for future allocations.
reuse: ReusePool,
/// Whether an allocation has been exposed or not. This cannot be put
/// into `AllocExtra` for the same reason as `base_addr`.
exposed: FxHashSet<AllocId>,
Expand All @@ -50,6 +58,7 @@ impl VisitProvenance for GlobalStateInner {
let GlobalStateInner {
int_to_ptr_map: _,
base_addr: _,
reuse: _,
exposed: _,
next_base_addr: _,
provenance_mode: _,
Expand All @@ -68,6 +77,7 @@ impl GlobalStateInner {
GlobalStateInner {
int_to_ptr_map: Vec::default(),
base_addr: FxHashMap::default(),
reuse: ReusePool::new(),
exposed: FxHashSet::default(),
next_base_addr: stack_addr,
provenance_mode: config.provenance_mode,
Expand Down Expand Up @@ -96,7 +106,7 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
// or `None` if the addr is out of bounds
fn alloc_id_from_addr(&self, addr: u64) -> Option<AllocId> {
let ecx = self.eval_context_ref();
let global_state = ecx.machine.intptrcast.borrow();
let global_state = ecx.machine.alloc_addresses.borrow();
assert!(global_state.provenance_mode != ProvenanceMode::Strict);

let pos = global_state.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr);
Expand Down Expand Up @@ -133,12 +143,13 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {

fn addr_from_alloc_id(&self, alloc_id: AllocId) -> InterpResult<'tcx, u64> {
let ecx = self.eval_context_ref();
let mut global_state = ecx.machine.intptrcast.borrow_mut();
let mut global_state = ecx.machine.alloc_addresses.borrow_mut();
let global_state = &mut *global_state;

Ok(match global_state.base_addr.entry(alloc_id) {
Entry::Occupied(entry) => *entry.get(),
Entry::Vacant(entry) => {
let mut rng = ecx.machine.rng.borrow_mut();
let (size, align, kind) = ecx.get_alloc_info(alloc_id);
// This is either called immediately after allocation (and then cached), or when
// adjusting `tcx` pointers (which never get freed). So assert that we are looking
Expand All @@ -147,44 +158,63 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
// information was removed.
assert!(!matches!(kind, AllocKind::Dead));

// This allocation does not have a base address yet, pick one.
// Leave some space to the previous allocation, to give it some chance to be less aligned.
let slack = {
let mut rng = ecx.machine.rng.borrow_mut();
// This means that `(global_state.next_base_addr + slack) % 16` is uniformly distributed.
rng.gen_range(0..16)
// This allocation does not have a base address yet, pick or reuse one.
let base_addr = if let Some(reuse_addr) =
global_state.reuse.take_addr(&mut *rng, size, align)
{
reuse_addr
} else {
// We have to pick a fresh address.
// Leave some space to the previous allocation, to give it some chance to be less aligned.
// We ensure that `(global_state.next_base_addr + slack) % 16` is uniformly distributed.
let slack = rng.gen_range(0..16);
// From next_base_addr + slack, round up to adjust for alignment.
let base_addr = global_state
.next_base_addr
.checked_add(slack)
.ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
let base_addr = align_addr(base_addr, align.bytes());

// Remember next base address. If this allocation is zero-sized, leave a gap
// of at least 1 to avoid two allocations having the same base address.
// (The logic in `alloc_id_from_addr` assumes unique addresses, and different
// function/vtable pointers need to be distinguishable!)
global_state.next_base_addr = base_addr
.checked_add(max(size.bytes(), 1))
.ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
// Even if `Size` didn't overflow, we might still have filled up the address space.
if global_state.next_base_addr > ecx.target_usize_max() {
throw_exhaust!(AddressSpaceFull);
}

base_addr
};
// From next_base_addr + slack, round up to adjust for alignment.
let base_addr = global_state
.next_base_addr
.checked_add(slack)
.ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
let base_addr = align_addr(base_addr, align.bytes());
entry.insert(base_addr);
trace!(
"Assigning base address {:#x} to allocation {:?} (size: {}, align: {}, slack: {})",
"Assigning base address {:#x} to allocation {:?} (size: {}, align: {})",
base_addr,
alloc_id,
size.bytes(),
align.bytes(),
slack,
);

// Remember next base address. If this allocation is zero-sized, leave a gap
// of at least 1 to avoid two allocations having the same base address.
// (The logic in `alloc_id_from_addr` assumes unique addresses, and different
// function/vtable pointers need to be distinguishable!)
global_state.next_base_addr = base_addr
.checked_add(max(size.bytes(), 1))
.ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
// Even if `Size` didn't overflow, we might still have filled up the address space.
if global_state.next_base_addr > ecx.target_usize_max() {
throw_exhaust!(AddressSpaceFull);
}
// Also maintain the opposite mapping in `int_to_ptr_map`.
// Given that `next_base_addr` increases in each allocation, pushing the
// corresponding tuple keeps `int_to_ptr_map` sorted
global_state.int_to_ptr_map.push((base_addr, alloc_id));
// Store address in cache.
entry.insert(base_addr);

// Also maintain the opposite mapping in `int_to_ptr_map`, ensuring we keep it sorted.
// We have a fast-path for the common case that this address is bigger than all previous ones.
let pos = if global_state
.int_to_ptr_map
.last()
.is_some_and(|(last_addr, _)| *last_addr < base_addr)
{
global_state.int_to_ptr_map.len()
} else {
global_state
.int_to_ptr_map
.binary_search_by_key(&base_addr, |(addr, _)| *addr)
.unwrap_err()
};
global_state.int_to_ptr_map.insert(pos, (base_addr, alloc_id));

base_addr
}
Expand All @@ -196,7 +226,7 @@ impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir,
pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
fn expose_ptr(&mut self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
let ecx = self.eval_context_mut();
let global_state = ecx.machine.intptrcast.get_mut();
let global_state = ecx.machine.alloc_addresses.get_mut();
// In strict mode, we don't need this, so we can save some cycles by not tracking it.
if global_state.provenance_mode == ProvenanceMode::Strict {
return Ok(());
Expand All @@ -207,7 +237,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
return Ok(());
}
trace!("Exposing allocation id {alloc_id:?}");
let global_state = ecx.machine.intptrcast.get_mut();
let global_state = ecx.machine.alloc_addresses.get_mut();
global_state.exposed.insert(alloc_id);
if ecx.machine.borrow_tracker.is_some() {
ecx.expose_tag(alloc_id, tag)?;
Expand All @@ -219,7 +249,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
trace!("Casting {:#x} to a pointer", addr);

let ecx = self.eval_context_ref();
let global_state = ecx.machine.intptrcast.borrow();
let global_state = ecx.machine.alloc_addresses.borrow();

// Potentially emit a warning.
match global_state.provenance_mode {
Expand Down Expand Up @@ -299,7 +329,13 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
}

impl GlobalStateInner {
pub fn free_alloc_id(&mut self, dead_id: AllocId) {
pub fn free_alloc_id(
&mut self,
rng: &mut impl Rng,
dead_id: AllocId,
size: Size,
align: Align,
) {
// We can *not* remove this from `base_addr`, since the interpreter design requires that we
// be able to retrieve an AllocId + offset for any memory access *before* we check if the
// access is valid. Specifically, `ptr_get_alloc` is called on each attempt at a memory
Expand All @@ -319,6 +355,8 @@ impl GlobalStateInner {
// We can also remove it from `exposed`, since this allocation can anyway not be returned by
// `alloc_id_from_addr` any more.
self.exposed.remove(&dead_id);
// Also remember this address for future reuse.
self.reuse.add_addr(rng, addr, size, align)
}
}

Expand Down
87 changes: 87 additions & 0 deletions src/tools/miri/src/alloc_addresses/reuse_pool.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
//! Manages a pool of addresses that can be reused.

use rand::Rng;

use rustc_target::abi::{Align, Size};

const MAX_POOL_SIZE: usize = 64;

// Just use fair coins, until we have evidence that other numbers are better.
const ADDR_REMEMBER_CHANCE: f64 = 0.5;
const ADDR_TAKE_CHANCE: f64 = 0.5;

/// The pool strikes a balance between exploring more possible executions and making it more likely
/// to find bugs. The hypothesis is that bugs are more likely to occur when reuse happens for
/// allocations with the same layout, since that can trigger e.g. ABA issues in a concurrent data
/// structure. Therefore we only reuse allocations when size and alignment match exactly.
#[derive(Debug)]
pub struct ReusePool {
/// The i-th element in `pool` stores allocations of alignment `2^i`. We store these reusable
/// allocations as address-size pairs, the list must be sorted by the size.
///
/// Each of these maps has at most MAX_POOL_SIZE elements, and since alignment is limited to
/// less than 64 different possible value, that bounds the overall size of the pool.
pool: Vec<Vec<(u64, Size)>>,
}

impl ReusePool {
pub fn new() -> Self {
ReusePool { pool: vec![] }
}

fn subpool(&mut self, align: Align) -> &mut Vec<(u64, Size)> {
let pool_idx: usize = align.bytes().trailing_zeros().try_into().unwrap();
if self.pool.len() <= pool_idx {
self.pool.resize(pool_idx + 1, Vec::new());
}
&mut self.pool[pool_idx]
}

pub fn add_addr(&mut self, rng: &mut impl Rng, addr: u64, size: Size, align: Align) {
// Let's see if we even want to remember this address.
if !rng.gen_bool(ADDR_REMEMBER_CHANCE) {
return;
}
// Determine the pool to add this to, and where in the pool to put it.
let subpool = self.subpool(align);
let pos = subpool.partition_point(|(_addr, other_size)| *other_size < size);
// Make sure the pool does not grow too big.
if subpool.len() >= MAX_POOL_SIZE {
// Pool full. Replace existing element, or last one if this would be even bigger.
let clamped_pos = pos.min(subpool.len() - 1);
subpool[clamped_pos] = (addr, size);
return;
}
// Add address to pool, at the right position.
subpool.insert(pos, (addr, size));
}

pub fn take_addr(&mut self, rng: &mut impl Rng, size: Size, align: Align) -> Option<u64> {
// Determine whether we'll even attempt a reuse.
if !rng.gen_bool(ADDR_TAKE_CHANCE) {
return None;
}
// Determine the pool to take this from.
let subpool = self.subpool(align);
// Let's see if we can find something of the right size. We want to find the full range of
// such items, beginning with the first, so we can't use `binary_search_by_key`.
let begin = subpool.partition_point(|(_addr, other_size)| *other_size < size);
let mut end = begin;
while let Some((_addr, other_size)) = subpool.get(end) {
if *other_size != size {
break;
}
end += 1;
}
if end == begin {
// Could not find any item of the right size.
return None;
}
// Pick a random element with the desired size.
let idx = rng.gen_range(begin..end);
// Remove it from the pool and return.
let (chosen_addr, chosen_size) = subpool.remove(idx);
debug_assert!(chosen_size >= size && chosen_addr % align.bytes() == 0);
Some(chosen_addr)
}
}
6 changes: 3 additions & 3 deletions src/tools/miri/src/borrow_tracker/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -472,14 +472,14 @@ impl AllocState {
&mut self,
alloc_id: AllocId,
prov_extra: ProvenanceExtra,
range: AllocRange,
size: Size,
machine: &MiriMachine<'_, 'tcx>,
) -> InterpResult<'tcx> {
match self {
AllocState::StackedBorrows(sb) =>
sb.get_mut().before_memory_deallocation(alloc_id, prov_extra, range, machine),
sb.get_mut().before_memory_deallocation(alloc_id, prov_extra, size, machine),
AllocState::TreeBorrows(tb) =>
tb.get_mut().before_memory_deallocation(alloc_id, prov_extra, range, machine),
tb.get_mut().before_memory_deallocation(alloc_id, prov_extra, size, machine),
}
}

Expand Down
6 changes: 3 additions & 3 deletions src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -574,13 +574,13 @@ impl Stacks {
&mut self,
alloc_id: AllocId,
tag: ProvenanceExtra,
range: AllocRange,
size: Size,
machine: &MiriMachine<'_, 'tcx>,
) -> InterpResult<'tcx> {
trace!("deallocation with tag {:?}: {:?}, size {}", tag, alloc_id, range.size.bytes());
trace!("deallocation with tag {:?}: {:?}, size {}", tag, alloc_id, size.bytes());
let dcx = DiagnosticCxBuilder::dealloc(machine, tag);
let state = machine.borrow_tracker.as_ref().unwrap().borrow();
self.for_each(range, dcx, |stack, dcx, exposed_tags| {
self.for_each(alloc_range(Size::ZERO, size), dcx, |stack, dcx, exposed_tags| {
stack.dealloc(tag, &state, dcx, exposed_tags)
})?;
Ok(())
Expand Down
4 changes: 2 additions & 2 deletions src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ impl<'tcx> Tree {
&mut self,
alloc_id: AllocId,
prov: ProvenanceExtra,
range: AllocRange,
size: Size,
machine: &MiriMachine<'_, 'tcx>,
) -> InterpResult<'tcx> {
// TODO: for now we bail out on wildcard pointers. Eventually we should
Expand All @@ -91,7 +91,7 @@ impl<'tcx> Tree {
};
let global = machine.borrow_tracker.as_ref().unwrap();
let span = machine.current_span();
self.dealloc(tag, range, global, alloc_id, span)
self.dealloc(tag, alloc_range(Size::ZERO, size), global, alloc_id, span)
}

pub fn expose_tag(&mut self, _tag: BorTag) {
Expand Down
4 changes: 2 additions & 2 deletions src/tools/miri/src/concurrency/data_race.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1071,10 +1071,10 @@ impl VClockAlloc {
pub fn deallocate<'tcx>(
&mut self,
alloc_id: AllocId,
range: AllocRange,
size: Size,
machine: &mut MiriMachine<'_, '_>,
) -> InterpResult<'tcx> {
self.unique_access(alloc_id, range, NaWriteType::Deallocate, machine)
self.unique_access(alloc_id, alloc_range(Size::ZERO, size), NaWriteType::Deallocate, machine)
}
}

Expand Down
Loading