Skip to content

Bounds-check with PtrMetadata instead of Len in MIR #133734

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 20 additions & 5 deletions compiler/rustc_const_eval/src/check_consts/check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -573,12 +573,27 @@ impl<'tcx> Visitor<'tcx> for Checker<'_, 'tcx> {
) => {}
Rvalue::ShallowInitBox(_, _) => {}

Rvalue::UnaryOp(_, operand) => {
Rvalue::UnaryOp(op, operand) => {
let ty = operand.ty(self.body, self.tcx);
if is_int_bool_float_or_char(ty) {
// Int, bool, float, and char operations are fine.
} else {
span_bug!(self.span, "non-primitive type in `Rvalue::UnaryOp`: {:?}", ty);
match op {
UnOp::Not | UnOp::Neg => {
if is_int_bool_float_or_char(ty) {
// Int, bool, float, and char operations are fine.
} else {
span_bug!(
self.span,
"non-primitive type in `Rvalue::UnaryOp{op:?}`: {ty:?}",
);
}
}
UnOp::PtrMetadata => {
if !ty.is_ref() && !ty.is_unsafe_ptr() {
span_bug!(
self.span,
"non-pointer type in `Rvalue::UnaryOp({op:?})`: {ty:?}",
);
}
}
}
}

Expand Down
13 changes: 11 additions & 2 deletions compiler/rustc_const_eval/src/interpret/step.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use rustc_middle::ty::layout::FnAbiOf;
use rustc_middle::ty::{self, Instance, Ty};
use rustc_middle::{bug, mir, span_bug};
use rustc_span::source_map::Spanned;
use rustc_span::{DesugaringKind, Span};
use rustc_target::callconv::FnAbi;
use tracing::{info, instrument, trace};

Expand Down Expand Up @@ -80,7 +81,9 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
use rustc_middle::mir::StatementKind::*;

match &stmt.kind {
Assign(box (place, rvalue)) => self.eval_rvalue_into_place(rvalue, *place)?,
Assign(box (place, rvalue)) => {
self.eval_rvalue_into_place(rvalue, *place, stmt.source_info.span)?
}

SetDiscriminant { place, variant_index } => {
let dest = self.eval_place(**place)?;
Expand Down Expand Up @@ -159,6 +162,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
&mut self,
rvalue: &mir::Rvalue<'tcx>,
place: mir::Place<'tcx>,
span: Span,
) -> InterpResult<'tcx> {
let dest = self.eval_place(place)?;
// FIXME: ensure some kind of non-aliasing between LHS and RHS?
Expand Down Expand Up @@ -250,8 +254,13 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
let src = self.eval_place(place)?;
let place = self.force_allocation(&src)?;
let mut val = ImmTy::from_immediate(place.to_ref(self), dest.layout);
if !place_base_raw {
if !place_base_raw
&& span.desugaring_kind() != Some(DesugaringKind::IndexBoundsCheckReborrow)
{
// If this was not already raw, it needs retagging.
// As a special hack, we exclude the desugared `PtrMetadata(&raw const *_n)`
// from indexing. (Really we should not do any retag on `&raw` but that does not
// currently work with Stacked Borrows.)
val = M::retag_ptr_value(self, mir::RetagKind::Raw, &val)?;
}
self.write_immediate(*val, &dest)?;
Expand Down
3 changes: 3 additions & 0 deletions compiler/rustc_middle/src/mir/consts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,9 @@ impl<'tcx> Const<'tcx> {
let const_val = tcx.valtree_to_const_val((ty, valtree));
Self::Val(const_val, ty)
}
ty::ConstKind::Unevaluated(ty::UnevaluatedConst { def, args }) => {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not correct, and there's no justification here? What is this for?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Specifically, there's a distinction between ty::ConstKind::Unevaluated(ty::UnevaluatedConst(..)) and mir::ConstKind::Unevaluated(..) and I think this may be getting that wrong. But generally randomly changing a function to do something different without asking why it's that way that seems like it may introduce subtle bugs 🤔

Self::Unevaluated(UnevaluatedConst { def, args, promoted: None }, ty)
}
_ => Self::Ty(ty, c),
}
}
Expand Down
92 changes: 83 additions & 9 deletions compiler/rustc_mir_build/src/build/expr/as_place.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use rustc_middle::mir::*;
use rustc_middle::thir::*;
use rustc_middle::ty::{self, AdtDef, CanonicalUserTypeAnnotation, Ty, Variance};
use rustc_middle::{bug, span_bug};
use rustc_span::Span;
use rustc_span::{DesugaringKind, Span};
use tracing::{debug, instrument, trace};

use crate::build::ForGuard::{OutsideGuard, RefWithinGuard};
Expand Down Expand Up @@ -630,6 +630,80 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
block.and(base_place.index(idx))
}

/// Given a place that's either an array or a slice, returns an operand
/// with the length of the array/slice.
///
/// For arrays it'll be `Operand::Constant` with the actual length;
/// For slices it'll be `Operand::Move` of a local using `PtrMetadata`.
fn len_of_slice_or_array(
&mut self,
block: BasicBlock,
place: Place<'tcx>,
span: Span,
source_info: SourceInfo,
) -> Operand<'tcx> {
let place_ty = place.ty(&self.local_decls, self.tcx).ty;
let usize_ty = self.tcx.types.usize;

match place_ty.kind() {
ty::Array(_elem_ty, len_const) => {
// We know how long an array is, so just use that as a constant
// directly -- no locals needed. We do need one statement so
// that borrow- and initialization-checking consider it used,
// though. FIXME: Do we really *need* to count this as a use?
// Could partial array tracking work off something else instead?
self.cfg.push_fake_read(block, source_info, FakeReadCause::ForIndex, place);
let const_ = Const::from_ty_const(*len_const, usize_ty, self.tcx);
Operand::Constant(Box::new(ConstOperand { span, user_ty: None, const_ }))
}
ty::Slice(_elem_ty) => {
let ptr_or_ref = if let [PlaceElem::Deref] = place.projection[..]
&& let local_ty = self.local_decls[place.local].ty
&& local_ty.is_trivially_pure_clone_copy()
{
// It's extremely common that we have something that can be
// directly passed to `PtrMetadata`, so avoid an unnecessary
// temporary and statement in those cases. Note that we can
// only do that for `Copy` types -- not `&mut [_]` -- because
// the MIR we're building here needs to pass NLL later.
Operand::Copy(Place::from(place.local))
} else {
let len_span = self.tcx.with_stable_hashing_context(|hcx| {
let span = source_info.span;
span.mark_with_reason(
None,
DesugaringKind::IndexBoundsCheckReborrow,
span.edition(),
hcx,
)
});
let ptr_ty = Ty::new_imm_ptr(self.tcx, place_ty);
let slice_ptr = self.temp(ptr_ty, span);
self.cfg.push_assign(
block,
SourceInfo { span: len_span, ..source_info },
slice_ptr,
Rvalue::RawPtr(Mutability::Not, place),
);
Operand::Move(slice_ptr)
};

let len = self.temp(usize_ty, span);
self.cfg.push_assign(
block,
source_info,
len,
Rvalue::UnaryOp(UnOp::PtrMetadata, ptr_or_ref),
);

Operand::Move(len)
}
_ => {
span_bug!(span, "len called on place of type {place_ty:?}")
}
}
}

fn bounds_check(
&mut self,
block: BasicBlock,
Expand All @@ -638,25 +712,25 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
expr_span: Span,
source_info: SourceInfo,
) -> BasicBlock {
let usize_ty = self.tcx.types.usize;
let bool_ty = self.tcx.types.bool;
// bounds check:
let len = self.temp(usize_ty, expr_span);
let lt = self.temp(bool_ty, expr_span);
let slice = slice.to_place(self);

// len = len(slice)
self.cfg.push_assign(block, source_info, len, Rvalue::Len(slice.to_place(self)));
let len = self.len_of_slice_or_array(block, slice, expr_span, source_info);

// lt = idx < len
let bool_ty = self.tcx.types.bool;
let lt = self.temp(bool_ty, expr_span);
self.cfg.push_assign(
block,
source_info,
lt,
Rvalue::BinaryOp(
BinOp::Lt,
Box::new((Operand::Copy(Place::from(index)), Operand::Copy(len))),
Box::new((Operand::Copy(Place::from(index)), len.to_copy())),
),
);
let msg = BoundsCheck { len: Operand::Move(len), index: Operand::Copy(Place::from(index)) };
let msg = BoundsCheck { len, index: Operand::Copy(Place::from(index)) };

// assert!(lt, "...")
self.assert(block, Operand::Move(lt), true, msg, expr_span)
}
Expand Down
13 changes: 0 additions & 13 deletions compiler/rustc_mir_transform/src/instsimplify.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ impl<'tcx> crate::MirPass<'tcx> for InstSimplify {
}
ctx.simplify_bool_cmp(rvalue);
ctx.simplify_ref_deref(rvalue);
ctx.simplify_len(rvalue);
ctx.simplify_ptr_aggregate(rvalue);
ctx.simplify_cast(rvalue);
}
Expand Down Expand Up @@ -132,18 +131,6 @@ impl<'tcx> InstSimplifyContext<'_, 'tcx> {
}
}

/// Transform `Len([_; N])` ==> `N`.
fn simplify_len(&self, rvalue: &mut Rvalue<'tcx>) {
if let Rvalue::Len(ref place) = *rvalue {
let place_ty = place.ty(self.local_decls, self.tcx).ty;
if let ty::Array(_, len) = *place_ty.kind() {
let const_ = Const::from_ty_const(len, self.tcx.types.usize, self.tcx);
let constant = ConstOperand { span: DUMMY_SP, const_, user_ty: None };
*rvalue = Rvalue::Use(Operand::Constant(Box::new(constant)));
}
}
}

/// Transform `Aggregate(RawPtr, [p, ()])` ==> `Cast(PtrToPtr, p)`.
fn simplify_ptr_aggregate(&self, rvalue: &mut Rvalue<'tcx>) {
if let Rvalue::Aggregate(box AggregateKind::RawPtr(pointee_ty, mutability), fields) = rvalue
Expand Down
8 changes: 0 additions & 8 deletions compiler/rustc_mir_transform/src/validate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1115,14 +1115,6 @@ impl<'a, 'tcx> Visitor<'tcx> for TypeChecker<'a, 'tcx> {
);
}
UnOp::PtrMetadata => {
if !matches!(self.body.phase, MirPhase::Runtime(_)) {
// It would probably be fine to support this in earlier phases, but at
// the time of writing it's only ever introduced from intrinsic
// lowering or other runtime-phase optimization passes, so earlier
// things can just `bug!` on it.
self.fail(location, "PtrMetadata should be in runtime MIR only");
}

check_kinds!(
a,
"Cannot PtrMetadata non-pointer non-reference type {:?}",
Expand Down
4 changes: 4 additions & 0 deletions compiler/rustc_span/src/hygiene.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1163,6 +1163,9 @@ pub enum DesugaringKind {
WhileLoop,
/// `async Fn()` bound modifier
BoundModifier,
/// Marks a `&raw const *_1` needed as part of getting the length of a mutable
/// slice for the bounds check, so that MIRI's retag handling can recognize it.
IndexBoundsCheckReborrow,
Copy link
Member

@compiler-errors compiler-errors Jan 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Span should definitely not be used like this. Span desugarings should only ever be used for diagnostics and things like that -- making miri rely on this behavior seems really bad. If we need to track certain locals for the purposes of some special miri hack, this needs to be recorded in the MIR body separately.

cc @scottmcm @RalfJung

Copy link
Member

@RalfJung RalfJung Jan 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a pretty bad hack, yeah. Medium-term I hope that Miri can entirely stop doing retagging on &raw, but this currently runs into issues... so not sure if it's worth spending a lot of effort on the proper thing.

}

impl DesugaringKind {
Expand All @@ -1179,6 +1182,7 @@ impl DesugaringKind {
DesugaringKind::ForLoop => "`for` loop",
DesugaringKind::WhileLoop => "`while` loop",
DesugaringKind::BoundModifier => "trait bound modifier",
DesugaringKind::IndexBoundsCheckReborrow => "slice indexing",
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@ fn main() -> () {
let mut _5: u32;
let mut _6: *mut usize;
let _7: usize;
let mut _8: usize;
let mut _9: bool;
let mut _8: bool;
scope 1 {
debug x => _1;
let mut _2: usize;
Expand Down Expand Up @@ -41,9 +40,8 @@ fn main() -> () {
StorageDead(_6);
StorageLive(_7);
_7 = copy _2;
_8 = Len(_1);
_9 = Lt(copy _7, copy _8);
assert(move _9, "index out of bounds: the length is {} but the index is {}", move _8, copy _7) -> [success: bb2, unwind unreachable];
_8 = Lt(copy _7, const 3_usize);
assert(move _8, "index out of bounds: the length is {} but the index is {}", const 3_usize, copy _7) -> [success: bb2, unwind unreachable];
}

bb2: {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@ fn main() -> () {
let mut _5: u32;
let mut _6: *mut usize;
let _7: usize;
let mut _8: usize;
let mut _9: bool;
let mut _8: bool;
scope 1 {
debug x => _1;
let mut _2: usize;
Expand Down Expand Up @@ -41,9 +40,8 @@ fn main() -> () {
StorageDead(_6);
StorageLive(_7);
_7 = copy _2;
_8 = Len(_1);
_9 = Lt(copy _7, copy _8);
assert(move _9, "index out of bounds: the length is {} but the index is {}", move _8, copy _7) -> [success: bb2, unwind continue];
_8 = Lt(copy _7, const 3_usize);
assert(move _8, "index out of bounds: the length is {} but the index is {}", const 3_usize, copy _7) -> [success: bb2, unwind continue];
}

bb2: {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// MIR for `index_array` after built

fn index_array(_1: &[i32; 7], _2: usize) -> &i32 {
debug array => _1;
debug index => _2;
let mut _0: &i32;
let _3: &i32;
let _4: usize;
let mut _5: bool;

bb0: {
StorageLive(_3);
StorageLive(_4);
_4 = copy _2;
FakeRead(ForIndex, (*_1));
_5 = Lt(copy _4, const 7_usize);
assert(move _5, "index out of bounds: the length is {} but the index is {}", const 7_usize, copy _4) -> [success: bb1, unwind: bb2];
}

bb1: {
_3 = &(*_1)[_4];
_0 = &(*_3);
StorageDead(_4);
StorageDead(_3);
return;
}

bb2 (cleanup): {
resume;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// MIR for `index_const_generic_array` after built

fn index_const_generic_array(_1: &[i32; N], _2: usize) -> &i32 {
debug array => _1;
debug index => _2;
let mut _0: &i32;
let _3: &i32;
let _4: usize;
let mut _5: bool;

bb0: {
StorageLive(_3);
StorageLive(_4);
_4 = copy _2;
FakeRead(ForIndex, (*_1));
_5 = Lt(copy _4, const N);
assert(move _5, "index out of bounds: the length is {} but the index is {}", const N, copy _4) -> [success: bb1, unwind: bb2];
}

bb1: {
_3 = &(*_1)[_4];
_0 = &(*_3);
StorageDead(_4);
StorageDead(_3);
return;
}

bb2 (cleanup): {
resume;
}
}
Loading
Loading