Skip to content

Bounds-check with PtrMetadata instead of Len in MIR #133734

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 15, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 20 additions & 5 deletions compiler/rustc_const_eval/src/check_consts/check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -573,12 +573,27 @@ impl<'tcx> Visitor<'tcx> for Checker<'_, 'tcx> {
) => {}
Rvalue::ShallowInitBox(_, _) => {}

Rvalue::UnaryOp(_, operand) => {
Rvalue::UnaryOp(op, operand) => {
let ty = operand.ty(self.body, self.tcx);
if is_int_bool_float_or_char(ty) {
// Int, bool, float, and char operations are fine.
} else {
span_bug!(self.span, "non-primitive type in `Rvalue::UnaryOp`: {:?}", ty);
match op {
UnOp::Not | UnOp::Neg => {
if is_int_bool_float_or_char(ty) {
// Int, bool, float, and char operations are fine.
} else {
span_bug!(
self.span,
"non-primitive type in `Rvalue::UnaryOp{op:?}`: {ty:?}",
);
}
}
UnOp::PtrMetadata => {
if !ty.is_ref() && !ty.is_unsafe_ptr() {
span_bug!(
self.span,
"non-pointer type in `Rvalue::UnaryOp({op:?})`: {ty:?}",
);
}
}
}
}

Expand Down
3 changes: 3 additions & 0 deletions compiler/rustc_middle/src/mir/consts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,9 @@ impl<'tcx> Const<'tcx> {
let const_val = tcx.valtree_to_const_val((ty, valtree));
Self::Val(const_val, ty)
}
ty::ConstKind::Unevaluated(ty::UnevaluatedConst { def, args }) => {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not correct, and there's no justification here? What is this for?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Specifically, there's a distinction between ty::ConstKind::Unevaluated(ty::UnevaluatedConst(..)) and mir::ConstKind::Unevaluated(..) and I think this may be getting that wrong. But generally randomly changing a function to do something different without asking why it's that way that seems like it may introduce subtle bugs 🤔

Self::Unevaluated(UnevaluatedConst { def, args, promoted: None }, ty)
}
_ => Self::Ty(ty, c),
}
}
Expand Down
81 changes: 73 additions & 8 deletions compiler/rustc_mir_build/src/build/expr/as_place.rs
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,71 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
block.and(base_place.index(idx))
}

/// Given a place that's either an array or a slice, returns an operand
/// with the length of the array/slice.
///
/// For arrays it'll be `Operand::Constant` with the actual length;
/// For slices it'll be `Operand::Move` of a local using `PtrMetadata`.
fn len_of_slice_or_array(
&mut self,
block: BasicBlock,
place: Place<'tcx>,
span: Span,
source_info: SourceInfo,
) -> Operand<'tcx> {
let place_ty = place.ty(&self.local_decls, self.tcx).ty;
let usize_ty = self.tcx.types.usize;

match place_ty.kind() {
ty::Array(_elem_ty, len_const) => {
// We know how long an array is, so just use that as a constant
// directly -- no locals needed. We do need one statement so
// that borrow- and initialization-checking consider it used,
// though. FIXME: Do we really *need* to count this as a use?
// Could partial array tracking work off something else instead?
self.cfg.push_fake_read(block, source_info, FakeReadCause::ForIndex, place);
let const_ = Const::from_ty_const(*len_const, usize_ty, self.tcx);
Operand::Constant(Box::new(ConstOperand { span, user_ty: None, const_ }))
}
ty::Slice(_elem_ty) => {
let ptr_or_ref = if let [PlaceElem::Deref] = place.projection[..]
&& let local_ty = self.local_decls[place.local].ty
&& local_ty.is_trivially_pure_clone_copy()
{
// It's extremely common that we have something that can be
// directly passed to `PtrMetadata`, so avoid an unnecessary
// temporary and statement in those cases. Note that we can
// only do that for `Copy` types -- not `&mut [_]` -- because
// the MIR we're building here needs to pass NLL later.
Operand::Copy(Place::from(place.local))
} else {
let ptr_ty = Ty::new_imm_ptr(self.tcx, place_ty);
let slice_ptr = self.temp(ptr_ty, span);
self.cfg.push_assign(
block,
source_info,
slice_ptr,
Rvalue::RawPtr(Mutability::Not, place),
);
Operand::Move(slice_ptr)
};

let len = self.temp(usize_ty, span);
self.cfg.push_assign(
block,
source_info,
len,
Rvalue::UnaryOp(UnOp::PtrMetadata, ptr_or_ref),
);

Operand::Move(len)
}
_ => {
span_bug!(span, "len called on place of type {place_ty:?}")
}
}
}

fn bounds_check(
&mut self,
block: BasicBlock,
Expand All @@ -638,25 +703,25 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
expr_span: Span,
source_info: SourceInfo,
) -> BasicBlock {
let usize_ty = self.tcx.types.usize;
let bool_ty = self.tcx.types.bool;
// bounds check:
let len = self.temp(usize_ty, expr_span);
let lt = self.temp(bool_ty, expr_span);
let slice = slice.to_place(self);

// len = len(slice)
self.cfg.push_assign(block, source_info, len, Rvalue::Len(slice.to_place(self)));
let len = self.len_of_slice_or_array(block, slice, expr_span, source_info);

// lt = idx < len
let bool_ty = self.tcx.types.bool;
let lt = self.temp(bool_ty, expr_span);
self.cfg.push_assign(
block,
source_info,
lt,
Rvalue::BinaryOp(
BinOp::Lt,
Box::new((Operand::Copy(Place::from(index)), Operand::Copy(len))),
Box::new((Operand::Copy(Place::from(index)), len.to_copy())),
),
);
let msg = BoundsCheck { len: Operand::Move(len), index: Operand::Copy(Place::from(index)) };
let msg = BoundsCheck { len, index: Operand::Copy(Place::from(index)) };

// assert!(lt, "...")
self.assert(block, Operand::Move(lt), true, msg, expr_span)
}
Expand Down
13 changes: 0 additions & 13 deletions compiler/rustc_mir_transform/src/instsimplify.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ impl<'tcx> crate::MirPass<'tcx> for InstSimplify {
}
ctx.simplify_bool_cmp(rvalue);
ctx.simplify_ref_deref(rvalue);
ctx.simplify_len(rvalue);
ctx.simplify_ptr_aggregate(rvalue);
ctx.simplify_cast(rvalue);
}
Expand Down Expand Up @@ -132,18 +131,6 @@ impl<'tcx> InstSimplifyContext<'_, 'tcx> {
}
}

/// Transform `Len([_; N])` ==> `N`.
fn simplify_len(&self, rvalue: &mut Rvalue<'tcx>) {
if let Rvalue::Len(ref place) = *rvalue {
let place_ty = place.ty(self.local_decls, self.tcx).ty;
if let ty::Array(_, len) = *place_ty.kind() {
let const_ = Const::from_ty_const(len, self.tcx.types.usize, self.tcx);
let constant = ConstOperand { span: DUMMY_SP, const_, user_ty: None };
*rvalue = Rvalue::Use(Operand::Constant(Box::new(constant)));
}
}
}

/// Transform `Aggregate(RawPtr, [p, ()])` ==> `Cast(PtrToPtr, p)`.
fn simplify_ptr_aggregate(&self, rvalue: &mut Rvalue<'tcx>) {
if let Rvalue::Aggregate(box AggregateKind::RawPtr(pointee_ty, mutability), fields) = rvalue
Expand Down
8 changes: 0 additions & 8 deletions compiler/rustc_mir_transform/src/validate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1115,14 +1115,6 @@ impl<'a, 'tcx> Visitor<'tcx> for TypeChecker<'a, 'tcx> {
);
}
UnOp::PtrMetadata => {
if !matches!(self.body.phase, MirPhase::Runtime(_)) {
// It would probably be fine to support this in earlier phases, but at
// the time of writing it's only ever introduced from intrinsic
// lowering or other runtime-phase optimization passes, so earlier
// things can just `bug!` on it.
self.fail(location, "PtrMetadata should be in runtime MIR only");
}

check_kinds!(
a,
"Cannot PtrMetadata non-pointer non-reference type {:?}",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ fn main() {
let v1 = safe::as_mut_slice(&v);
let v2 = safe::as_mut_slice(&v);
v1[1] = 5;
//~[stack]^ ERROR: /write access .* tag does not exist in the borrow stack/
//~[stack]^ ERROR: /trying to retag .+ for SharedReadOnly permission .+ tag does not exist in the borrow stack for this location/
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@RalfJung I think what's happened here is that SB is now failing "on" the read of the slice length for the bounds check, before it gets to trying to write, thus it mentioning SharedReadOnly instead of "write access", which seems at least plausibly fine. But please double-check that I didn't blow this up in some horrible way 😬

Copy link
Member

@RalfJung RalfJung Dec 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, yeah as you say it seems to fail on a read / retag-for-read now, instead of a write. It is a bit surprising that this would change now. How did the MIR change here?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be good to add a slice index expression in mir-opt/retag.rs so that we can see how this looks like with retagging.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because it's a &mut [_], it changed from doing the bounds check as

_2 = Len(*_1);
_3 = Lt(const 1_usize, _2)
assert(_3,);

to

_4 = &raw const *_1;
_2 = PtrMetadata(move _4);
_3 = Lt(const 1_usize, _2)
assert(_3,);

(It's not just PtrMetadata(copy _1) because there could be a projection [like Len((*_1).3)] and because that copy on a &mut ends up breaking borrowck.)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, yeah the &raw const here is relevant for Stacked Borrows -- it basically acts as a read on the entire slice. That is a bit unfortunate since so far, for slice accesses, we didn't do such a whole-slice retag. I think ideally we would not add a retag to these raw reborrows.

Is there any way for the retag pass to recognize these reborrows? Like, are they marked as syntactic sugar somewhere, or so? I recall we do that for some constructs, but I am not actually sure how it is represented.

Long-term I hope we can change SB to make raw retags true NOPs, but that will require a bit more work.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, no, the read I added to the array case has FakeReadCause::ForIndex, but nothing for the slice case.

I guess if I was to mark the &raw const as something it'd be in the SourceInfo that I'd do that, by marking its span somehow?

Alternatively, AFAIK this is always dealing in a place where the projection chain starts with a deref, like (*_1).3. So is there a way in MIR building to emit just a PtrMetadata(copy _1) that always works? The problem I hit is that when _1 is a &mut [_], that blows up later in MIR borrowck, thus why I added the &raw const in the first place.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm afraid I don't know much about MIR borrowck.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess if I was to mark the &raw const as something it'd be in the SourceInfo that I'd do that, by marking its span somehow?

Yeah I think that's how the desugaring markers usually work, but I am not sure.

v2[1] = 7;
//~[tree]^ ERROR: /write access through .* is forbidden/
}
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
error: Undefined Behavior: attempting a write access using <TAG> at ALLOC[0x4], but that tag does not exist in the borrow stack for this location
error: Undefined Behavior: trying to retag from <TAG> for SharedReadOnly permission at ALLOC[0x0], but that tag does not exist in the borrow stack for this location
--> tests/fail/both_borrows/buggy_as_mut_slice.rs:LL:CC
|
LL | v1[1] = 5;
| ^^^^^^^^^
| ^^^^^
| |
| attempting a write access using <TAG> at ALLOC[0x4], but that tag does not exist in the borrow stack for this location
| this error occurs as part of an access at ALLOC[0x4..0x8]
| trying to retag from <TAG> for SharedReadOnly permission at ALLOC[0x0], but that tag does not exist in the borrow stack for this location
| this error occurs as part of retag at ALLOC[0x0..0xc]
|
= help: this indicates a potential bug in the program: it performed an invalid operation, but the Stacked Borrows rules it violated are still experimental
= help: see https://github.com/rust-lang/unsafe-code-guidelines/blob/master/wip/stacked-borrows.md for further information
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@ fn main() -> () {
let mut _5: u32;
let mut _6: *mut usize;
let _7: usize;
let mut _8: usize;
let mut _9: bool;
let mut _8: bool;
scope 1 {
debug x => _1;
let mut _2: usize;
Expand Down Expand Up @@ -41,9 +40,8 @@ fn main() -> () {
StorageDead(_6);
StorageLive(_7);
_7 = copy _2;
_8 = Len(_1);
_9 = Lt(copy _7, copy _8);
assert(move _9, "index out of bounds: the length is {} but the index is {}", move _8, copy _7) -> [success: bb2, unwind unreachable];
_8 = Lt(copy _7, const 3_usize);
assert(move _8, "index out of bounds: the length is {} but the index is {}", const 3_usize, copy _7) -> [success: bb2, unwind unreachable];
}

bb2: {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@ fn main() -> () {
let mut _5: u32;
let mut _6: *mut usize;
let _7: usize;
let mut _8: usize;
let mut _9: bool;
let mut _8: bool;
scope 1 {
debug x => _1;
let mut _2: usize;
Expand Down Expand Up @@ -41,9 +40,8 @@ fn main() -> () {
StorageDead(_6);
StorageLive(_7);
_7 = copy _2;
_8 = Len(_1);
_9 = Lt(copy _7, copy _8);
assert(move _9, "index out of bounds: the length is {} but the index is {}", move _8, copy _7) -> [success: bb2, unwind continue];
_8 = Lt(copy _7, const 3_usize);
assert(move _8, "index out of bounds: the length is {} but the index is {}", const 3_usize, copy _7) -> [success: bb2, unwind continue];
}

bb2: {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// MIR for `index_array` after built

fn index_array(_1: &[i32; 7], _2: usize) -> &i32 {
debug array => _1;
debug index => _2;
let mut _0: &i32;
let _3: &i32;
let _4: usize;
let mut _5: bool;

bb0: {
StorageLive(_3);
StorageLive(_4);
_4 = copy _2;
FakeRead(ForIndex, (*_1));
_5 = Lt(copy _4, const 7_usize);
assert(move _5, "index out of bounds: the length is {} but the index is {}", const 7_usize, copy _4) -> [success: bb1, unwind: bb2];
}

bb1: {
_3 = &(*_1)[_4];
_0 = &(*_3);
StorageDead(_4);
StorageDead(_3);
return;
}

bb2 (cleanup): {
resume;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// MIR for `index_const_generic_array` after built

fn index_const_generic_array(_1: &[i32; N], _2: usize) -> &i32 {
debug array => _1;
debug index => _2;
let mut _0: &i32;
let _3: &i32;
let _4: usize;
let mut _5: bool;

bb0: {
StorageLive(_3);
StorageLive(_4);
_4 = copy _2;
FakeRead(ForIndex, (*_1));
_5 = Lt(copy _4, const N);
assert(move _5, "index out of bounds: the length is {} but the index is {}", const N, copy _4) -> [success: bb1, unwind: bb2];
}

bb1: {
_3 = &(*_1)[_4];
_0 = &(*_3);
StorageDead(_4);
StorageDead(_3);
return;
}

bb2 (cleanup): {
resume;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// MIR for `index_custom` after built

fn index_custom(_1: &WithSliceTail, _2: usize) -> &i32 {
debug custom => _1;
debug index => _2;
let mut _0: &i32;
let _3: &i32;
let _4: usize;
let mut _5: *const [i32];
let mut _6: usize;
let mut _7: bool;

bb0: {
StorageLive(_3);
StorageLive(_4);
_4 = copy _2;
_5 = &raw const ((*_1).1: [i32]);
_6 = PtrMetadata(move _5);
_7 = Lt(copy _4, copy _6);
assert(move _7, "index out of bounds: the length is {} but the index is {}", move _6, copy _4) -> [success: bb1, unwind: bb2];
}

bb1: {
_3 = &((*_1).1: [i32])[_4];
_0 = &(*_3);
StorageDead(_4);
StorageDead(_3);
return;
}

bb2 (cleanup): {
resume;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// MIR for `index_mut_slice` after built

fn index_mut_slice(_1: &mut [i32], _2: usize) -> &i32 {
debug slice => _1;
debug index => _2;
let mut _0: &i32;
let _3: &i32;
let _4: usize;
let mut _5: *const [i32];
let mut _6: usize;
let mut _7: bool;

bb0: {
StorageLive(_3);
StorageLive(_4);
_4 = copy _2;
_5 = &raw const (*_1);
_6 = PtrMetadata(move _5);
_7 = Lt(copy _4, copy _6);
assert(move _7, "index out of bounds: the length is {} but the index is {}", move _6, copy _4) -> [success: bb1, unwind: bb2];
}

bb1: {
_3 = &(*_1)[_4];
_0 = &(*_3);
StorageDead(_4);
StorageDead(_3);
return;
}

bb2 (cleanup): {
resume;
}
}
Loading
Loading