Skip to content

Commit 0e70c26

Browse files
committed
fix BTree creating shared references that are not entirely in-bounds
1 parent 1ccb5b2 commit 0e70c26

File tree

1 file changed

+86
-29
lines changed
  • src/liballoc/collections/btree

1 file changed

+86
-29
lines changed

src/liballoc/collections/btree/node.rs

+86-29
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,34 @@ pub const CAPACITY: usize = 2 * B - 1;
5858
/// these should always be put behind pointers, and specifically behind `BoxedNode` in the owned
5959
/// case.
6060
///
61-
/// We put the metadata first so that its position is the same for every `K` and `V`, in order
62-
/// to statically allocate a single dummy node to avoid allocations. This struct is `repr(C)` to
63-
/// prevent them from being reordered.
61+
/// We have a separate type for the header and rely on it matching the prefix of `LeafNode`, in
62+
/// order to statically allocate a single dummy node to avoid allocations. This struct is
63+
/// `repr(C)` to prevent them from being reordered. `LeafNode` does not just contain a
64+
/// `NodeHeader` because we do not want unnecessary padding between `len` and the keys.
65+
/// Crucially, `NodeHeader` can be safely transmuted to different K and V. (This is exploited
66+
/// by `as_header`.)
67+
/// See `into_key_slice` for an explanation of K2. K2 cannot be safely transmuted around
68+
/// because the size of `NodeHeader` depends on its alignment!
69+
#[repr(C)]
70+
struct NodeHeader<K, V, K2 = ()> {
71+
/// We use `*const` as opposed to `*mut` so as to be covariant in `K` and `V`.
72+
/// This either points to an actual node or is null.
73+
parent: *const InternalNode<K, V>,
74+
75+
/// This node's index into the parent node's `edges` array.
76+
/// `*node.parent.edges[node.parent_idx]` should be the same thing as `node`.
77+
/// This is only guaranteed to be initialized when `parent` is non-null.
78+
parent_idx: MaybeUninit<u16>,
79+
80+
/// The number of keys and values this node stores.
81+
///
82+
/// This next to `parent_idx` to encourage the compiler to join `len` and
83+
/// `parent_idx` into the same 32-bit word, reducing space overhead.
84+
len: u16,
85+
86+
/// See `into_key_slice`.
87+
keys_start: [K2; 0],
88+
}
6489
#[repr(C)]
6590
struct LeafNode<K, V> {
6691
/// We use `*const` as opposed to `*mut` so as to be covariant in `K` and `V`.
@@ -98,24 +123,25 @@ impl<K, V> LeafNode<K, V> {
98123
len: 0
99124
}
100125
}
126+
}
101127

128+
impl<K, V> NodeHeader<K, V> {
102129
fn is_shared_root(&self) -> bool {
103130
ptr::eq(self, &EMPTY_ROOT_NODE as *const _ as *const _)
104131
}
105132
}
106133

107134
// We need to implement Sync here in order to make a static instance.
108-
unsafe impl Sync for LeafNode<(), ()> {}
135+
unsafe impl Sync for NodeHeader<(), ()> {}
109136

110137
// An empty node used as a placeholder for the root node, to avoid allocations.
111-
// We use () in order to save space, since no operation on an empty tree will
138+
// We use just a header in order to save space, since no operation on an empty tree will
112139
// ever take a pointer past the first key.
113-
static EMPTY_ROOT_NODE: LeafNode<(), ()> = LeafNode {
140+
static EMPTY_ROOT_NODE: NodeHeader<(), ()> = NodeHeader {
114141
parent: ptr::null(),
115142
parent_idx: MaybeUninit::uninitialized(),
116143
len: 0,
117-
keys: MaybeUninit::uninitialized(),
118-
vals: MaybeUninit::uninitialized(),
144+
keys_start: [],
119145
};
120146

121147
/// The underlying representation of internal nodes. As with `LeafNode`s, these should be hidden
@@ -306,6 +332,11 @@ impl<K, V> Root<K, V> {
306332
/// `Leaf`, the `NodeRef` points to a leaf node, when this is `Internal` the
307333
/// `NodeRef` points to an internal node, and when this is `LeafOrInternal` the
308334
/// `NodeRef` could be pointing to either type of node.
335+
/// Note that in case of a leaf node, this might still be the shared root! Only turn
336+
/// this into a `LeafNode` reference if you know it is not a root! Shared references
337+
/// must be dereferencable *for the entire size of their pointee*, so `&InternalNode`
338+
/// pointing to the shared root is UB.
339+
/// Turning this into a `NodeHeader` is always safe.
309340
pub struct NodeRef<BorrowType, K, V, Type> {
310341
height: usize,
311342
node: NonNull<LeafNode<K, V>>,
@@ -352,7 +383,7 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> {
352383
/// Finds the length of the node. This is the number of keys or values. In an
353384
/// internal node, the number of edges is `len() + 1`.
354385
pub fn len(&self) -> usize {
355-
self.as_leaf().len as usize
386+
self.as_header().len as usize
356387
}
357388

358389
/// Returns the height of this node in the whole tree. Zero height denotes the
@@ -382,14 +413,19 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> {
382413
}
383414
}
384415

385-
fn as_leaf(&self) -> &LeafNode<K, V> {
416+
/// Assert that this is indeed a proper leaf node, and not the shared root.
417+
unsafe fn as_leaf(&self) -> &LeafNode<K, V> {
418+
self.node.as_ref()
419+
}
420+
421+
fn as_header(&self) -> &NodeHeader<K, V> {
386422
unsafe {
387-
self.node.as_ref()
423+
&*(self.node.as_ptr() as *const NodeHeader<K, V>)
388424
}
389425
}
390426

391427
pub fn is_shared_root(&self) -> bool {
392-
self.as_leaf().is_shared_root()
428+
self.as_header().is_shared_root()
393429
}
394430

395431
pub fn keys(&self) -> &[K] {
@@ -418,7 +454,7 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> {
418454
>,
419455
Self
420456
> {
421-
let parent_as_leaf = self.as_leaf().parent as *const LeafNode<K, V>;
457+
let parent_as_leaf = self.as_header().parent as *const LeafNode<K, V>;
422458
if let Some(non_zero) = NonNull::new(parent_as_leaf as *mut _) {
423459
Ok(Handle {
424460
node: NodeRef {
@@ -427,7 +463,7 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> {
427463
root: self.root,
428464
_marker: PhantomData
429465
},
430-
idx: unsafe { usize::from(*self.as_leaf().parent_idx.get_ref()) },
466+
idx: unsafe { usize::from(*self.as_header().parent_idx.get_ref()) },
431467
_marker: PhantomData
432468
})
433469
} else {
@@ -535,9 +571,8 @@ impl<'a, K, V, Type> NodeRef<marker::Mut<'a>, K, V, Type> {
535571
}
536572

537573
fn as_leaf_mut(&mut self) -> &mut LeafNode<K, V> {
538-
unsafe {
539-
self.node.as_mut()
540-
}
574+
// We are mutable, so we cannot be the root, so this is okay.
575+
unsafe { self.node.as_mut() }
541576
}
542577

543578
fn keys_mut(&mut self) -> &mut [K] {
@@ -551,28 +586,50 @@ impl<'a, K, V, Type> NodeRef<marker::Mut<'a>, K, V, Type> {
551586

552587
impl<'a, K: 'a, V: 'a, Type> NodeRef<marker::Immut<'a>, K, V, Type> {
553588
fn into_key_slice(self) -> &'a [K] {
554-
// When taking a pointer to the keys, if our key has a stricter
555-
// alignment requirement than the shared root does, then the pointer
556-
// would be out of bounds, which LLVM assumes will not happen. If the
557-
// alignment is more strict, we need to make an empty slice that doesn't
558-
// use an out of bounds pointer.
589+
// We have to be careful here because we might be pointing to the shared root.
590+
// In that case, we must not create an `&LeafNode`. We could just return
591+
// an empty slice whenever the lenght is 0 (this includes the shared root),
592+
// but we want to avoid that run-time check.
593+
// Instead, we create a slice pointing into the node whenever possible.
594+
// We can sometimes do this even for the shared root, as the slice will be
595+
// empty. We cannot *always* do this because if the type is too highly
596+
// aligned, the offset of `keys` in a "full node" might be outside the bounds
597+
// of the header! So we do an alignment check first, that will be
598+
// evaluated at compile-time, and only do any run-time check in the rare case
599+
// that the alignment is very big.
559600
if mem::align_of::<K>() > mem::align_of::<LeafNode<(), ()>>() && self.is_shared_root() {
560601
&[]
561602
} else {
562-
// Here either it's not the root, or the alignment is less strict,
563-
// in which case the keys pointer will point "one-past-the-end" of
564-
// the node, which is allowed by LLVM.
603+
// Thanks to the alignment check above, we know that `keys` will be
604+
// in-bounds of some allocation even if this is the shared root!
605+
// (We might be one-past-the-end, but that is allowed by LLVM.)
606+
// Getting the pointer is tricky though. `NodeHeader` does not have a `keys`
607+
// field because we want its size to not depend on the alignment of `K`
608+
// (needed becuase `as_header` should be safe). We cannot call `as_leaf`
609+
// because we might be the shared root.
610+
// For this reason, `NodeHeader` has this `K2` parameter (that's usually `()`
611+
// and hence just adds a size-0-align-1 field, not affecting layout).
612+
// We know that we can transmute `NodeHeader<K, V, ()>` to `NodeHeader<K, V, K>`
613+
// because we did the alignment check above, and hence `NodeHeader<K, V, K>`
614+
// is not bigger than `NodeHeader<K, V, ()>`! Then we can use `NodeHeader<K, V, K>`
615+
// to compute the pointer where the keys start.
616+
// This entire hack will become unnecessary once
617+
// <https://github.com/rust-lang/rfcs/pull/2582> lands, then we can just take a raw
618+
// pointer to the `keys` field of `*const InternalNode<K, V>`.
619+
620+
// This is a non-debug-assert because it can be completely compile-time evaluated.
621+
assert!(mem::size_of::<NodeHeader<K, V>>() == mem::size_of::<NodeHeader<K, V, K>>());
622+
let header = self.as_header() as *const _ as *const NodeHeader<K, V, K>;
623+
let keys = unsafe { &(*header).keys_start as *const _ as *const K };
565624
unsafe {
566-
slice::from_raw_parts(
567-
self.as_leaf().keys.as_ptr() as *const K,
568-
self.len()
569-
)
625+
slice::from_raw_parts(keys, self.len())
570626
}
571627
}
572628
}
573629

574630
fn into_val_slice(self) -> &'a [V] {
575631
debug_assert!(!self.is_shared_root());
632+
// We cannot be the root, so `as_leaf` is okay
576633
unsafe {
577634
slice::from_raw_parts(
578635
self.as_leaf().vals.as_ptr() as *const V,

0 commit comments

Comments
 (0)