-
Notifications
You must be signed in to change notification settings - Fork 13.4k
Adapt table sizes to the contents, accommodating u64 rmeta offsets #113542
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38,6 +38,12 @@ impl IsDefault for u32 { | |
} | ||
} | ||
|
||
impl IsDefault for u64 { | ||
fn is_default(&self) -> bool { | ||
*self == 0 | ||
} | ||
} | ||
|
||
impl<T> IsDefault for LazyArray<T> { | ||
fn is_default(&self) -> bool { | ||
self.num_elems == 0 | ||
|
@@ -89,6 +95,20 @@ impl FixedSizeEncoding for u32 { | |
} | ||
} | ||
|
||
impl FixedSizeEncoding for u64 { | ||
type ByteArray = [u8; 8]; | ||
|
||
#[inline] | ||
fn from_bytes(b: &[u8; 8]) -> Self { | ||
Self::from_le_bytes(*b) | ||
} | ||
|
||
#[inline] | ||
fn write_to_bytes(self, b: &mut [u8; 8]) { | ||
*b = self.to_le_bytes(); | ||
} | ||
} | ||
|
||
macro_rules! fixed_size_enum { | ||
($ty:ty { $(($($pat:tt)*))* }) => { | ||
impl FixedSizeEncoding for Option<$ty> { | ||
|
@@ -299,21 +319,21 @@ impl FixedSizeEncoding for UnusedGenericParams { | |
// generic `LazyValue<T>` impl, but in the general case we might not need / want | ||
// to fit every `usize` in `u32`. | ||
impl<T> FixedSizeEncoding for Option<LazyValue<T>> { | ||
type ByteArray = [u8; 4]; | ||
type ByteArray = [u8; 8]; | ||
|
||
#[inline] | ||
fn from_bytes(b: &[u8; 4]) -> Self { | ||
let position = NonZeroUsize::new(u32::from_bytes(b) as usize)?; | ||
fn from_bytes(b: &[u8; 8]) -> Self { | ||
let position = NonZeroUsize::new(u64::from_bytes(b) as usize)?; | ||
Some(LazyValue::from_position(position)) | ||
} | ||
|
||
#[inline] | ||
fn write_to_bytes(self, b: &mut [u8; 4]) { | ||
fn write_to_bytes(self, b: &mut [u8; 8]) { | ||
match self { | ||
None => unreachable!(), | ||
Some(lazy) => { | ||
let position = lazy.position.get(); | ||
let position: u32 = position.try_into().unwrap(); | ||
let position: u64 = position.try_into().unwrap(); | ||
position.write_to_bytes(b) | ||
} | ||
} | ||
|
@@ -322,55 +342,67 @@ impl<T> FixedSizeEncoding for Option<LazyValue<T>> { | |
|
||
impl<T> LazyArray<T> { | ||
#[inline] | ||
fn write_to_bytes_impl(self, b: &mut [u8; 8]) { | ||
let ([position_bytes, meta_bytes], []) = b.as_chunks_mut::<4>() else { panic!() }; | ||
fn write_to_bytes_impl(self, b: &mut [u8; 16]) { | ||
let position = (self.position.get() as u64).to_le_bytes(); | ||
let len = (self.num_elems as u64).to_le_bytes(); | ||
|
||
let position = self.position.get(); | ||
let position: u32 = position.try_into().unwrap(); | ||
position.write_to_bytes(position_bytes); | ||
|
||
let len = self.num_elems; | ||
let len: u32 = len.try_into().unwrap(); | ||
len.write_to_bytes(meta_bytes); | ||
for i in 0..8 { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you please write a comment here stating the motivation for this interleaving? This code would be very difficult to understand without context. |
||
b[2 * i] = position[i]; | ||
b[2 * i + 1] = len[i]; | ||
} | ||
} | ||
|
||
fn from_bytes_impl(position_bytes: &[u8; 4], meta_bytes: &[u8; 4]) -> Option<LazyArray<T>> { | ||
let position = NonZeroUsize::new(u32::from_bytes(position_bytes) as usize)?; | ||
let len = u32::from_bytes(meta_bytes) as usize; | ||
fn from_bytes_impl(position: &[u8; 8], meta: &[u8; 8]) -> Option<LazyArray<T>> { | ||
let position = NonZeroUsize::new(u64::from_bytes(&position) as usize)?; | ||
let len = u64::from_bytes(&meta) as usize; | ||
Some(LazyArray::from_position_and_num_elems(position, len)) | ||
} | ||
} | ||
|
||
impl<T> FixedSizeEncoding for LazyArray<T> { | ||
type ByteArray = [u8; 8]; | ||
type ByteArray = [u8; 16]; | ||
|
||
#[inline] | ||
fn from_bytes(b: &[u8; 8]) -> Self { | ||
let ([position_bytes, meta_bytes], []) = b.as_chunks::<4>() else { panic!() }; | ||
if *meta_bytes == [0; 4] { | ||
fn from_bytes(b: &[u8; 16]) -> Self { | ||
let mut position = [0u8; 8]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe extract this to a function since this code is duplicated? Also a small comment referencing the idea behind the interleaving would be nice. |
||
let mut meta = [0u8; 8]; | ||
|
||
for i in 0..8 { | ||
position[i] = b[2 * i]; | ||
meta[i] = b[2 * i + 1]; | ||
} | ||
|
||
if meta == [0; 8] { | ||
return Default::default(); | ||
} | ||
LazyArray::from_bytes_impl(position_bytes, meta_bytes).unwrap() | ||
LazyArray::from_bytes_impl(&position, &meta).unwrap() | ||
} | ||
|
||
#[inline] | ||
fn write_to_bytes(self, b: &mut [u8; 8]) { | ||
fn write_to_bytes(self, b: &mut [u8; 16]) { | ||
assert!(!self.is_default()); | ||
self.write_to_bytes_impl(b) | ||
} | ||
} | ||
|
||
impl<T> FixedSizeEncoding for Option<LazyArray<T>> { | ||
type ByteArray = [u8; 8]; | ||
type ByteArray = [u8; 16]; | ||
|
||
#[inline] | ||
fn from_bytes(b: &[u8; 8]) -> Self { | ||
let ([position_bytes, meta_bytes], []) = b.as_chunks::<4>() else { panic!() }; | ||
LazyArray::from_bytes_impl(position_bytes, meta_bytes) | ||
fn from_bytes(b: &[u8; 16]) -> Self { | ||
let mut position = [0u8; 8]; | ||
let mut meta = [0u8; 8]; | ||
|
||
for i in 0..8 { | ||
position[i] = b[2 * i]; | ||
meta[i] = b[2 * i + 1]; | ||
} | ||
|
||
LazyArray::from_bytes_impl(&position, &meta) | ||
} | ||
|
||
#[inline] | ||
fn write_to_bytes(self, b: &mut [u8; 8]) { | ||
fn write_to_bytes(self, b: &mut [u8; 16]) { | ||
match self { | ||
None => unreachable!(), | ||
Some(lazy) => lazy.write_to_bytes_impl(b), | ||
|
@@ -380,13 +412,14 @@ impl<T> FixedSizeEncoding for Option<LazyArray<T>> { | |
|
||
/// Helper for constructing a table's serialization (also see `Table`). | ||
pub(super) struct TableBuilder<I: Idx, T: FixedSizeEncoding> { | ||
width: usize, | ||
blocks: IndexVec<I, T::ByteArray>, | ||
_marker: PhantomData<T>, | ||
} | ||
|
||
impl<I: Idx, T: FixedSizeEncoding> Default for TableBuilder<I, T> { | ||
fn default() -> Self { | ||
TableBuilder { blocks: Default::default(), _marker: PhantomData } | ||
TableBuilder { width: 0, blocks: Default::default(), _marker: PhantomData } | ||
} | ||
} | ||
|
||
|
@@ -414,40 +447,63 @@ impl<I: Idx, const N: usize, T: FixedSizeEncoding<ByteArray = [u8; N]>> TableBui | |
// > store bit-masks of which item in each bucket is actually serialized). | ||
let block = self.blocks.ensure_contains_elem(i, || [0; N]); | ||
value.write_to_bytes(block); | ||
if self.width != N { | ||
let width = N - trailing_zeros(block); | ||
self.width = self.width.max(width); | ||
} | ||
} | ||
} | ||
|
||
pub(crate) fn encode(&self, buf: &mut FileEncoder) -> LazyTable<I, T> { | ||
let pos = buf.position(); | ||
|
||
let width = self.width; | ||
for block in &self.blocks { | ||
buf.emit_raw_bytes(block); | ||
buf.emit_raw_bytes(&block[..width]); | ||
} | ||
let num_bytes = self.blocks.len() * N; | ||
|
||
LazyTable::from_position_and_encoded_size( | ||
NonZeroUsize::new(pos as usize).unwrap(), | ||
num_bytes, | ||
width, | ||
self.blocks.len(), | ||
) | ||
} | ||
} | ||
|
||
fn trailing_zeros(x: &[u8]) -> usize { | ||
x.iter().rev().take_while(|b| **b == 0).count() | ||
} | ||
|
||
impl<I: Idx, const N: usize, T: FixedSizeEncoding<ByteArray = [u8; N]> + ParameterizedOverTcx> | ||
LazyTable<I, T> | ||
where | ||
for<'tcx> T::Value<'tcx>: FixedSizeEncoding<ByteArray = [u8; N]>, | ||
{ | ||
/// Given the metadata, extract out the value at a particular index (if any). | ||
#[inline(never)] | ||
pub(super) fn get<'a, 'tcx, M: Metadata<'a, 'tcx>>(&self, metadata: M, i: I) -> T::Value<'tcx> { | ||
trace!("LazyTable::lookup: index={:?} len={:?}", i, self.encoded_size); | ||
trace!("LazyTable::lookup: index={:?} len={:?}", i, self.len); | ||
|
||
// Access past the end of the table returns a Default | ||
if i.index() >= self.len { | ||
return Default::default(); | ||
} | ||
|
||
let start = self.position.get(); | ||
let bytes = &metadata.blob()[start..start + self.encoded_size]; | ||
let (bytes, []) = bytes.as_chunks::<N>() else { panic!() }; | ||
bytes.get(i.index()).map_or_else(Default::default, FixedSizeEncoding::from_bytes) | ||
let width = self.width; | ||
let start = self.position.get() + (width * i.index()); | ||
let end = start + width; | ||
let bytes = &metadata.blob()[start..end]; | ||
|
||
if let Ok(fixed) = bytes.try_into() { | ||
FixedSizeEncoding::from_bytes(fixed) | ||
} else { | ||
let mut fixed = [0u8; N]; | ||
fixed[..width].copy_from_slice(bytes); | ||
FixedSizeEncoding::from_bytes(&fixed) | ||
} | ||
} | ||
|
||
/// Size of the table in entries, including possible gaps. | ||
pub(super) fn size(&self) -> usize { | ||
self.encoded_size / N | ||
self.len | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe add some comments on those?