Skip to content

Adapt table sizes to the contents, accommodating u64 rmeta offsets #113542

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 30, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions compiler/rustc_metadata/src/rmeta/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ impl<'a, 'tcx> Metadata<'a, 'tcx> for (CrateMetadataRef<'a>, TyCtxt<'tcx>) {
}

impl<T: ParameterizedOverTcx> LazyValue<T> {
#[inline]
fn decode<'a, 'tcx, M: Metadata<'a, 'tcx>>(self, metadata: M) -> T::Value<'tcx>
where
T::Value<'tcx>: Decodable<DecodeContext<'a, 'tcx>>,
Expand Down Expand Up @@ -294,6 +295,7 @@ unsafe impl<'a, 'tcx, T: Decodable<DecodeContext<'a, 'tcx>>> TrustedLen
}

impl<T: ParameterizedOverTcx> LazyArray<T> {
#[inline]
fn decode<'a, 'tcx, M: Metadata<'a, 'tcx>>(
self,
metadata: M,
Expand Down Expand Up @@ -360,8 +362,8 @@ impl<'a, 'tcx> DecodeContext<'a, 'tcx> {
self.read_lazy_offset_then(|pos| LazyArray::from_position_and_num_elems(pos, len))
}

fn read_lazy_table<I, T>(&mut self, len: usize) -> LazyTable<I, T> {
self.read_lazy_offset_then(|pos| LazyTable::from_position_and_encoded_size(pos, len))
fn read_lazy_table<I, T>(&mut self, width: usize, len: usize) -> LazyTable<I, T> {
self.read_lazy_offset_then(|pos| LazyTable::from_position_and_encoded_size(pos, width, len))
}

#[inline]
Expand Down Expand Up @@ -420,25 +422,29 @@ impl<'a, 'tcx> TyDecoder for DecodeContext<'a, 'tcx> {
}

impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for CrateNum {
#[inline]
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> CrateNum {
let cnum = CrateNum::from_u32(d.read_u32());
d.map_encoded_cnum_to_current(cnum)
}
}

impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for DefIndex {
#[inline]
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> DefIndex {
DefIndex::from_u32(d.read_u32())
}
}

impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for ExpnIndex {
#[inline]
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> ExpnIndex {
ExpnIndex::from_u32(d.read_u32())
}
}

impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for ast::AttrId {
#[inline]
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> ast::AttrId {
let sess = d.sess.expect("can't decode AttrId without Session");
sess.parse_sess.attr_id_generator.mk_attr_id()
Expand Down Expand Up @@ -657,6 +663,7 @@ impl<'a, 'tcx, T> Decodable<DecodeContext<'a, 'tcx>> for LazyValue<T> {
}

impl<'a, 'tcx, T> Decodable<DecodeContext<'a, 'tcx>> for LazyArray<T> {
#[inline]
fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> Self {
let len = decoder.read_usize();
if len == 0 { LazyArray::default() } else { decoder.read_lazy_array(len) }
Expand All @@ -665,8 +672,9 @@ impl<'a, 'tcx, T> Decodable<DecodeContext<'a, 'tcx>> for LazyArray<T> {

impl<'a, 'tcx, I: Idx, T> Decodable<DecodeContext<'a, 'tcx>> for LazyTable<I, T> {
fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> Self {
let width = decoder.read_usize();
let len = decoder.read_usize();
decoder.read_lazy_table(len)
decoder.read_lazy_table(width, len)
}
}

Expand Down
3 changes: 2 additions & 1 deletion compiler/rustc_metadata/src/rmeta/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,8 @@ impl<'a, 'tcx, T> Encodable<EncodeContext<'a, 'tcx>> for LazyArray<T> {

impl<'a, 'tcx, I, T> Encodable<EncodeContext<'a, 'tcx>> for LazyTable<I, T> {
fn encode(&self, e: &mut EncodeContext<'a, 'tcx>) {
e.emit_usize(self.encoded_size);
e.emit_usize(self.width);
e.emit_usize(self.len);
e.emit_lazy_distance(self.position);
}
}
Expand Down
8 changes: 5 additions & 3 deletions compiler/rustc_metadata/src/rmeta/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,8 @@ impl<T> LazyArray<T> {
/// eagerly and in-order.
struct LazyTable<I, T> {
position: NonZeroUsize,
encoded_size: usize,
width: usize,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe add some comments on those?

len: usize,
_marker: PhantomData<fn(I) -> T>,
}

Expand All @@ -147,9 +148,10 @@ impl<I: 'static, T: ParameterizedOverTcx> ParameterizedOverTcx for LazyTable<I,
impl<I, T> LazyTable<I, T> {
fn from_position_and_encoded_size(
position: NonZeroUsize,
encoded_size: usize,
width: usize,
len: usize,
) -> LazyTable<I, T> {
LazyTable { position, encoded_size, _marker: PhantomData }
LazyTable { position, width, len, _marker: PhantomData }
}
}

Expand Down
134 changes: 95 additions & 39 deletions compiler/rustc_metadata/src/rmeta/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ impl IsDefault for u32 {
}
}

impl IsDefault for u64 {
fn is_default(&self) -> bool {
*self == 0
}
}

impl<T> IsDefault for LazyArray<T> {
fn is_default(&self) -> bool {
self.num_elems == 0
Expand Down Expand Up @@ -89,6 +95,20 @@ impl FixedSizeEncoding for u32 {
}
}

impl FixedSizeEncoding for u64 {
type ByteArray = [u8; 8];

#[inline]
fn from_bytes(b: &[u8; 8]) -> Self {
Self::from_le_bytes(*b)
}

#[inline]
fn write_to_bytes(self, b: &mut [u8; 8]) {
*b = self.to_le_bytes();
}
}

macro_rules! fixed_size_enum {
($ty:ty { $(($($pat:tt)*))* }) => {
impl FixedSizeEncoding for Option<$ty> {
Expand Down Expand Up @@ -299,21 +319,21 @@ impl FixedSizeEncoding for UnusedGenericParams {
// generic `LazyValue<T>` impl, but in the general case we might not need / want
// to fit every `usize` in `u32`.
impl<T> FixedSizeEncoding for Option<LazyValue<T>> {
type ByteArray = [u8; 4];
type ByteArray = [u8; 8];

#[inline]
fn from_bytes(b: &[u8; 4]) -> Self {
let position = NonZeroUsize::new(u32::from_bytes(b) as usize)?;
fn from_bytes(b: &[u8; 8]) -> Self {
let position = NonZeroUsize::new(u64::from_bytes(b) as usize)?;
Some(LazyValue::from_position(position))
}

#[inline]
fn write_to_bytes(self, b: &mut [u8; 4]) {
fn write_to_bytes(self, b: &mut [u8; 8]) {
match self {
None => unreachable!(),
Some(lazy) => {
let position = lazy.position.get();
let position: u32 = position.try_into().unwrap();
let position: u64 = position.try_into().unwrap();
position.write_to_bytes(b)
}
}
Expand All @@ -322,55 +342,67 @@ impl<T> FixedSizeEncoding for Option<LazyValue<T>> {

impl<T> LazyArray<T> {
#[inline]
fn write_to_bytes_impl(self, b: &mut [u8; 8]) {
let ([position_bytes, meta_bytes], []) = b.as_chunks_mut::<4>() else { panic!() };
fn write_to_bytes_impl(self, b: &mut [u8; 16]) {
let position = (self.position.get() as u64).to_le_bytes();
let len = (self.num_elems as u64).to_le_bytes();

let position = self.position.get();
let position: u32 = position.try_into().unwrap();
position.write_to_bytes(position_bytes);

let len = self.num_elems;
let len: u32 = len.try_into().unwrap();
len.write_to_bytes(meta_bytes);
for i in 0..8 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please write a comment here stating the motivation for this interleaving? This code would be very difficult to understand without context.

b[2 * i] = position[i];
b[2 * i + 1] = len[i];
}
}

fn from_bytes_impl(position_bytes: &[u8; 4], meta_bytes: &[u8; 4]) -> Option<LazyArray<T>> {
let position = NonZeroUsize::new(u32::from_bytes(position_bytes) as usize)?;
let len = u32::from_bytes(meta_bytes) as usize;
fn from_bytes_impl(position: &[u8; 8], meta: &[u8; 8]) -> Option<LazyArray<T>> {
let position = NonZeroUsize::new(u64::from_bytes(&position) as usize)?;
let len = u64::from_bytes(&meta) as usize;
Some(LazyArray::from_position_and_num_elems(position, len))
}
}

impl<T> FixedSizeEncoding for LazyArray<T> {
type ByteArray = [u8; 8];
type ByteArray = [u8; 16];

#[inline]
fn from_bytes(b: &[u8; 8]) -> Self {
let ([position_bytes, meta_bytes], []) = b.as_chunks::<4>() else { panic!() };
if *meta_bytes == [0; 4] {
fn from_bytes(b: &[u8; 16]) -> Self {
let mut position = [0u8; 8];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe extract this to a function since this code is duplicated? Also a small comment referencing the idea behind the interleaving would be nice.

let mut meta = [0u8; 8];

for i in 0..8 {
position[i] = b[2 * i];
meta[i] = b[2 * i + 1];
}

if meta == [0; 8] {
return Default::default();
}
LazyArray::from_bytes_impl(position_bytes, meta_bytes).unwrap()
LazyArray::from_bytes_impl(&position, &meta).unwrap()
}

#[inline]
fn write_to_bytes(self, b: &mut [u8; 8]) {
fn write_to_bytes(self, b: &mut [u8; 16]) {
assert!(!self.is_default());
self.write_to_bytes_impl(b)
}
}

impl<T> FixedSizeEncoding for Option<LazyArray<T>> {
type ByteArray = [u8; 8];
type ByteArray = [u8; 16];

#[inline]
fn from_bytes(b: &[u8; 8]) -> Self {
let ([position_bytes, meta_bytes], []) = b.as_chunks::<4>() else { panic!() };
LazyArray::from_bytes_impl(position_bytes, meta_bytes)
fn from_bytes(b: &[u8; 16]) -> Self {
let mut position = [0u8; 8];
let mut meta = [0u8; 8];

for i in 0..8 {
position[i] = b[2 * i];
meta[i] = b[2 * i + 1];
}

LazyArray::from_bytes_impl(&position, &meta)
}

#[inline]
fn write_to_bytes(self, b: &mut [u8; 8]) {
fn write_to_bytes(self, b: &mut [u8; 16]) {
match self {
None => unreachable!(),
Some(lazy) => lazy.write_to_bytes_impl(b),
Expand All @@ -380,13 +412,14 @@ impl<T> FixedSizeEncoding for Option<LazyArray<T>> {

/// Helper for constructing a table's serialization (also see `Table`).
pub(super) struct TableBuilder<I: Idx, T: FixedSizeEncoding> {
width: usize,
blocks: IndexVec<I, T::ByteArray>,
_marker: PhantomData<T>,
}

impl<I: Idx, T: FixedSizeEncoding> Default for TableBuilder<I, T> {
fn default() -> Self {
TableBuilder { blocks: Default::default(), _marker: PhantomData }
TableBuilder { width: 0, blocks: Default::default(), _marker: PhantomData }
}
}

Expand Down Expand Up @@ -414,40 +447,63 @@ impl<I: Idx, const N: usize, T: FixedSizeEncoding<ByteArray = [u8; N]>> TableBui
// > store bit-masks of which item in each bucket is actually serialized).
let block = self.blocks.ensure_contains_elem(i, || [0; N]);
value.write_to_bytes(block);
if self.width != N {
let width = N - trailing_zeros(block);
self.width = self.width.max(width);
}
}
}

pub(crate) fn encode(&self, buf: &mut FileEncoder) -> LazyTable<I, T> {
let pos = buf.position();

let width = self.width;
for block in &self.blocks {
buf.emit_raw_bytes(block);
buf.emit_raw_bytes(&block[..width]);
}
let num_bytes = self.blocks.len() * N;

LazyTable::from_position_and_encoded_size(
NonZeroUsize::new(pos as usize).unwrap(),
num_bytes,
width,
self.blocks.len(),
)
}
}

fn trailing_zeros(x: &[u8]) -> usize {
x.iter().rev().take_while(|b| **b == 0).count()
}

impl<I: Idx, const N: usize, T: FixedSizeEncoding<ByteArray = [u8; N]> + ParameterizedOverTcx>
LazyTable<I, T>
where
for<'tcx> T::Value<'tcx>: FixedSizeEncoding<ByteArray = [u8; N]>,
{
/// Given the metadata, extract out the value at a particular index (if any).
#[inline(never)]
pub(super) fn get<'a, 'tcx, M: Metadata<'a, 'tcx>>(&self, metadata: M, i: I) -> T::Value<'tcx> {
trace!("LazyTable::lookup: index={:?} len={:?}", i, self.encoded_size);
trace!("LazyTable::lookup: index={:?} len={:?}", i, self.len);

// Access past the end of the table returns a Default
if i.index() >= self.len {
return Default::default();
}

let start = self.position.get();
let bytes = &metadata.blob()[start..start + self.encoded_size];
let (bytes, []) = bytes.as_chunks::<N>() else { panic!() };
bytes.get(i.index()).map_or_else(Default::default, FixedSizeEncoding::from_bytes)
let width = self.width;
let start = self.position.get() + (width * i.index());
let end = start + width;
let bytes = &metadata.blob()[start..end];

if let Ok(fixed) = bytes.try_into() {
FixedSizeEncoding::from_bytes(fixed)
} else {
let mut fixed = [0u8; N];
fixed[..width].copy_from_slice(bytes);
FixedSizeEncoding::from_bytes(&fixed)
}
}

/// Size of the table in entries, including possible gaps.
pub(super) fn size(&self) -> usize {
self.encoded_size / N
self.len
}
}