Skip to content

Commit f558900

Browse files
committed
Auto merge of rust-lang#119226 - Mark-Simulacrum:format-efficiency, r=<try>
Improve coding efficiency for RawDefId This copies the scheme already used for LazyArray, cutting a couple hundred kilobytes from libcore's metadata.
2 parents 398fd92 + 2fec2d6 commit f558900

File tree

1 file changed

+36
-23
lines changed
  • compiler/rustc_metadata/src/rmeta

1 file changed

+36
-23
lines changed

compiler/rustc_metadata/src/rmeta/table.rs

Lines changed: 36 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -255,24 +255,30 @@ impl FixedSizeEncoding for Option<RawDefId> {
255255
type ByteArray = [u8; 8];
256256

257257
#[inline]
258-
fn from_bytes(b: &[u8; 8]) -> Self {
259-
let krate = u32::from_le_bytes(b[0..4].try_into().unwrap());
258+
fn from_bytes(encoded: &[u8; 8]) -> Self {
259+
let (index, krate) = decode_interleaved(encoded);
260+
let krate = u32::from_le_bytes(krate);
260261
if krate == 0 {
261262
return None;
262263
}
263-
let index = u32::from_le_bytes(b[4..8].try_into().unwrap());
264+
let index = u32::from_le_bytes(index);
265+
264266
Some(RawDefId { krate: krate - 1, index })
265267
}
266268

267269
#[inline]
268-
fn write_to_bytes(self, b: &mut [u8; 8]) {
270+
fn write_to_bytes(self, dest: &mut [u8; 8]) {
269271
match self {
270272
None => unreachable!(),
271273
Some(RawDefId { krate, index }) => {
272-
// CrateNum is less than `CrateNum::MAX_AS_U32`.
273274
debug_assert!(krate < u32::MAX);
274-
b[0..4].copy_from_slice(&(1 + krate).to_le_bytes());
275-
b[4..8].copy_from_slice(&index.to_le_bytes());
275+
// CrateNum is less than `CrateNum::MAX_AS_U32`.
276+
let krate = (krate + 1).to_le_bytes();
277+
let index = index.to_le_bytes();
278+
279+
// CrateNum is usually much smaller than the index within the crate, so put it in
280+
// the second slot.
281+
encode_interleaved(index, krate, dest);
276282
}
277283
}
278284
}
@@ -374,20 +380,11 @@ impl<T> FixedSizeEncoding for Option<LazyValue<T>> {
374380

375381
impl<T> LazyArray<T> {
376382
#[inline]
377-
fn write_to_bytes_impl(self, b: &mut [u8; 16]) {
383+
fn write_to_bytes_impl(self, dest: &mut [u8; 16]) {
378384
let position = (self.position.get() as u64).to_le_bytes();
379385
let len = (self.num_elems as u64).to_le_bytes();
380386

381-
// Element width is selected at runtime on a per-table basis by omitting trailing
382-
// zero bytes in table elements. This works very naturally when table elements are
383-
// simple numbers but `LazyArray` is a pair of integers. If naively encoded, the second
384-
// element would shield the trailing zeroes in the first. Interleaving the bytes
385-
// of the position and length exposes trailing zeroes in both to the optimization.
386-
// We encode length second because we generally expect it to be smaller.
387-
for i in 0..8 {
388-
b[2 * i] = position[i];
389-
b[2 * i + 1] = len[i];
390-
}
387+
encode_interleaved(position, len, dest)
391388
}
392389

393390
fn from_bytes_impl(position: &[u8; 8], meta: &[u8; 8]) -> Option<LazyArray<T>> {
@@ -397,20 +394,36 @@ impl<T> LazyArray<T> {
397394
}
398395
}
399396

400-
// Decoding helper for the encoding scheme used by `LazyArray`.
401397
// Interleaving the bytes of the two integers exposes trailing bytes in the first integer
402398
// to the varint scheme that we use for tables.
403399
#[inline]
404-
fn decode_interleaved(encoded: &[u8; 16]) -> ([u8; 8], [u8; 8]) {
405-
let mut first = [0u8; 8];
406-
let mut second = [0u8; 8];
407-
for i in 0..8 {
400+
fn decode_interleaved<const N: usize, const M: usize>(encoded: &[u8; N]) -> ([u8; M], [u8; M]) {
401+
assert_eq!(M * 2, N);
402+
let mut first = [0u8; M];
403+
let mut second = [0u8; M];
404+
for i in 0..M {
408405
first[i] = encoded[2 * i];
409406
second[i] = encoded[2 * i + 1];
410407
}
411408
(first, second)
412409
}
413410

411+
// Element width is selected at runtime on a per-table basis by omitting trailing
412+
// zero bytes in table elements. This works very naturally when table elements are
413+
// simple numbers but sometimes we have a pair of integers. If naively encoded, the second element
414+
// would shield the trailing zeroes in the first. Interleaving the bytes exposes trailing zeroes in
415+
// both to the optimization.
416+
//
417+
// Prefer passing a and b such that `b` is usually smaller.
418+
#[inline]
419+
fn encode_interleaved<const N: usize, const M: usize>(a: [u8; M], b: [u8; M], dest: &mut [u8; N]) {
420+
assert_eq!(M * 2, N);
421+
for i in 0..M {
422+
dest[2 * i] = a[i];
423+
dest[2 * i + 1] = b[i];
424+
}
425+
}
426+
414427
impl<T> FixedSizeEncoding for LazyArray<T> {
415428
type ByteArray = [u8; 16];
416429

0 commit comments

Comments
 (0)