Skip to content

Commit ce5fed6

Browse files
committed
Auto merge of #119238 - Mark-Simulacrum:def-hash-efficiency, r=<try>
Specialize DefPathHash table to skip encoding crate IDs The current implementation is ad-hoc and likely should be replaced with a non-table based approach (i.e., fully pulling out DefPathHash from the rmeta table infrastructure, of which we use ~none now), but this was an easy way to get an initial PR out. The main pending question is whether the assumption made here that there is exactly one shared prefix accurate? If not, is it right that the number should be typically small? (If so a deduplication scheme of which this is a special case almost certainly makes sense). We encode a lot of these (1000s) so the savings of 8 bytes/hash add up quickly. Opening this PR to get opinions more on the general idea and to run perf on whether the underlying impl will perform OK.
2 parents 495203b + 5b3116c commit ce5fed6

File tree

1 file changed

+52
-0
lines changed
  • compiler/rustc_metadata/src/rmeta

1 file changed

+52
-0
lines changed

compiler/rustc_metadata/src/rmeta/table.rs

+52
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,11 @@ pub(super) trait FixedSizeEncoding: IsDefault {
6969
/// Cannot use an associated `const BYTE_LEN: usize` instead due to const eval limitations.
7070
type ByteArray;
7171

72+
const IS_DEF_PATH_HASH: bool = false;
73+
fn from_16_bytes(_: &[u8; 16]) -> Self {
74+
unreachable!()
75+
}
76+
7277
fn from_bytes(b: &Self::ByteArray) -> Self;
7378
fn write_to_bytes(self, b: &mut Self::ByteArray);
7479
}
@@ -238,6 +243,11 @@ fixed_size_enum! {
238243
impl FixedSizeEncoding for DefPathHash {
239244
type ByteArray = [u8; 16];
240245

246+
const IS_DEF_PATH_HASH: bool = true;
247+
fn from_16_bytes(b: &[u8; 16]) -> Self {
248+
DefPathHash(Fingerprint::from_le_bytes(*b))
249+
}
250+
241251
#[inline]
242252
fn from_bytes(b: &[u8; 16]) -> Self {
243253
DefPathHash(Fingerprint::from_le_bytes(*b))
@@ -497,6 +507,37 @@ impl<I: Idx, const N: usize, T: FixedSizeEncoding<ByteArray = [u8; N]>> TableBui
497507
pub(crate) fn encode(&self, buf: &mut FileEncoder) -> LazyTable<I, T> {
498508
let pos = buf.position();
499509

510+
if T::IS_DEF_PATH_HASH {
511+
if self.blocks.is_empty() {
512+
return LazyTable::from_position_and_encoded_size(
513+
NonZeroUsize::new(pos).unwrap(),
514+
0,
515+
0,
516+
);
517+
}
518+
let mut prefix: Option<[u8; 8]> = None;
519+
for block in self.blocks.iter() {
520+
if prefix.is_none() {
521+
prefix = Some(block[..8].try_into().unwrap());
522+
}
523+
assert_eq!(prefix.unwrap(), block[..8]);
524+
}
525+
buf.write_array(prefix.unwrap());
526+
527+
for block in &self.blocks {
528+
buf.write_with::<8>(|dest| {
529+
*dest = block[8..].try_into().unwrap();
530+
8
531+
});
532+
}
533+
534+
return LazyTable::from_position_and_encoded_size(
535+
NonZeroUsize::new(pos).unwrap(),
536+
0,
537+
self.blocks.len(),
538+
);
539+
}
540+
500541
let width = self.width;
501542
for block in &self.blocks {
502543
buf.write_with(|dest| {
@@ -531,6 +572,17 @@ where
531572
return Default::default();
532573
}
533574

575+
if T::IS_DEF_PATH_HASH {
576+
let region = &metadata.blob()[self.position.get()..];
577+
let prefix: [u8; 8] = region[..8].try_into().unwrap();
578+
let local_hash_region = &region[8..];
579+
let suffix: [u8; 8] = local_hash_region[i.index() * 8..][..8].try_into().unwrap();
580+
let mut combined = [0; 16];
581+
combined[..8].copy_from_slice(&prefix);
582+
combined[8..].copy_from_slice(&suffix);
583+
return FixedSizeEncoding::from_16_bytes(&combined);
584+
}
585+
534586
let width = self.width;
535587
let start = self.position.get() + (width * i.index());
536588
let end = start + width;

0 commit comments

Comments
 (0)