Skip to content

Commit a723228

Browse files
Shrink span encoding further
Spans are now stored in a more compact form which cuts down on at least 1 byte per span (indirect/direct encoding) and at most 3 bytes per span (indirect/direct encoding, context byte, length byte). As a result, libcore metadata shrinks by 1.5MB.
1 parent 89e2160 commit a723228

File tree

3 files changed

+130
-40
lines changed

3 files changed

+130
-40
lines changed

compiler/rustc_metadata/src/rmeta/decoder.rs

+19-21
Original file line numberDiff line numberDiff line change
@@ -508,39 +508,37 @@ impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for ExpnId {
508508
impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for Span {
509509
fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> Span {
510510
let start = decoder.position();
511-
let mode = SpanEncodingMode::decode(decoder);
512-
let data = match mode {
513-
SpanEncodingMode::Direct => SpanData::decode(decoder),
514-
SpanEncodingMode::RelativeOffset(offset) => {
515-
decoder.with_position(start - offset, |decoder| {
516-
let mode = SpanEncodingMode::decode(decoder);
517-
debug_assert!(matches!(mode, SpanEncodingMode::Direct));
518-
SpanData::decode(decoder)
519-
})
520-
}
521-
SpanEncodingMode::AbsoluteOffset(addr) => decoder.with_position(addr, |decoder| {
522-
let mode = SpanEncodingMode::decode(decoder);
523-
debug_assert!(matches!(mode, SpanEncodingMode::Direct));
524-
SpanData::decode(decoder)
525-
}),
511+
let tag = SpanTag(decoder.peek_byte());
512+
let data = if tag.kind() == SpanKind::Indirect {
513+
// Skip past the tag we just peek'd.
514+
decoder.read_u8();
515+
let offset_or_position = decoder.read_usize();
516+
let position = if tag.is_relative_offset() {
517+
start - offset_or_position
518+
} else {
519+
offset_or_position
520+
};
521+
decoder.with_position(position, SpanData::decode)
522+
} else {
523+
SpanData::decode(decoder)
526524
};
527525
Span::new(data.lo, data.hi, data.ctxt, data.parent)
528526
}
529527
}
530528

531529
impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for SpanData {
532530
fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> SpanData {
533-
let ctxt = SyntaxContext::decode(decoder);
534-
let tag = u8::decode(decoder);
531+
let tag = SpanTag::decode(decoder);
532+
let ctxt = tag.context().unwrap_or_else(|| SyntaxContext::decode(decoder));
535533

536-
if tag == TAG_PARTIAL_SPAN {
534+
if tag.kind() == SpanKind::Partial {
537535
return DUMMY_SP.with_ctxt(ctxt).data();
538536
}
539537

540-
debug_assert!(tag == TAG_VALID_SPAN_LOCAL || tag == TAG_VALID_SPAN_FOREIGN);
538+
debug_assert!(tag.kind() == SpanKind::Local || tag.kind() == SpanKind::Foreign);
541539

542540
let lo = BytePos::decode(decoder);
543-
let len = BytePos::decode(decoder);
541+
let len = tag.length().unwrap_or_else(|| BytePos::decode(decoder));
544542
let hi = lo + len;
545543

546544
let Some(sess) = decoder.sess else {
@@ -581,7 +579,7 @@ impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for SpanData {
581579
// treat the 'local' and 'foreign' cases almost identically during deserialization:
582580
// we can call `imported_source_file` for the proper crate, and binary search
583581
// through the returned slice using our span.
584-
let source_file = if tag == TAG_VALID_SPAN_LOCAL {
582+
let source_file = if tag.kind() == SpanKind::Local {
585583
decoder.cdata().imported_source_file(metadata_index, sess)
586584
} else {
587585
// When we encode a proc-macro crate, all `Span`s should be encoded

compiler/rustc_metadata/src/rmeta/encoder.rs

+29-15
Original file line numberDiff line numberDiff line change
@@ -177,15 +177,17 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Span {
177177
// previously saved offset must be smaller than the current position.
178178
let offset = s.opaque.position() - last_location;
179179
if offset < last_location {
180-
SpanEncodingMode::RelativeOffset(offset).encode(s)
180+
SpanTag::indirect(true).encode(s);
181+
offset.encode(s);
181182
} else {
182-
SpanEncodingMode::AbsoluteOffset(last_location).encode(s)
183+
SpanTag::indirect(false).encode(s);
184+
last_location.encode(s);
183185
}
184186
}
185187
Entry::Vacant(v) => {
186188
let position = s.opaque.position();
187189
v.insert(position);
188-
SpanEncodingMode::Direct.encode(s);
190+
// Data is encoded with a SpanTag prefix (see below).
189191
self.data().encode(s);
190192
}
191193
}
@@ -225,14 +227,15 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for SpanData {
225227
// IMPORTANT: If this is ever changed, be sure to update
226228
// `rustc_span::hygiene::raw_encode_expn_id` to handle
227229
// encoding `ExpnData` for proc-macro crates.
228-
if s.is_proc_macro {
229-
SyntaxContext::root().encode(s);
230-
} else {
231-
self.ctxt.encode(s);
232-
}
230+
let ctxt = if s.is_proc_macro { SyntaxContext::root() } else { self.ctxt };
233231

234232
if self.is_dummy() {
235-
return TAG_PARTIAL_SPAN.encode(s);
233+
let tag = SpanTag::new(SpanKind::Partial, ctxt, 0);
234+
tag.encode(s);
235+
if tag.context().is_none() {
236+
ctxt.encode(s);
237+
}
238+
return;
236239
}
237240

238241
// The Span infrastructure should make sure that this invariant holds:
@@ -250,7 +253,12 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for SpanData {
250253
if !source_file.contains(self.hi) {
251254
// Unfortunately, macro expansion still sometimes generates Spans
252255
// that malformed in this way.
253-
return TAG_PARTIAL_SPAN.encode(s);
256+
let tag = SpanTag::new(SpanKind::Partial, ctxt, 0);
257+
tag.encode(s);
258+
if tag.context().is_none() {
259+
ctxt.encode(s);
260+
}
261+
return;
254262
}
255263

256264
// There are two possible cases here:
@@ -269,7 +277,7 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for SpanData {
269277
// if we're a proc-macro crate.
270278
// This allows us to avoid loading the dependencies of proc-macro crates: all of
271279
// the information we need to decode `Span`s is stored in the proc-macro crate.
272-
let (tag, metadata_index) = if source_file.is_imported() && !s.is_proc_macro {
280+
let (kind, metadata_index) = if source_file.is_imported() && !s.is_proc_macro {
273281
// To simplify deserialization, we 'rebase' this span onto the crate it originally came
274282
// from (the crate that 'owns' the file it references. These rebased 'lo' and 'hi'
275283
// values are relative to the source map information for the 'foreign' crate whose
@@ -287,7 +295,7 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for SpanData {
287295
}
288296
};
289297

290-
(TAG_VALID_SPAN_FOREIGN, metadata_index)
298+
(SpanKind::Foreign, metadata_index)
291299
} else {
292300
// Record the fact that we need to encode the data for this `SourceFile`
293301
let source_files =
@@ -296,7 +304,7 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for SpanData {
296304
let metadata_index: u32 =
297305
metadata_index.try_into().expect("cannot export more than U32_MAX files");
298306

299-
(TAG_VALID_SPAN_LOCAL, metadata_index)
307+
(SpanKind::Local, metadata_index)
300308
};
301309

302310
// Encode the start position relative to the file start, so we profit more from the
@@ -307,14 +315,20 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for SpanData {
307315
// from the variable-length integer encoding that we use.
308316
let len = self.hi - self.lo;
309317

318+
let tag = SpanTag::new(kind, ctxt, len.0 as usize);
310319
tag.encode(s);
320+
if tag.context().is_none() {
321+
ctxt.encode(s);
322+
}
311323
lo.encode(s);
312-
len.encode(s);
324+
if tag.length().is_none() {
325+
len.encode(s);
326+
}
313327

314328
// Encode the index of the `SourceFile` for the span, in order to make decoding faster.
315329
metadata_index.encode(s);
316330

317-
if tag == TAG_VALID_SPAN_FOREIGN {
331+
if kind == SpanKind::Foreign {
318332
// This needs to be two lines to avoid holding the `s.source_file_cache`
319333
// while calling `cnum.encode(s)`
320334
let cnum = s.source_file_cache.0.cnum;

compiler/rustc_metadata/src/rmeta/mod.rs

+82-4
Original file line numberDiff line numberDiff line change
@@ -488,10 +488,88 @@ bitflags::bitflags! {
488488
}
489489
}
490490

491-
// Tags used for encoding Spans:
492-
const TAG_VALID_SPAN_LOCAL: u8 = 0;
493-
const TAG_VALID_SPAN_FOREIGN: u8 = 1;
494-
const TAG_PARTIAL_SPAN: u8 = 2;
491+
/// A span tag byte encodes a bunch of data, so that we can cut out a few extra bytes from span
492+
/// encodings (which are very common, for example, libcore has ~650,000 unique spans and over 1.1
493+
/// million references to prior-written spans).
494+
///
495+
/// The byte format is split into several parts:
496+
///
497+
/// [ a a a a a c d d ]
498+
///
499+
/// `a` bits represent the span length. We have 5 bits, so we can store lengths up to 30 inline, with
500+
/// an all-1s pattern representing that the length is stored separately.
501+
///
502+
/// `c` represents whether the span context is zero (and then it is not stored as a separate varint)
503+
/// for direct span encodings, and whether the offset is absolute or relative otherwise (zero for
504+
/// absolute).
505+
///
506+
/// d bits represent the kind of span we are storing (local, foreign, partial, indirect).
507+
#[derive(Encodable, Decodable, Copy, Clone)]
508+
struct SpanTag(u8);
509+
510+
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
511+
enum SpanKind {
512+
Local = 0b00,
513+
Foreign = 0b01,
514+
Partial = 0b10,
515+
// Indicates the actual span contents are elsewhere.
516+
// If this is the kind, then the span context bit represents whether it is a relative or
517+
// absolute offset.
518+
Indirect = 0b11,
519+
}
520+
521+
impl SpanTag {
522+
fn new(kind: SpanKind, context: rustc_span::SyntaxContext, length: usize) -> SpanTag {
523+
let mut data = 0u8;
524+
data |= kind as u8;
525+
if context.is_root() {
526+
data |= 0b100;
527+
}
528+
let all_1s_len = (0xffu8 << 3) >> 3;
529+
// strictly less than - all 1s pattern is a sentinel for storage being out of band.
530+
if length < all_1s_len as usize {
531+
data |= (length as u8) << 3;
532+
} else {
533+
data |= all_1s_len << 3;
534+
}
535+
536+
SpanTag(data)
537+
}
538+
539+
fn indirect(relative: bool) -> SpanTag {
540+
let mut tag = SpanTag(SpanKind::Indirect as u8);
541+
if relative {
542+
tag.0 |= 0b100;
543+
}
544+
tag
545+
}
546+
547+
fn kind(self) -> SpanKind {
548+
let masked = self.0 & 0b11;
549+
match masked {
550+
0b00 => SpanKind::Local,
551+
0b01 => SpanKind::Foreign,
552+
0b10 => SpanKind::Partial,
553+
0b11 => SpanKind::Indirect,
554+
_ => unreachable!(),
555+
}
556+
}
557+
558+
fn is_relative_offset(self) -> bool {
559+
debug_assert_eq!(self.kind(), SpanKind::Indirect);
560+
self.0 & 0b100 != 0
561+
}
562+
563+
fn context(self) -> Option<rustc_span::SyntaxContext> {
564+
if self.0 & 0b100 != 0 { Some(rustc_span::SyntaxContext::root()) } else { None }
565+
}
566+
567+
fn length(self) -> Option<rustc_span::BytePos> {
568+
let all_1s_len = (0xffu8 << 3) >> 3;
569+
let len = self.0 >> 3;
570+
if len != all_1s_len { Some(rustc_span::BytePos(u32::from(len))) } else { None }
571+
}
572+
}
495573

496574
// Tags for encoding Symbol's
497575
const SYMBOL_STR: u8 = 0;

0 commit comments

Comments
 (0)