Skip to content

EXPERIMENT: MemDecoder tweak #109910

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 12 additions & 11 deletions compiler/rustc_serialize/src/leb128.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,25 +49,25 @@ impl_write_unsigned_leb128!(write_usize_leb128, usize);

macro_rules! impl_read_unsigned_leb128 {
($fn_name:ident, $int_ty:ty) => {
// This returns `Option` to avoid needing to emit the panic paths here.
// Letting the caller do it instead helps keep our code size small.
#[inline]
pub fn $fn_name(slice: &[u8], position: &mut usize) -> $int_ty {
pub fn $fn_name(slice: &mut std::slice::Iter<'_, u8>) -> Option<$int_ty> {
// The first iteration of this loop is unpeeled. This is a
// performance win because this code is hot and integer values less
// than 128 are very common, typically occurring 50-80% or more of
// the time, even for u64 and u128.
let byte = slice[*position];
*position += 1;
let byte = *(slice.next()?);
if (byte & 0x80) == 0 {
return byte as $int_ty;
return Some(byte as $int_ty);
}
let mut result = (byte & 0x7F) as $int_ty;
let mut shift = 7;
loop {
let byte = slice[*position];
*position += 1;
let byte = *(slice.next()?);
if (byte & 0x80) == 0 {
result |= (byte as $int_ty) << shift;
return result;
return Some(result);
} else {
result |= ((byte & 0x7F) as $int_ty) << shift;
}
Expand Down Expand Up @@ -126,15 +126,16 @@ impl_write_signed_leb128!(write_isize_leb128, isize);

macro_rules! impl_read_signed_leb128 {
($fn_name:ident, $int_ty:ty) => {
// This returns `Option` to avoid needing to emit the panic paths here.
// Letting the caller do it instead helps keep our code size small.
#[inline]
pub fn $fn_name(slice: &[u8], position: &mut usize) -> $int_ty {
pub fn $fn_name(slice: &mut std::slice::Iter<'_, u8>) -> Option<$int_ty> {
let mut result = 0;
let mut shift = 0;
let mut byte;

loop {
byte = slice[*position];
*position += 1;
byte = *(slice.next()?);
result |= <$int_ty>::from(byte & 0x7F) << shift;
shift += 7;

Expand All @@ -148,7 +149,7 @@ macro_rules! impl_read_signed_leb128 {
result |= (!0 << shift);
}

result
Some(result)
}
};
}
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_serialize/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Core encoding and decoding interfaces.
)]
#![feature(never_type)]
#![feature(associated_type_bounds)]
#![feature(iter_advance_by)]
#![feature(min_specialization)]
#![feature(core_intrinsics)]
#![feature(maybe_uninit_slice)]
Expand Down
80 changes: 50 additions & 30 deletions compiler/rustc_serialize/src/opaque.rs
Original file line number Diff line number Diff line change
Expand Up @@ -535,34 +535,56 @@ impl Encoder for FileEncoder {
// -----------------------------------------------------------------------------

pub struct MemDecoder<'a> {
// Previously this type stored `position: usize`, but because it's staying
// safe code, that meant that reading `n` bytes meant a bounds check both
// for `position + n` *and* `position`, since there's nothing saying that
// the additions didn't wrap. Storing an iterator like this instead means
// there's no offsetting needed to get to the data, and the iterator instead
// of a slice means only increasing the start pointer on reads, rather than
// also needing to decrease the count in a slice.
// This field is first because it's touched more than `data`.
reader: std::slice::Iter<'a, u8>,
pub data: &'a [u8],
position: usize,
}

impl<'a> MemDecoder<'a> {
#[inline]
pub fn new(data: &'a [u8], position: usize) -> MemDecoder<'a> {
MemDecoder { data, position }
let reader = data[position..].iter();
MemDecoder { data, reader }
}

#[inline]
pub fn position(&self) -> usize {
self.position
self.data.len() - self.reader.len()
}

#[inline]
pub fn set_position(&mut self, pos: usize) {
self.position = pos
self.reader = self.data[pos..].iter();
}

#[inline]
pub fn advance(&mut self, bytes: usize) {
self.position += bytes;
self.reader.advance_by(bytes).unwrap();
}

#[cold]
fn panic_insufficient_data(&self) -> ! {
let pos = self.position();
let len = self.data.len();
panic!("Insufficient remaining data at position {pos} (length {len})");
}
}

macro_rules! read_leb128 {
($dec:expr, $fun:ident) => {{ leb128::$fun($dec.data, &mut $dec.position) }};
($dec:expr, $fun:ident) => {{
if let Some(val) = leb128::$fun(&mut $dec.reader) {
val
} else {
$dec.panic_insufficient_data()
}
}};
}

impl<'a> Decoder for MemDecoder<'a> {
Expand All @@ -583,17 +605,14 @@ impl<'a> Decoder for MemDecoder<'a> {

#[inline]
fn read_u16(&mut self) -> u16 {
let bytes = [self.data[self.position], self.data[self.position + 1]];
let value = u16::from_le_bytes(bytes);
self.position += 2;
value
let bytes = self.read_raw_bytes(2);
u16::from_le_bytes(bytes.try_into().unwrap())
}

#[inline]
fn read_u8(&mut self) -> u8 {
let value = self.data[self.position];
self.position += 1;
value
let bytes = self.read_raw_bytes(1);
bytes[0]
}

#[inline]
Expand All @@ -618,17 +637,12 @@ impl<'a> Decoder for MemDecoder<'a> {

#[inline]
fn read_i16(&mut self) -> i16 {
let bytes = [self.data[self.position], self.data[self.position + 1]];
let value = i16::from_le_bytes(bytes);
self.position += 2;
value
self.read_u16() as i16
}

#[inline]
fn read_i8(&mut self) -> i8 {
let value = self.data[self.position];
self.position += 1;
value as i8
self.read_u8() as i8
}

#[inline]
Expand Down Expand Up @@ -663,20 +677,26 @@ impl<'a> Decoder for MemDecoder<'a> {
#[inline]
fn read_str(&mut self) -> &'a str {
let len = self.read_usize();
let sentinel = self.data[self.position + len];
assert!(sentinel == STR_SENTINEL);
let s = unsafe {
std::str::from_utf8_unchecked(&self.data[self.position..self.position + len])
};
self.position += len + 1;
s

// This cannot reuse `read_raw_bytes` as that runs into lifetime issues
// where the slice gets tied to `'b` instead of just to `'a`.
if self.reader.len() <= len {
self.panic_insufficient_data();
}
let slice = self.reader.as_slice();
assert!(slice[len] == STR_SENTINEL);
self.reader.advance_by(len + 1).unwrap();
unsafe { std::str::from_utf8_unchecked(&slice[..len]) }
}

#[inline]
fn read_raw_bytes(&mut self, bytes: usize) -> &'a [u8] {
let start = self.position;
self.position += bytes;
&self.data[start..self.position]
if self.reader.len() < bytes {
self.panic_insufficient_data();
}
let slice = self.reader.as_slice();
self.reader.advance_by(bytes).unwrap();
&slice[..bytes]
}
}

Expand Down
12 changes: 6 additions & 6 deletions compiler/rustc_serialize/tests/leb128.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ macro_rules! impl_test_unsigned_leb128 {
stream.extend($write_fn_name(&mut buf, x));
}

let mut position = 0;
let mut reader = stream.iter();
for &expected in &values {
let actual = $read_fn_name(&stream, &mut position);
let actual = $read_fn_name(&mut reader).unwrap();
assert_eq!(expected, actual);
}
assert_eq!(stream.len(), position);
assert_eq!(reader.len(), 0);
}
};
}
Expand Down Expand Up @@ -74,12 +74,12 @@ macro_rules! impl_test_signed_leb128 {
stream.extend($write_fn_name(&mut buf, x));
}

let mut position = 0;
let mut reader = stream.iter();
for &expected in &values {
let actual = $read_fn_name(&stream, &mut position);
let actual = $read_fn_name(&mut reader).unwrap();
assert_eq!(expected, actual);
}
assert_eq!(stream.len(), position);
assert_eq!(reader.len(), 0);
}
};
}
Expand Down
8 changes: 4 additions & 4 deletions compiler/rustc_serialize/tests/opaque.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,15 @@ fn test_unit() {
#[test]
fn test_u8() {
let mut vec = vec![];
for i in u8::MIN..u8::MAX {
for i in u8::MIN..=u8::MAX {
vec.push(i);
}
check_round_trip(vec);
}

#[test]
fn test_u16() {
for i in u16::MIN..u16::MAX {
for i in u16::MIN..=u16::MAX {
check_round_trip(vec![1, 2, 3, i, i, i]);
}
}
Expand All @@ -86,15 +86,15 @@ fn test_usize() {
#[test]
fn test_i8() {
let mut vec = vec![];
for i in i8::MIN..i8::MAX {
for i in i8::MIN..=i8::MAX {
vec.push(i);
}
check_round_trip(vec);
}

#[test]
fn test_i16() {
for i in i16::MIN..i16::MAX {
for i in i16::MIN..=i16::MAX {
check_round_trip(vec![-1, 2, -3, i, i, i, 2]);
}
}
Expand Down