Skip to content

EXPERIMENT: Avoid some extra bounds checks in read_{u8,u16} #110066

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions compiler/rustc_serialize/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Core encoding and decoding interfaces.
)]
#![feature(never_type)]
#![feature(associated_type_bounds)]
#![feature(iter_advance_by)]
#![feature(min_specialization)]
#![feature(core_intrinsics)]
#![feature(maybe_uninit_slice)]
Expand Down
81 changes: 51 additions & 30 deletions compiler/rustc_serialize/src/opaque.rs
Original file line number Diff line number Diff line change
Expand Up @@ -535,34 +535,55 @@ impl Encoder for FileEncoder {
// -----------------------------------------------------------------------------

pub struct MemDecoder<'a> {
// Previously this type stored `position: usize`, but because it's staying
// safe code, that meant that reading `n` bytes meant a bounds check both
// for `position + n` *and* `position`, since there's nothing saying that
// the additions didn't wrap. Storing an iterator like this instead means
// there's no offsetting needed to get to the data, and the iterator instead
// of a slice means only increasing the start pointer on reads, rather than
// also needing to decrease the count in a slice.
// This field is first because it's touched more than `data`.
reader: std::slice::Iter<'a, u8>,
pub data: &'a [u8],
position: usize,
}

impl<'a> MemDecoder<'a> {
#[inline]
pub fn new(data: &'a [u8], position: usize) -> MemDecoder<'a> {
MemDecoder { data, position }
let reader = data[position..].iter();
MemDecoder { data, reader }
}

#[inline]
pub fn position(&self) -> usize {
self.position
self.data.len() - self.reader.len()
}

#[inline]
pub fn set_position(&mut self, pos: usize) {
self.position = pos
self.reader = self.data[pos..].iter();
}

#[inline]
pub fn advance(&mut self, bytes: usize) {
self.position += bytes;
self.reader.advance_by(bytes).unwrap();
}

#[cold]
fn panic_insufficient_data(&self) -> ! {
let pos = self.position();
let len = self.data.len();
panic!("Insufficient remaining data at position {pos} (length {len})");
}
}

macro_rules! read_leb128 {
($dec:expr, $fun:ident) => {{ leb128::$fun($dec.data, &mut $dec.position) }};
($dec:expr, $fun:ident) => {{
let mut position = 0_usize;
let val = leb128::$fun($dec.reader.as_slice(), &mut position);
let _ = $dec.reader.advance_by(position);
val
}};
}

impl<'a> Decoder for MemDecoder<'a> {
Expand All @@ -583,17 +604,14 @@ impl<'a> Decoder for MemDecoder<'a> {

#[inline]
fn read_u16(&mut self) -> u16 {
let bytes = [self.data[self.position], self.data[self.position + 1]];
let value = u16::from_le_bytes(bytes);
self.position += 2;
value
let bytes = self.read_raw_bytes_array::<2>();
u16::from_le_bytes(*bytes)
}

#[inline]
fn read_u8(&mut self) -> u8 {
let value = self.data[self.position];
self.position += 1;
value
let bytes = self.read_raw_bytes_array::<1>();
u8::from_le_bytes(*bytes)
}

#[inline]
Expand All @@ -618,17 +636,14 @@ impl<'a> Decoder for MemDecoder<'a> {

#[inline]
fn read_i16(&mut self) -> i16 {
let bytes = [self.data[self.position], self.data[self.position + 1]];
let value = i16::from_le_bytes(bytes);
self.position += 2;
value
let bytes = self.read_raw_bytes_array::<2>();
i16::from_le_bytes(*bytes)
}

#[inline]
fn read_i8(&mut self) -> i8 {
let value = self.data[self.position];
self.position += 1;
value as i8
let bytes = self.read_raw_bytes_array::<1>();
i8::from_le_bytes(*bytes)
}

#[inline]
Expand Down Expand Up @@ -663,20 +678,26 @@ impl<'a> Decoder for MemDecoder<'a> {
#[inline]
fn read_str(&mut self) -> &'a str {
let len = self.read_usize();
let sentinel = self.data[self.position + len];
assert!(sentinel == STR_SENTINEL);
let s = unsafe {
std::str::from_utf8_unchecked(&self.data[self.position..self.position + len])
};
self.position += len + 1;
s

// This cannot reuse `read_raw_bytes` as that runs into lifetime issues
// where the slice gets tied to `'b` instead of just to `'a`.
if self.reader.len() <= len {
self.panic_insufficient_data();
}
let slice = self.reader.as_slice();
assert!(slice[len] == STR_SENTINEL);
self.reader.advance_by(len + 1).unwrap();
unsafe { std::str::from_utf8_unchecked(&slice[..len]) }
}

#[inline]
fn read_raw_bytes(&mut self, bytes: usize) -> &'a [u8] {
let start = self.position;
self.position += bytes;
&self.data[start..self.position]
if self.reader.len() < bytes {
self.panic_insufficient_data();
}
let slice = self.reader.as_slice();
self.reader.advance_by(bytes).unwrap();
&slice[..bytes]
}
}

Expand Down
5 changes: 5 additions & 0 deletions compiler/rustc_serialize/src/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,11 @@ pub trait Decoder {
fn read_char(&mut self) -> char;
fn read_str(&mut self) -> &str;
fn read_raw_bytes(&mut self, len: usize) -> &[u8];

#[inline]
fn read_raw_bytes_array<const N: usize>(&mut self) -> &[u8; N] {
self.read_raw_bytes(N).try_into().unwrap()
}
}

/// Trait for types that can be serialized
Expand Down
8 changes: 4 additions & 4 deletions compiler/rustc_serialize/tests/opaque.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,15 @@ fn test_unit() {
#[test]
fn test_u8() {
let mut vec = vec![];
for i in u8::MIN..u8::MAX {
for i in u8::MIN..=u8::MAX {
vec.push(i);
}
check_round_trip(vec);
}

#[test]
fn test_u16() {
for i in u16::MIN..u16::MAX {
for i in u16::MIN..=u16::MAX {
check_round_trip(vec![1, 2, 3, i, i, i]);
}
}
Expand All @@ -86,15 +86,15 @@ fn test_usize() {
#[test]
fn test_i8() {
let mut vec = vec![];
for i in i8::MIN..i8::MAX {
for i in i8::MIN..=i8::MAX {
vec.push(i);
}
check_round_trip(vec);
}

#[test]
fn test_i16() {
for i in i16::MIN..i16::MAX {
for i in i16::MIN..=i16::MAX {
check_round_trip(vec![-1, 2, -3, i, i, i, 2]);
}
}
Expand Down