Skip to content

Commit 3c25ddf

Browse files
committed
Reimplement BufReader to retry harder.
1 parent 61fc8e3 commit 3c25ddf

File tree

1 file changed

+123
-15
lines changed

1 file changed

+123
-15
lines changed

compiler/rustc_serialize/src/opaque.rs

+123-15
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
use crate::leb128::{self, max_leb128_len};
22
use crate::serialize::{self, Decoder as _, Encoder as _};
33
use std::borrow::Cow;
4+
use std::convert::TryInto;
45
use std::fs::File;
5-
use std::io::{self, BufRead, BufReader, Read, Write};
6+
use std::io::{self, BufReader, Read, Seek, SeekFrom, Write};
67
use std::mem::MaybeUninit;
78
use std::path::Path;
89
use std::ptr;
@@ -681,29 +682,138 @@ impl<'a> serialize::Decoder for Decoder<'a> {
681682
}
682683

683684
pub struct FileDecoder {
684-
pub file: BufReader<File>,
685+
file: File,
686+
buf: Box<[u8]>,
687+
pos: usize,
688+
cap: usize,
685689
}
686690

687691
impl FileDecoder {
688692
#[inline]
689693
pub fn new(file: BufReader<File>) -> Self {
690-
FileDecoder { file }
694+
const CAP: usize = 8 * 1024;
695+
let mut buf = Vec::with_capacity(CAP);
696+
buf.resize(CAP, 0u8);
697+
let old_buf = file.buffer();
698+
let len = old_buf.len();
699+
buf[..len].copy_from_slice(old_buf);
700+
let file = file.into_inner();
701+
FileDecoder { file, buf: buf.into(), pos: 0, cap: len }
691702
}
692703

693704
#[inline]
694705
pub fn advance(&mut self, bytes: usize) {
695-
self.file.consume(bytes)
706+
self.pos += bytes;
707+
debug_assert!(self.pos <= self.cap);
708+
}
709+
710+
#[inline]
711+
pub fn read_all(self) -> Result<(Box<[u8]>, usize), io::Error> {
712+
let mut file = self.file;
713+
let start_pos = file.seek(SeekFrom::Current(0))?;
714+
let start_pos = start_pos.try_into().unwrap();
715+
file.seek(SeekFrom::Start(0))?;
716+
let mut bytes = Vec::new();
717+
file.read_to_end(&mut bytes)?;
718+
Ok((bytes.into(), start_pos))
719+
}
720+
721+
#[inline]
722+
fn read_byte(&mut self) -> Result<u8, io::Error> {
723+
if self.pos < self.cap {
724+
let c = self.buf[self.pos];
725+
self.pos += 1;
726+
Ok(c)
727+
} else {
728+
let read = self.file.read(&mut self.buf)?;
729+
self.pos = 0;
730+
self.cap = read;
731+
Ok(self.buf[0])
732+
}
733+
}
734+
735+
fn read_exact(&mut self, mut out: &mut [u8]) -> Result<(), io::Error> {
736+
loop {
737+
let len = out.len();
738+
if len == 0 {
739+
return Ok(());
740+
} else if self.pos + len < self.cap {
741+
out.copy_from_slice(&self.buf[self.pos..self.pos + len]);
742+
self.pos += len;
743+
return Ok(());
744+
}
745+
746+
let available = self.cap - self.pos;
747+
out[..available].copy_from_slice(&self.buf[self.pos..self.cap]);
748+
self.pos += len;
749+
750+
// Re-fill the buffer starting from zero.
751+
let read = self.file.read(&mut self.buf)?;
752+
self.pos = 0;
753+
self.cap = read;
754+
out = &mut out[available..];
755+
}
756+
}
757+
758+
/// Read the buffer until we encounter a byte with its top bit unset.
759+
#[inline]
760+
fn read_for_leb128(&mut self) -> Result<&[u8], io::Error> {
761+
self.fill_for_leb128()?;
762+
Ok(&self.buf[self.pos..self.cap])
763+
}
764+
765+
/// Fill the buffer until we encounter a byte with its top bit unset.
766+
/// Fast path.
767+
#[inline]
768+
fn fill_for_leb128(&mut self) -> Result<(), io::Error> {
769+
let buf = &mut self.buf[..];
770+
let known = &buf[self.pos..self.cap];
771+
if std::intrinsics::likely(known.iter().any(|c| c & 0x80 == 0)) {
772+
return Ok(());
773+
}
774+
775+
self.fill_more_for_leb128()
776+
}
777+
778+
/// Fill the buffer until we encounter a byte with its top bit unset.
779+
/// Slow path.
780+
#[cold]
781+
fn fill_more_for_leb128(&mut self) -> Result<(), io::Error> {
782+
let buf = &mut self.buf[..];
783+
let max = leb128::max_leb128_len();
784+
if self.pos + max >= self.cap {
785+
// The buffer should be large enough.
786+
debug_assert!(self.pos > max);
787+
let len = self.cap - self.pos;
788+
let (start, end) = buf.split_at_mut(self.pos);
789+
start[..len].copy_from_slice(&end[..len]);
790+
self.pos = 0;
791+
self.cap = len;
792+
}
793+
794+
// We've reached the end of our internal buffer then we need to fetch
795+
// some more data from the file.
796+
loop {
797+
let read = self.file.read(&mut buf[self.cap..])?;
798+
self.cap += read;
799+
800+
if read == 0 {
801+
return Err(io::Error::new(io::ErrorKind::UnexpectedEof, ""));
802+
}
803+
804+
let known = &mut buf[self.pos..self.cap];
805+
if known.iter().any(|c| c & 0x80 == 0) {
806+
return Ok(());
807+
}
808+
}
696809
}
697810
}
698811

699812
macro_rules! read_leb128 {
700813
($dec:expr, $fun:ident, $ty:ty) => {{
701-
let mut buf = $dec.file.buffer();
702-
if buf.len() < max_leb128_len!($ty) {
703-
buf = $dec.file.fill_buf()?;
704-
}
705-
let (value, bytes_read): ($ty, usize) = leb128::$fun(&buf);
706-
$dec.file.consume(bytes_read);
814+
let buf = $dec.read_for_leb128()?;
815+
let (value, bytes_read): ($ty, usize) = leb128::$fun(buf);
816+
$dec.advance(bytes_read);
707817
Ok(value)
708818
}};
709819
}
@@ -738,9 +848,7 @@ impl serialize::Decoder for FileDecoder {
738848

739849
#[inline]
740850
fn read_u8(&mut self) -> Result<u8, Self::Error> {
741-
let mut value = [0; 1];
742-
self.file.read_exact(&mut value)?;
743-
let [value] = value;
851+
let value = self.read_byte()?;
744852
Ok(value)
745853
}
746854

@@ -809,7 +917,7 @@ impl serialize::Decoder for FileDecoder {
809917
let len = self.read_usize()?;
810918
let mut buf = Vec::new();
811919
buf.resize(len, 0u8);
812-
self.file.read_exact(&mut buf)?;
920+
self.read_exact(&mut buf)?;
813921
let s = String::from_utf8(buf).unwrap();
814922
Ok(Cow::Owned(s))
815923
}
@@ -821,7 +929,7 @@ impl serialize::Decoder for FileDecoder {
821929

822930
#[inline]
823931
fn read_raw_bytes(&mut self, s: &mut [MaybeUninit<u8>]) -> Result<(), Self::Error> {
824-
self.file.read_exact(unsafe { MaybeUninit::slice_assume_init_mut(s) })
932+
self.read_exact(unsafe { MaybeUninit::slice_assume_init_mut(s) })
825933
}
826934
}
827935

0 commit comments

Comments
 (0)