Skip to content

Commit 761ddf3

Browse files
committed
Remove some redundant checks from BufReader
The implementation of BufReader contains a lot of redundant checks. While any one of these checks is not particularly expensive to execute, especially when taken together they dramatically inhibit LLVM's ability to make subsequent optimizations.
1 parent b4151a4 commit 761ddf3

File tree

3 files changed

+106
-53
lines changed

3 files changed

+106
-53
lines changed

library/std/src/io/buffered/bufreader.rs

+30-53
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
use crate::cmp;
1+
mod buffer;
2+
23
use crate::fmt;
34
use crate::io::{
45
self, BufRead, IoSliceMut, Read, ReadBuf, Seek, SeekFrom, SizeHint, DEFAULT_BUF_SIZE,
56
};
6-
use crate::mem::MaybeUninit;
7+
use buffer::Buffer;
78

89
/// The `BufReader<R>` struct adds buffering to any reader.
910
///
@@ -48,10 +49,7 @@ use crate::mem::MaybeUninit;
4849
#[stable(feature = "rust1", since = "1.0.0")]
4950
pub struct BufReader<R> {
5051
inner: R,
51-
buf: Box<[MaybeUninit<u8>]>,
52-
pos: usize,
53-
cap: usize,
54-
init: usize,
52+
buf: Buffer,
5553
}
5654

5755
impl<R: Read> BufReader<R> {
@@ -93,8 +91,7 @@ impl<R: Read> BufReader<R> {
9391
/// ```
9492
#[stable(feature = "rust1", since = "1.0.0")]
9593
pub fn with_capacity(capacity: usize, inner: R) -> BufReader<R> {
96-
let buf = Box::new_uninit_slice(capacity);
97-
BufReader { inner, buf, pos: 0, cap: 0, init: 0 }
94+
BufReader { inner, buf: Buffer::with_capacity(capacity) }
9895
}
9996
}
10097

@@ -170,8 +167,7 @@ impl<R> BufReader<R> {
170167
/// ```
171168
#[stable(feature = "bufreader_buffer", since = "1.37.0")]
172169
pub fn buffer(&self) -> &[u8] {
173-
// SAFETY: self.cap is always <= self.init, so self.buf[self.pos..self.cap] is always init
174-
unsafe { MaybeUninit::slice_assume_init_ref(&self.buf[self.pos..self.cap]) }
170+
self.buf.buffer()
175171
}
176172

177173
/// Returns the number of bytes the internal buffer can hold at once.
@@ -194,7 +190,7 @@ impl<R> BufReader<R> {
194190
/// ```
195191
#[stable(feature = "buffered_io_capacity", since = "1.46.0")]
196192
pub fn capacity(&self) -> usize {
197-
self.buf.len()
193+
self.buf.capacity()
198194
}
199195

200196
/// Unwraps this `BufReader<R>`, returning the underlying reader.
@@ -224,8 +220,7 @@ impl<R> BufReader<R> {
224220
/// Invalidates all data in the internal buffer.
225221
#[inline]
226222
fn discard_buffer(&mut self) {
227-
self.pos = 0;
228-
self.cap = 0;
223+
self.buf.discard_buffer()
229224
}
230225
}
231226

@@ -236,15 +231,15 @@ impl<R: Seek> BufReader<R> {
236231
/// must track this information themselves if it is required.
237232
#[stable(feature = "bufreader_seek_relative", since = "1.53.0")]
238233
pub fn seek_relative(&mut self, offset: i64) -> io::Result<()> {
239-
let pos = self.pos as u64;
234+
let pos = self.buf.pos() as u64;
240235
if offset < 0 {
241-
if let Some(new_pos) = pos.checked_sub((-offset) as u64) {
242-
self.pos = new_pos as usize;
236+
if let Some(_) = pos.checked_sub((-offset) as u64) {
237+
self.buf.unconsume((-offset) as usize);
243238
return Ok(());
244239
}
245240
} else if let Some(new_pos) = pos.checked_add(offset as u64) {
246-
if new_pos <= self.cap as u64 {
247-
self.pos = new_pos as usize;
241+
if new_pos <= self.buf.cap() as u64 {
242+
self.buf.consume(offset as usize);
248243
return Ok(());
249244
}
250245
}
@@ -259,7 +254,7 @@ impl<R: Read> Read for BufReader<R> {
259254
// If we don't have any buffered data and we're doing a massive read
260255
// (larger than our internal buffer), bypass our internal buffer
261256
// entirely.
262-
if self.pos == self.cap && buf.len() >= self.buf.len() {
257+
if self.buf.pos() == self.buf.cap() && buf.len() >= self.capacity() {
263258
self.discard_buffer();
264259
return self.inner.read(buf);
265260
}
@@ -275,7 +270,7 @@ impl<R: Read> Read for BufReader<R> {
275270
// If we don't have any buffered data and we're doing a massive read
276271
// (larger than our internal buffer), bypass our internal buffer
277272
// entirely.
278-
if self.pos == self.cap && buf.remaining() >= self.buf.len() {
273+
if self.buf.pos() == self.buf.cap() && buf.remaining() >= self.capacity() {
279274
self.discard_buffer();
280275
return self.inner.read_buf(buf);
281276
}
@@ -295,9 +290,9 @@ impl<R: Read> Read for BufReader<R> {
295290
// generation for the common path where the buffer has enough bytes to fill the passed-in
296291
// buffer.
297292
fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
298-
if self.buffer().len() >= buf.len() {
299-
buf.copy_from_slice(&self.buffer()[..buf.len()]);
300-
self.consume(buf.len());
293+
if let Some(claimed) = self.buffer().get(..buf.len()) {
294+
buf.copy_from_slice(claimed);
295+
self.consume(claimed.len());
301296
return Ok(());
302297
}
303298

@@ -306,7 +301,7 @@ impl<R: Read> Read for BufReader<R> {
306301

307302
fn read_vectored(&mut self, bufs: &mut [IoSliceMut<'_>]) -> io::Result<usize> {
308303
let total_len = bufs.iter().map(|b| b.len()).sum::<usize>();
309-
if self.pos == self.cap && total_len >= self.buf.len() {
304+
if self.buf.pos() == self.buf.cap() && total_len >= self.capacity() {
310305
self.discard_buffer();
311306
return self.inner.read_vectored(bufs);
312307
}
@@ -325,8 +320,9 @@ impl<R: Read> Read for BufReader<R> {
325320
// The inner reader might have an optimized `read_to_end`. Drain our buffer and then
326321
// delegate to the inner implementation.
327322
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
328-
let nread = self.cap - self.pos;
329-
buf.extend_from_slice(&self.buffer());
323+
let inner_buf = self.buffer();
324+
buf.extend_from_slice(inner_buf);
325+
let nread = inner_buf.len();
330326
self.discard_buffer();
331327
Ok(nread + self.inner.read_to_end(buf)?)
332328
}
@@ -371,33 +367,11 @@ impl<R: Read> Read for BufReader<R> {
371367
#[stable(feature = "rust1", since = "1.0.0")]
372368
impl<R: Read> BufRead for BufReader<R> {
373369
fn fill_buf(&mut self) -> io::Result<&[u8]> {
374-
// If we've reached the end of our internal buffer then we need to fetch
375-
// some more data from the underlying reader.
376-
// Branch using `>=` instead of the more correct `==`
377-
// to tell the compiler that the pos..cap slice is always valid.
378-
if self.pos >= self.cap {
379-
debug_assert!(self.pos == self.cap);
380-
381-
let mut readbuf = ReadBuf::uninit(&mut self.buf);
382-
383-
// SAFETY: `self.init` is either 0 or set to `readbuf.initialized_len()`
384-
// from the last time this function was called
385-
unsafe {
386-
readbuf.assume_init(self.init);
387-
}
388-
389-
self.inner.read_buf(&mut readbuf)?;
390-
391-
self.cap = readbuf.filled_len();
392-
self.init = readbuf.initialized_len();
393-
394-
self.pos = 0;
395-
}
396-
Ok(self.buffer())
370+
self.buf.fill_buf(&mut self.inner)
397371
}
398372

399373
fn consume(&mut self, amt: usize) {
400-
self.pos = cmp::min(self.pos + amt, self.cap);
374+
self.buf.consume(amt)
401375
}
402376
}
403377

@@ -409,7 +383,10 @@ where
409383
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
410384
fmt.debug_struct("BufReader")
411385
.field("reader", &self.inner)
412-
.field("buffer", &format_args!("{}/{}", self.cap - self.pos, self.buf.len()))
386+
.field(
387+
"buffer",
388+
&format_args!("{}/{}", self.buf.cap() - self.buf.pos(), self.capacity()),
389+
)
413390
.finish()
414391
}
415392
}
@@ -441,7 +418,7 @@ impl<R: Seek> Seek for BufReader<R> {
441418
fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
442419
let result: u64;
443420
if let SeekFrom::Current(n) = pos {
444-
let remainder = (self.cap - self.pos) as i64;
421+
let remainder = (self.buf.cap() - self.buf.pos()) as i64;
445422
// it should be safe to assume that remainder fits within an i64 as the alternative
446423
// means we managed to allocate 8 exbibytes and that's absurd.
447424
// But it's not out of the realm of possibility for some weird underlying reader to
@@ -499,7 +476,7 @@ impl<R: Seek> Seek for BufReader<R> {
499476
/// }
500477
/// ```
501478
fn stream_position(&mut self) -> io::Result<u64> {
502-
let remainder = (self.cap - self.pos) as u64;
479+
let remainder = (self.buf.cap() - self.buf.pos()) as u64;
503480
self.inner.stream_position().map(|pos| {
504481
pos.checked_sub(remainder).expect(
505482
"overflow when subtracting remaining buffer size from inner stream position",
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
use crate::cmp;
2+
use crate::io::{self, Read, ReadBuf};
3+
use crate::mem::MaybeUninit;
4+
5+
pub struct Buffer {
6+
buf: Box<[MaybeUninit<u8>]>,
7+
pos: usize,
8+
cap: usize,
9+
init: usize,
10+
}
11+
12+
impl Buffer {
13+
pub fn with_capacity(capacity: usize) -> Self {
14+
let buf = Box::new_uninit_slice(capacity);
15+
Self { buf, pos: 0, cap: 0, init: 0 }
16+
}
17+
18+
pub fn buffer(&self) -> &[u8] {
19+
// SAFETY: self.cap is always <= self.init, so self.buf[self.pos..self.cap] is always init
20+
// Additionally, both self.pos and self.cap are valid and and self.cap => self.pos, and
21+
// that region is initialized because those are all invariants of this type.
22+
unsafe { MaybeUninit::slice_assume_init_ref(&self.buf.get_unchecked(self.pos..self.cap)) }
23+
}
24+
25+
pub fn capacity(&self) -> usize {
26+
self.buf.len()
27+
}
28+
29+
pub fn cap(&self) -> usize {
30+
self.cap
31+
}
32+
33+
pub fn pos(&self) -> usize {
34+
self.pos
35+
}
36+
37+
pub fn discard_buffer(&mut self) {
38+
self.pos = 0;
39+
self.cap = 0;
40+
}
41+
42+
pub fn consume(&mut self, amt: usize) {
43+
self.pos = cmp::min(self.pos + amt, self.cap);
44+
}
45+
46+
pub fn unconsume(&mut self, amt: usize) {
47+
self.pos = self.pos.saturating_sub(amt);
48+
}
49+
50+
pub fn fill_buf(&mut self, mut reader: impl Read) -> io::Result<&[u8]> {
51+
// If we've reached the end of our internal buffer then we need to fetch
52+
// some more data from the underlying reader.
53+
// Branch using `>=` instead of the more correct `==`
54+
// to tell the compiler that the pos..cap slice is always valid.
55+
if self.pos >= self.cap {
56+
debug_assert!(self.pos == self.cap);
57+
58+
let mut readbuf = ReadBuf::uninit(&mut self.buf);
59+
60+
// SAFETY: `self.init` is either 0 or set to `readbuf.initialized_len()`
61+
// from the last time this function was called
62+
unsafe {
63+
readbuf.assume_init(self.init);
64+
}
65+
66+
reader.read_buf(&mut readbuf)?;
67+
68+
self.cap = readbuf.filled_len();
69+
self.init = readbuf.initialized_len();
70+
71+
self.pos = 0;
72+
}
73+
Ok(self.buffer())
74+
}
75+
}

library/std/src/io/buffered/tests.rs

+1
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,7 @@ fn bench_buffered_reader_small_reads(b: &mut test::Bencher) {
523523
let mut buf = [0u8; 4];
524524
for _ in 0..1024 {
525525
reader.read_exact(&mut buf).unwrap();
526+
core::hint::black_box(&buf);
526527
}
527528
});
528529
}

0 commit comments

Comments
 (0)