Skip to content

Commit 92f3d9a

Browse files
committed
Auto merge of #23820 - sfackler:fast_read_to_end, r=alexcrichton
with_end_to_cap is enormously expensive now that it's initializing memory since it involves 64k allocation + memset on every call. This is most noticable when calling read_to_end on very small readers, where the new version if **4 orders of magnitude** faster. BufReader also depended on with_end_to_cap so I've rewritten it in its original form. As a bonus, converted the buffered IO struct Debug impls to use the debug builders. I first came across this in sfackler/rust-postgres#106 where a user reported a 10x performance regression. A call to read_to_end turned out to be the culprit: sfackler/rust-postgres@9cd413d. The new version differs from the old in a couple of ways. The buffer size used is now adaptive. It starts at 32 bytes and doubles each time EOF hasn't been reached up to a limit of 64k. In addition, the buffer is only truncated when EOF or an error has been reached, rather than after every call to read as was the case for the old implementation. I wrote up a benchmark to compare the old version and new version: https://gist.github.com/sfackler/e979711b0ee2f2063462 It tests a couple of different cases: a high bandwidth reader, a low bandwidth reader, and a low bandwidth reader that won't return more than 10k per call to `read`. The high bandwidth reader should be analagous to use cases when reading from e.g. a `BufReader` or `Vec`, and the low bandwidth readers should be analogous to reading from something like a `TcpStream`. Of special note, reads from a high bandwith reader containing 4 bytes are now *4,495 times faster*. ``` ~/foo ❯ cargo bench Compiling foo v0.0.1 (file:///home/sfackler/foo) Running target/release/foo-7498d7dd7faecf5c running 13 tests test test_new ... ignored test new_delay_4 ... bench: 230768 ns/iter (+/- 14812) test new_delay_4_cap ... bench: 231421 ns/iter (+/- 7211) test new_delay_5m ... bench: 14495370 ns/iter (+/- 4008648) test new_delay_5m_cap ... bench: 73127954 ns/iter (+/- 59908587) test new_nodelay_4 ... bench: 83 ns/iter (+/- 2) test new_nodelay_5m ... bench: 12527237 ns/iter (+/- 335243) test std_delay_4 ... bench: 373095 ns/iter (+/- 12613) test std_delay_4_cap ... bench: 374190 ns/iter (+/- 19611) test std_delay_5m ... bench: 17356012 ns/iter (+/- 15906588) test std_delay_5m_cap ... bench: 883555035 ns/iter (+/- 205559857) test std_nodelay_4 ... bench: 144937 ns/iter (+/- 2448) test std_nodelay_5m ... bench: 16095893 ns/iter (+/- 3315116) test result: ok. 0 passed; 0 failed; 1 ignored; 12 measured ``` r? @alexcrichton
2 parents c5370be + ccb4e84 commit 92f3d9a

File tree

2 files changed

+76
-59
lines changed

2 files changed

+76
-59
lines changed

src/libstd/io/buffered.rs

+43-28
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,9 @@ use io::prelude::*;
1818
use cmp;
1919
use error::{self, FromError};
2020
use fmt;
21-
use io::{self, Cursor, DEFAULT_BUF_SIZE, Error, ErrorKind};
21+
use io::{self, DEFAULT_BUF_SIZE, Error, ErrorKind};
2222
use ptr;
23+
use iter;
2324

2425
/// Wraps a `Read` and buffers input from it
2526
///
@@ -30,7 +31,9 @@ use ptr;
3031
#[stable(feature = "rust1", since = "1.0.0")]
3132
pub struct BufReader<R> {
3233
inner: R,
33-
buf: Cursor<Vec<u8>>,
34+
buf: Vec<u8>,
35+
pos: usize,
36+
cap: usize,
3437
}
3538

3639
impl<R: Read> BufReader<R> {
@@ -43,9 +46,13 @@ impl<R: Read> BufReader<R> {
4346
/// Creates a new `BufReader` with the specified buffer capacity
4447
#[stable(feature = "rust1", since = "1.0.0")]
4548
pub fn with_capacity(cap: usize, inner: R) -> BufReader<R> {
49+
let mut buf = Vec::with_capacity(cap);
50+
buf.extend(iter::repeat(0).take(cap));
4651
BufReader {
4752
inner: inner,
48-
buf: Cursor::new(Vec::with_capacity(cap)),
53+
buf: buf,
54+
pos: 0,
55+
cap: 0,
4956
}
5057
}
5158

@@ -74,12 +81,15 @@ impl<R: Read> Read for BufReader<R> {
7481
// If we don't have any buffered data and we're doing a massive read
7582
// (larger than our internal buffer), bypass our internal buffer
7683
// entirely.
77-
if self.buf.get_ref().len() == self.buf.position() as usize &&
78-
buf.len() >= self.buf.get_ref().capacity() {
84+
if self.pos == self.cap && buf.len() >= self.buf.len() {
7985
return self.inner.read(buf);
8086
}
81-
try!(self.fill_buf());
82-
self.buf.read(buf)
87+
let nread = {
88+
let mut rem = try!(self.fill_buf());
89+
try!(rem.read(buf))
90+
};
91+
self.consume(nread);
92+
Ok(nread)
8393
}
8494
}
8595

@@ -88,26 +98,25 @@ impl<R: Read> BufRead for BufReader<R> {
8898
fn fill_buf(&mut self) -> io::Result<&[u8]> {
8999
// If we've reached the end of our internal buffer then we need to fetch
90100
// some more data from the underlying reader.
91-
if self.buf.position() as usize == self.buf.get_ref().len() {
92-
self.buf.set_position(0);
93-
let v = self.buf.get_mut();
94-
v.truncate(0);
95-
let inner = &mut self.inner;
96-
try!(super::with_end_to_cap(v, |b| inner.read(b)));
101+
if self.pos == self.cap {
102+
self.cap = try!(self.inner.read(&mut self.buf));
103+
self.pos = 0;
97104
}
98-
self.buf.fill_buf()
105+
Ok(&self.buf[self.pos..self.cap])
99106
}
100107

101108
fn consume(&mut self, amt: usize) {
102-
self.buf.consume(amt)
109+
self.pos = cmp::min(self.pos + amt, self.cap);
103110
}
104111
}
105112

106113
#[stable(feature = "rust1", since = "1.0.0")]
107114
impl<R> fmt::Debug for BufReader<R> where R: fmt::Debug {
108115
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
109-
write!(fmt, "BufReader {{ reader: {:?}, buffer: {}/{} }}",
110-
self.inner, self.buf.position(), self.buf.get_ref().len())
116+
fmt.debug_struct("BufReader")
117+
.field("reader", &self.inner)
118+
.field("buffer", &format_args!("{}/{}", self.cap - self.pos, self.buf.len()))
119+
.finish()
111120
}
112121
}
113122

@@ -222,8 +231,10 @@ impl<W: Write> Write for BufWriter<W> {
222231
#[stable(feature = "rust1", since = "1.0.0")]
223232
impl<W: Write> fmt::Debug for BufWriter<W> where W: fmt::Debug {
224233
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
225-
write!(fmt, "BufWriter {{ writer: {:?}, buffer: {}/{} }}",
226-
self.inner.as_ref().unwrap(), self.buf.len(), self.buf.capacity())
234+
fmt.debug_struct("BufWriter")
235+
.field("writer", &self.inner.as_ref().unwrap())
236+
.field("buffer", &format_args!("{}/{}", self.buf.len(), self.buf.capacity()))
237+
.finish()
227238
}
228239
}
229240

@@ -337,9 +348,11 @@ impl<W: Write> Write for LineWriter<W> {
337348
#[stable(feature = "rust1", since = "1.0.0")]
338349
impl<W: Write> fmt::Debug for LineWriter<W> where W: fmt::Debug {
339350
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
340-
write!(fmt, "LineWriter {{ writer: {:?}, buffer: {}/{} }}",
341-
self.inner.inner, self.inner.buf.len(),
342-
self.inner.buf.capacity())
351+
fmt.debug_struct("LineWriter")
352+
.field("writer", &self.inner.inner)
353+
.field("buffer",
354+
&format_args!("{}/{}", self.inner.buf.len(), self.inner.buf.capacity()))
355+
.finish()
343356
}
344357
}
345358

@@ -415,10 +428,10 @@ impl<S: Read + Write> BufStream<S> {
415428
/// Any leftover data in the read buffer is lost.
416429
#[stable(feature = "rust1", since = "1.0.0")]
417430
pub fn into_inner(self) -> Result<S, IntoInnerError<BufStream<S>>> {
418-
let BufReader { inner: InternalBufWriter(w), buf } = self.inner;
431+
let BufReader { inner: InternalBufWriter(w), buf, pos, cap } = self.inner;
419432
w.into_inner().map_err(|IntoInnerError(w, e)| {
420433
IntoInnerError(BufStream {
421-
inner: BufReader { inner: InternalBufWriter(w), buf: buf },
434+
inner: BufReader { inner: InternalBufWriter(w), buf: buf, pos: pos, cap: cap },
422435
}, e)
423436
})
424437
}
@@ -452,10 +465,12 @@ impl<S: Write> fmt::Debug for BufStream<S> where S: fmt::Debug {
452465
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
453466
let reader = &self.inner;
454467
let writer = &self.inner.inner.0;
455-
write!(fmt, "BufStream {{ stream: {:?}, write_buffer: {}/{}, read_buffer: {}/{} }}",
456-
writer.inner,
457-
writer.buf.len(), writer.buf.capacity(),
458-
reader.buf.position(), reader.buf.get_ref().len())
468+
fmt.debug_struct("BufStream")
469+
.field("stream", &writer.inner)
470+
.field("write_buffer", &format_args!("{}/{}", writer.buf.len(), writer.buf.capacity()))
471+
.field("read_buffer",
472+
&format_args!("{}/{}", reader.cap - reader.pos, reader.buf.len()))
473+
.finish()
459474
}
460475
}
461476

src/libstd/io/mod.rs

+33-31
Original file line numberDiff line numberDiff line change
@@ -48,30 +48,6 @@ mod stdio;
4848

4949
const DEFAULT_BUF_SIZE: usize = 64 * 1024;
5050

51-
// Acquires a slice of the vector `v` from its length to its capacity
52-
// (after initializing the data), reads into it, and then updates the length.
53-
//
54-
// This function is leveraged to efficiently read some bytes into a destination
55-
// vector without extra copying and taking advantage of the space that's already
56-
// in `v`.
57-
fn with_end_to_cap<F>(v: &mut Vec<u8>, f: F) -> Result<usize>
58-
where F: FnOnce(&mut [u8]) -> Result<usize>
59-
{
60-
let len = v.len();
61-
let new_area = v.capacity() - len;
62-
v.extend(iter::repeat(0).take(new_area));
63-
match f(&mut v[len..]) {
64-
Ok(n) => {
65-
v.truncate(len + n);
66-
Ok(n)
67-
}
68-
Err(e) => {
69-
v.truncate(len);
70-
Err(e)
71-
}
72-
}
73-
}
74-
7551
// A few methods below (read_to_string, read_line) will append data into a
7652
// `String` buffer, but we need to be pretty careful when doing this. The
7753
// implementation will just call `.as_mut_vec()` and then delegate to a
@@ -116,19 +92,45 @@ fn append_to_string<F>(buf: &mut String, f: F) -> Result<usize>
11692
}
11793
}
11894

95+
// This uses an adaptive system to extend the vector when it fills. We want to
96+
// avoid paying to allocate and zero a huge chunk of memory if the reader only
97+
// has 4 bytes while still making large reads if the reader does have a ton
98+
// of data to return. Simply tacking on an extra DEFAULT_BUF_SIZE space every
99+
// time is 4,500 times (!) slower than this if the reader has a very small
100+
// amount of data to return.
119101
fn read_to_end<R: Read + ?Sized>(r: &mut R, buf: &mut Vec<u8>) -> Result<usize> {
120-
let mut read = 0;
102+
let start_len = buf.len();
103+
let mut len = start_len;
104+
let mut cap_bump = 16;
105+
let ret;
121106
loop {
122-
if buf.capacity() == buf.len() {
123-
buf.reserve(DEFAULT_BUF_SIZE);
107+
if len == buf.len() {
108+
if buf.capacity() == buf.len() {
109+
if cap_bump < DEFAULT_BUF_SIZE {
110+
cap_bump *= 2;
111+
}
112+
buf.reserve(cap_bump);
113+
}
114+
let new_area = buf.capacity() - buf.len();
115+
buf.extend(iter::repeat(0).take(new_area));
124116
}
125-
match with_end_to_cap(buf, |b| r.read(b)) {
126-
Ok(0) => return Ok(read),
127-
Ok(n) => read += n,
117+
118+
match r.read(&mut buf[len..]) {
119+
Ok(0) => {
120+
ret = Ok(len - start_len);
121+
break;
122+
}
123+
Ok(n) => len += n,
128124
Err(ref e) if e.kind() == ErrorKind::Interrupted => {}
129-
Err(e) => return Err(e),
125+
Err(e) => {
126+
ret = Err(e);
127+
break;
128+
}
130129
}
131130
}
131+
132+
buf.truncate(len);
133+
ret
132134
}
133135

134136
/// A trait for objects which are byte-oriented sources.

0 commit comments

Comments
 (0)