Skip to content

Commit 1f32d40

Browse files
committed
BufWriter: apply #[inline] / #[inline(never)] optimizations
Ensure that `write` and `write_all` can be inlined and that their commonly executed fast paths can be as short as possible. `write_vectored` would likely benefit from the same optimization, but I omitted it because its implementation is more complex, and I don't have a benchmark on hand to guide its optimization.
1 parent 5c13042 commit 1f32d40

File tree

1 file changed

+66
-24
lines changed

1 file changed

+66
-24
lines changed

library/std/src/io/buffered/bufwriter.rs

Lines changed: 66 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,52 @@ impl<W: Write> BufWriter<W> {
331331
let buf = if !self.panicked { Ok(buf) } else { Err(WriterPanicked { buf }) };
332332
(self.inner.take().unwrap(), buf)
333333
}
334+
335+
// Ensure this function does not get inlined into `write`, so that it
336+
// remains inlineable and its common path remains as short as possible.
337+
// If this function ends up being called frequently relative to `write`,
338+
// it's likely a sign that the client is using an improperly sized buffer.
339+
#[inline(never)]
340+
fn flush_and_write(&mut self, buf: &[u8]) -> io::Result<usize> {
341+
self.flush_buf()?;
342+
343+
// Why not len > capacity? To avoid a needless trip through the buffer when the
344+
// input exactly fills. We'd just need to flush it to the underlying writer anyway.
345+
if buf.len() >= self.buf.capacity() {
346+
self.panicked = true;
347+
let r = self.get_mut().write(buf);
348+
self.panicked = false;
349+
r
350+
} else {
351+
self.buf.extend_from_slice(buf);
352+
Ok(buf.len())
353+
}
354+
}
355+
356+
// Ensure this function does not get inlined into `write_all`, so that it
357+
// remains inlineable and its common path remains as short as possible.
358+
// If this function ends up being called frequently relative to `write_all`,
359+
// it's likely a sign that the client is using an improperly sized buffer.
360+
#[inline(never)]
361+
fn flush_and_write_all(&mut self, buf: &[u8]) -> io::Result<()> {
362+
// Normally, `write_all` just calls `write` in a loop. We can do better
363+
// by calling `self.get_mut().write_all()` directly, which avoids
364+
// round trips through the buffer in the event of a series of partial
365+
// writes in some circumstances.
366+
self.flush_buf()?;
367+
368+
// Why not len > capacity? To avoid a needless trip through the buffer when the
369+
// input exactly fills. We'd just need to flush it to the underlying writer anyway.
370+
if buf.len() >= self.buf.capacity() {
371+
self.panicked = true;
372+
let r = self.get_mut().write_all(buf);
373+
self.panicked = false;
374+
r
375+
} else {
376+
self.buf.extend_from_slice(buf);
377+
Ok(())
378+
}
379+
}
334380
}
335381

336382
#[unstable(feature = "bufwriter_into_raw_parts", issue = "80690")]
@@ -402,43 +448,39 @@ impl fmt::Debug for WriterPanicked {
402448

403449
#[stable(feature = "rust1", since = "1.0.0")]
404450
impl<W: Write> Write for BufWriter<W> {
451+
#[inline]
405452
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
406-
if self.buf.len() + buf.len() > self.buf.capacity() {
407-
self.flush_buf()?;
408-
}
409-
// FIXME: Why no len > capacity? Why not buffer len == capacity? #72919
410-
if buf.len() >= self.buf.capacity() {
411-
self.panicked = true;
412-
let r = self.get_mut().write(buf);
413-
self.panicked = false;
414-
r
415-
} else {
453+
// The `buf.len() != self.buf.capacity()` check is done to avoid a needless trip through
454+
// the buffer when the input is exactly the same size as it. For many clients, that is a
455+
// rare event, so it's unfortunate that the check is in the common code path. But it
456+
// prevents pathological cases for other clients which *always* make writes of this size.
457+
// See #72919 and #79930 for more info and a breadcrumb trail.
458+
if self.buf.len() + buf.len() <= self.buf.capacity() && buf.len() != self.buf.capacity() {
416459
self.buf.extend_from_slice(buf);
417460
Ok(buf.len())
461+
} else {
462+
self.flush_and_write(buf)
418463
}
419464
}
420465

466+
#[inline]
421467
fn write_all(&mut self, buf: &[u8]) -> io::Result<()> {
422-
// Normally, `write_all` just calls `write` in a loop. We can do better
423-
// by calling `self.get_mut().write_all()` directly, which avoids
424-
// round trips through the buffer in the event of a series of partial
425-
// writes in some circumstances.
426-
if self.buf.len() + buf.len() > self.buf.capacity() {
427-
self.flush_buf()?;
428-
}
429-
// FIXME: Why no len > capacity? Why not buffer len == capacity? #72919
430-
if buf.len() >= self.buf.capacity() {
431-
self.panicked = true;
432-
let r = self.get_mut().write_all(buf);
433-
self.panicked = false;
434-
r
435-
} else {
468+
// The `buf.len() != self.buf.capacity()` check is done to avoid a needless trip through
469+
// the buffer when the input is exactly the same size as it. For many clients, that is a
470+
// rare event, so it's unfortunate that the check is in the common code path. But it
471+
// prevents pathological cases for other clients which *always* make writes of this size.
472+
// See #72919 and #79930 for more info and a breadcrumb trail.
473+
if self.buf.len() + buf.len() <= self.buf.capacity() && buf.len() != self.buf.capacity() {
436474
self.buf.extend_from_slice(buf);
437475
Ok(())
476+
} else {
477+
self.flush_and_write_all(buf)
438478
}
439479
}
440480

441481
fn write_vectored(&mut self, bufs: &[IoSlice<'_>]) -> io::Result<usize> {
482+
// FIXME: Consider applying `#[inline]` / `#[inline(never)]` optimizations already applied
483+
// to `write` and `write_all`. The performance benefits can be significant. See #79930.
442484
if self.get_ref().is_write_vectored() {
443485
let total_len = bufs.iter().map(|b| b.len()).sum::<usize>();
444486
if self.buf.len() + total_len > self.buf.capacity() {

0 commit comments

Comments
 (0)