Fix massive performance issue in read_to_end
with_end_to_cap is enormously expensive now that it's initializing memory since it involves 64k allocation + memset on every call. This is most noticable when calling read_to_end on very small readers, where the new version if **4 orders of magnitude** faster. BufReader also depended on with_end_to_cap so I've rewritten it in its original form. As a bonus, converted the buffered IO struct Debug impls to use the debug builders. Fixes #23815
This commit is contained in:
parent
3e7385aae9
commit
ccb4e8423e
3 changed files with 77 additions and 59 deletions
|
@ -18,8 +18,9 @@ use io::prelude::*;
|
||||||
use cmp;
|
use cmp;
|
||||||
use error::{self, FromError};
|
use error::{self, FromError};
|
||||||
use fmt;
|
use fmt;
|
||||||
use io::{self, Cursor, DEFAULT_BUF_SIZE, Error, ErrorKind};
|
use io::{self, DEFAULT_BUF_SIZE, Error, ErrorKind};
|
||||||
use ptr;
|
use ptr;
|
||||||
|
use iter;
|
||||||
|
|
||||||
/// Wraps a `Read` and buffers input from it
|
/// Wraps a `Read` and buffers input from it
|
||||||
///
|
///
|
||||||
|
@ -30,7 +31,9 @@ use ptr;
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
pub struct BufReader<R> {
|
pub struct BufReader<R> {
|
||||||
inner: R,
|
inner: R,
|
||||||
buf: Cursor<Vec<u8>>,
|
buf: Vec<u8>,
|
||||||
|
pos: usize,
|
||||||
|
cap: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<R: Read> BufReader<R> {
|
impl<R: Read> BufReader<R> {
|
||||||
|
@ -43,9 +46,13 @@ impl<R: Read> BufReader<R> {
|
||||||
/// Creates a new `BufReader` with the specified buffer capacity
|
/// Creates a new `BufReader` with the specified buffer capacity
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
pub fn with_capacity(cap: usize, inner: R) -> BufReader<R> {
|
pub fn with_capacity(cap: usize, inner: R) -> BufReader<R> {
|
||||||
|
let mut buf = Vec::with_capacity(cap);
|
||||||
|
buf.extend(iter::repeat(0).take(cap));
|
||||||
BufReader {
|
BufReader {
|
||||||
inner: inner,
|
inner: inner,
|
||||||
buf: Cursor::new(Vec::with_capacity(cap)),
|
buf: buf,
|
||||||
|
pos: 0,
|
||||||
|
cap: 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -74,12 +81,15 @@ impl<R: Read> Read for BufReader<R> {
|
||||||
// If we don't have any buffered data and we're doing a massive read
|
// If we don't have any buffered data and we're doing a massive read
|
||||||
// (larger than our internal buffer), bypass our internal buffer
|
// (larger than our internal buffer), bypass our internal buffer
|
||||||
// entirely.
|
// entirely.
|
||||||
if self.buf.get_ref().len() == self.buf.position() as usize &&
|
if self.pos == self.cap && buf.len() >= self.buf.len() {
|
||||||
buf.len() >= self.buf.get_ref().capacity() {
|
|
||||||
return self.inner.read(buf);
|
return self.inner.read(buf);
|
||||||
}
|
}
|
||||||
try!(self.fill_buf());
|
let nread = {
|
||||||
self.buf.read(buf)
|
let mut rem = try!(self.fill_buf());
|
||||||
|
try!(rem.read(buf))
|
||||||
|
};
|
||||||
|
self.consume(nread);
|
||||||
|
Ok(nread)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -88,26 +98,25 @@ impl<R: Read> BufRead for BufReader<R> {
|
||||||
fn fill_buf(&mut self) -> io::Result<&[u8]> {
|
fn fill_buf(&mut self) -> io::Result<&[u8]> {
|
||||||
// If we've reached the end of our internal buffer then we need to fetch
|
// If we've reached the end of our internal buffer then we need to fetch
|
||||||
// some more data from the underlying reader.
|
// some more data from the underlying reader.
|
||||||
if self.buf.position() as usize == self.buf.get_ref().len() {
|
if self.pos == self.cap {
|
||||||
self.buf.set_position(0);
|
self.cap = try!(self.inner.read(&mut self.buf));
|
||||||
let v = self.buf.get_mut();
|
self.pos = 0;
|
||||||
v.truncate(0);
|
|
||||||
let inner = &mut self.inner;
|
|
||||||
try!(super::with_end_to_cap(v, |b| inner.read(b)));
|
|
||||||
}
|
}
|
||||||
self.buf.fill_buf()
|
Ok(&self.buf[self.pos..self.cap])
|
||||||
}
|
}
|
||||||
|
|
||||||
fn consume(&mut self, amt: usize) {
|
fn consume(&mut self, amt: usize) {
|
||||||
self.buf.consume(amt)
|
self.pos = cmp::min(self.pos + amt, self.cap);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
impl<R> fmt::Debug for BufReader<R> where R: fmt::Debug {
|
impl<R> fmt::Debug for BufReader<R> where R: fmt::Debug {
|
||||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||||
write!(fmt, "BufReader {{ reader: {:?}, buffer: {}/{} }}",
|
fmt.debug_struct("BufReader")
|
||||||
self.inner, self.buf.position(), self.buf.get_ref().len())
|
.field("reader", &self.inner)
|
||||||
|
.field("buffer", &format_args!("{}/{}", self.cap - self.pos, self.buf.len()))
|
||||||
|
.finish()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -222,8 +231,10 @@ impl<W: Write> Write for BufWriter<W> {
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
impl<W: Write> fmt::Debug for BufWriter<W> where W: fmt::Debug {
|
impl<W: Write> fmt::Debug for BufWriter<W> where W: fmt::Debug {
|
||||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||||
write!(fmt, "BufWriter {{ writer: {:?}, buffer: {}/{} }}",
|
fmt.debug_struct("BufWriter")
|
||||||
self.inner.as_ref().unwrap(), self.buf.len(), self.buf.capacity())
|
.field("writer", &self.inner.as_ref().unwrap())
|
||||||
|
.field("buffer", &format_args!("{}/{}", self.buf.len(), self.buf.capacity()))
|
||||||
|
.finish()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -337,9 +348,11 @@ impl<W: Write> Write for LineWriter<W> {
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
impl<W: Write> fmt::Debug for LineWriter<W> where W: fmt::Debug {
|
impl<W: Write> fmt::Debug for LineWriter<W> where W: fmt::Debug {
|
||||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||||
write!(fmt, "LineWriter {{ writer: {:?}, buffer: {}/{} }}",
|
fmt.debug_struct("LineWriter")
|
||||||
self.inner.inner, self.inner.buf.len(),
|
.field("writer", &self.inner.inner)
|
||||||
self.inner.buf.capacity())
|
.field("buffer",
|
||||||
|
&format_args!("{}/{}", self.inner.buf.len(), self.inner.buf.capacity()))
|
||||||
|
.finish()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -415,10 +428,10 @@ impl<S: Read + Write> BufStream<S> {
|
||||||
/// Any leftover data in the read buffer is lost.
|
/// Any leftover data in the read buffer is lost.
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
pub fn into_inner(self) -> Result<S, IntoInnerError<BufStream<S>>> {
|
pub fn into_inner(self) -> Result<S, IntoInnerError<BufStream<S>>> {
|
||||||
let BufReader { inner: InternalBufWriter(w), buf } = self.inner;
|
let BufReader { inner: InternalBufWriter(w), buf, pos, cap } = self.inner;
|
||||||
w.into_inner().map_err(|IntoInnerError(w, e)| {
|
w.into_inner().map_err(|IntoInnerError(w, e)| {
|
||||||
IntoInnerError(BufStream {
|
IntoInnerError(BufStream {
|
||||||
inner: BufReader { inner: InternalBufWriter(w), buf: buf },
|
inner: BufReader { inner: InternalBufWriter(w), buf: buf, pos: pos, cap: cap },
|
||||||
}, e)
|
}, e)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -452,10 +465,12 @@ impl<S: Write> fmt::Debug for BufStream<S> where S: fmt::Debug {
|
||||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||||
let reader = &self.inner;
|
let reader = &self.inner;
|
||||||
let writer = &self.inner.inner.0;
|
let writer = &self.inner.inner.0;
|
||||||
write!(fmt, "BufStream {{ stream: {:?}, write_buffer: {}/{}, read_buffer: {}/{} }}",
|
fmt.debug_struct("BufStream")
|
||||||
writer.inner,
|
.field("stream", &writer.inner)
|
||||||
writer.buf.len(), writer.buf.capacity(),
|
.field("write_buffer", &format_args!("{}/{}", writer.buf.len(), writer.buf.capacity()))
|
||||||
reader.buf.position(), reader.buf.get_ref().len())
|
.field("read_buffer",
|
||||||
|
&format_args!("{}/{}", reader.cap - reader.pos, reader.buf.len()))
|
||||||
|
.finish()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -48,30 +48,6 @@ mod stdio;
|
||||||
|
|
||||||
const DEFAULT_BUF_SIZE: usize = 64 * 1024;
|
const DEFAULT_BUF_SIZE: usize = 64 * 1024;
|
||||||
|
|
||||||
// Acquires a slice of the vector `v` from its length to its capacity
|
|
||||||
// (after initializing the data), reads into it, and then updates the length.
|
|
||||||
//
|
|
||||||
// This function is leveraged to efficiently read some bytes into a destination
|
|
||||||
// vector without extra copying and taking advantage of the space that's already
|
|
||||||
// in `v`.
|
|
||||||
fn with_end_to_cap<F>(v: &mut Vec<u8>, f: F) -> Result<usize>
|
|
||||||
where F: FnOnce(&mut [u8]) -> Result<usize>
|
|
||||||
{
|
|
||||||
let len = v.len();
|
|
||||||
let new_area = v.capacity() - len;
|
|
||||||
v.extend(iter::repeat(0).take(new_area));
|
|
||||||
match f(&mut v[len..]) {
|
|
||||||
Ok(n) => {
|
|
||||||
v.truncate(len + n);
|
|
||||||
Ok(n)
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
v.truncate(len);
|
|
||||||
Err(e)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// A few methods below (read_to_string, read_line) will append data into a
|
// A few methods below (read_to_string, read_line) will append data into a
|
||||||
// `String` buffer, but we need to be pretty careful when doing this. The
|
// `String` buffer, but we need to be pretty careful when doing this. The
|
||||||
// implementation will just call `.as_mut_vec()` and then delegate to a
|
// implementation will just call `.as_mut_vec()` and then delegate to a
|
||||||
|
@ -116,21 +92,47 @@ fn append_to_string<F>(buf: &mut String, f: F) -> Result<usize>
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This uses an adaptive system to extend the vector when it fills. We want to
|
||||||
|
// avoid paying to allocate and zero a huge chunk of memory if the reader only
|
||||||
|
// has 4 bytes while still making large reads if the reader does have a ton
|
||||||
|
// of data to return. Simply tacking on an extra DEFAULT_BUF_SIZE space every
|
||||||
|
// time is 4,500 times (!) slower than this if the reader has a very small
|
||||||
|
// amount of data to return.
|
||||||
fn read_to_end<R: Read + ?Sized>(r: &mut R, buf: &mut Vec<u8>) -> Result<usize> {
|
fn read_to_end<R: Read + ?Sized>(r: &mut R, buf: &mut Vec<u8>) -> Result<usize> {
|
||||||
let mut read = 0;
|
let start_len = buf.len();
|
||||||
|
let mut len = start_len;
|
||||||
|
let mut cap_bump = 16;
|
||||||
|
let ret;
|
||||||
loop {
|
loop {
|
||||||
|
if len == buf.len() {
|
||||||
if buf.capacity() == buf.len() {
|
if buf.capacity() == buf.len() {
|
||||||
buf.reserve(DEFAULT_BUF_SIZE);
|
if cap_bump < DEFAULT_BUF_SIZE {
|
||||||
|
cap_bump *= 2;
|
||||||
}
|
}
|
||||||
match with_end_to_cap(buf, |b| r.read(b)) {
|
buf.reserve(cap_bump);
|
||||||
Ok(0) => return Ok(read),
|
}
|
||||||
Ok(n) => read += n,
|
let new_area = buf.capacity() - buf.len();
|
||||||
|
buf.extend(iter::repeat(0).take(new_area));
|
||||||
|
}
|
||||||
|
|
||||||
|
match r.read(&mut buf[len..]) {
|
||||||
|
Ok(0) => {
|
||||||
|
ret = Ok(len - start_len);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Ok(n) => len += n,
|
||||||
Err(ref e) if e.kind() == ErrorKind::Interrupted => {}
|
Err(ref e) if e.kind() == ErrorKind::Interrupted => {}
|
||||||
Err(e) => return Err(e),
|
Err(e) => {
|
||||||
|
ret = Err(e);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
buf.truncate(len);
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
|
||||||
/// A trait for objects which are byte-oriented sources.
|
/// A trait for objects which are byte-oriented sources.
|
||||||
///
|
///
|
||||||
/// Readers are defined by one method, `read`. Each call to `read` will attempt
|
/// Readers are defined by one method, `read`. Each call to `read` will attempt
|
||||||
|
|
|
@ -128,6 +128,7 @@
|
||||||
#![feature(into_cow)]
|
#![feature(into_cow)]
|
||||||
#![feature(slice_patterns)]
|
#![feature(slice_patterns)]
|
||||||
#![feature(std_misc)]
|
#![feature(std_misc)]
|
||||||
|
#![feature(debug_builders)]
|
||||||
#![cfg_attr(test, feature(test, rustc_private, std_misc))]
|
#![cfg_attr(test, feature(test, rustc_private, std_misc))]
|
||||||
|
|
||||||
// Don't link to std. We are std.
|
// Don't link to std. We are std.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue