Auto merge of #110655 - ChrisDenton:read-to-end, r=joshtriplett
Limit read size in `File::read_to_end` loop Fixes #110650. Windows file reads have perf overhead that's proportional to the buffer size. When we have a reasonable expectation that we know the file size, we can set a reasonable upper bound for the size of the buffer in one read call.
This commit is contained in:
commit
9de7d9169c
4 changed files with 43 additions and 24 deletions
|
@ -249,9 +249,9 @@ pub struct DirBuilder {
|
||||||
pub fn read<P: AsRef<Path>>(path: P) -> io::Result<Vec<u8>> {
|
pub fn read<P: AsRef<Path>>(path: P) -> io::Result<Vec<u8>> {
|
||||||
fn inner(path: &Path) -> io::Result<Vec<u8>> {
|
fn inner(path: &Path) -> io::Result<Vec<u8>> {
|
||||||
let mut file = File::open(path)?;
|
let mut file = File::open(path)?;
|
||||||
let size = file.metadata().map(|m| m.len()).unwrap_or(0);
|
let size = file.metadata().map(|m| m.len() as usize).ok();
|
||||||
let mut bytes = Vec::with_capacity(size as usize);
|
let mut bytes = Vec::with_capacity(size.unwrap_or(0));
|
||||||
io::default_read_to_end(&mut file, &mut bytes)?;
|
io::default_read_to_end(&mut file, &mut bytes, size)?;
|
||||||
Ok(bytes)
|
Ok(bytes)
|
||||||
}
|
}
|
||||||
inner(path.as_ref())
|
inner(path.as_ref())
|
||||||
|
@ -289,9 +289,9 @@ pub fn read<P: AsRef<Path>>(path: P) -> io::Result<Vec<u8>> {
|
||||||
pub fn read_to_string<P: AsRef<Path>>(path: P) -> io::Result<String> {
|
pub fn read_to_string<P: AsRef<Path>>(path: P) -> io::Result<String> {
|
||||||
fn inner(path: &Path) -> io::Result<String> {
|
fn inner(path: &Path) -> io::Result<String> {
|
||||||
let mut file = File::open(path)?;
|
let mut file = File::open(path)?;
|
||||||
let size = file.metadata().map(|m| m.len()).unwrap_or(0);
|
let size = file.metadata().map(|m| m.len() as usize).ok();
|
||||||
let mut string = String::with_capacity(size as usize);
|
let mut string = String::with_capacity(size.unwrap_or(0));
|
||||||
io::default_read_to_string(&mut file, &mut string)?;
|
io::default_read_to_string(&mut file, &mut string, size)?;
|
||||||
Ok(string)
|
Ok(string)
|
||||||
}
|
}
|
||||||
inner(path.as_ref())
|
inner(path.as_ref())
|
||||||
|
@ -732,12 +732,12 @@ impl fmt::Debug for File {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Indicates how much extra capacity is needed to read the rest of the file.
|
/// Indicates how much extra capacity is needed to read the rest of the file.
|
||||||
fn buffer_capacity_required(mut file: &File) -> usize {
|
fn buffer_capacity_required(mut file: &File) -> Option<usize> {
|
||||||
let size = file.metadata().map(|m| m.len()).unwrap_or(0);
|
let size = file.metadata().map(|m| m.len()).ok()?;
|
||||||
let pos = file.stream_position().unwrap_or(0);
|
let pos = file.stream_position().ok()?;
|
||||||
// Don't worry about `usize` overflow because reading will fail regardless
|
// Don't worry about `usize` overflow because reading will fail regardless
|
||||||
// in that case.
|
// in that case.
|
||||||
size.saturating_sub(pos) as usize
|
Some(size.saturating_sub(pos) as usize)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
|
@ -761,14 +761,16 @@ impl Read for File {
|
||||||
|
|
||||||
// Reserves space in the buffer based on the file size when available.
|
// Reserves space in the buffer based on the file size when available.
|
||||||
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
|
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
|
||||||
buf.reserve(buffer_capacity_required(self));
|
let size = buffer_capacity_required(self);
|
||||||
io::default_read_to_end(self, buf)
|
buf.reserve(size.unwrap_or(0));
|
||||||
|
io::default_read_to_end(self, buf, size)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reserves space in the buffer based on the file size when available.
|
// Reserves space in the buffer based on the file size when available.
|
||||||
fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
|
fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
|
||||||
buf.reserve(buffer_capacity_required(self));
|
let size = buffer_capacity_required(self);
|
||||||
io::default_read_to_string(self, buf)
|
buf.reserve(size.unwrap_or(0));
|
||||||
|
io::default_read_to_string(self, buf, size)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
|
@ -817,14 +819,16 @@ impl Read for &File {
|
||||||
|
|
||||||
// Reserves space in the buffer based on the file size when available.
|
// Reserves space in the buffer based on the file size when available.
|
||||||
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
|
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
|
||||||
buf.reserve(buffer_capacity_required(self));
|
let size = buffer_capacity_required(self);
|
||||||
io::default_read_to_end(self, buf)
|
buf.reserve(size.unwrap_or(0));
|
||||||
|
io::default_read_to_end(self, buf, size)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reserves space in the buffer based on the file size when available.
|
// Reserves space in the buffer based on the file size when available.
|
||||||
fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
|
fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
|
||||||
buf.reserve(buffer_capacity_required(self));
|
let size = buffer_capacity_required(self);
|
||||||
io::default_read_to_string(self, buf)
|
buf.reserve(size.unwrap_or(0));
|
||||||
|
io::default_read_to_string(self, buf, size)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
|
|
|
@ -357,9 +357,17 @@ where
|
||||||
// of data to return. Simply tacking on an extra DEFAULT_BUF_SIZE space every
|
// of data to return. Simply tacking on an extra DEFAULT_BUF_SIZE space every
|
||||||
// time is 4,500 times (!) slower than a default reservation size of 32 if the
|
// time is 4,500 times (!) slower than a default reservation size of 32 if the
|
||||||
// reader has a very small amount of data to return.
|
// reader has a very small amount of data to return.
|
||||||
pub(crate) fn default_read_to_end<R: Read + ?Sized>(r: &mut R, buf: &mut Vec<u8>) -> Result<usize> {
|
pub(crate) fn default_read_to_end<R: Read + ?Sized>(
|
||||||
|
r: &mut R,
|
||||||
|
buf: &mut Vec<u8>,
|
||||||
|
size_hint: Option<usize>,
|
||||||
|
) -> Result<usize> {
|
||||||
let start_len = buf.len();
|
let start_len = buf.len();
|
||||||
let start_cap = buf.capacity();
|
let start_cap = buf.capacity();
|
||||||
|
// Optionally limit the maximum bytes read on each iteration.
|
||||||
|
// This adds an arbitrary fiddle factor to allow for more data than we expect.
|
||||||
|
let max_read_size =
|
||||||
|
size_hint.and_then(|s| s.checked_add(1024)?.checked_next_multiple_of(DEFAULT_BUF_SIZE));
|
||||||
|
|
||||||
let mut initialized = 0; // Extra initialized bytes from previous loop iteration
|
let mut initialized = 0; // Extra initialized bytes from previous loop iteration
|
||||||
loop {
|
loop {
|
||||||
|
@ -367,7 +375,12 @@ pub(crate) fn default_read_to_end<R: Read + ?Sized>(r: &mut R, buf: &mut Vec<u8>
|
||||||
buf.reserve(32); // buf is full, need more space
|
buf.reserve(32); // buf is full, need more space
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut read_buf: BorrowedBuf<'_> = buf.spare_capacity_mut().into();
|
let mut spare = buf.spare_capacity_mut();
|
||||||
|
if let Some(size) = max_read_size {
|
||||||
|
let len = cmp::min(spare.len(), size);
|
||||||
|
spare = &mut spare[..len]
|
||||||
|
}
|
||||||
|
let mut read_buf: BorrowedBuf<'_> = spare.into();
|
||||||
|
|
||||||
// SAFETY: These bytes were initialized but not filled in the previous loop
|
// SAFETY: These bytes were initialized but not filled in the previous loop
|
||||||
unsafe {
|
unsafe {
|
||||||
|
@ -419,6 +432,7 @@ pub(crate) fn default_read_to_end<R: Read + ?Sized>(r: &mut R, buf: &mut Vec<u8>
|
||||||
pub(crate) fn default_read_to_string<R: Read + ?Sized>(
|
pub(crate) fn default_read_to_string<R: Read + ?Sized>(
|
||||||
r: &mut R,
|
r: &mut R,
|
||||||
buf: &mut String,
|
buf: &mut String,
|
||||||
|
size_hint: Option<usize>,
|
||||||
) -> Result<usize> {
|
) -> Result<usize> {
|
||||||
// Note that we do *not* call `r.read_to_end()` here. We are passing
|
// Note that we do *not* call `r.read_to_end()` here. We are passing
|
||||||
// `&mut Vec<u8>` (the raw contents of `buf`) into the `read_to_end`
|
// `&mut Vec<u8>` (the raw contents of `buf`) into the `read_to_end`
|
||||||
|
@ -429,7 +443,7 @@ pub(crate) fn default_read_to_string<R: Read + ?Sized>(
|
||||||
// To prevent extraneously checking the UTF-8-ness of the entire buffer
|
// To prevent extraneously checking the UTF-8-ness of the entire buffer
|
||||||
// we pass it to our hardcoded `default_read_to_end` implementation which
|
// we pass it to our hardcoded `default_read_to_end` implementation which
|
||||||
// we know is guaranteed to only read data into the end of the buffer.
|
// we know is guaranteed to only read data into the end of the buffer.
|
||||||
unsafe { append_to_string(buf, |b| default_read_to_end(r, b)) }
|
unsafe { append_to_string(buf, |b| default_read_to_end(r, b, size_hint)) }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn default_read_vectored<F>(read: F, bufs: &mut [IoSliceMut<'_>]) -> Result<usize>
|
pub(crate) fn default_read_vectored<F>(read: F, bufs: &mut [IoSliceMut<'_>]) -> Result<usize>
|
||||||
|
@ -709,7 +723,7 @@ pub trait Read {
|
||||||
/// [`std::fs::read`]: crate::fs::read
|
/// [`std::fs::read`]: crate::fs::read
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> Result<usize> {
|
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> Result<usize> {
|
||||||
default_read_to_end(self, buf)
|
default_read_to_end(self, buf, None)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Read all bytes until EOF in this source, appending them to `buf`.
|
/// Read all bytes until EOF in this source, appending them to `buf`.
|
||||||
|
@ -752,7 +766,7 @@ pub trait Read {
|
||||||
/// [`std::fs::read_to_string`]: crate::fs::read_to_string
|
/// [`std::fs::read_to_string`]: crate::fs::read_to_string
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
fn read_to_string(&mut self, buf: &mut String) -> Result<usize> {
|
fn read_to_string(&mut self, buf: &mut String) -> Result<usize> {
|
||||||
default_read_to_string(self, buf)
|
default_read_to_string(self, buf, None)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Read the exact number of bytes required to fill `buf`.
|
/// Read the exact number of bytes required to fill `buf`.
|
||||||
|
|
|
@ -314,7 +314,7 @@ fn bench_read_to_end(b: &mut test::Bencher) {
|
||||||
b.iter(|| {
|
b.iter(|| {
|
||||||
let mut lr = repeat(1).take(10000000);
|
let mut lr = repeat(1).take(10000000);
|
||||||
let mut vec = Vec::with_capacity(1024);
|
let mut vec = Vec::with_capacity(1024);
|
||||||
super::default_read_to_end(&mut lr, &mut vec)
|
super::default_read_to_end(&mut lr, &mut vec, None)
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -288,6 +288,7 @@
|
||||||
#![feature(float_next_up_down)]
|
#![feature(float_next_up_down)]
|
||||||
#![feature(hasher_prefixfree_extras)]
|
#![feature(hasher_prefixfree_extras)]
|
||||||
#![feature(hashmap_internals)]
|
#![feature(hashmap_internals)]
|
||||||
|
#![feature(int_roundings)]
|
||||||
#![feature(ip)]
|
#![feature(ip)]
|
||||||
#![feature(ip_in_core)]
|
#![feature(ip_in_core)]
|
||||||
#![feature(maybe_uninit_slice)]
|
#![feature(maybe_uninit_slice)]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue