Rollup merge of #118222 - the8472:copy-use-vec-write, r=m-ou-se

unify read_to_end and io::copy impls for reading into a Vec This ports over the initial probe (to avoid allocation) and the dynamic read sizing from the io::copy specialization to the `default_read_to_end` implementation which already had its own optimizations for different cases. I think it should be a best-of-both now. suggested by `@a1phyr` in https://github.com/rust-lang/rust/pull/117576#issuecomment-1803408492
2023-11-28 16:09:54 +01:00 · 2023-11-28 16:09:54 +01:00 · 97ef5a3b53
commit 97ef5a3b53
parent b8e1194ab2 bc7dd5fa6d
3 changed files with 90 additions and 106 deletions
--- a/library/std/src/io/mod.rs
+++ b/library/std/src/io/mod.rs
@ -397,12 +397,16 @@ where
    }
 }

-// This uses an adaptive system to extend the vector when it fills. We want to
-// avoid paying to allocate and zero a huge chunk of memory if the reader only
-// has 4 bytes while still making large reads if the reader does have a ton
-// of data to return. Simply tacking on an extra DEFAULT_BUF_SIZE space every
-// time is 4,500 times (!) slower than a default reservation size of 32 if the
-// reader has a very small amount of data to return.
+// Here we must serve many masters with conflicting goals:
+//
+// - avoid allocating unless necessary
+// - avoid overallocating if we know the exact size (#89165)
+// - avoid passing large buffers to readers that always initialize the free capacity if they perform short reads (#23815, #23820)
+// - pass large buffers to readers that do not initialize the spare capacity. this can amortize per-call overheads
+// - and finally pass not-too-small and not-too-large buffers to Windows read APIs because they manage to suffer from both problems
+//   at the same time, i.e. small reads suffer from syscall overhead, all reads incur initialization cost
+//   proportional to buffer size (#110650)
+//
 pub(crate) fn default_read_to_end<R: Read + ?Sized>(
    r: &mut R,
    buf: &mut Vec<u8>,
@ -412,20 +416,58 @@ pub(crate) fn default_read_to_end<R: Read + ?Sized>(
    let start_cap = buf.capacity();
    // Optionally limit the maximum bytes read on each iteration.
    // This adds an arbitrary fiddle factor to allow for more data than we expect.
-    let max_read_size =
-        size_hint.and_then(|s| s.checked_add(1024)?.checked_next_multiple_of(DEFAULT_BUF_SIZE));
+    let mut max_read_size = size_hint
+        .and_then(|s| s.checked_add(1024)?.checked_next_multiple_of(DEFAULT_BUF_SIZE))
+        .unwrap_or(DEFAULT_BUF_SIZE);

    let mut initialized = 0; // Extra initialized bytes from previous loop iteration
+
+    const PROBE_SIZE: usize = 32;
+
+    fn small_probe_read<R: Read + ?Sized>(r: &mut R, buf: &mut Vec<u8>) -> Result<usize> {
+        let mut probe = [0u8; PROBE_SIZE];
+
+        loop {
+            match r.read(&mut probe) {
+                Ok(n) => {
+                    buf.extend_from_slice(&probe[..n]);
+                    return Ok(n);
+                }
+                Err(ref e) if e.is_interrupted() => continue,
+                Err(e) => return Err(e),
+            }
+        }
+    }
+
+    // avoid inflating empty/small vecs before we have determined that there's anything to read
+    if (size_hint.is_none() || size_hint == Some(0)) && buf.capacity() - buf.len() < PROBE_SIZE {
+        let read = small_probe_read(r, buf)?;
+
+        if read == 0 {
+            return Ok(0);
+        }
+    }
+
    loop {
+        if buf.len() == buf.capacity() && buf.capacity() == start_cap {
+            // The buffer might be an exact fit. Let's read into a probe buffer
+            // and see if it returns `Ok(0)`. If so, we've avoided an
+            // unnecessary doubling of the capacity. But if not, append the
+            // probe buffer to the primary buffer and let its capacity grow.
+            let read = small_probe_read(r, buf)?;
+
+            if read == 0 {
+                return Ok(buf.len() - start_len);
+            }
+        }
+
        if buf.len() == buf.capacity() {
-            buf.reserve(32); // buf is full, need more space
+            buf.reserve(PROBE_SIZE); // buf is full, need more space
        }

        let mut spare = buf.spare_capacity_mut();
-        if let Some(size) = max_read_size {
-            let len = cmp::min(spare.len(), size);
-            spare = &mut spare[..len]
-        }
+        let buf_len = cmp::min(spare.len(), max_read_size);
+        spare = &mut spare[..buf_len];
        let mut read_buf: BorrowedBuf<'_> = spare.into();

        // SAFETY: These bytes were initialized but not filled in the previous loop
@ -434,42 +476,44 @@ pub(crate) fn default_read_to_end<R: Read + ?Sized>(
        }

        let mut cursor = read_buf.unfilled();
-        match r.read_buf(cursor.reborrow()) {
-            Ok(()) => {}
-            Err(e) if e.is_interrupted() => continue,
-            Err(e) => return Err(e),
+        loop {
+            match r.read_buf(cursor.reborrow()) {
+                Ok(()) => break,
+                Err(e) if e.is_interrupted() => continue,
+                Err(e) => return Err(e),
+            }
        }

-        if cursor.written() == 0 {
+        let unfilled_but_initialized = cursor.init_ref().len();
+        let bytes_read = cursor.written();
+        let was_fully_initialized = read_buf.init_len() == buf_len;
+
+        if bytes_read == 0 {
            return Ok(buf.len() - start_len);
        }

        // store how much was initialized but not filled
-        initialized = cursor.init_ref().len();
+        initialized = unfilled_but_initialized;

        // SAFETY: BorrowedBuf's invariants mean this much memory is initialized.
        unsafe {
-            let new_len = read_buf.filled().len() + buf.len();
+            let new_len = bytes_read + buf.len();
            buf.set_len(new_len);
        }

-        if buf.len() == buf.capacity() && buf.capacity() == start_cap {
-            // The buffer might be an exact fit. Let's read into a probe buffer
-            // and see if it returns `Ok(0)`. If so, we've avoided an
-            // unnecessary doubling of the capacity. But if not, append the
-            // probe buffer to the primary buffer and let its capacity grow.
-            let mut probe = [0u8; 32];
+        // Use heuristics to determine the max read size if no initial size hint was provided
+        if size_hint.is_none() {
+            // The reader is returning short reads but it doesn't call ensure_init().
+            // In that case we no longer need to restrict read sizes to avoid
+            // initialization costs.
+            if !was_fully_initialized {
+                max_read_size = usize::MAX;
+            }

-            loop {
-                match r.read(&mut probe) {
-                    Ok(0) => return Ok(buf.len() - start_len),
-                    Ok(n) => {
-                        buf.extend_from_slice(&probe[..n]);
-                        break;
-                    }
-                    Err(ref e) if e.is_interrupted() => continue,
-                    Err(e) => return Err(e),
-                }
+            // we have passed a larger buffer than previously and the
+            // reader still hasn't returned a short read
+            if buf_len >= max_read_size && bytes_read == buf_len {
+                max_read_size = max_read_size.saturating_mul(2);
            }
        }
    }