From cd4245d318b04c8b44aed7e682c49b0507086d6c Mon Sep 17 00:00:00 2001
From: Maybe Waffle <waffle.lapkin@gmail.com>
Date: Thu, 27 Jan 2022 00:25:17 +0300
Subject: [PATCH] Make char::DecodeUtf16::size_hist more precise

New implementation takes into account contents of `self.buf` and rounds
lower bound up instead of down.
---
 library/core/src/char/decode.rs | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)
diff --git a/library/core/src/char/decode.rs b/library/core/src/char/decode.rs
index 5dd8c5ef789..f3fef85ef1d 100644
--- a/library/core/src/char/decode.rs
+++ b/library/core/src/char/decode.rs
@@ -120,9 +120,21 @@ impl<I: Iterator<Item = u16>> Iterator for DecodeUtf16<I> {
     #[inline]
     fn size_hint(&self) -> (usize, Option<usize>) {
         let (low, high) = self.iter.size_hint();
-        // we could be entirely valid surrogates (2 elements per
-        // char), or entirely non-surrogates (1 element per char)
-        (low / 2, high)
+
+        // `self.buf` will never contain the first part of a surrogate,
+        // so the presence of `buf == Some(...)` always means +1
+        // on lower and upper bound.
+        let addition_from_buf = self.buf.is_some() as usize;
+
+        // `self.iter` could contain entirely valid surrogates (2 elements per
+        // char), or entirely non-surrogates (1 element per char).
+        //
+        // On odd lower bound, at least one element must stay unpaired
+        // (with other elements from `self.iter`), so we round up.
+        let low = low.div_ceil(2) + addition_from_buf;
+        let high = high.and_then(|h| h.checked_add(addition_from_buf));
+
+        (low, high)
     }
 }