Expose Utf8Lossy as Utf8Chunks

2022-08-20 12:49:20 -04:00 · 2022-08-20 12:49:20 -04:00 · e8ee0b7b2b
commit e8ee0b7b2b
parent be9cfb307e
10 changed files with 273 additions and 184 deletions
--- a/library/alloc/src/lib.rs
+++ b/library/alloc/src/lib.rs
@ -141,6 +141,7 @@
 #![feature(unchecked_math)]
 #![feature(unicode_internals)]
 #![feature(unsize)]
 #![feature(utf8_chunks)]
 #![feature(std_internals)]
 //
 // Language features:
--- a/library/alloc/src/str.rs
+++ b/library/alloc/src/str.rs
@ -71,6 +71,8 @@ pub use core::str::{RSplit, Split};
 pub use core::str::{RSplitN, SplitN};
 #[stable(feature = "rust1", since = "1.0.0")]
 pub use core::str::{RSplitTerminator, SplitTerminator};
 #[unstable(feature = "utf8_chunks", issue = "99543")]
 pub use core::str::{Utf8Chunk, Utf8Chunks};
 /// Note: `str` in `Concat<str>` is not meaningful here.
 /// This type parameter of the trait only exists to enable another impl.
--- a/library/alloc/src/string.rs
+++ b/library/alloc/src/string.rs
@ -58,9 +58,9 @@ use core::ops::Bound::{Excluded, Included, Unbounded};
 use core::ops::{self, Index, IndexMut, Range, RangeBounds};
 use core::ptr;
 use core::slice;
 #[cfg(not(no_global_oom_handling))]
 use core::str::lossy;
 use core::str::pattern::Pattern;
 #[cfg(not(no_global_oom_handling))]
 use core::str::Utf8Chunks;
 #[cfg(not(no_global_oom_handling))]
 use crate::borrow::{Cow, ToOwned};
@ -628,11 +628,11 @@ impl String {
    #[cfg(not(no_global_oom_handling))]
    #[stable(feature = "rust1", since = "1.0.0")]
    pub fn from_utf8_lossy(v: &[u8]) -> Cow<'_, str> {
-        let mut iter = lossy::Utf8Lossy::from_bytes(v).chunks();
+        let mut iter = Utf8Chunks::new(v);
        let first_valid = if let Some(chunk) = iter.next() {
-            let lossy::Utf8LossyChunk { valid, broken } = chunk;
+            let valid = chunk.valid();
-            if broken.is_empty() {
+            if chunk.invalid().is_empty() {
                debug_assert_eq!(valid.len(), v.len());
                return Cow::Borrowed(valid);
            }
@ -647,9 +647,9 @@ impl String {
        res.push_str(first_valid);
        res.push_str(REPLACEMENT);
-        for lossy::Utf8LossyChunk { valid, broken } in iter {
+        for chunk in iter {
-            res.push_str(valid);
+            res.push_str(chunk.valid());
-            if !broken.is_empty() {
+            if !chunk.invalid().is_empty() {
                res.push_str(REPLACEMENT);
            }
        }
--- a/library/core/src/str/lossy.rs
+++ b/library/core/src/str/lossy.rs
@ -1,51 +1,170 @@
-use crate::char;
+use crate::fmt;
-use crate::fmt::{self, Write};
+use crate::fmt::Formatter;
-use crate::mem;
+use crate::fmt::Write;
 use crate::iter::FusedIterator;
 use super::from_utf8_unchecked;
 use super::validations::utf8_char_width;
-/// Lossy UTF-8 string.
+/// An item returned by the [`Utf8Chunks`] iterator.
-#[unstable(feature = "str_internals", issue = "none")]
+///
-pub struct Utf8Lossy {
+/// A `Utf8Chunk` stores a sequence of [`u8`] up to the first broken character
-    bytes: [u8],
+/// when decoding a UTF-8 string.
 ///
 /// # Examples
 ///
 /// ```
 /// #![feature(utf8_chunks)]
 ///
 /// use std::str::Utf8Chunks;
 ///
 /// // An invalid UTF-8 string
 /// let bytes = b"foo\xF1\x80bar";
 ///
 /// // Decode the first `Utf8Chunk`
 /// let chunk = Utf8Chunks::new(bytes).next().unwrap();
 ///
 /// // The first three characters are valid UTF-8
 /// assert_eq!("foo", chunk.valid());
 ///
 /// // The fourth character is broken
 /// assert_eq!(b"\xF1\x80", chunk.invalid());
 /// ```
 #[unstable(feature = "utf8_chunks", issue = "99543")]
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub struct Utf8Chunk<'a> {
    valid: &'a str,
    invalid: &'a [u8],
 }
-impl Utf8Lossy {
+impl<'a> Utf8Chunk<'a> {
    /// Returns the next validated UTF-8 substring.
    ///
    /// This substring can be empty at the start of the string or between
    /// broken UTF-8 characters.
    #[must_use]
-    pub fn from_bytes(bytes: &[u8]) -> &Utf8Lossy {
+    #[unstable(feature = "utf8_chunks", issue = "99543")]
-        // SAFETY: Both use the same memory layout, and UTF-8 correctness isn't required.
+    pub fn valid(&self) -> &'a str {
-        unsafe { mem::transmute(bytes) }
+        self.valid
    }
-    pub fn chunks(&self) -> Utf8LossyChunksIter<'_> {
+    /// Returns the invalid sequence that caused a failure.
-        Utf8LossyChunksIter { source: &self.bytes }
+    ///
    /// The returned slice will have a maximum length of 3 and starts after the
    /// substring given by [`valid`]. Decoding will resume after this sequence.
    ///
    /// If empty, this is the last chunk in the string. If non-empty, an
    /// unexpected byte was encountered or the end of the input was reached
    /// unexpectedly.
    ///
    /// Lossy decoding would replace this sequence with [`U+FFFD REPLACEMENT
    /// CHARACTER`].
    ///
    /// [`valid`]: Self::valid
    /// [`U+FFFD REPLACEMENT CHARACTER`]: crate::char::REPLACEMENT_CHARACTER
    #[must_use]
    #[unstable(feature = "utf8_chunks", issue = "99543")]
    pub fn invalid(&self) -> &'a [u8] {
        self.invalid
    }
 }
-/// Iterator over lossy UTF-8 string
+#[must_use]
 #[must_use = "iterators are lazy and do nothing unless consumed"]
 #[unstable(feature = "str_internals", issue = "none")]
-#[allow(missing_debug_implementations)]
+pub struct Debug<'a>(&'a [u8]);
-pub struct Utf8LossyChunksIter<'a> {
+
 #[unstable(feature = "str_internals", issue = "none")]
 impl fmt::Debug for Debug<'_> {
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
        f.write_char('"')?;
        for chunk in Utf8Chunks::new(self.0) {
            // Valid part.
            // Here we partially parse UTF-8 again which is suboptimal.
            {
                let valid = chunk.valid();
                let mut from = 0;
                for (i, c) in valid.char_indices() {
                    let esc = c.escape_debug();
                    // If char needs escaping, flush backlog so far and write, else skip
                    if esc.len() != 1 {
                        f.write_str(&valid[from..i])?;
                        for c in esc {
                            f.write_char(c)?;
                        }
                        from = i + c.len_utf8();
                    }
                }
                f.write_str(&valid[from..])?;
            }
            // Broken parts of string as hex escape.
            for &b in chunk.invalid() {
                write!(f, "\\x{:02X}", b)?;
            }
        }
        f.write_char('"')
    }
 }
 /// An iterator used to decode a slice of mostly UTF-8 bytes to string slices
 /// ([`&str`]) and byte slices ([`&[u8]`][byteslice]).
 ///
 /// If you want a simple conversion from UTF-8 byte slices to string slices,
 /// [`from_utf8`] is easier to use.
 ///
 /// [byteslice]: slice
 /// [`from_utf8`]: super::from_utf8
 ///
 /// # Examples
 ///
 /// This can be used to create functionality similar to
 /// [`String::from_utf8_lossy`] without allocating heap memory:
 ///
 /// ```
 /// #![feature(utf8_chunks)]
 ///
 /// use std::str::Utf8Chunks;
 ///
 /// fn from_utf8_lossy<F>(input: &[u8], mut push: F) where F: FnMut(&str) {
 ///     for chunk in Utf8Chunks::new(input) {
 ///         push(chunk.valid());
 ///
 ///         if !chunk.invalid().is_empty() {
 ///             push("\u{FFFD}");
 ///         }
 ///     }
 /// }
 /// ```
 ///
 /// [`String::from_utf8_lossy`]: ../../std/string/struct.String.html#method.from_utf8_lossy
 #[must_use = "iterators are lazy and do nothing unless consumed"]
 #[unstable(feature = "utf8_chunks", issue = "99543")]
 #[derive(Clone)]
 pub struct Utf8Chunks<'a> {
    source: &'a [u8],
 }
-#[unstable(feature = "str_internals", issue = "none")]
+impl<'a> Utf8Chunks<'a> {
-#[derive(PartialEq, Eq, Debug)]
+    /// Creates a new iterator to decode the bytes.
-pub struct Utf8LossyChunk<'a> {
+    #[unstable(feature = "utf8_chunks", issue = "99543")]
-    /// Sequence of valid chars.
+    pub fn new(bytes: &'a [u8]) -> Self {
-    /// Can be empty between broken UTF-8 chars.
+        Self { source: bytes }
-    pub valid: &'a str,
+    }
-    /// Single broken char, empty if none.
+
-    /// Empty iff iterator item is last.
+    #[doc(hidden)]
-    pub broken: &'a [u8],
+    #[unstable(feature = "str_internals", issue = "none")]
    pub fn debug(&self) -> Debug<'_> {
        Debug(self.source)
    }
 }
-impl<'a> Iterator for Utf8LossyChunksIter<'a> {
+#[unstable(feature = "utf8_chunks", issue = "99543")]
-    type Item = Utf8LossyChunk<'a>;
+impl<'a> Iterator for Utf8Chunks<'a> {
    type Item = Utf8Chunk<'a>;
-    fn next(&mut self) -> Option<Utf8LossyChunk<'a>> {
+    fn next(&mut self) -> Option<Utf8Chunk<'a>> {
        if self.source.is_empty() {
            return None;
        }
@ -130,71 +249,22 @@ impl<'a> Iterator for Utf8LossyChunksIter<'a> {
        // SAFETY: `valid_up_to <= i` because it is only ever assigned via
        // `valid_up_to = i` and `i` only increases.
-        let (valid, broken) = unsafe { inspected.split_at_unchecked(valid_up_to) };
+        let (valid, invalid) = unsafe { inspected.split_at_unchecked(valid_up_to) };
-        Some(Utf8LossyChunk {
+        Some(Utf8Chunk {
            // SAFETY: All bytes up to `valid_up_to` are valid UTF-8.
            valid: unsafe { from_utf8_unchecked(valid) },
-            broken,
+            invalid,
        })
    }
 }
-impl fmt::Display for Utf8Lossy {
+#[unstable(feature = "utf8_chunks", issue = "99543")]
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+impl FusedIterator for Utf8Chunks<'_> {}
        // If we're the empty string then our iterator won't actually yield
        // anything, so perform the formatting manually
        if self.bytes.is_empty() {
            return "".fmt(f);
        }
-        for Utf8LossyChunk { valid, broken } in self.chunks() {
+#[unstable(feature = "utf8_chunks", issue = "99543")]
-            // If we successfully decoded the whole chunk as a valid string then
+impl fmt::Debug for Utf8Chunks<'_> {
-            // we can return a direct formatting of the string which will also
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
-            // respect various formatting flags if possible.
+        f.debug_struct("Utf8Chunks").field("source", &self.debug()).finish()
            if valid.len() == self.bytes.len() {
                assert!(broken.is_empty());
                return valid.fmt(f);
            }
            f.write_str(valid)?;
            if !broken.is_empty() {
                f.write_char(char::REPLACEMENT_CHARACTER)?;
            }
        }
        Ok(())
    }
 }
 impl fmt::Debug for Utf8Lossy {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.write_char('"')?;
        for Utf8LossyChunk { valid, broken } in self.chunks() {
            // Valid part.
            // Here we partially parse UTF-8 again which is suboptimal.
            {
                let mut from = 0;
                for (i, c) in valid.char_indices() {
                    let esc = c.escape_debug();
                    // If char needs escaping, flush backlog so far and write, else skip
                    if esc.len() != 1 {
                        f.write_str(&valid[from..i])?;
                        for c in esc {
                            f.write_char(c)?;
                        }
                        from = i + c.len_utf8();
                    }
                }
                f.write_str(&valid[from..])?;
            }
            // Broken parts of string as hex escape.
            for &b in broken {
                write!(f, "\\x{:02x}", b)?;
            }
        }
        f.write_char('"')
    }
 }
--- a/library/core/src/str/mod.rs
+++ b/library/core/src/str/mod.rs
@ -22,9 +22,9 @@ use crate::slice::{self, SliceIndex};
 pub mod pattern;
-#[unstable(feature = "str_internals", issue = "none")]
+mod lossy;
-#[allow(missing_docs)]
+#[unstable(feature = "utf8_chunks", issue = "99543")]
-pub mod lossy;
+pub use lossy::{Utf8Chunk, Utf8Chunks};
 #[stable(feature = "rust1", since = "1.0.0")]
 pub use converts::{from_utf8, from_utf8_unchecked};
--- a/library/core/tests/lib.rs
+++ b/library/core/tests/lib.rs
@ -96,6 +96,7 @@
 #![feature(waker_getters)]
 #![feature(slice_flatten)]
 #![feature(provide_any)]
 #![feature(utf8_chunks)]
 #![deny(unsafe_op_in_unsafe_fn)]
 extern crate test;
--- a/library/core/tests/str_lossy.rs
+++ b/library/core/tests/str_lossy.rs
@ -1,85 +1,85 @@
-use core::str::lossy::*;
+use core::str::Utf8Chunks;
 #[test]
 fn chunks() {
-    let mut iter = Utf8Lossy::from_bytes(b"hello").chunks();
+    macro_rules! assert_chunks {
-    assert_eq!(Some(Utf8LossyChunk { valid: "hello", broken: b"" }), iter.next());
+        ( $string:expr, $(($valid:expr, $invalid:expr)),* $(,)? ) => {{
            let mut iter = Utf8Chunks::new($string);
            $(
                let chunk = iter.next().expect("missing chunk");
                assert_eq!($valid, chunk.valid());
                assert_eq!($invalid, chunk.invalid());
            )*
            assert_eq!(None, iter.next());
        }};
    }
-    let mut iter = Utf8Lossy::from_bytes("ศไทย中华Việt Nam".as_bytes()).chunks();
+    assert_chunks!(b"hello", ("hello", b""));
-    assert_eq!(Some(Utf8LossyChunk { valid: "ศไทย中华Việt Nam", broken: b"" }), iter.next());
+    assert_chunks!("ศไทย中华Việt Nam".as_bytes(), ("ศไทย中华Việt Nam", b""));
-    assert_eq!(None, iter.next());
+    assert_chunks!(
-
+        b"Hello\xC2 There\xFF Goodbye",
-    let mut iter = Utf8Lossy::from_bytes(b"Hello\xC2 There\xFF Goodbye").chunks();
+        ("Hello", b"\xC2"),
-    assert_eq!(Some(Utf8LossyChunk { valid: "Hello", broken: b"\xC2" }), iter.next());
+        (" There", b"\xFF"),
-    assert_eq!(Some(Utf8LossyChunk { valid: " There", broken: b"\xFF" }), iter.next());
+        (" Goodbye", b""),
-    assert_eq!(Some(Utf8LossyChunk { valid: " Goodbye", broken: b"" }), iter.next());
+    );
-    assert_eq!(None, iter.next());
+    assert_chunks!(
-
+        b"Hello\xC0\x80 There\xE6\x83 Goodbye",
-    let mut iter = Utf8Lossy::from_bytes(b"Hello\xC0\x80 There\xE6\x83 Goodbye").chunks();
+        ("Hello", b"\xC0"),
-    assert_eq!(Some(Utf8LossyChunk { valid: "Hello", broken: b"\xC0" }), iter.next());
+        ("", b"\x80"),
-    assert_eq!(Some(Utf8LossyChunk { valid: "", broken: b"\x80" }), iter.next());
+        (" There", b"\xE6\x83"),
-    assert_eq!(Some(Utf8LossyChunk { valid: " There", broken: b"\xE6\x83" }), iter.next());
+        (" Goodbye", b""),
-    assert_eq!(Some(Utf8LossyChunk { valid: " Goodbye", broken: b"" }), iter.next());
+    );
-    assert_eq!(None, iter.next());
+    assert_chunks!(
-
+        b"\xF5foo\xF5\x80bar",
-    let mut iter = Utf8Lossy::from_bytes(b"\xF5foo\xF5\x80bar").chunks();
+        ("", b"\xF5"),
-    assert_eq!(Some(Utf8LossyChunk { valid: "", broken: b"\xF5" }), iter.next());
+        ("foo", b"\xF5"),
-    assert_eq!(Some(Utf8LossyChunk { valid: "foo", broken: b"\xF5" }), iter.next());
+        ("", b"\x80"),
-    assert_eq!(Some(Utf8LossyChunk { valid: "", broken: b"\x80" }), iter.next());
+        ("bar", b""),
-    assert_eq!(Some(Utf8LossyChunk { valid: "bar", broken: b"" }), iter.next());
+    );
-    assert_eq!(None, iter.next());
+    assert_chunks!(
-
+        b"\xF1foo\xF1\x80bar\xF1\x80\x80baz",
-    let mut iter = Utf8Lossy::from_bytes(b"\xF1foo\xF1\x80bar\xF1\x80\x80baz").chunks();
+        ("", b"\xF1"),
-    assert_eq!(Some(Utf8LossyChunk { valid: "", broken: b"\xF1" }), iter.next());
+        ("foo", b"\xF1\x80"),
-    assert_eq!(Some(Utf8LossyChunk { valid: "foo", broken: b"\xF1\x80" }), iter.next());
+        ("bar", b"\xF1\x80\x80"),
-    assert_eq!(Some(Utf8LossyChunk { valid: "bar", broken: b"\xF1\x80\x80" }), iter.next());
+        ("baz", b""),
-    assert_eq!(Some(Utf8LossyChunk { valid: "baz", broken: b"" }), iter.next());
+    );
-    assert_eq!(None, iter.next());
+    assert_chunks!(
-
+        b"\xF4foo\xF4\x80bar\xF4\xBFbaz",
-    let mut iter = Utf8Lossy::from_bytes(b"\xF4foo\xF4\x80bar\xF4\xBFbaz").chunks();
+        ("", b"\xF4"),
-    assert_eq!(Some(Utf8LossyChunk { valid: "", broken: b"\xF4" }), iter.next());
+        ("foo", b"\xF4\x80"),
-    assert_eq!(Some(Utf8LossyChunk { valid: "foo", broken: b"\xF4\x80" }), iter.next());
+        ("bar", b"\xF4"),
-    assert_eq!(Some(Utf8LossyChunk { valid: "bar", broken: b"\xF4" }), iter.next());
+        ("", b"\xBF"),
-    assert_eq!(Some(Utf8LossyChunk { valid: "", broken: b"\xBF" }), iter.next());
+        ("baz", b""),
-    assert_eq!(Some(Utf8LossyChunk { valid: "baz", broken: b"" }), iter.next());
+    );
-    assert_eq!(None, iter.next());
+    assert_chunks!(
-
+        b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar",
-    let mut iter = Utf8Lossy::from_bytes(b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar").chunks();
+        ("", b"\xF0"),
-    assert_eq!(Some(Utf8LossyChunk { valid: "", broken: b"\xF0" }), iter.next());
+        ("", b"\x80"),
-    assert_eq!(Some(Utf8LossyChunk { valid: "", broken: b"\x80" }), iter.next());
+        ("", b"\x80"),
-    assert_eq!(Some(Utf8LossyChunk { valid: "", broken: b"\x80" }), iter.next());
+        ("", b"\x80"),
-    assert_eq!(Some(Utf8LossyChunk { valid: "", broken: b"\x80" }), iter.next());
+        ("foo\u{10000}bar", b""),
-    assert_eq!(Some(Utf8LossyChunk { valid: "foo\u{10000}bar", broken: b"" }), iter.next());
+    );
    assert_eq!(None, iter.next());
    // surrogates
-    let mut iter = Utf8Lossy::from_bytes(b"\xED\xA0\x80foo\xED\xBF\xBFbar").chunks();
+    assert_chunks!(
-    assert_eq!(Some(Utf8LossyChunk { valid: "", broken: b"\xED" }), iter.next());
+        b"\xED\xA0\x80foo\xED\xBF\xBFbar",
-    assert_eq!(Some(Utf8LossyChunk { valid: "", broken: b"\xA0" }), iter.next());
+        ("", b"\xED"),
-    assert_eq!(Some(Utf8LossyChunk { valid: "", broken: b"\x80" }), iter.next());
+        ("", b"\xA0"),
-    assert_eq!(Some(Utf8LossyChunk { valid: "foo", broken: b"\xED" }), iter.next());
+        ("", b"\x80"),
-    assert_eq!(Some(Utf8LossyChunk { valid: "", broken: b"\xBF" }), iter.next());
+        ("foo", b"\xED"),
-    assert_eq!(Some(Utf8LossyChunk { valid: "", broken: b"\xBF" }), iter.next());
+        ("", b"\xBF"),
-    assert_eq!(Some(Utf8LossyChunk { valid: "bar", broken: b"" }), iter.next());
+        ("", b"\xBF"),
-    assert_eq!(None, iter.next());
+        ("bar", b""),
 }
 #[test]
 fn display() {
    assert_eq!(
        "Hello\u{FFFD}\u{FFFD} There\u{FFFD} Goodbye",
        &Utf8Lossy::from_bytes(b"Hello\xC0\x80 There\xE6\x83 Goodbye").to_string()
    );
 }
 #[test]
 fn debug() {
    assert_eq!(
-        "\"Hello\\xc0\\x80 There\\xe6\\x83 Goodbye\\u{10d4ea}\"",
+        "\"Hello\\xC0\\x80 There\\xE6\\x83 Goodbye\\u{10d4ea}\"",
        &format!(
            "{:?}",
-            Utf8Lossy::from_bytes(b"Hello\xC0\x80 There\xE6\x83 Goodbye\xf4\x8d\x93\xaa")
+            Utf8Chunks::new(b"Hello\xC0\x80 There\xE6\x83 Goodbye\xf4\x8d\x93\xaa").debug(),
-        )
+        ),
    );
 }
--- a/library/std/src/lib.rs
+++ b/library/std/src/lib.rs
@ -258,6 +258,7 @@
 #![feature(staged_api)]
 #![feature(thread_local)]
 #![feature(try_blocks)]
 #![feature(utf8_chunks)]
 //
 // Library features (core):
 #![feature(array_error_internals)]
--- a/library/std/src/sys/unix/os_str.rs
+++ b/library/std/src/sys/unix/os_str.rs
@ -11,7 +11,7 @@ use crate::str;
 use crate::sync::Arc;
 use crate::sys_common::{AsInner, IntoInner};
-use core::str::lossy::{Utf8Lossy, Utf8LossyChunk};
+use core::str::Utf8Chunks;
 #[cfg(test)]
 #[path = "../unix/os_str/tests.rs"]
@ -29,26 +29,32 @@ pub struct Slice {
 }
 impl fmt::Debug for Slice {
-    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        // Writes out a valid unicode string with the correct escape sequences
+        fmt::Debug::fmt(&Utf8Chunks::new(&self.inner).debug(), f)
        formatter.write_str("\"")?;
        for Utf8LossyChunk { valid, broken } in Utf8Lossy::from_bytes(&self.inner).chunks() {
            for c in valid.chars().flat_map(|c| c.escape_debug()) {
                formatter.write_char(c)?
            }
            for b in broken {
                write!(formatter, "\\x{:02X}", b)?;
            }
        }
        formatter.write_str("\"")
    }
 }
 impl fmt::Display for Slice {
-    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        fmt::Display::fmt(&Utf8Lossy::from_bytes(&self.inner), formatter)
+        // If we're the empty string then our iterator won't actually yield
        // anything, so perform the formatting manually
        if self.inner.is_empty() {
            return "".fmt(f);
        }
        for chunk in Utf8Chunks::new(&self.inner) {
            let valid = chunk.valid();
            // If we successfully decoded the whole chunk as a valid string then
            // we can return a direct formatting of the string which will also
            // respect various formatting flags if possible.
            if chunk.invalid().is_empty() {
                return valid.fmt(f);
            }
            f.write_str(valid)?;
            f.write_char(char::REPLACEMENT_CHARACTER)?;
        }
        Ok(())
    }
 }
--- a/library/std/src/sys/unix/os_str/tests.rs
+++ b/library/std/src/sys/unix/os_str/tests.rs
@ -8,3 +8,11 @@ fn slice_debug_output() {
    assert_eq!(output, expected);
 }
 #[test]
 fn display() {
    assert_eq!(
        "Hello\u{FFFD}\u{FFFD} There\u{FFFD} Goodbye",
        Slice::from_u8_slice(b"Hello\xC0\x80 There\xE6\x83 Goodbye").to_string(),
    );
 }