From b5f16a10e9406fc1c19294fee1c33e507a17458e Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 6 Mar 2017 22:06:30 +0100 Subject: [PATCH] Replace Utf8Error::resume_from with Utf8Error::error_len Their relationship is: * `resume_from = error_len.map(|l| l + valid_up_to)` * error_len is always one of None, Some(1), Some(2), or Some(3). When I started using resume_from I almost always ended up subtracting valid_up_to to obtain error_len. Therefore the latter is what should be provided in the first place. --- src/libcollectionstest/lib.rs | 2 +- src/libcollectionstest/str.rs | 30 +++++++++++++++--------------- src/libcore/str/mod.rs | 22 ++++++++++++---------- 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/src/libcollectionstest/lib.rs b/src/libcollectionstest/lib.rs index a7018daf098..98d0b1c8e15 100644 --- a/src/libcollectionstest/lib.rs +++ b/src/libcollectionstest/lib.rs @@ -28,7 +28,7 @@ #![feature(test)] #![feature(unboxed_closures)] #![feature(unicode)] -#![feature(utf8_error_resume_from)] +#![feature(utf8_error_error_len)] extern crate collections; extern crate test; diff --git a/src/libcollectionstest/str.rs b/src/libcollectionstest/str.rs index 5de74d68b9e..c9b7104fec4 100644 --- a/src/libcollectionstest/str.rs +++ b/src/libcollectionstest/str.rs @@ -543,31 +543,31 @@ fn from_utf8_mostly_ascii() { #[test] fn from_utf8_error() { macro_rules! test { - ($input: expr, $expected_valid_up_to: expr, $expected_resume_from: expr) => { + ($input: expr, $expected_valid_up_to: expr, $expected_error_len: expr) => { let error = from_utf8($input).unwrap_err(); assert_eq!(error.valid_up_to(), $expected_valid_up_to); - assert_eq!(error.resume_from(), $expected_resume_from); + assert_eq!(error.error_len(), $expected_error_len); } } - test!(b"A\xC3\xA9 \xFF ", 4, Some(5)); - test!(b"A\xC3\xA9 \x80 ", 4, Some(5)); - test!(b"A\xC3\xA9 \xC1 ", 4, Some(5)); - test!(b"A\xC3\xA9 \xC1", 4, Some(5)); + test!(b"A\xC3\xA9 \xFF ", 4, Some(1)); + test!(b"A\xC3\xA9 \x80 ", 4, Some(1)); + test!(b"A\xC3\xA9 \xC1 ", 4, Some(1)); + test!(b"A\xC3\xA9 \xC1", 4, Some(1)); test!(b"A\xC3\xA9 \xC2", 4, None); - test!(b"A\xC3\xA9 \xC2 ", 4, Some(5)); - test!(b"A\xC3\xA9 \xC2\xC0", 4, Some(5)); + test!(b"A\xC3\xA9 \xC2 ", 4, Some(1)); + test!(b"A\xC3\xA9 \xC2\xC0", 4, Some(1)); test!(b"A\xC3\xA9 \xE0", 4, None); - test!(b"A\xC3\xA9 \xE0\x9F", 4, Some(5)); + test!(b"A\xC3\xA9 \xE0\x9F", 4, Some(1)); test!(b"A\xC3\xA9 \xE0\xA0", 4, None); - test!(b"A\xC3\xA9 \xE0\xA0\xC0", 4, Some(6)); - test!(b"A\xC3\xA9 \xE0\xA0 ", 4, Some(6)); - test!(b"A\xC3\xA9 \xED\xA0\x80 ", 4, Some(5)); + test!(b"A\xC3\xA9 \xE0\xA0\xC0", 4, Some(2)); + test!(b"A\xC3\xA9 \xE0\xA0 ", 4, Some(2)); + test!(b"A\xC3\xA9 \xED\xA0\x80 ", 4, Some(1)); test!(b"A\xC3\xA9 \xF1", 4, None); test!(b"A\xC3\xA9 \xF1\x80", 4, None); test!(b"A\xC3\xA9 \xF1\x80\x80", 4, None); - test!(b"A\xC3\xA9 \xF1 ", 4, Some(5)); - test!(b"A\xC3\xA9 \xF1\x80 ", 4, Some(6)); - test!(b"A\xC3\xA9 \xF1\x80\x80 ", 4, Some(7)); + test!(b"A\xC3\xA9 \xF1 ", 4, Some(1)); + test!(b"A\xC3\xA9 \xF1\x80 ", 4, Some(2)); + test!(b"A\xC3\xA9 \xF1\x80\x80 ", 4, Some(3)); } #[test] diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs index eb13d28e82d..63b12932c3d 100644 --- a/src/libcore/str/mod.rs +++ b/src/libcore/str/mod.rs @@ -125,7 +125,7 @@ Section: Creating a string #[stable(feature = "rust1", since = "1.0.0")] pub struct Utf8Error { valid_up_to: usize, - invalid_length: Option, + error_len: Option, } impl Utf8Error { @@ -161,12 +161,14 @@ impl Utf8Error { /// If a byte stream (such as a file or a network socket) is being decoded incrementally, /// this could be a valid `char` whose UTF-8 byte sequence is spanning multiple chunks. /// - /// * `Some(index)`: an unexpected byte was encountered. - /// The index provided is where decoding should resume + /// * `Some(len)`: an unexpected byte was encountered. + /// The length provided is that of the invalid byte sequence + /// that starts at the index given by `valid_up_to()`. + /// Decoding should resume after that sequence /// (after inserting a U+FFFD REPLACEMENT CHARACTER) in case of lossy decoding. - #[unstable(feature = "utf8_error_resume_from", reason ="new", issue = "0")] - pub fn resume_from(&self) -> Option { - self.invalid_length.map(|l| self.valid_up_to + l as usize) + #[unstable(feature = "utf8_error_error_len", reason ="new", issue = "0")] + pub fn error_len(&self) -> Option { + self.error_len.map(|len| len as usize) } } @@ -316,9 +318,9 @@ pub unsafe fn from_utf8_unchecked(v: &[u8]) -> &str { #[stable(feature = "rust1", since = "1.0.0")] impl fmt::Display for Utf8Error { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - if let Some(invalid_length) = self.invalid_length { + if let Some(error_len) = self.error_len { write!(f, "invalid utf-8 sequence of {} bytes from index {}", - invalid_length, self.valid_up_to) + error_len, self.valid_up_to) } else { write!(f, "incomplete utf-8 byte sequence from index {}", self.valid_up_to) } @@ -1263,10 +1265,10 @@ fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> { while index < len { let old_offset = index; macro_rules! err { - ($invalid_length: expr) => { + ($error_len: expr) => { return Err(Utf8Error { valid_up_to: old_offset, - invalid_length: $invalid_length, + error_len: $error_len, }) } }