Replace Utf8Error::resume_from with Utf8Error::error_len
Their relationship is: * `resume_from = error_len.map(|l| l + valid_up_to)` * error_len is always one of None, Some(1), Some(2), or Some(3). When I started using resume_from I almost always ended up subtracting valid_up_to to obtain error_len. Therefore the latter is what should be provided in the first place.
This commit is contained in:
parent
182044248c
commit
b5f16a10e9
3 changed files with 28 additions and 26 deletions
|
@ -28,7 +28,7 @@
|
||||||
#![feature(test)]
|
#![feature(test)]
|
||||||
#![feature(unboxed_closures)]
|
#![feature(unboxed_closures)]
|
||||||
#![feature(unicode)]
|
#![feature(unicode)]
|
||||||
#![feature(utf8_error_resume_from)]
|
#![feature(utf8_error_error_len)]
|
||||||
|
|
||||||
extern crate collections;
|
extern crate collections;
|
||||||
extern crate test;
|
extern crate test;
|
||||||
|
|
|
@ -543,31 +543,31 @@ fn from_utf8_mostly_ascii() {
|
||||||
#[test]
|
#[test]
|
||||||
fn from_utf8_error() {
|
fn from_utf8_error() {
|
||||||
macro_rules! test {
|
macro_rules! test {
|
||||||
($input: expr, $expected_valid_up_to: expr, $expected_resume_from: expr) => {
|
($input: expr, $expected_valid_up_to: expr, $expected_error_len: expr) => {
|
||||||
let error = from_utf8($input).unwrap_err();
|
let error = from_utf8($input).unwrap_err();
|
||||||
assert_eq!(error.valid_up_to(), $expected_valid_up_to);
|
assert_eq!(error.valid_up_to(), $expected_valid_up_to);
|
||||||
assert_eq!(error.resume_from(), $expected_resume_from);
|
assert_eq!(error.error_len(), $expected_error_len);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
test!(b"A\xC3\xA9 \xFF ", 4, Some(5));
|
test!(b"A\xC3\xA9 \xFF ", 4, Some(1));
|
||||||
test!(b"A\xC3\xA9 \x80 ", 4, Some(5));
|
test!(b"A\xC3\xA9 \x80 ", 4, Some(1));
|
||||||
test!(b"A\xC3\xA9 \xC1 ", 4, Some(5));
|
test!(b"A\xC3\xA9 \xC1 ", 4, Some(1));
|
||||||
test!(b"A\xC3\xA9 \xC1", 4, Some(5));
|
test!(b"A\xC3\xA9 \xC1", 4, Some(1));
|
||||||
test!(b"A\xC3\xA9 \xC2", 4, None);
|
test!(b"A\xC3\xA9 \xC2", 4, None);
|
||||||
test!(b"A\xC3\xA9 \xC2 ", 4, Some(5));
|
test!(b"A\xC3\xA9 \xC2 ", 4, Some(1));
|
||||||
test!(b"A\xC3\xA9 \xC2\xC0", 4, Some(5));
|
test!(b"A\xC3\xA9 \xC2\xC0", 4, Some(1));
|
||||||
test!(b"A\xC3\xA9 \xE0", 4, None);
|
test!(b"A\xC3\xA9 \xE0", 4, None);
|
||||||
test!(b"A\xC3\xA9 \xE0\x9F", 4, Some(5));
|
test!(b"A\xC3\xA9 \xE0\x9F", 4, Some(1));
|
||||||
test!(b"A\xC3\xA9 \xE0\xA0", 4, None);
|
test!(b"A\xC3\xA9 \xE0\xA0", 4, None);
|
||||||
test!(b"A\xC3\xA9 \xE0\xA0\xC0", 4, Some(6));
|
test!(b"A\xC3\xA9 \xE0\xA0\xC0", 4, Some(2));
|
||||||
test!(b"A\xC3\xA9 \xE0\xA0 ", 4, Some(6));
|
test!(b"A\xC3\xA9 \xE0\xA0 ", 4, Some(2));
|
||||||
test!(b"A\xC3\xA9 \xED\xA0\x80 ", 4, Some(5));
|
test!(b"A\xC3\xA9 \xED\xA0\x80 ", 4, Some(1));
|
||||||
test!(b"A\xC3\xA9 \xF1", 4, None);
|
test!(b"A\xC3\xA9 \xF1", 4, None);
|
||||||
test!(b"A\xC3\xA9 \xF1\x80", 4, None);
|
test!(b"A\xC3\xA9 \xF1\x80", 4, None);
|
||||||
test!(b"A\xC3\xA9 \xF1\x80\x80", 4, None);
|
test!(b"A\xC3\xA9 \xF1\x80\x80", 4, None);
|
||||||
test!(b"A\xC3\xA9 \xF1 ", 4, Some(5));
|
test!(b"A\xC3\xA9 \xF1 ", 4, Some(1));
|
||||||
test!(b"A\xC3\xA9 \xF1\x80 ", 4, Some(6));
|
test!(b"A\xC3\xA9 \xF1\x80 ", 4, Some(2));
|
||||||
test!(b"A\xC3\xA9 \xF1\x80\x80 ", 4, Some(7));
|
test!(b"A\xC3\xA9 \xF1\x80\x80 ", 4, Some(3));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
|
@ -125,7 +125,7 @@ Section: Creating a string
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
pub struct Utf8Error {
|
pub struct Utf8Error {
|
||||||
valid_up_to: usize,
|
valid_up_to: usize,
|
||||||
invalid_length: Option<u8>,
|
error_len: Option<u8>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Utf8Error {
|
impl Utf8Error {
|
||||||
|
@ -161,12 +161,14 @@ impl Utf8Error {
|
||||||
/// If a byte stream (such as a file or a network socket) is being decoded incrementally,
|
/// If a byte stream (such as a file or a network socket) is being decoded incrementally,
|
||||||
/// this could be a valid `char` whose UTF-8 byte sequence is spanning multiple chunks.
|
/// this could be a valid `char` whose UTF-8 byte sequence is spanning multiple chunks.
|
||||||
///
|
///
|
||||||
/// * `Some(index)`: an unexpected byte was encountered.
|
/// * `Some(len)`: an unexpected byte was encountered.
|
||||||
/// The index provided is where decoding should resume
|
/// The length provided is that of the invalid byte sequence
|
||||||
|
/// that starts at the index given by `valid_up_to()`.
|
||||||
|
/// Decoding should resume after that sequence
|
||||||
/// (after inserting a U+FFFD REPLACEMENT CHARACTER) in case of lossy decoding.
|
/// (after inserting a U+FFFD REPLACEMENT CHARACTER) in case of lossy decoding.
|
||||||
#[unstable(feature = "utf8_error_resume_from", reason ="new", issue = "0")]
|
#[unstable(feature = "utf8_error_error_len", reason ="new", issue = "0")]
|
||||||
pub fn resume_from(&self) -> Option<usize> {
|
pub fn error_len(&self) -> Option<usize> {
|
||||||
self.invalid_length.map(|l| self.valid_up_to + l as usize)
|
self.error_len.map(|len| len as usize)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -316,9 +318,9 @@ pub unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
impl fmt::Display for Utf8Error {
|
impl fmt::Display for Utf8Error {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
if let Some(invalid_length) = self.invalid_length {
|
if let Some(error_len) = self.error_len {
|
||||||
write!(f, "invalid utf-8 sequence of {} bytes from index {}",
|
write!(f, "invalid utf-8 sequence of {} bytes from index {}",
|
||||||
invalid_length, self.valid_up_to)
|
error_len, self.valid_up_to)
|
||||||
} else {
|
} else {
|
||||||
write!(f, "incomplete utf-8 byte sequence from index {}", self.valid_up_to)
|
write!(f, "incomplete utf-8 byte sequence from index {}", self.valid_up_to)
|
||||||
}
|
}
|
||||||
|
@ -1263,10 +1265,10 @@ fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
|
||||||
while index < len {
|
while index < len {
|
||||||
let old_offset = index;
|
let old_offset = index;
|
||||||
macro_rules! err {
|
macro_rules! err {
|
||||||
($invalid_length: expr) => {
|
($error_len: expr) => {
|
||||||
return Err(Utf8Error {
|
return Err(Utf8Error {
|
||||||
valid_up_to: old_offset,
|
valid_up_to: old_offset,
|
||||||
invalid_length: $invalid_length,
|
error_len: $error_len,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue