1
Fork 0

Refactor low-level UTF-16 decoding.

* Rename `utf16_items` to `decode_utf16`. "Items" is meaningless.
* Move it to `rustc_unicode::char`, exposed in `std::char`.
* Generalize it to any `u16` iterable, not just `&[u16]`.
* Make it yield `Result` instead of a custom `Utf16Item` enum that was isomorphic to `Result`. This enable using the `FromIterator for Result` impl.
* Add a `REPLACEMENT_CHARACTER` constant.
* Document how `result.unwrap_or(REPLACEMENT_CHARACTER)` replaces `Utf16Item::to_char_lossy`.
This commit is contained in:
Simon Sapin 2015-08-13 18:39:46 +02:00
parent c408b78633
commit 6174b8d726
10 changed files with 164 additions and 61 deletions

View file

@ -209,8 +209,6 @@ use std::str::FromStr;
use std::string;
use std::{char, f64, fmt, str};
use std;
use rustc_unicode::str as unicode_str;
use rustc_unicode::str::Utf16Item;
use Encodable;
@ -1712,11 +1710,13 @@ impl<T: Iterator<Item=char>> Parser<T> {
_ => return self.error(UnexpectedEndOfHexEscape),
}
let buf = [n1, try!(self.decode_hex_escape())];
match unicode_str::utf16_items(&buf).next() {
Some(Utf16Item::ScalarValue(c)) => res.push(c),
_ => return self.error(LoneLeadingSurrogateInHexEscape),
let n2 = try!(self.decode_hex_escape());
if n2 < 0xDC00 || n2 > 0xDFFF {
return self.error(LoneLeadingSurrogateInHexEscape)
}
let c = (((n1 - 0xD800) as u32) << 10 |
(n2 - 0xDC00) as u32) + 0x1_0000;
res.push(char::from_u32(c).unwrap());
}
n => match char::from_u32(n as u32) {