Auto merge of #32204 - alexcrichton:redesign-char-encoding-types, r=aturon
std: Change `encode_utf{8,16}` to return iterators Currently these have non-traditional APIs which take a buffer and report how much was filled in, but they're not necessarily ergonomic to use. Returning an iterator which *also* exposes an underlying slice shouldn't result in any performance loss as it's just a lazy version of the same implementation, and it's also much more ergonomic! cc #27784
This commit is contained in:
commit
0dcc413e42
10 changed files with 195 additions and 201 deletions
|
@ -61,7 +61,6 @@ use core::iter::FromIterator;
|
||||||
use core::mem;
|
use core::mem;
|
||||||
use core::ops::{self, Add, Index, IndexMut};
|
use core::ops::{self, Add, Index, IndexMut};
|
||||||
use core::ptr;
|
use core::ptr;
|
||||||
use core::slice;
|
|
||||||
use core::str::pattern::Pattern;
|
use core::str::pattern::Pattern;
|
||||||
use rustc_unicode::char::{decode_utf16, REPLACEMENT_CHARACTER};
|
use rustc_unicode::char::{decode_utf16, REPLACEMENT_CHARACTER};
|
||||||
use rustc_unicode::str as unicode_str;
|
use rustc_unicode::str as unicode_str;
|
||||||
|
@ -970,22 +969,7 @@ impl String {
|
||||||
pub fn push(&mut self, ch: char) {
|
pub fn push(&mut self, ch: char) {
|
||||||
match ch.len_utf8() {
|
match ch.len_utf8() {
|
||||||
1 => self.vec.push(ch as u8),
|
1 => self.vec.push(ch as u8),
|
||||||
ch_len => {
|
_ => self.vec.extend_from_slice(ch.encode_utf8().as_slice()),
|
||||||
let cur_len = self.len();
|
|
||||||
// This may use up to 4 bytes.
|
|
||||||
self.vec.reserve(ch_len);
|
|
||||||
|
|
||||||
unsafe {
|
|
||||||
// Attempt to not use an intermediate buffer by just pushing bytes
|
|
||||||
// directly onto this string.
|
|
||||||
let slice = slice::from_raw_parts_mut(self.vec
|
|
||||||
.as_mut_ptr()
|
|
||||||
.offset(cur_len as isize),
|
|
||||||
ch_len);
|
|
||||||
let used = ch.encode_utf8(slice).unwrap_or(0);
|
|
||||||
self.vec.set_len(cur_len + used);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1136,9 +1120,10 @@ impl String {
|
||||||
let len = self.len();
|
let len = self.len();
|
||||||
assert!(idx <= len);
|
assert!(idx <= len);
|
||||||
assert!(self.is_char_boundary(idx));
|
assert!(self.is_char_boundary(idx));
|
||||||
self.vec.reserve(4);
|
let bits = ch.encode_utf8();
|
||||||
let mut bits = [0; 4];
|
let bits = bits.as_slice();
|
||||||
let amt = ch.encode_utf8(&mut bits).unwrap();
|
let amt = bits.len();
|
||||||
|
self.vec.reserve(amt);
|
||||||
|
|
||||||
unsafe {
|
unsafe {
|
||||||
ptr::copy(self.vec.as_ptr().offset(idx as isize),
|
ptr::copy(self.vec.as_ptr().offset(idx as isize),
|
||||||
|
|
|
@ -794,10 +794,9 @@ fn test_rev_iterator() {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_chars_decoding() {
|
fn test_chars_decoding() {
|
||||||
let mut bytes = [0; 4];
|
|
||||||
for c in (0..0x110000).filter_map(::std::char::from_u32) {
|
for c in (0..0x110000).filter_map(::std::char::from_u32) {
|
||||||
let len = c.encode_utf8(&mut bytes).unwrap_or(0);
|
let bytes = c.encode_utf8();
|
||||||
let s = ::std::str::from_utf8(&bytes[..len]).unwrap();
|
let s = ::std::str::from_utf8(bytes.as_slice()).unwrap();
|
||||||
if Some(c) != s.chars().next() {
|
if Some(c) != s.chars().next() {
|
||||||
panic!("character {:x}={} does not decode correctly", c as u32, c);
|
panic!("character {:x}={} does not decode correctly", c as u32, c);
|
||||||
}
|
}
|
||||||
|
@ -806,10 +805,9 @@ fn test_chars_decoding() {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_chars_rev_decoding() {
|
fn test_chars_rev_decoding() {
|
||||||
let mut bytes = [0; 4];
|
|
||||||
for c in (0..0x110000).filter_map(::std::char::from_u32) {
|
for c in (0..0x110000).filter_map(::std::char::from_u32) {
|
||||||
let len = c.encode_utf8(&mut bytes).unwrap_or(0);
|
let bytes = c.encode_utf8();
|
||||||
let s = ::std::str::from_utf8(&bytes[..len]).unwrap();
|
let s = ::std::str::from_utf8(bytes.as_slice()).unwrap();
|
||||||
if Some(c) != s.chars().rev().next() {
|
if Some(c) != s.chars().rev().next() {
|
||||||
panic!("character {:x}={} does not decode correctly", c as u32, c);
|
panic!("character {:x}={} does not decode correctly", c as u32, c);
|
||||||
}
|
}
|
||||||
|
|
|
@ -269,10 +269,10 @@ pub trait CharExt {
|
||||||
fn len_utf8(self) -> usize;
|
fn len_utf8(self) -> usize;
|
||||||
#[stable(feature = "core", since = "1.6.0")]
|
#[stable(feature = "core", since = "1.6.0")]
|
||||||
fn len_utf16(self) -> usize;
|
fn len_utf16(self) -> usize;
|
||||||
#[stable(feature = "core", since = "1.6.0")]
|
#[unstable(feature = "unicode", issue = "27784")]
|
||||||
fn encode_utf8(self, dst: &mut [u8]) -> Option<usize>;
|
fn encode_utf8(self) -> EncodeUtf8;
|
||||||
#[stable(feature = "core", since = "1.6.0")]
|
#[unstable(feature = "unicode", issue = "27784")]
|
||||||
fn encode_utf16(self, dst: &mut [u16]) -> Option<usize>;
|
fn encode_utf16(self) -> EncodeUtf16;
|
||||||
}
|
}
|
||||||
|
|
||||||
#[stable(feature = "core", since = "1.6.0")]
|
#[stable(feature = "core", since = "1.6.0")]
|
||||||
|
@ -336,75 +336,47 @@ impl CharExt for char {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn encode_utf8(self, dst: &mut [u8]) -> Option<usize> {
|
fn encode_utf8(self) -> EncodeUtf8 {
|
||||||
encode_utf8_raw(self as u32, dst)
|
let code = self as u32;
|
||||||
|
let mut buf = [0; 4];
|
||||||
|
let pos = if code < MAX_ONE_B {
|
||||||
|
buf[3] = code as u8;
|
||||||
|
3
|
||||||
|
} else if code < MAX_TWO_B {
|
||||||
|
buf[2] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
|
||||||
|
buf[3] = (code & 0x3F) as u8 | TAG_CONT;
|
||||||
|
2
|
||||||
|
} else if code < MAX_THREE_B {
|
||||||
|
buf[1] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
|
||||||
|
buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
|
||||||
|
buf[3] = (code & 0x3F) as u8 | TAG_CONT;
|
||||||
|
1
|
||||||
|
} else {
|
||||||
|
buf[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
|
||||||
|
buf[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT;
|
||||||
|
buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
|
||||||
|
buf[3] = (code & 0x3F) as u8 | TAG_CONT;
|
||||||
|
0
|
||||||
|
};
|
||||||
|
EncodeUtf8 { buf: buf, pos: pos }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn encode_utf16(self, dst: &mut [u16]) -> Option<usize> {
|
fn encode_utf16(self) -> EncodeUtf16 {
|
||||||
encode_utf16_raw(self as u32, dst)
|
let mut buf = [0; 2];
|
||||||
}
|
let mut code = self as u32;
|
||||||
}
|
let pos = if (code & 0xFFFF) == code {
|
||||||
|
|
||||||
/// Encodes a raw u32 value as UTF-8 into the provided byte buffer,
|
|
||||||
/// and then returns the number of bytes written.
|
|
||||||
///
|
|
||||||
/// If the buffer is not large enough, nothing will be written into it
|
|
||||||
/// and a `None` will be returned.
|
|
||||||
#[inline]
|
|
||||||
#[unstable(feature = "char_internals",
|
|
||||||
reason = "this function should not be exposed publicly",
|
|
||||||
issue = "0")]
|
|
||||||
#[doc(hidden)]
|
|
||||||
pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> Option<usize> {
|
|
||||||
// Marked #[inline] to allow llvm optimizing it away
|
|
||||||
if code < MAX_ONE_B && !dst.is_empty() {
|
|
||||||
dst[0] = code as u8;
|
|
||||||
Some(1)
|
|
||||||
} else if code < MAX_TWO_B && dst.len() >= 2 {
|
|
||||||
dst[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
|
|
||||||
dst[1] = (code & 0x3F) as u8 | TAG_CONT;
|
|
||||||
Some(2)
|
|
||||||
} else if code < MAX_THREE_B && dst.len() >= 3 {
|
|
||||||
dst[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
|
|
||||||
dst[1] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
|
|
||||||
dst[2] = (code & 0x3F) as u8 | TAG_CONT;
|
|
||||||
Some(3)
|
|
||||||
} else if dst.len() >= 4 {
|
|
||||||
dst[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
|
|
||||||
dst[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT;
|
|
||||||
dst[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
|
|
||||||
dst[3] = (code & 0x3F) as u8 | TAG_CONT;
|
|
||||||
Some(4)
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Encodes a raw u32 value as UTF-16 into the provided `u16` buffer,
|
|
||||||
/// and then returns the number of `u16`s written.
|
|
||||||
///
|
|
||||||
/// If the buffer is not large enough, nothing will be written into it
|
|
||||||
/// and a `None` will be returned.
|
|
||||||
#[inline]
|
|
||||||
#[unstable(feature = "char_internals",
|
|
||||||
reason = "this function should not be exposed publicly",
|
|
||||||
issue = "0")]
|
|
||||||
#[doc(hidden)]
|
|
||||||
pub fn encode_utf16_raw(mut ch: u32, dst: &mut [u16]) -> Option<usize> {
|
|
||||||
// Marked #[inline] to allow llvm optimizing it away
|
|
||||||
if (ch & 0xFFFF) == ch && !dst.is_empty() {
|
|
||||||
// The BMP falls through (assuming non-surrogate, as it should)
|
// The BMP falls through (assuming non-surrogate, as it should)
|
||||||
dst[0] = ch as u16;
|
buf[1] = code as u16;
|
||||||
Some(1)
|
1
|
||||||
} else if dst.len() >= 2 {
|
|
||||||
// Supplementary planes break into surrogates.
|
|
||||||
ch -= 0x1_0000;
|
|
||||||
dst[0] = 0xD800 | ((ch >> 10) as u16);
|
|
||||||
dst[1] = 0xDC00 | ((ch as u16) & 0x3FF);
|
|
||||||
Some(2)
|
|
||||||
} else {
|
} else {
|
||||||
None
|
// Supplementary planes break into surrogates.
|
||||||
|
code -= 0x1_0000;
|
||||||
|
buf[0] = 0xD800 | ((code >> 10) as u16);
|
||||||
|
buf[1] = 0xDC00 | ((code as u16) & 0x3FF);
|
||||||
|
0
|
||||||
|
};
|
||||||
|
EncodeUtf16 { buf: buf, pos: pos }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -583,3 +555,80 @@ impl Iterator for EscapeDefault {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// An iterator over `u8` entries represending the UTF-8 encoding of a `char`
|
||||||
|
/// value.
|
||||||
|
///
|
||||||
|
/// Constructed via the `.encode_utf8()` method on `char`.
|
||||||
|
#[unstable(feature = "unicode", issue = "27784")]
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct EncodeUtf8 {
|
||||||
|
buf: [u8; 4],
|
||||||
|
pos: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EncodeUtf8 {
|
||||||
|
/// Returns the remaining bytes of this iterator as a slice.
|
||||||
|
#[unstable(feature = "unicode", issue = "27784")]
|
||||||
|
pub fn as_slice(&self) -> &[u8] {
|
||||||
|
&self.buf[self.pos..]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[unstable(feature = "unicode", issue = "27784")]
|
||||||
|
impl Iterator for EncodeUtf8 {
|
||||||
|
type Item = u8;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<u8> {
|
||||||
|
if self.pos == self.buf.len() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
let ret = Some(self.buf[self.pos]);
|
||||||
|
self.pos += 1;
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||||
|
self.as_slice().iter().size_hint()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An iterator over `u16` entries represending the UTF-16 encoding of a `char`
|
||||||
|
/// value.
|
||||||
|
///
|
||||||
|
/// Constructed via the `.encode_utf16()` method on `char`.
|
||||||
|
#[unstable(feature = "unicode", issue = "27784")]
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct EncodeUtf16 {
|
||||||
|
buf: [u16; 2],
|
||||||
|
pos: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EncodeUtf16 {
|
||||||
|
/// Returns the remaining bytes of this iterator as a slice.
|
||||||
|
#[unstable(feature = "unicode", issue = "27784")]
|
||||||
|
pub fn as_slice(&self) -> &[u16] {
|
||||||
|
&self.buf[self.pos..]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#[unstable(feature = "unicode", issue = "27784")]
|
||||||
|
impl Iterator for EncodeUtf16 {
|
||||||
|
type Item = u16;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<u16> {
|
||||||
|
if self.pos == self.buf.len() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
let ret = Some(self.buf[self.pos]);
|
||||||
|
self.pos += 1;
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||||
|
self.as_slice().iter().size_hint()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -99,9 +99,9 @@ pub trait Write {
|
||||||
/// This function will return an instance of `Error` on error.
|
/// This function will return an instance of `Error` on error.
|
||||||
#[stable(feature = "fmt_write_char", since = "1.1.0")]
|
#[stable(feature = "fmt_write_char", since = "1.1.0")]
|
||||||
fn write_char(&mut self, c: char) -> Result {
|
fn write_char(&mut self, c: char) -> Result {
|
||||||
let mut utf_8 = [0u8; 4];
|
self.write_str(unsafe {
|
||||||
let bytes_written = c.encode_utf8(&mut utf_8).unwrap_or(0);
|
str::from_utf8_unchecked(c.encode_utf8().as_slice())
|
||||||
self.write_str(unsafe { str::from_utf8_unchecked(&utf_8[..bytes_written]) })
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Glue for usage of the `write!` macro with implementors of this trait.
|
/// Glue for usage of the `write!` macro with implementors of this trait.
|
||||||
|
@ -897,10 +897,9 @@ impl<'a> Formatter<'a> {
|
||||||
// Writes the sign if it exists, and then the prefix if it was requested
|
// Writes the sign if it exists, and then the prefix if it was requested
|
||||||
let write_prefix = |f: &mut Formatter| {
|
let write_prefix = |f: &mut Formatter| {
|
||||||
if let Some(c) = sign {
|
if let Some(c) = sign {
|
||||||
let mut b = [0; 4];
|
try!(f.buf.write_str(unsafe {
|
||||||
let n = c.encode_utf8(&mut b).unwrap_or(0);
|
str::from_utf8_unchecked(c.encode_utf8().as_slice())
|
||||||
let b = unsafe { str::from_utf8_unchecked(&b[..n]) };
|
}));
|
||||||
try!(f.buf.write_str(b));
|
|
||||||
}
|
}
|
||||||
if prefixed { f.buf.write_str(prefix) }
|
if prefixed { f.buf.write_str(prefix) }
|
||||||
else { Ok(()) }
|
else { Ok(()) }
|
||||||
|
@ -1003,9 +1002,10 @@ impl<'a> Formatter<'a> {
|
||||||
rt::v1::Alignment::Center => (padding / 2, (padding + 1) / 2),
|
rt::v1::Alignment::Center => (padding / 2, (padding + 1) / 2),
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut fill = [0; 4];
|
let fill = self.fill.encode_utf8();
|
||||||
let len = self.fill.encode_utf8(&mut fill).unwrap_or(0);
|
let fill = unsafe {
|
||||||
let fill = unsafe { str::from_utf8_unchecked(&fill[..len]) };
|
str::from_utf8_unchecked(fill.as_slice())
|
||||||
|
};
|
||||||
|
|
||||||
for _ in 0..pre_pad {
|
for _ in 0..pre_pad {
|
||||||
try!(self.buf.write_str(fill));
|
try!(self.buf.write_str(fill));
|
||||||
|
@ -1391,10 +1391,9 @@ impl Display for char {
|
||||||
if f.width.is_none() && f.precision.is_none() {
|
if f.width.is_none() && f.precision.is_none() {
|
||||||
f.write_char(*self)
|
f.write_char(*self)
|
||||||
} else {
|
} else {
|
||||||
let mut utf8 = [0; 4];
|
f.pad(unsafe {
|
||||||
let amt = self.encode_utf8(&mut utf8).unwrap_or(0);
|
str::from_utf8_unchecked(self.encode_utf8().as_slice())
|
||||||
let s: &str = unsafe { str::from_utf8_unchecked(&utf8[..amt]) };
|
})
|
||||||
f.pad(s)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -175,9 +175,10 @@ fn test_escape_unicode() {
|
||||||
#[test]
|
#[test]
|
||||||
fn test_encode_utf8() {
|
fn test_encode_utf8() {
|
||||||
fn check(input: char, expect: &[u8]) {
|
fn check(input: char, expect: &[u8]) {
|
||||||
let mut buf = [0; 4];
|
assert_eq!(input.encode_utf8().as_slice(), expect);
|
||||||
let n = input.encode_utf8(&mut buf).unwrap_or(0);
|
for (a, b) in input.encode_utf8().zip(expect) {
|
||||||
assert_eq!(&buf[..n], expect);
|
assert_eq!(a, *b);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
check('x', &[0x78]);
|
check('x', &[0x78]);
|
||||||
|
@ -189,9 +190,10 @@ fn test_encode_utf8() {
|
||||||
#[test]
|
#[test]
|
||||||
fn test_encode_utf16() {
|
fn test_encode_utf16() {
|
||||||
fn check(input: char, expect: &[u16]) {
|
fn check(input: char, expect: &[u16]) {
|
||||||
let mut buf = [0; 2];
|
assert_eq!(input.encode_utf16().as_slice(), expect);
|
||||||
let n = input.encode_utf16(&mut buf).unwrap_or(0);
|
for (a, b) in input.encode_utf16().zip(expect) {
|
||||||
assert_eq!(&buf[..n], expect);
|
assert_eq!(a, *b);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
check('x', &[0x0078]);
|
check('x', &[0x0078]);
|
||||||
|
|
|
@ -35,7 +35,9 @@ use tables::{derived_property, property, general_category, conversions};
|
||||||
|
|
||||||
// stable reexports
|
// stable reexports
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
pub use core::char::{MAX, from_u32, from_u32_unchecked, from_digit, EscapeUnicode, EscapeDefault};
|
pub use core::char::{MAX, from_u32, from_u32_unchecked, from_digit};
|
||||||
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
|
pub use core::char::{EscapeUnicode, EscapeDefault, EncodeUtf8, EncodeUtf16};
|
||||||
|
|
||||||
// unstable reexports
|
// unstable reexports
|
||||||
#[unstable(feature = "unicode", issue = "27783")]
|
#[unstable(feature = "unicode", issue = "27783")]
|
||||||
|
@ -408,84 +410,50 @@ impl char {
|
||||||
C::len_utf16(self)
|
C::len_utf16(self)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Encodes this character as UTF-8 into the provided byte buffer, and then
|
/// Returns an interator over the bytes of this character as UTF-8.
|
||||||
/// returns the number of bytes written.
|
|
||||||
///
|
///
|
||||||
/// If the buffer is not large enough, nothing will be written into it and a
|
/// The returned iterator also has an `as_slice()` method to view the
|
||||||
/// `None` will be returned. A buffer of length four is large enough to
|
/// encoded bytes as a byte slice.
|
||||||
/// encode any `char`.
|
|
||||||
///
|
///
|
||||||
/// # Examples
|
/// # Examples
|
||||||
///
|
///
|
||||||
/// In both of these examples, 'ß' takes two bytes to encode.
|
|
||||||
///
|
|
||||||
/// ```
|
/// ```
|
||||||
/// #![feature(unicode)]
|
/// #![feature(unicode)]
|
||||||
///
|
///
|
||||||
/// let mut b = [0; 2];
|
/// let iterator = 'ß'.encode_utf8();
|
||||||
|
/// assert_eq!(iterator.as_slice(), [0xc3, 0x9f]);
|
||||||
///
|
///
|
||||||
/// let result = 'ß'.encode_utf8(&mut b);
|
/// for (i, byte) in iterator.enumerate() {
|
||||||
///
|
/// println!("byte {}: {:x}", i, byte);
|
||||||
/// assert_eq!(result, Some(2));
|
/// }
|
||||||
/// ```
|
/// ```
|
||||||
///
|
#[unstable(feature = "unicode", issue = "27784")]
|
||||||
/// A buffer that's too small:
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// #![feature(unicode)]
|
|
||||||
///
|
|
||||||
/// let mut b = [0; 1];
|
|
||||||
///
|
|
||||||
/// let result = 'ß'.encode_utf8(&mut b);
|
|
||||||
///
|
|
||||||
/// assert_eq!(result, None);
|
|
||||||
/// ```
|
|
||||||
#[unstable(feature = "unicode",
|
|
||||||
reason = "pending decision about Iterator/Writer/Reader",
|
|
||||||
issue = "27784")]
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn encode_utf8(self, dst: &mut [u8]) -> Option<usize> {
|
pub fn encode_utf8(self) -> EncodeUtf8 {
|
||||||
C::encode_utf8(self, dst)
|
C::encode_utf8(self)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Encodes this character as UTF-16 into the provided `u16` buffer, and
|
/// Returns an interator over the `u16` entries of this character as UTF-16.
|
||||||
/// then returns the number of `u16`s written.
|
|
||||||
///
|
///
|
||||||
/// If the buffer is not large enough, nothing will be written into it and a
|
/// The returned iterator also has an `as_slice()` method to view the
|
||||||
/// `None` will be returned. A buffer of length 2 is large enough to encode
|
/// encoded form as a slice.
|
||||||
/// any `char`.
|
|
||||||
///
|
///
|
||||||
/// # Examples
|
/// # Examples
|
||||||
///
|
///
|
||||||
/// In both of these examples, '𝕊' takes two `u16`s to encode.
|
|
||||||
///
|
|
||||||
/// ```
|
/// ```
|
||||||
/// #![feature(unicode)]
|
/// #![feature(unicode)]
|
||||||
///
|
///
|
||||||
/// let mut b = [0; 2];
|
/// let iterator = '𝕊'.encode_utf16();
|
||||||
|
/// assert_eq!(iterator.as_slice(), [0xd835, 0xdd4a]);
|
||||||
///
|
///
|
||||||
/// let result = '𝕊'.encode_utf16(&mut b);
|
/// for (i, val) in iterator.enumerate() {
|
||||||
///
|
/// println!("entry {}: {:x}", i, val);
|
||||||
/// assert_eq!(result, Some(2));
|
/// }
|
||||||
/// ```
|
/// ```
|
||||||
///
|
#[unstable(feature = "unicode", issue = "27784")]
|
||||||
/// A buffer that's too small:
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// #![feature(unicode)]
|
|
||||||
///
|
|
||||||
/// let mut b = [0; 1];
|
|
||||||
///
|
|
||||||
/// let result = '𝕊'.encode_utf16(&mut b);
|
|
||||||
///
|
|
||||||
/// assert_eq!(result, None);
|
|
||||||
/// ```
|
|
||||||
#[unstable(feature = "unicode",
|
|
||||||
reason = "pending decision about Iterator/Writer/Reader",
|
|
||||||
issue = "27784")]
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn encode_utf16(self, dst: &mut [u16]) -> Option<usize> {
|
pub fn encode_utf16(self) -> EncodeUtf16 {
|
||||||
C::encode_utf16(self, dst)
|
C::encode_utf16(self)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns true if this `char` is an alphabetic code point, and false if not.
|
/// Returns true if this `char` is an alphabetic code point, and false if not.
|
||||||
|
|
|
@ -35,6 +35,7 @@
|
||||||
#![feature(core_char_ext)]
|
#![feature(core_char_ext)]
|
||||||
#![feature(lang_items)]
|
#![feature(lang_items)]
|
||||||
#![feature(staged_api)]
|
#![feature(staged_api)]
|
||||||
|
#![feature(unicode)]
|
||||||
|
|
||||||
mod tables;
|
mod tables;
|
||||||
mod u_str;
|
mod u_str;
|
||||||
|
|
|
@ -155,13 +155,13 @@ impl<I> Iterator for Utf16Encoder<I> where I: Iterator<Item=char> {
|
||||||
return Some(tmp);
|
return Some(tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut buf = [0; 2];
|
|
||||||
self.chars.next().map(|ch| {
|
self.chars.next().map(|ch| {
|
||||||
let n = CharExt::encode_utf16(ch, &mut buf).unwrap_or(0);
|
let n = CharExt::encode_utf16(ch);
|
||||||
if n == 2 {
|
let n = n.as_slice();
|
||||||
self.extra = buf[1];
|
if n.len() == 2 {
|
||||||
|
self.extra = n[1];
|
||||||
}
|
}
|
||||||
buf[0]
|
n[0]
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -433,10 +433,9 @@ fn escape_str(wr: &mut fmt::Write, v: &str) -> EncodeResult {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn escape_char(writer: &mut fmt::Write, v: char) -> EncodeResult {
|
fn escape_char(writer: &mut fmt::Write, v: char) -> EncodeResult {
|
||||||
let mut buf = [0; 4];
|
escape_str(writer, unsafe {
|
||||||
let n = v.encode_utf8(&mut buf).unwrap();
|
str::from_utf8_unchecked(v.encode_utf8().as_slice())
|
||||||
let buf = unsafe { str::from_utf8_unchecked(&buf[..n]) };
|
})
|
||||||
escape_str(writer, buf)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn spaces(wr: &mut fmt::Write, mut n: usize) -> EncodeResult {
|
fn spaces(wr: &mut fmt::Write, mut n: usize) -> EncodeResult {
|
||||||
|
|
|
@ -25,7 +25,6 @@
|
||||||
// unix (it's mostly used on windows), so don't worry about dead code here.
|
// unix (it's mostly used on windows), so don't worry about dead code here.
|
||||||
#![allow(dead_code)]
|
#![allow(dead_code)]
|
||||||
|
|
||||||
use core::char::{encode_utf8_raw, encode_utf16_raw};
|
|
||||||
use core::str::next_code_point;
|
use core::str::next_code_point;
|
||||||
|
|
||||||
use ascii::*;
|
use ascii::*;
|
||||||
|
@ -206,19 +205,10 @@ impl Wtf8Buf {
|
||||||
/// Copied from String::push
|
/// Copied from String::push
|
||||||
/// This does **not** include the WTF-8 concatenation check.
|
/// This does **not** include the WTF-8 concatenation check.
|
||||||
fn push_code_point_unchecked(&mut self, code_point: CodePoint) {
|
fn push_code_point_unchecked(&mut self, code_point: CodePoint) {
|
||||||
let cur_len = self.len();
|
let bytes = unsafe {
|
||||||
// This may use up to 4 bytes.
|
char::from_u32_unchecked(code_point.value).encode_utf8()
|
||||||
self.reserve(4);
|
};
|
||||||
|
self.bytes.extend_from_slice(bytes.as_slice());
|
||||||
unsafe {
|
|
||||||
// Attempt to not use an intermediate buffer by just pushing bytes
|
|
||||||
// directly onto this string.
|
|
||||||
let slice = slice::from_raw_parts_mut(
|
|
||||||
self.bytes.as_mut_ptr().offset(cur_len as isize), 4
|
|
||||||
);
|
|
||||||
let used = encode_utf8_raw(code_point.value, slice).unwrap();
|
|
||||||
self.bytes.set_len(cur_len + used);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
@ -747,12 +737,15 @@ impl<'a> Iterator for EncodeWide<'a> {
|
||||||
return Some(tmp);
|
return Some(tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut buf = [0; 2];
|
|
||||||
self.code_points.next().map(|code_point| {
|
self.code_points.next().map(|code_point| {
|
||||||
let n = encode_utf16_raw(code_point.value, &mut buf)
|
let n = unsafe {
|
||||||
.unwrap_or(0);
|
char::from_u32_unchecked(code_point.value).encode_utf16()
|
||||||
if n == 2 { self.extra = buf[1]; }
|
};
|
||||||
buf[0]
|
let n = n.as_slice();
|
||||||
|
if n.len() == 2 {
|
||||||
|
self.extra = n[1];
|
||||||
|
}
|
||||||
|
n[0]
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue