1
Fork 0

auto merge of #14613 : schmee/rust/utf16-iterator, r=huonw

Closes #14358.

~~The tests are not yet moved to `utf16_iter`, so this probably won't compile. I'm submitting this PR anyway so it can be reviewed and since it was mentioned in #14611.~~ EDIT: Tests now use `utf16_iter`.

This deprecates `.to_utf16`. `x.to_utf16()` should be replaced by either `x.utf16_iter().collect::<Vec<u16>>()` (the type annotation may be optional), or just `x.utf16_iter()` directly, if it can be used in an iterator context.

[breaking-change]

cc @huonw
This commit is contained in:
bors 2014-06-30 19:26:35 +00:00
commit a345c54334
8 changed files with 85 additions and 28 deletions

View file

@ -16,6 +16,7 @@
use mem;
use char;
use char::Char;
use clone::Clone;
use cmp;
use cmp::{PartialEq, Eq};
@ -24,7 +25,7 @@ use default::Default;
use iter::{Filter, Map, Iterator};
use iter::{DoubleEndedIterator, ExactSize};
use iter::range;
use num::Saturating;
use num::{CheckedMul, Saturating};
use option::{None, Option, Some};
use raw::Repr;
use slice::ImmutableVector;
@ -557,6 +558,41 @@ impl<'a> Iterator<&'a str> for StrSplits<'a> {
}
}
/// External iterator for a string's UTF16 codeunits.
/// Use with the `std::iter` module.
#[deriving(Clone)]
pub struct Utf16CodeUnits<'a> {
chars: Chars<'a>,
extra: u16
}
impl<'a> Iterator<u16> for Utf16CodeUnits<'a> {
#[inline]
fn next(&mut self) -> Option<u16> {
if self.extra != 0 {
let tmp = self.extra;
self.extra = 0;
return Some(tmp);
}
let mut buf = [0u16, ..2];
self.chars.next().map(|ch| {
let n = ch.encode_utf16(buf /* as mut slice! */);
if n == 2 { self.extra = buf[1]; }
buf[0]
})
}
#[inline]
fn size_hint(&self) -> (uint, Option<uint>) {
let (low, high) = self.chars.size_hint();
// every char gets either one u16 or two u16,
// so this iterator is between 1 or 2 times as
// long as the underlying iterator.
(low, high.and_then(|n| n.checked_mul(&2)))
}
}
/*
Section: Comparing strings
*/
@ -1609,6 +1645,9 @@ pub trait StrSlice<'a> {
/// and that it is not reallocated (e.g. by pushing to the
/// string).
fn as_ptr(&self) -> *const u8;
/// Return an iterator of `u16` over the string encoded as UTF-16.
fn utf16_units(&self) -> Utf16CodeUnits<'a>;
}
impl<'a> StrSlice<'a> for &'a str {
@ -1957,6 +1996,11 @@ impl<'a> StrSlice<'a> for &'a str {
fn as_ptr(&self) -> *const u8 {
self.repr().data
}
#[inline]
fn utf16_units(&self) -> Utf16CodeUnits<'a> {
Utf16CodeUnits{ chars: self.chars(), extra: 0}
}
}
impl<'a> Default for &'a str {