Auto merge of #27808 - SimonSapin:utf16decoder, r=alexcrichton

* Rename `Utf16Items` to `Utf16Decoder`. "Items" is meaningless. * Generalize it to any `u16` iterator, not just `[u16].iter()` * Make it yield `Result` instead of a custom `Utf16Item` enum that was isomorphic to `Result`. This enable using the `FromIterator for Result` impl. * Replace `Utf16Item::to_char_lossy` with a `Utf16Decoder::lossy` iterator adaptor. This is a [breaking change], but only for users of the unstable `rustc_unicode` crate. I’d like this functionality to be stabilized and re-exported in `std` eventually, as the "low-level equivalent" of `String::from_utf16` and `String::from_utf16_lossy` like #27784 is the low-level equivalent of #27714. CC @aturon, @alexcrichton
2015-08-27 00:41:13 +00:00 · 2015-08-27 00:41:13 +00:00 · fd302a95e1
commit fd302a95e1
parent 80b971a9b8 6174b8d726
16 changed files with 293 additions and 171 deletions
--- a/src/liballoc/arc.rs
+++ b/src/liballoc/arc.rs
@ -73,6 +73,7 @@ use boxed::Box;

 use core::sync::atomic;
 use core::sync::atomic::Ordering::{Relaxed, Release, Acquire, SeqCst};
+use core::borrow;
 use core::fmt;
 use core::cmp::Ordering;
 use core::mem::{align_of_val, size_of_val};
@ -1109,3 +1110,7 @@ mod tests {
        assert!(y.upgrade().is_none());
    }
 }
+
+impl<T: ?Sized> borrow::Borrow<T> for Arc<T> {
+    fn borrow(&self) -> &T { &**self }
+}
--- a/src/liballoc/boxed.rs
+++ b/src/liballoc/boxed.rs
@ -57,6 +57,7 @@ use heap;
 use raw_vec::RawVec;

 use core::any::Any;
+use core::borrow;
 use core::cmp::Ordering;
 use core::fmt;
 use core::hash::{self, Hash};
@ -562,3 +563,10 @@ impl<T: Clone> Clone for Box<[T]> {
    }
 }

+impl<T: ?Sized> borrow::Borrow<T> for Box<T> {
+    fn borrow(&self) -> &T { &**self }
+}
+
+impl<T: ?Sized> borrow::BorrowMut<T> for Box<T> {
+    fn borrow_mut(&mut self) -> &mut T { &mut **self }
+}
--- a/src/liballoc/rc.rs
+++ b/src/liballoc/rc.rs
@ -158,6 +158,7 @@ use boxed::Box;
 #[cfg(test)]
 use std::boxed::Box;

+use core::borrow;
 use core::cell::Cell;
 use core::cmp::Ordering;
 use core::fmt;
@ -1091,3 +1092,7 @@ mod tests {
        assert_eq!(foo, foo.clone());
    }
 }
+
+impl<T: ?Sized> borrow::Borrow<T> for Rc<T> {
+    fn borrow(&self) -> &T { &**self }
+}
--- a/src/libcollections/borrow.rs
+++ b/src/libcollections/borrow.rs
@ -21,119 +21,10 @@ use core::ops::Deref;
 use core::option::Option;

 use fmt;
-use alloc::{boxed, rc, arc};

 use self::Cow::*;

-/// A trait for borrowing data.
-///
-/// In general, there may be several ways to "borrow" a piece of data.  The
-/// typical ways of borrowing a type `T` are `&T` (a shared borrow) and `&mut T`
-/// (a mutable borrow). But types like `Vec<T>` provide additional kinds of
-/// borrows: the borrowed slices `&[T]` and `&mut [T]`.
-///
-/// When writing generic code, it is often desirable to abstract over all ways
-/// of borrowing data from a given type. That is the role of the `Borrow`
-/// trait: if `T: Borrow<U>`, then `&U` can be borrowed from `&T`.  A given
-/// type can be borrowed as multiple different types. In particular, `Vec<T>:
-/// Borrow<Vec<T>>` and `Vec<T>: Borrow<[T]>`.
-///
-/// If you are implementing `Borrow` and both `Self` and `Borrowed` implement
-/// `Hash`, `Eq`, and/or `Ord`, they must produce the same result.
-///
-/// `Borrow` is very similar to, but different than, `AsRef`. See
-/// [the book][book] for more.
-///
-/// [book]: ../../book/borrow-and-asref.html
-#[stable(feature = "rust1", since = "1.0.0")]
-pub trait Borrow<Borrowed: ?Sized> {
-    /// Immutably borrows from an owned value.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// use std::borrow::Borrow;
-    ///
-    /// fn check<T: Borrow<str>>(s: T) {
-    ///     assert_eq!("Hello", s.borrow());
-    /// }
-    ///
-    /// let s = "Hello".to_string();
-    ///
-    /// check(s);
-    ///
-    /// let s = "Hello";
-    ///
-    /// check(s);
-    /// ```
-    #[stable(feature = "rust1", since = "1.0.0")]
-    fn borrow(&self) -> &Borrowed;
-}
-
-/// A trait for mutably borrowing data.
-///
-/// Similar to `Borrow`, but for mutable borrows.
-#[stable(feature = "rust1", since = "1.0.0")]
-pub trait BorrowMut<Borrowed: ?Sized> : Borrow<Borrowed> {
-    /// Mutably borrows from an owned value.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// use std::borrow::BorrowMut;
-    ///
-    /// fn check<T: BorrowMut<[i32]>>(mut v: T) {
-    ///     assert_eq!(&mut [1, 2, 3], v.borrow_mut());
-    /// }
-    ///
-    /// let v = vec![1, 2, 3];
-    ///
-    /// check(v);
-    /// ```
-    #[stable(feature = "rust1", since = "1.0.0")]
-    fn borrow_mut(&mut self) -> &mut Borrowed;
-}
-
-#[stable(feature = "rust1", since = "1.0.0")]
-impl<T: ?Sized> Borrow<T> for T {
-    fn borrow(&self) -> &T { self }
-}
-
-#[stable(feature = "rust1", since = "1.0.0")]
-impl<T: ?Sized> BorrowMut<T> for T {
-    fn borrow_mut(&mut self) -> &mut T { self }
-}
-
-#[stable(feature = "rust1", since = "1.0.0")]
-impl<'a, T: ?Sized> Borrow<T> for &'a T {
-    fn borrow(&self) -> &T { &**self }
-}
-
-#[stable(feature = "rust1", since = "1.0.0")]
-impl<'a, T: ?Sized> Borrow<T> for &'a mut T {
-    fn borrow(&self) -> &T { &**self }
-}
-
-#[stable(feature = "rust1", since = "1.0.0")]
-impl<'a, T: ?Sized> BorrowMut<T> for &'a mut T {
-    fn borrow_mut(&mut self) -> &mut T { &mut **self }
-}
-
-impl<T: ?Sized> Borrow<T> for boxed::Box<T> {
-    fn borrow(&self) -> &T { &**self }
-}
-
-impl<T: ?Sized> BorrowMut<T> for boxed::Box<T> {
-    fn borrow_mut(&mut self) -> &mut T { &mut **self }
-}
-
-impl<T: ?Sized> Borrow<T> for rc::Rc<T> {
-    fn borrow(&self) -> &T { &**self }
-}
-
-impl<T: ?Sized> Borrow<T> for arc::Arc<T> {
-    fn borrow(&self) -> &T { &**self }
-}
+pub use core::borrow::{Borrow, BorrowMut};

 #[stable(feature = "rust1", since = "1.0.0")]
 impl<'a, B: ?Sized> Borrow<B> for Cow<'a, B> where B: ToOwned, <B as ToOwned>::Owned: 'a {
--- a/src/libcollections/lib.rs
+++ b/src/libcollections/lib.rs
@ -56,6 +56,7 @@
 #![feature(unicode)]
 #![feature(unique)]
 #![feature(unsafe_no_drop_flag, filling_drop)]
+#![feature(decode_utf16)]
 #![feature(utf8_error)]
 #![cfg_attr(test, feature(rand, test))]

--- a/src/libcollections/string.rs
+++ b/src/libcollections/string.rs
@ -20,8 +20,8 @@ use core::ops::{self, Deref, Add, Index};
 use core::ptr;
 use core::slice;
 use core::str::pattern::Pattern;
+use rustc_unicode::char::{decode_utf16, REPLACEMENT_CHARACTER};
 use rustc_unicode::str as unicode_str;
-use rustc_unicode::str::Utf16Item;

 use borrow::{Cow, IntoCow};
 use range::RangeArgument;
@ -267,14 +267,7 @@ impl String {
    /// ```
    #[stable(feature = "rust1", since = "1.0.0")]
    pub fn from_utf16(v: &[u16]) -> Result<String, FromUtf16Error> {
-        let mut s = String::with_capacity(v.len());
-        for c in unicode_str::utf16_items(v) {
-            match c {
-                Utf16Item::ScalarValue(c) => s.push(c),
-                Utf16Item::LoneSurrogate(_) => return Err(FromUtf16Error(())),
-            }
-        }
-        Ok(s)
+        decode_utf16(v.iter().cloned()).collect::<Result<_, _>>().map_err(|_| FromUtf16Error(()))
    }

    /// Decode a UTF-16 encoded vector `v` into a string, replacing
@ -294,7 +287,7 @@ impl String {
    #[inline]
    #[stable(feature = "rust1", since = "1.0.0")]
    pub fn from_utf16_lossy(v: &[u16]) -> String {
-        unicode_str::utf16_items(v).map(|c| c.to_char_lossy()).collect()
+        decode_utf16(v.iter().cloned()).map(|r| r.unwrap_or(REPLACEMENT_CHARACTER)).collect()
    }

    /// Creates a new `String` from a length, capacity, and pointer.
--- a/src/libcore/borrow.rs
+++ b/src/libcore/borrow.rs
@ -0,0 +1,109 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! A module for working with borrowed data.
+
+#![stable(feature = "rust1", since = "1.0.0")]
+
+use marker::Sized;
+
+/// A trait for borrowing data.
+///
+/// In general, there may be several ways to "borrow" a piece of data.  The
+/// typical ways of borrowing a type `T` are `&T` (a shared borrow) and `&mut T`
+/// (a mutable borrow). But types like `Vec<T>` provide additional kinds of
+/// borrows: the borrowed slices `&[T]` and `&mut [T]`.
+///
+/// When writing generic code, it is often desirable to abstract over all ways
+/// of borrowing data from a given type. That is the role of the `Borrow`
+/// trait: if `T: Borrow<U>`, then `&U` can be borrowed from `&T`.  A given
+/// type can be borrowed as multiple different types. In particular, `Vec<T>:
+/// Borrow<Vec<T>>` and `Vec<T>: Borrow<[T]>`.
+///
+/// If you are implementing `Borrow` and both `Self` and `Borrowed` implement
+/// `Hash`, `Eq`, and/or `Ord`, they must produce the same result.
+///
+/// `Borrow` is very similar to, but different than, `AsRef`. See
+/// [the book][book] for more.
+///
+/// [book]: ../../book/borrow-and-asref.html
+#[stable(feature = "rust1", since = "1.0.0")]
+pub trait Borrow<Borrowed: ?Sized> {
+    /// Immutably borrows from an owned value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use std::borrow::Borrow;
+    ///
+    /// fn check<T: Borrow<str>>(s: T) {
+    ///     assert_eq!("Hello", s.borrow());
+    /// }
+    ///
+    /// let s = "Hello".to_string();
+    ///
+    /// check(s);
+    ///
+    /// let s = "Hello";
+    ///
+    /// check(s);
+    /// ```
+    #[stable(feature = "rust1", since = "1.0.0")]
+    fn borrow(&self) -> &Borrowed;
+}
+
+/// A trait for mutably borrowing data.
+///
+/// Similar to `Borrow`, but for mutable borrows.
+#[stable(feature = "rust1", since = "1.0.0")]
+pub trait BorrowMut<Borrowed: ?Sized> : Borrow<Borrowed> {
+    /// Mutably borrows from an owned value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use std::borrow::BorrowMut;
+    ///
+    /// fn check<T: BorrowMut<[i32]>>(mut v: T) {
+    ///     assert_eq!(&mut [1, 2, 3], v.borrow_mut());
+    /// }
+    ///
+    /// let v = vec![1, 2, 3];
+    ///
+    /// check(v);
+    /// ```
+    #[stable(feature = "rust1", since = "1.0.0")]
+    fn borrow_mut(&mut self) -> &mut Borrowed;
+}
+
+#[stable(feature = "rust1", since = "1.0.0")]
+impl<T: ?Sized> Borrow<T> for T {
+    fn borrow(&self) -> &T { self }
+}
+
+#[stable(feature = "rust1", since = "1.0.0")]
+impl<T: ?Sized> BorrowMut<T> for T {
+    fn borrow_mut(&mut self) -> &mut T { self }
+}
+
+#[stable(feature = "rust1", since = "1.0.0")]
+impl<'a, T: ?Sized> Borrow<T> for &'a T {
+    fn borrow(&self) -> &T { &**self }
+}
+
+#[stable(feature = "rust1", since = "1.0.0")]
+impl<'a, T: ?Sized> Borrow<T> for &'a mut T {
+    fn borrow(&self) -> &T { &**self }
+}
+
+#[stable(feature = "rust1", since = "1.0.0")]
+impl<'a, T: ?Sized> BorrowMut<T> for &'a mut T {
+    fn borrow_mut(&mut self) -> &mut T { &mut **self }
+}
--- a/src/libcore/lib.rs
+++ b/src/libcore/lib.rs
@ -139,6 +139,7 @@ pub mod cmp;
 pub mod clone;
 pub mod default;
 pub mod convert;
+pub mod borrow;

 /* Core types and methods on primitives */

--- a/src/libcoretest/char.rs
+++ b/src/libcoretest/char.rs
@ -207,3 +207,12 @@ fn test_len_utf16() {
    assert!('\u{a66e}'.len_utf16() == 1);
    assert!('\u{1f4a9}'.len_utf16() == 2);
 }
+
+#[test]
+fn test_decode_utf16() {
+    fn check(s: &[u16], expected: &[Result<char, u16>]) {
+        assert_eq!(::std::char::decode_utf16(s.iter().cloned()).collect::<Vec<_>>(), expected);
+    }
+    check(&[0xD800, 0x41, 0x42], &[Err(0xD800), Ok('A'), Ok('B')]);
+    check(&[0xD800, 0], &[Err(0xD800), Ok('\0')]);
+}
--- a/src/libcoretest/lib.rs
+++ b/src/libcoretest/lib.rs
@ -19,6 +19,7 @@
 #![feature(float_from_str_radix)]
 #![feature(flt2dec)]
 #![feature(dec2flt)]
+#![feature(decode_utf16)]
 #![feature(fmt_radix)]
 #![feature(iter_arith)]
 #![feature(iter_arith)]
--- a/src/librustc_unicode/char.rs
+++ b/src/librustc_unicode/char.rs
@ -503,3 +503,116 @@ impl char {
        ToUppercase(CaseMappingIter::new(conversions::to_upper(self)))
    }
 }
+
+/// An iterator that decodes UTF-16 encoded codepoints from an iterator of `u16`s.
+#[unstable(feature = "decode_utf16", reason = "recently exposed", issue = "27830")]
+#[derive(Clone)]
+pub struct DecodeUtf16<I> where I: Iterator<Item=u16> {
+    iter: I,
+    buf: Option<u16>,
+}
+
+/// Create an iterator over the UTF-16 encoded codepoints in `iterable`,
+/// returning unpaired surrogates as `Err`s.
+///
+/// # Examples
+///
+/// ```
+/// #![feature(decode_utf16)]
+///
+/// use std::char::decode_utf16;
+///
+/// fn main() {
+///     // 𝄞mus<invalid>ic<invalid>
+///     let v = [0xD834, 0xDD1E, 0x006d, 0x0075,
+///              0x0073, 0xDD1E, 0x0069, 0x0063,
+///              0xD834];
+///
+///     assert_eq!(decode_utf16(v.iter().cloned()).collect::<Vec<_>>(),
+///                vec![Ok('𝄞'),
+///                     Ok('m'), Ok('u'), Ok('s'),
+///                     Err(0xDD1E),
+///                     Ok('i'), Ok('c'),
+///                     Err(0xD834)]);
+/// }
+/// ```
+///
+/// A lossy decoder can be obtained by replacing `Err` results with the replacement character:
+///
+/// ```
+/// #![feature(decode_utf16)]
+///
+/// use std::char::{decode_utf16, REPLACEMENT_CHARACTER};
+///
+/// fn main() {
+///     // 𝄞mus<invalid>ic<invalid>
+///     let v = [0xD834, 0xDD1E, 0x006d, 0x0075,
+///              0x0073, 0xDD1E, 0x0069, 0x0063,
+///              0xD834];
+///
+///     assert_eq!(decode_utf16(v.iter().cloned())
+///                    .map(|r| r.unwrap_or(REPLACEMENT_CHARACTER))
+///                    .collect::<String>(),
+///                "𝄞mus<75>ic<69>");
+/// }
+/// ```
+#[unstable(feature = "decode_utf16", reason = "recently exposed", issue = "27830")]
+#[inline]
+pub fn decode_utf16<I: IntoIterator<Item=u16>>(iterable: I) -> DecodeUtf16<I::IntoIter> {
+    DecodeUtf16 {
+        iter: iterable.into_iter(),
+        buf: None,
+    }
+}
+
+#[unstable(feature = "decode_utf16", reason = "recently exposed", issue = "27830")]
+impl<I: Iterator<Item=u16>> Iterator for DecodeUtf16<I> {
+    type Item = Result<char, u16>;
+
+    fn next(&mut self) -> Option<Result<char, u16>> {
+        let u = match self.buf.take() {
+            Some(buf) => buf,
+            None => match self.iter.next() {
+                Some(u) => u,
+                None => return None
+            }
+        };
+
+        if u < 0xD800 || 0xDFFF < u {
+            // not a surrogate
+            Some(Ok(unsafe { from_u32_unchecked(u as u32) }))
+        } else if u >= 0xDC00 {
+            // a trailing surrogate
+            Some(Err(u))
+        } else {
+            let u2 = match self.iter.next() {
+                Some(u2) => u2,
+                // eof
+                None => return Some(Err(u))
+            };
+            if u2 < 0xDC00 || u2 > 0xDFFF {
+                // not a trailing surrogate so we're not a valid
+                // surrogate pair, so rewind to redecode u2 next time.
+                self.buf = Some(u2);
+                return Some(Err(u))
+            }
+
+            // all ok, so lets decode it.
+            let c = (((u - 0xD800) as u32) << 10 | (u2 - 0xDC00) as u32) + 0x1_0000;
+            Some(Ok(unsafe { from_u32_unchecked(c) }))
+        }
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        let (low, high) = self.iter.size_hint();
+        // we could be entirely valid surrogates (2 elements per
+        // char), or entirely non-surrogates (1 element per char)
+        (low / 2, high)
+    }
+}
+
+/// U+FFFD REPLACEMENT CHARACTER (<28>) is used in Unicode to represent a decoding error.
+/// It can occur, for example, when giving ill-formed UTF-8 bytes to `String::from_utf8_lossy`.
+#[unstable(feature = "decode_utf16", reason = "recently added", issue = "27830")]
+pub const REPLACEMENT_CHARACTER: char = '\u{FFFD}';
--- a/src/librustc_unicode/lib.rs
+++ b/src/librustc_unicode/lib.rs
@ -46,6 +46,7 @@ mod tables;
 mod u_str;
 pub mod char;

+#[allow(deprecated)]
 pub mod str {
    pub use u_str::{UnicodeStr, SplitWhitespace};
    pub use u_str::{utf8_char_width, is_utf16, Utf16Items, Utf16Item};
--- a/src/librustc_unicode/u_str.rs
+++ b/src/librustc_unicode/u_str.rs
@ -13,8 +13,9 @@
 //! This module provides functionality to `str` that requires the Unicode methods provided by the
 //! unicode parts of the CharExt trait.

+use char::{DecodeUtf16, decode_utf16};
 use core::char;
-use core::iter::Filter;
+use core::iter::{Cloned, Filter};
 use core::slice;
 use core::str::Split;

@ -119,11 +120,18 @@ pub fn is_utf16(v: &[u16]) -> bool {

 /// An iterator that decodes UTF-16 encoded codepoints from a vector
 /// of `u16`s.
+#[deprecated(since = "1.4.0", reason = "renamed to `char::DecodeUtf16`")]
+#[unstable(feature = "decode_utf16", reason = "not exposed in std", issue = "27830")]
+#[allow(deprecated)]
 #[derive(Clone)]
 pub struct Utf16Items<'a> {
-    iter: slice::Iter<'a, u16>
+    decoder: DecodeUtf16<Cloned<slice::Iter<'a, u16>>>
 }
+
 /// The possibilities for values decoded from a `u16` stream.
+#[deprecated(since = "1.4.0", reason = "`char::DecodeUtf16` uses `Result<char, u16>` instead")]
+#[unstable(feature = "decode_utf16", reason = "not exposed in std", issue = "27830")]
+#[allow(deprecated)]
 #[derive(Copy, PartialEq, Eq, Clone, Debug)]
 pub enum Utf16Item {
    /// A valid codepoint.
@ -132,6 +140,7 @@ pub enum Utf16Item {
    LoneSurrogate(u16)
 }

+#[allow(deprecated)]
 impl Utf16Item {
    /// Convert `self` to a `char`, taking `LoneSurrogate`s to the
    /// replacement character (U+FFFD).
@ -144,49 +153,22 @@ impl Utf16Item {
    }
 }

+#[deprecated(since = "1.4.0", reason = "use `char::DecodeUtf16` instead")]
+#[unstable(feature = "decode_utf16", reason = "not exposed in std", issue = "27830")]
+#[allow(deprecated)]
 impl<'a> Iterator for Utf16Items<'a> {
    type Item = Utf16Item;

    fn next(&mut self) -> Option<Utf16Item> {
-        let u = match self.iter.next() {
-            Some(u) => *u,
-            None => return None
-        };
-
-        if u < 0xD800 || 0xDFFF < u {
-            // not a surrogate
-            Some(Utf16Item::ScalarValue(unsafe { char::from_u32_unchecked(u as u32) }))
-        } else if u >= 0xDC00 {
-            // a trailing surrogate
-            Some(Utf16Item::LoneSurrogate(u))
-        } else {
-            // preserve state for rewinding.
-            let old = self.iter.clone();
-
-            let u2 = match self.iter.next() {
-                Some(u2) => *u2,
-                // eof
-                None => return Some(Utf16Item::LoneSurrogate(u))
-            };
-            if u2 < 0xDC00 || u2 > 0xDFFF {
-                // not a trailing surrogate so we're not a valid
-                // surrogate pair, so rewind to redecode u2 next time.
-                self.iter = old.clone();
-                return Some(Utf16Item::LoneSurrogate(u))
-            }
-
-            // all ok, so lets decode it.
-            let c = (((u - 0xD800) as u32) << 10 | (u2 - 0xDC00) as u32) + 0x1_0000;
-            Some(Utf16Item::ScalarValue(unsafe { char::from_u32_unchecked(c) }))
-        }
+        self.decoder.next().map(|result| match result {
+            Ok(c) => Utf16Item::ScalarValue(c),
+            Err(s) => Utf16Item::LoneSurrogate(s),
+        })
    }

    #[inline]
    fn size_hint(&self) -> (usize, Option<usize>) {
-        let (low, high) = self.iter.size_hint();
-        // we could be entirely valid surrogates (2 elements per
-        // char), or entirely non-surrogates (1 element per char)
-        (low / 2, high)
+        self.decoder.size_hint()
    }
 }

@ -196,7 +178,7 @@ impl<'a> Iterator for Utf16Items<'a> {
 /// # Examples
 ///
 /// ```
-/// #![feature(unicode)]
+/// #![feature(unicode, decode_utf16)]
 ///
 /// extern crate rustc_unicode;
 ///
@ -216,8 +198,11 @@ impl<'a> Iterator for Utf16Items<'a> {
 ///                     LoneSurrogate(0xD834)]);
 /// }
 /// ```
+#[deprecated(since = "1.4.0", reason = "renamed to `char::decode_utf16`")]
+#[unstable(feature = "decode_utf16", reason = "not exposed in std", issue = "27830")]
+#[allow(deprecated)]
 pub fn utf16_items<'a>(v: &'a [u16]) -> Utf16Items<'a> {
-    Utf16Items { iter : v.iter() }
+    Utf16Items { decoder: decode_utf16(v.iter().cloned()) }
 }

 /// Iterator adaptor for encoding `char`s to UTF-16.
--- a/src/libserialize/json.rs
+++ b/src/libserialize/json.rs
@ -209,8 +209,6 @@ use std::str::FromStr;
 use std::string;
 use std::{char, f64, fmt, str};
 use std;
-use rustc_unicode::str as unicode_str;
-use rustc_unicode::str::Utf16Item;

 use Encodable;

@ -1712,11 +1710,13 @@ impl<T: Iterator<Item=char>> Parser<T> {
                                _ => return self.error(UnexpectedEndOfHexEscape),
                            }

-                            let buf = [n1, try!(self.decode_hex_escape())];
-                            match unicode_str::utf16_items(&buf).next() {
-                                Some(Utf16Item::ScalarValue(c)) => res.push(c),
-                                _ => return self.error(LoneLeadingSurrogateInHexEscape),
+                            let n2 = try!(self.decode_hex_escape());
+                            if n2 < 0xDC00 || n2 > 0xDFFF {
+                                return self.error(LoneLeadingSurrogateInHexEscape)
                            }
+                            let c = (((n1 - 0xD800) as u32) << 10 |
+                                     (n2 - 0xDC00) as u32) + 0x1_0000;
+                            res.push(char::from_u32(c).unwrap());
                        }

                        n => match char::from_u32(n as u32) {
--- a/src/libstd/lib.rs
+++ b/src/libstd/lib.rs
@ -242,6 +242,7 @@
 #![feature(unicode)]
 #![feature(unique)]
 #![feature(unsafe_no_drop_flag, filling_drop)]
+#![feature(decode_utf16)]
 #![feature(vec_push_all)]
 #![feature(vec_resize)]
 #![feature(wrapping)]
--- a/src/libstd/sys/common/wtf8.rs
+++ b/src/libstd/sys/common/wtf8.rs
@ -37,7 +37,6 @@ use hash::{Hash, Hasher};
 use iter::FromIterator;
 use mem;
 use ops;
-use rustc_unicode::str::{Utf16Item, utf16_items};
 use slice;
 use str;
 use string::String;
@ -186,14 +185,14 @@ impl Wtf8Buf {
    /// will always return the original code units.
    pub fn from_wide(v: &[u16]) -> Wtf8Buf {
        let mut string = Wtf8Buf::with_capacity(v.len());
-        for item in utf16_items(v) {
+        for item in char::decode_utf16(v.iter().cloned()) {
            match item {
-                Utf16Item::ScalarValue(c) => string.push_char(c),
-                Utf16Item::LoneSurrogate(s) => {
+                Ok(ch) => string.push_char(ch),
+                Err(surrogate) => {
                    // Surrogates are known to be in the code point range.
-                    let code_point = unsafe { CodePoint::from_u32_unchecked(s as u32) };
+                    let code_point = unsafe { CodePoint::from_u32_unchecked(surrogate as u32) };
                    // Skip the WTF-8 concatenation check,
-                    // surrogate pairs are already decoded by utf16_items
+                    // surrogate pairs are already decoded by decode_utf16
                    string.push_code_point_unchecked(code_point)
                }
            }