auto merge of #15283 : kwantam/rust/master, r=alexcrichton

Add libunicode; move unicode functions from core - created new crate, libunicode, below libstd - split `Char` trait into `Char` (libcore) and `UnicodeChar` (libunicode) - Unicode-aware functions now live in libunicode - `is_alphabetic`, `is_XID_start`, `is_XID_continue`, `is_lowercase`, `is_uppercase`, `is_whitespace`, `is_alphanumeric`, `is_control`, `is_digit`, `to_uppercase`, `to_lowercase` - added `width` method in UnicodeChar trait - determines printed width of character in columns, or None if it is a non-NULL control character - takes a boolean argument indicating whether the present context is CJK or not (characters with 'A'mbiguous widths are double-wide in CJK contexts, single-wide otherwise) - split `StrSlice` into `StrSlice` (libcore) and `UnicodeStrSlice` (libunicode) - functionality formerly in `StrSlice` that relied upon Unicode functionality from `Char` is now in `UnicodeStrSlice` - `words`, `is_whitespace`, `is_alphanumeric`, `trim`, `trim_left`, `trim_right` - also moved `Words` type alias into libunicode because `words` method is in `UnicodeStrSlice` - unified Unicode tables from libcollections, libcore, and libregex into libunicode - updated `unicode.py` in `src/etc` to generate aforementioned tables - generated new tables based on latest Unicode data - added `UnicodeChar` and `UnicodeStrSlice` traits to prelude - libunicode is now the collection point for the `std::char` module, combining the libunicode functionality with the `Char` functionality from libcore - thus, moved doc comment for `char` from `core::char` to `unicode::char` - libcollections remains the collection point for `std::str` The Unicode-aware functions that previously lived in the `Char` and `StrSlice` traits are no longer available to programs that only use libcore. To regain use of these methods, include the libunicode crate and `use` the `UnicodeChar` and/or `UnicodeStrSlice` traits: extern crate unicode; use unicode::UnicodeChar; use unicode::UnicodeStrSlice; use unicode::Words; // if you want to use the words() method NOTE: this does *not* impact programs that use libstd, since UnicodeChar and UnicodeStrSlice have been added to the prelude. closes #15224 [breaking-change]
2014-07-09 18:36:30 +00:00 · 2014-07-09 18:36:30 +00:00 · fa7cbb5a46
commit fa7cbb5a46
parent f9d3b9e488 85e2bee4a2
27 changed files with 7445 additions and 11597 deletions
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@ -22,7 +22,7 @@ use cmp;
 use cmp::{PartialEq, Eq};
 use collections::Collection;
 use default::Default;
-use iter::{Filter, Map, Iterator};
+use iter::{Map, Iterator};
 use iter::{DoubleEndedIterator, ExactSize};
 use iter::range;
 use num::{CheckedMul, Saturating};
@ -204,10 +204,6 @@ pub struct CharSplitsN<'a, Sep> {
    invert: bool,
 }

-/// An iterator over the words of a string, separated by a sequence of whitespace
-pub type Words<'a> =
-    Filter<'a, &'a str, CharSplits<'a, extern "Rust" fn(char) -> bool>>;
-
 /// An iterator over the lines of a string, separated by either `\n` or (`\r\n`).
 pub type AnyLines<'a> =
    Map<'a, &'a str, &'a str, CharSplits<'a, char>>;
@ -1209,48 +1205,6 @@ pub trait StrSlice<'a> {
    /// ```
    fn lines_any(&self) -> AnyLines<'a>;

-    /// An iterator over the words of a string (subsequences separated
-    /// by any sequence of whitespace). Sequences of whitespace are
-    /// collapsed, so empty "words" are not included.
-    ///
-    /// # Example
-    ///
-    /// ```rust
-    /// let some_words = " Mary   had\ta little  \n\t lamb";
-    /// let v: Vec<&str> = some_words.words().collect();
-    /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
-    /// ```
-    fn words(&self) -> Words<'a>;
-
-    /// Returns true if the string contains only whitespace.
-    ///
-    /// Whitespace characters are determined by `char::is_whitespace`.
-    ///
-    /// # Example
-    ///
-    /// ```rust
-    /// assert!(" \t\n".is_whitespace());
-    /// assert!("".is_whitespace());
-    ///
-    /// assert!( !"abc".is_whitespace());
-    /// ```
-    fn is_whitespace(&self) -> bool;
-
-    /// Returns true if the string contains only alphanumeric code
-    /// points.
-    ///
-    /// Alphanumeric characters are determined by `char::is_alphanumeric`.
-    ///
-    /// # Example
-    ///
-    /// ```rust
-    /// assert!("Löwe老虎Léopard123".is_alphanumeric());
-    /// assert!("".is_alphanumeric());
-    ///
-    /// assert!( !" &*~".is_alphanumeric());
-    /// ```
-    fn is_alphanumeric(&self) -> bool;
-
    /// Returns the number of Unicode code points (`char`) that a
    /// string holds.
    ///
@ -1368,15 +1322,6 @@ pub trait StrSlice<'a> {
    /// Returns true if `needle` is a suffix of the string.
    fn ends_with(&self, needle: &str) -> bool;

-    /// Returns a string with leading and trailing whitespace removed.
-    fn trim(&self) -> &'a str;
-
-    /// Returns a string with leading whitespace removed.
-    fn trim_left(&self) -> &'a str;
-
-    /// Returns a string with trailing whitespace removed.
-    fn trim_right(&self) -> &'a str;
-
    /// Returns a string with characters that match `to_trim` removed.
    ///
    /// # Arguments
@ -1748,17 +1693,6 @@ impl<'a> StrSlice<'a> for &'a str {
        })
    }

-    #[inline]
-    fn words(&self) -> Words<'a> {
-        self.split(char::is_whitespace).filter(|s| !s.is_empty())
-    }
-
-    #[inline]
-    fn is_whitespace(&self) -> bool { self.chars().all(char::is_whitespace) }
-
-    #[inline]
-    fn is_alphanumeric(&self) -> bool { self.chars().all(char::is_alphanumeric) }
-
    #[inline]
    fn char_len(&self) -> uint { self.chars().count() }

@ -1817,21 +1751,6 @@ impl<'a> StrSlice<'a> for &'a str {
        m >= n && needle.as_bytes() == self.as_bytes().slice_from(m - n)
    }

-    #[inline]
-    fn trim(&self) -> &'a str {
-        self.trim_left().trim_right()
-    }
-
-    #[inline]
-    fn trim_left(&self) -> &'a str {
-        self.trim_left_chars(char::is_whitespace)
-    }
-
-    #[inline]
-    fn trim_right(&self) -> &'a str {
-        self.trim_right_chars(char::is_whitespace)
-    }
-
    #[inline]
    fn trim_chars<C: CharEq>(&self, mut to_trim: C) -> &'a str {
        let cur = match self.find(|c: char| !to_trim.matches(c)) {