auto merge of #15283 : kwantam/rust/master, r=alexcrichton
Add libunicode; move unicode functions from core - created new crate, libunicode, below libstd - split `Char` trait into `Char` (libcore) and `UnicodeChar` (libunicode) - Unicode-aware functions now live in libunicode - `is_alphabetic`, `is_XID_start`, `is_XID_continue`, `is_lowercase`, `is_uppercase`, `is_whitespace`, `is_alphanumeric`, `is_control`, `is_digit`, `to_uppercase`, `to_lowercase` - added `width` method in UnicodeChar trait - determines printed width of character in columns, or None if it is a non-NULL control character - takes a boolean argument indicating whether the present context is CJK or not (characters with 'A'mbiguous widths are double-wide in CJK contexts, single-wide otherwise) - split `StrSlice` into `StrSlice` (libcore) and `UnicodeStrSlice` (libunicode) - functionality formerly in `StrSlice` that relied upon Unicode functionality from `Char` is now in `UnicodeStrSlice` - `words`, `is_whitespace`, `is_alphanumeric`, `trim`, `trim_left`, `trim_right` - also moved `Words` type alias into libunicode because `words` method is in `UnicodeStrSlice` - unified Unicode tables from libcollections, libcore, and libregex into libunicode - updated `unicode.py` in `src/etc` to generate aforementioned tables - generated new tables based on latest Unicode data - added `UnicodeChar` and `UnicodeStrSlice` traits to prelude - libunicode is now the collection point for the `std::char` module, combining the libunicode functionality with the `Char` functionality from libcore - thus, moved doc comment for `char` from `core::char` to `unicode::char` - libcollections remains the collection point for `std::str` The Unicode-aware functions that previously lived in the `Char` and `StrSlice` traits are no longer available to programs that only use libcore. To regain use of these methods, include the libunicode crate and `use` the `UnicodeChar` and/or `UnicodeStrSlice` traits: extern crate unicode; use unicode::UnicodeChar; use unicode::UnicodeStrSlice; use unicode::Words; // if you want to use the words() method NOTE: this does *not* impact programs that use libstd, since UnicodeChar and UnicodeStrSlice have been added to the prelude. closes #15224 [breaking-change]
This commit is contained in:
commit
fa7cbb5a46
27 changed files with 7445 additions and 11597 deletions
|
@ -22,7 +22,7 @@ use cmp;
|
|||
use cmp::{PartialEq, Eq};
|
||||
use collections::Collection;
|
||||
use default::Default;
|
||||
use iter::{Filter, Map, Iterator};
|
||||
use iter::{Map, Iterator};
|
||||
use iter::{DoubleEndedIterator, ExactSize};
|
||||
use iter::range;
|
||||
use num::{CheckedMul, Saturating};
|
||||
|
@ -204,10 +204,6 @@ pub struct CharSplitsN<'a, Sep> {
|
|||
invert: bool,
|
||||
}
|
||||
|
||||
/// An iterator over the words of a string, separated by a sequence of whitespace
|
||||
pub type Words<'a> =
|
||||
Filter<'a, &'a str, CharSplits<'a, extern "Rust" fn(char) -> bool>>;
|
||||
|
||||
/// An iterator over the lines of a string, separated by either `\n` or (`\r\n`).
|
||||
pub type AnyLines<'a> =
|
||||
Map<'a, &'a str, &'a str, CharSplits<'a, char>>;
|
||||
|
@ -1209,48 +1205,6 @@ pub trait StrSlice<'a> {
|
|||
/// ```
|
||||
fn lines_any(&self) -> AnyLines<'a>;
|
||||
|
||||
/// An iterator over the words of a string (subsequences separated
|
||||
/// by any sequence of whitespace). Sequences of whitespace are
|
||||
/// collapsed, so empty "words" are not included.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// let some_words = " Mary had\ta little \n\t lamb";
|
||||
/// let v: Vec<&str> = some_words.words().collect();
|
||||
/// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
|
||||
/// ```
|
||||
fn words(&self) -> Words<'a>;
|
||||
|
||||
/// Returns true if the string contains only whitespace.
|
||||
///
|
||||
/// Whitespace characters are determined by `char::is_whitespace`.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// assert!(" \t\n".is_whitespace());
|
||||
/// assert!("".is_whitespace());
|
||||
///
|
||||
/// assert!( !"abc".is_whitespace());
|
||||
/// ```
|
||||
fn is_whitespace(&self) -> bool;
|
||||
|
||||
/// Returns true if the string contains only alphanumeric code
|
||||
/// points.
|
||||
///
|
||||
/// Alphanumeric characters are determined by `char::is_alphanumeric`.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// assert!("Löwe老虎Léopard123".is_alphanumeric());
|
||||
/// assert!("".is_alphanumeric());
|
||||
///
|
||||
/// assert!( !" &*~".is_alphanumeric());
|
||||
/// ```
|
||||
fn is_alphanumeric(&self) -> bool;
|
||||
|
||||
/// Returns the number of Unicode code points (`char`) that a
|
||||
/// string holds.
|
||||
///
|
||||
|
@ -1368,15 +1322,6 @@ pub trait StrSlice<'a> {
|
|||
/// Returns true if `needle` is a suffix of the string.
|
||||
fn ends_with(&self, needle: &str) -> bool;
|
||||
|
||||
/// Returns a string with leading and trailing whitespace removed.
|
||||
fn trim(&self) -> &'a str;
|
||||
|
||||
/// Returns a string with leading whitespace removed.
|
||||
fn trim_left(&self) -> &'a str;
|
||||
|
||||
/// Returns a string with trailing whitespace removed.
|
||||
fn trim_right(&self) -> &'a str;
|
||||
|
||||
/// Returns a string with characters that match `to_trim` removed.
|
||||
///
|
||||
/// # Arguments
|
||||
|
@ -1748,17 +1693,6 @@ impl<'a> StrSlice<'a> for &'a str {
|
|||
})
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn words(&self) -> Words<'a> {
|
||||
self.split(char::is_whitespace).filter(|s| !s.is_empty())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_whitespace(&self) -> bool { self.chars().all(char::is_whitespace) }
|
||||
|
||||
#[inline]
|
||||
fn is_alphanumeric(&self) -> bool { self.chars().all(char::is_alphanumeric) }
|
||||
|
||||
#[inline]
|
||||
fn char_len(&self) -> uint { self.chars().count() }
|
||||
|
||||
|
@ -1817,21 +1751,6 @@ impl<'a> StrSlice<'a> for &'a str {
|
|||
m >= n && needle.as_bytes() == self.as_bytes().slice_from(m - n)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn trim(&self) -> &'a str {
|
||||
self.trim_left().trim_right()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn trim_left(&self) -> &'a str {
|
||||
self.trim_left_chars(char::is_whitespace)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn trim_right(&self) -> &'a str {
|
||||
self.trim_right_chars(char::is_whitespace)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn trim_chars<C: CharEq>(&self, mut to_trim: C) -> &'a str {
|
||||
let cur = match self.find(|c: char| !to_trim.matches(c)) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue