std: Stabilize more of the char
module
This commit performs another pass over the `std::char` module for stabilization. Some minor cleanup is performed such as migrating documentation from libcore to libunicode (where the `std`-facing trait resides) as well as a slight reorganiation in libunicode itself. Otherwise, the stability modifications made are: * `char::from_digit` is now stable * `CharExt::is_digit` is now stable * `CharExt::to_digit` is now stable * `CharExt::to_{lower,upper}case` are now stable after being modified to return an iterator over characters. While the implementation today has not changed this should allow us to implement the full set of case conversions in unicode where some characters can map to multiple when doing an upper or lower case mapping. * `StrExt::to_{lower,upper}case` was added as unstable for a convenience of not having to worry about characters expanding to more characters when you just want the whole string to get into upper or lower case. This is a breaking change due to the change in the signatures of the `CharExt::to_{upper,lower}case` methods. Code can be updated to use functions like `flat_map` or `collect` to handle the difference. [breaking-change]
This commit is contained in:
parent
2574009af0
commit
0f6a0b58f9
14 changed files with 332 additions and 388 deletions
|
@ -19,7 +19,6 @@
|
||||||
#![feature(unboxed_closures)]
|
#![feature(unboxed_closures)]
|
||||||
#![feature(std_misc)]
|
#![feature(std_misc)]
|
||||||
#![feature(test)]
|
#![feature(test)]
|
||||||
#![feature(unicode)]
|
|
||||||
#![feature(core)]
|
#![feature(core)]
|
||||||
#![feature(path)]
|
#![feature(path)]
|
||||||
#![feature(io)]
|
#![feature(io)]
|
||||||
|
|
|
@ -71,7 +71,7 @@ fn parse_expected(last_nonfollow_error: Option<uint>,
|
||||||
let letters = line[kind_start..].chars();
|
let letters = line[kind_start..].chars();
|
||||||
let kind = letters.skip_while(|c| c.is_whitespace())
|
let kind = letters.skip_while(|c| c.is_whitespace())
|
||||||
.take_while(|c| !c.is_whitespace())
|
.take_while(|c| !c.is_whitespace())
|
||||||
.map(|c| c.to_lowercase())
|
.flat_map(|c| c.to_lowercase())
|
||||||
.collect::<String>();
|
.collect::<String>();
|
||||||
let letters = line[kind_start..].chars();
|
let letters = line[kind_start..].chars();
|
||||||
let msg = letters.skip_while(|c| c.is_whitespace())
|
let msg = letters.skip_while(|c| c.is_whitespace())
|
||||||
|
|
|
@ -12,13 +12,14 @@
|
||||||
|
|
||||||
//! Unicode string manipulation (the [`str`](../primitive.str.html) type).
|
//! Unicode string manipulation (the [`str`](../primitive.str.html) type).
|
||||||
//!
|
//!
|
||||||
//! Rust's [`str`](../primitive.str.html) type is one of the core primitive types of the
|
//! Rust's [`str`](../primitive.str.html) type is one of the core primitive
|
||||||
//! language. `&str` is the borrowed string type. This type of string can only be created
|
//! types of the language. `&str` is the borrowed string type. This type of
|
||||||
//! from other strings, unless it is a `&'static str` (see below). It is not possible to
|
//! string can only be created from other strings, unless it is a `&'static str`
|
||||||
//! move out of borrowed strings because they are owned elsewhere.
|
//! (see below). It is not possible to move out of borrowed strings because they
|
||||||
|
//! are owned elsewhere.
|
||||||
//!
|
//!
|
||||||
//! Basic operations are implemented directly by the compiler, but more advanced operations are
|
//! Basic operations are implemented directly by the compiler, but more advanced
|
||||||
//! defined on the [`StrExt`](trait.StrExt.html) trait.
|
//! operations are defined on the [`StrExt`](trait.StrExt.html) trait.
|
||||||
//!
|
//!
|
||||||
//! # Examples
|
//! # Examples
|
||||||
//!
|
//!
|
||||||
|
@ -28,8 +29,9 @@
|
||||||
//! let s = "Hello, world.";
|
//! let s = "Hello, world.";
|
||||||
//! ```
|
//! ```
|
||||||
//!
|
//!
|
||||||
//! This `&str` is a `&'static str`, which is the type of string literals. They're `'static`
|
//! This `&str` is a `&'static str`, which is the type of string literals.
|
||||||
//! because literals are available for the entire lifetime of the program.
|
//! They're `'static` because literals are available for the entire lifetime of
|
||||||
|
//! the program.
|
||||||
//!
|
//!
|
||||||
//! You can get a non-`'static` `&str` by taking a slice of a `String`:
|
//! You can get a non-`'static` `&str` by taking a slice of a `String`:
|
||||||
//!
|
//!
|
||||||
|
@ -40,12 +42,13 @@
|
||||||
//!
|
//!
|
||||||
//! # Representation
|
//! # Representation
|
||||||
//!
|
//!
|
||||||
//! Rust's string type, `str`, is a sequence of Unicode scalar values encoded as a stream of UTF-8
|
//! Rust's string type, `str`, is a sequence of Unicode scalar values encoded as
|
||||||
//! bytes. All [strings](../../reference.html#literals) are guaranteed to be validly encoded UTF-8
|
//! a stream of UTF-8 bytes. All [strings](../../reference.html#literals) are
|
||||||
//! sequences. Additionally, strings are not null-terminated and can thus contain null bytes.
|
//! guaranteed to be validly encoded UTF-8 sequences. Additionally, strings are
|
||||||
|
//! not null-terminated and can thus contain null bytes.
|
||||||
//!
|
//!
|
||||||
//! The actual representation of `str`s have direct mappings to slices: `&str` is the same as
|
//! The actual representation of `str`s have direct mappings to slices: `&str`
|
||||||
//! `&[u8]`.
|
//! is the same as `&[u8]`.
|
||||||
|
|
||||||
#![doc(primitive = "str")]
|
#![doc(primitive = "str")]
|
||||||
#![stable(feature = "rust1", since = "1.0.0")]
|
#![stable(feature = "rust1", since = "1.0.0")]
|
||||||
|
@ -53,16 +56,16 @@
|
||||||
use self::RecompositionState::*;
|
use self::RecompositionState::*;
|
||||||
use self::DecompositionType::*;
|
use self::DecompositionType::*;
|
||||||
|
|
||||||
use core::char::CharExt;
|
|
||||||
use core::clone::Clone;
|
use core::clone::Clone;
|
||||||
use core::iter::AdditiveIterator;
|
use core::iter::AdditiveIterator;
|
||||||
use core::iter::{Iterator, IteratorExt};
|
use core::iter::{Iterator, IteratorExt, Extend};
|
||||||
use core::ops::Index;
|
use core::ops::Index;
|
||||||
use core::ops::RangeFull;
|
use core::ops::RangeFull;
|
||||||
use core::option::Option::{self, Some, None};
|
use core::option::Option::{self, Some, None};
|
||||||
use core::result::Result;
|
use core::result::Result;
|
||||||
use core::slice::AsSlice;
|
use core::slice::AsSlice;
|
||||||
use core::str as core_str;
|
use core::str as core_str;
|
||||||
|
use unicode::char::CharExt;
|
||||||
use unicode::str::{UnicodeStr, Utf16Encoder};
|
use unicode::str::{UnicodeStr, Utf16Encoder};
|
||||||
|
|
||||||
use vec_deque::VecDeque;
|
use vec_deque::VecDeque;
|
||||||
|
@ -836,17 +839,19 @@ pub trait StrExt: Index<RangeFull, Output = str> {
|
||||||
|
|
||||||
/// Returns a slice of the string from the character range [`begin`..`end`).
|
/// Returns a slice of the string from the character range [`begin`..`end`).
|
||||||
///
|
///
|
||||||
/// That is, start at the `begin`-th code point of the string and continue to the `end`-th code
|
/// That is, start at the `begin`-th code point of the string and continue
|
||||||
/// point. This does not detect or handle edge cases such as leaving a combining character as
|
/// to the `end`-th code point. This does not detect or handle edge cases
|
||||||
/// the first code point of the string.
|
/// such as leaving a combining character as the first code point of the
|
||||||
|
/// string.
|
||||||
///
|
///
|
||||||
/// Due to the design of UTF-8, this operation is `O(end)`. See `slice`, `slice_to` and
|
/// Due to the design of UTF-8, this operation is `O(end)`. See `slice`,
|
||||||
/// `slice_from` for `O(1)` variants that use byte indices rather than code point indices.
|
/// `slice_to` and `slice_from` for `O(1)` variants that use byte indices
|
||||||
|
/// rather than code point indices.
|
||||||
///
|
///
|
||||||
/// # Panics
|
/// # Panics
|
||||||
///
|
///
|
||||||
/// Panics if `begin` > `end` or the either `begin` or `end` are beyond the last character of
|
/// Panics if `begin` > `end` or the either `begin` or `end` are beyond the
|
||||||
/// the string.
|
/// last character of the string.
|
||||||
///
|
///
|
||||||
/// # Examples
|
/// # Examples
|
||||||
///
|
///
|
||||||
|
@ -868,8 +873,8 @@ pub trait StrExt: Index<RangeFull, Output = str> {
|
||||||
///
|
///
|
||||||
/// # Unsafety
|
/// # Unsafety
|
||||||
///
|
///
|
||||||
/// Caller must check both UTF-8 character boundaries and the boundaries of the entire slice as
|
/// Caller must check both UTF-8 character boundaries and the boundaries of
|
||||||
/// well.
|
/// the entire slice as well.
|
||||||
///
|
///
|
||||||
/// # Examples
|
/// # Examples
|
||||||
///
|
///
|
||||||
|
@ -1506,6 +1511,32 @@ pub trait StrExt: Index<RangeFull, Output = str> {
|
||||||
fn trim_right(&self) -> &str {
|
fn trim_right(&self) -> &str {
|
||||||
UnicodeStr::trim_right(&self[..])
|
UnicodeStr::trim_right(&self[..])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the lowercase equivalent of this string.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// let s = "HELLO";
|
||||||
|
/// assert_eq!(s.to_lowercase(), "hello");
|
||||||
|
#[unstable(feature = "collections")]
|
||||||
|
fn to_lowercase(&self) -> String {
|
||||||
|
let mut s = String::with_capacity(self.len());
|
||||||
|
s.extend(self[..].chars().flat_map(|c| c.to_lowercase()));
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the uppercase equivalent of this string.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// let s = "hello";
|
||||||
|
/// assert_eq!(s.to_uppercase(), "HELLO");
|
||||||
|
#[unstable(feature = "collections")]
|
||||||
|
fn to_uppercase(&self) -> String {
|
||||||
|
let mut s = String::with_capacity(self.len());
|
||||||
|
s.extend(self[..].chars().flat_map(|c| c.to_uppercase()));
|
||||||
|
return s;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
|
|
|
@ -118,7 +118,7 @@ pub fn from_u32(i: u32) -> Option<char> {
|
||||||
/// assert_eq!(c, Some('4'));
|
/// assert_eq!(c, Some('4'));
|
||||||
/// ```
|
/// ```
|
||||||
#[inline]
|
#[inline]
|
||||||
#[unstable(feature = "core", reason = "pending integer conventions")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
pub fn from_digit(num: u32, radix: u32) -> Option<char> {
|
pub fn from_digit(num: u32, radix: u32) -> Option<char> {
|
||||||
if radix > 36 {
|
if radix > 36 {
|
||||||
panic!("from_digit: radix is too high (maximum 36)");
|
panic!("from_digit: radix is too high (maximum 36)");
|
||||||
|
@ -136,230 +136,25 @@ pub fn from_digit(num: u32, radix: u32) -> Option<char> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Basic `char` manipulations.
|
// NB: the stabilization and documentation for this trait is in
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
// unicode/char.rs, not here
|
||||||
|
#[allow(missing_docs)] // docs in libunicode/u_char.rs
|
||||||
pub trait CharExt {
|
pub trait CharExt {
|
||||||
/// Checks if a `char` parses as a numeric digit in the given radix.
|
|
||||||
///
|
|
||||||
/// Compared to `is_numeric()`, this function only recognizes the characters
|
|
||||||
/// `0-9`, `a-z` and `A-Z`.
|
|
||||||
///
|
|
||||||
/// # Return value
|
|
||||||
///
|
|
||||||
/// Returns `true` if `c` is a valid digit under `radix`, and `false`
|
|
||||||
/// otherwise.
|
|
||||||
///
|
|
||||||
/// # Panics
|
|
||||||
///
|
|
||||||
/// Panics if given a radix > 36.
|
|
||||||
///
|
|
||||||
/// # Examples
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// let c = '1';
|
|
||||||
///
|
|
||||||
/// assert!(c.is_digit(10));
|
|
||||||
///
|
|
||||||
/// assert!('f'.is_digit(16));
|
|
||||||
/// ```
|
|
||||||
#[unstable(feature = "core",
|
|
||||||
reason = "pending integer conventions")]
|
|
||||||
fn is_digit(self, radix: u32) -> bool;
|
fn is_digit(self, radix: u32) -> bool;
|
||||||
|
|
||||||
/// Converts a character to the corresponding digit.
|
|
||||||
///
|
|
||||||
/// # Return value
|
|
||||||
///
|
|
||||||
/// If `c` is between '0' and '9', the corresponding value between 0 and
|
|
||||||
/// 9. If `c` is 'a' or 'A', 10. If `c` is 'b' or 'B', 11, etc. Returns
|
|
||||||
/// none if the character does not refer to a digit in the given radix.
|
|
||||||
///
|
|
||||||
/// # Panics
|
|
||||||
///
|
|
||||||
/// Panics if given a radix outside the range [0..36].
|
|
||||||
///
|
|
||||||
/// # Examples
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// let c = '1';
|
|
||||||
///
|
|
||||||
/// assert_eq!(c.to_digit(10), Some(1));
|
|
||||||
///
|
|
||||||
/// assert_eq!('f'.to_digit(16), Some(15));
|
|
||||||
/// ```
|
|
||||||
#[unstable(feature = "core",
|
|
||||||
reason = "pending integer conventions")]
|
|
||||||
fn to_digit(self, radix: u32) -> Option<u32>;
|
fn to_digit(self, radix: u32) -> Option<u32>;
|
||||||
|
|
||||||
/// Returns an iterator that yields the hexadecimal Unicode escape of a character, as `char`s.
|
|
||||||
///
|
|
||||||
/// All characters are escaped with Rust syntax of the form `\\u{NNNN}` where `NNNN` is the
|
|
||||||
/// shortest hexadecimal representation of the code point.
|
|
||||||
///
|
|
||||||
/// # Examples
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// for i in '❤'.escape_unicode() {
|
|
||||||
/// println!("{}", i);
|
|
||||||
/// }
|
|
||||||
/// ```
|
|
||||||
///
|
|
||||||
/// This prints:
|
|
||||||
///
|
|
||||||
/// ```text
|
|
||||||
/// \
|
|
||||||
/// u
|
|
||||||
/// {
|
|
||||||
/// 2
|
|
||||||
/// 7
|
|
||||||
/// 6
|
|
||||||
/// 4
|
|
||||||
/// }
|
|
||||||
/// ```
|
|
||||||
///
|
|
||||||
/// Collecting into a `String`:
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// let heart: String = '❤'.escape_unicode().collect();
|
|
||||||
///
|
|
||||||
/// assert_eq!(heart, r"\u{2764}");
|
|
||||||
/// ```
|
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
|
||||||
fn escape_unicode(self) -> EscapeUnicode;
|
fn escape_unicode(self) -> EscapeUnicode;
|
||||||
|
|
||||||
/// Returns an iterator that yields the 'default' ASCII and
|
|
||||||
/// C++11-like literal escape of a character, as `char`s.
|
|
||||||
///
|
|
||||||
/// The default is chosen with a bias toward producing literals that are
|
|
||||||
/// legal in a variety of languages, including C++11 and similar C-family
|
|
||||||
/// languages. The exact rules are:
|
|
||||||
///
|
|
||||||
/// * Tab, CR and LF are escaped as '\t', '\r' and '\n' respectively.
|
|
||||||
/// * Single-quote, double-quote and backslash chars are backslash-
|
|
||||||
/// escaped.
|
|
||||||
/// * Any other chars in the range [0x20,0x7e] are not escaped.
|
|
||||||
/// * Any other chars are given hex Unicode escapes; see `escape_unicode`.
|
|
||||||
///
|
|
||||||
/// # Examples
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// for i in '"'.escape_default() {
|
|
||||||
/// println!("{}", i);
|
|
||||||
/// }
|
|
||||||
/// ```
|
|
||||||
///
|
|
||||||
/// This prints:
|
|
||||||
///
|
|
||||||
/// ```text
|
|
||||||
/// \
|
|
||||||
/// "
|
|
||||||
/// ```
|
|
||||||
///
|
|
||||||
/// Collecting into a `String`:
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// let quote: String = '"'.escape_default().collect();
|
|
||||||
///
|
|
||||||
/// assert_eq!(quote, "\\\"");
|
|
||||||
/// ```
|
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
|
||||||
fn escape_default(self) -> EscapeDefault;
|
fn escape_default(self) -> EscapeDefault;
|
||||||
|
|
||||||
/// Returns the number of bytes this character would need if encoded in UTF-8.
|
|
||||||
///
|
|
||||||
/// # Examples
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// let n = 'ß'.len_utf8();
|
|
||||||
///
|
|
||||||
/// assert_eq!(n, 2);
|
|
||||||
/// ```
|
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
|
||||||
fn len_utf8(self) -> usize;
|
fn len_utf8(self) -> usize;
|
||||||
|
|
||||||
/// Returns the number of bytes this character would need if encoded in UTF-16.
|
|
||||||
///
|
|
||||||
/// # Examples
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// let n = 'ß'.len_utf16();
|
|
||||||
///
|
|
||||||
/// assert_eq!(n, 1);
|
|
||||||
/// ```
|
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
|
||||||
fn len_utf16(self) -> usize;
|
fn len_utf16(self) -> usize;
|
||||||
|
|
||||||
/// Encodes this character as UTF-8 into the provided byte buffer, and then returns the number
|
|
||||||
/// of bytes written.
|
|
||||||
///
|
|
||||||
/// If the buffer is not large enough, nothing will be written into it and a `None` will be
|
|
||||||
/// returned.
|
|
||||||
///
|
|
||||||
/// # Examples
|
|
||||||
///
|
|
||||||
/// In both of these examples, 'ß' takes two bytes to encode.
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// let mut b = [0; 2];
|
|
||||||
///
|
|
||||||
/// let result = 'ß'.encode_utf8(&mut b);
|
|
||||||
///
|
|
||||||
/// assert_eq!(result, Some(2));
|
|
||||||
/// ```
|
|
||||||
///
|
|
||||||
/// A buffer that's too small:
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// let mut b = [0; 1];
|
|
||||||
///
|
|
||||||
/// let result = 'ß'.encode_utf8(&mut b);
|
|
||||||
///
|
|
||||||
/// assert_eq!(result, None);
|
|
||||||
/// ```
|
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
|
||||||
fn encode_utf8(self, dst: &mut [u8]) -> Option<usize>;
|
fn encode_utf8(self, dst: &mut [u8]) -> Option<usize>;
|
||||||
|
|
||||||
/// Encodes this character as UTF-16 into the provided `u16` buffer, and then returns the
|
|
||||||
/// number of `u16`s written.
|
|
||||||
///
|
|
||||||
/// If the buffer is not large enough, nothing will be written into it and a `None` will be
|
|
||||||
/// returned.
|
|
||||||
///
|
|
||||||
/// # Examples
|
|
||||||
///
|
|
||||||
/// In both of these examples, 'ß' takes one byte to encode.
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// let mut b = [0; 1];
|
|
||||||
///
|
|
||||||
/// let result = 'ß'.encode_utf16(&mut b);
|
|
||||||
///
|
|
||||||
/// assert_eq!(result, Some(1));
|
|
||||||
/// ```
|
|
||||||
///
|
|
||||||
/// A buffer that's too small:
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// let mut b = [0; 0];
|
|
||||||
///
|
|
||||||
/// let result = 'ß'.encode_utf8(&mut b);
|
|
||||||
///
|
|
||||||
/// assert_eq!(result, None);
|
|
||||||
/// ```
|
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
|
||||||
fn encode_utf16(self, dst: &mut [u16]) -> Option<usize>;
|
fn encode_utf16(self, dst: &mut [u16]) -> Option<usize>;
|
||||||
}
|
}
|
||||||
|
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
|
||||||
impl CharExt for char {
|
impl CharExt for char {
|
||||||
#[unstable(feature = "core",
|
|
||||||
reason = "pending integer conventions")]
|
|
||||||
fn is_digit(self, radix: u32) -> bool {
|
fn is_digit(self, radix: u32) -> bool {
|
||||||
self.to_digit(radix).is_some()
|
self.to_digit(radix).is_some()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[unstable(feature = "core",
|
|
||||||
reason = "pending integer conventions")]
|
|
||||||
fn to_digit(self, radix: u32) -> Option<u32> {
|
fn to_digit(self, radix: u32) -> Option<u32> {
|
||||||
if radix > 36 {
|
if radix > 36 {
|
||||||
panic!("to_digit: radix is too high (maximum 36)");
|
panic!("to_digit: radix is too high (maximum 36)");
|
||||||
|
@ -374,12 +169,10 @@ impl CharExt for char {
|
||||||
else { None }
|
else { None }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
|
||||||
fn escape_unicode(self) -> EscapeUnicode {
|
fn escape_unicode(self) -> EscapeUnicode {
|
||||||
EscapeUnicode { c: self, state: EscapeUnicodeState::Backslash }
|
EscapeUnicode { c: self, state: EscapeUnicodeState::Backslash }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
|
||||||
fn escape_default(self) -> EscapeDefault {
|
fn escape_default(self) -> EscapeDefault {
|
||||||
let init_state = match self {
|
let init_state = match self {
|
||||||
'\t' => EscapeDefaultState::Backslash('t'),
|
'\t' => EscapeDefaultState::Backslash('t'),
|
||||||
|
@ -395,7 +188,6 @@ impl CharExt for char {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
|
||||||
fn len_utf8(self) -> usize {
|
fn len_utf8(self) -> usize {
|
||||||
let code = self as u32;
|
let code = self as u32;
|
||||||
if code < MAX_ONE_B {
|
if code < MAX_ONE_B {
|
||||||
|
@ -410,22 +202,17 @@ impl CharExt for char {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
|
||||||
fn len_utf16(self) -> usize {
|
fn len_utf16(self) -> usize {
|
||||||
let ch = self as u32;
|
let ch = self as u32;
|
||||||
if (ch & 0xFFFF) == ch { 1 } else { 2 }
|
if (ch & 0xFFFF) == ch { 1 } else { 2 }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
#[unstable(feature = "core",
|
|
||||||
reason = "pending decision about Iterator/Writer/Reader")]
|
|
||||||
fn encode_utf8(self, dst: &mut [u8]) -> Option<usize> {
|
fn encode_utf8(self, dst: &mut [u8]) -> Option<usize> {
|
||||||
encode_utf8_raw(self as u32, dst)
|
encode_utf8_raw(self as u32, dst)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
#[unstable(feature = "core",
|
|
||||||
reason = "pending decision about Iterator/Writer/Reader")]
|
|
||||||
fn encode_utf16(self, dst: &mut [u16]) -> Option<usize> {
|
fn encode_utf16(self, dst: &mut [u16]) -> Option<usize> {
|
||||||
encode_utf16_raw(self as u32, dst)
|
encode_utf16_raw(self as u32, dst)
|
||||||
}
|
}
|
||||||
|
@ -437,7 +224,6 @@ impl CharExt for char {
|
||||||
/// If the buffer is not large enough, nothing will be written into it
|
/// If the buffer is not large enough, nothing will be written into it
|
||||||
/// and a `None` will be returned.
|
/// and a `None` will be returned.
|
||||||
#[inline]
|
#[inline]
|
||||||
#[unstable(feature = "core")]
|
|
||||||
pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> Option<usize> {
|
pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> Option<usize> {
|
||||||
// Marked #[inline] to allow llvm optimizing it away
|
// Marked #[inline] to allow llvm optimizing it away
|
||||||
if code < MAX_ONE_B && dst.len() >= 1 {
|
if code < MAX_ONE_B && dst.len() >= 1 {
|
||||||
|
@ -469,7 +255,6 @@ pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> Option<usize> {
|
||||||
/// If the buffer is not large enough, nothing will be written into it
|
/// If the buffer is not large enough, nothing will be written into it
|
||||||
/// and a `None` will be returned.
|
/// and a `None` will be returned.
|
||||||
#[inline]
|
#[inline]
|
||||||
#[unstable(feature = "core")]
|
|
||||||
pub fn encode_utf16_raw(mut ch: u32, dst: &mut [u16]) -> Option<usize> {
|
pub fn encode_utf16_raw(mut ch: u32, dst: &mut [u16]) -> Option<usize> {
|
||||||
// Marked #[inline] to allow llvm optimizing it away
|
// Marked #[inline] to allow llvm optimizing it away
|
||||||
if (ch & 0xFFFF) == ch && dst.len() >= 1 {
|
if (ch & 0xFFFF) == ch && dst.len() >= 1 {
|
||||||
|
@ -497,7 +282,6 @@ pub struct EscapeUnicode {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
#[unstable(feature = "core")]
|
|
||||||
enum EscapeUnicodeState {
|
enum EscapeUnicodeState {
|
||||||
Backslash,
|
Backslash,
|
||||||
Type,
|
Type,
|
||||||
|
@ -559,7 +343,6 @@ pub struct EscapeDefault {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
#[unstable(feature = "core")]
|
|
||||||
enum EscapeDefaultState {
|
enum EscapeDefaultState {
|
||||||
Backslash(char),
|
Backslash(char),
|
||||||
Char(char),
|
Char(char),
|
||||||
|
|
|
@ -57,35 +57,47 @@ fn test_to_digit() {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_to_lowercase() {
|
fn test_to_lowercase() {
|
||||||
assert_eq!('A'.to_lowercase(), 'a');
|
fn lower(c: char) -> char {
|
||||||
assert_eq!('Ö'.to_lowercase(), 'ö');
|
let mut it = c.to_lowercase();
|
||||||
assert_eq!('ß'.to_lowercase(), 'ß');
|
let c = it.next().unwrap();
|
||||||
assert_eq!('Ü'.to_lowercase(), 'ü');
|
assert!(it.next().is_none());
|
||||||
assert_eq!('💩'.to_lowercase(), '💩');
|
c
|
||||||
assert_eq!('Σ'.to_lowercase(), 'σ');
|
}
|
||||||
assert_eq!('Τ'.to_lowercase(), 'τ');
|
assert_eq!(lower('A'), 'a');
|
||||||
assert_eq!('Ι'.to_lowercase(), 'ι');
|
assert_eq!(lower('Ö'), 'ö');
|
||||||
assert_eq!('Γ'.to_lowercase(), 'γ');
|
assert_eq!(lower('ß'), 'ß');
|
||||||
assert_eq!('Μ'.to_lowercase(), 'μ');
|
assert_eq!(lower('Ü'), 'ü');
|
||||||
assert_eq!('Α'.to_lowercase(), 'α');
|
assert_eq!(lower('💩'), '💩');
|
||||||
assert_eq!('Σ'.to_lowercase(), 'σ');
|
assert_eq!(lower('Σ'), 'σ');
|
||||||
|
assert_eq!(lower('Τ'), 'τ');
|
||||||
|
assert_eq!(lower('Ι'), 'ι');
|
||||||
|
assert_eq!(lower('Γ'), 'γ');
|
||||||
|
assert_eq!(lower('Μ'), 'μ');
|
||||||
|
assert_eq!(lower('Α'), 'α');
|
||||||
|
assert_eq!(lower('Σ'), 'σ');
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_to_uppercase() {
|
fn test_to_uppercase() {
|
||||||
assert_eq!('a'.to_uppercase(), 'A');
|
fn upper(c: char) -> char {
|
||||||
assert_eq!('ö'.to_uppercase(), 'Ö');
|
let mut it = c.to_uppercase();
|
||||||
assert_eq!('ß'.to_uppercase(), 'ß'); // not ẞ: Latin capital letter sharp s
|
let c = it.next().unwrap();
|
||||||
assert_eq!('ü'.to_uppercase(), 'Ü');
|
assert!(it.next().is_none());
|
||||||
assert_eq!('💩'.to_uppercase(), '💩');
|
c
|
||||||
|
}
|
||||||
|
assert_eq!(upper('a'), 'A');
|
||||||
|
assert_eq!(upper('ö'), 'Ö');
|
||||||
|
assert_eq!(upper('ß'), 'ß'); // not ẞ: Latin capital letter sharp s
|
||||||
|
assert_eq!(upper('ü'), 'Ü');
|
||||||
|
assert_eq!(upper('💩'), '💩');
|
||||||
|
|
||||||
assert_eq!('σ'.to_uppercase(), 'Σ');
|
assert_eq!(upper('σ'), 'Σ');
|
||||||
assert_eq!('τ'.to_uppercase(), 'Τ');
|
assert_eq!(upper('τ'), 'Τ');
|
||||||
assert_eq!('ι'.to_uppercase(), 'Ι');
|
assert_eq!(upper('ι'), 'Ι');
|
||||||
assert_eq!('γ'.to_uppercase(), 'Γ');
|
assert_eq!(upper('γ'), 'Γ');
|
||||||
assert_eq!('μ'.to_uppercase(), 'Μ');
|
assert_eq!(upper('μ'), 'Μ');
|
||||||
assert_eq!('α'.to_uppercase(), 'Α');
|
assert_eq!(upper('α'), 'Α');
|
||||||
assert_eq!('ς'.to_uppercase(), 'Σ');
|
assert_eq!(upper('ς'), 'Σ');
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
|
@ -36,7 +36,6 @@
|
||||||
#![feature(rustc_private)]
|
#![feature(rustc_private)]
|
||||||
#![feature(unsafe_destructor)]
|
#![feature(unsafe_destructor)]
|
||||||
#![feature(staged_api)]
|
#![feature(staged_api)]
|
||||||
#![feature(unicode)]
|
|
||||||
#![feature(exit_status)]
|
#![feature(exit_status)]
|
||||||
#![feature(path)]
|
#![feature(path)]
|
||||||
#![feature(io)]
|
#![feature(io)]
|
||||||
|
@ -618,8 +617,7 @@ Available lint options:
|
||||||
|
|
||||||
let print_lint_groups = |lints: Vec<(&'static str, Vec<lint::LintId>)>| {
|
let print_lint_groups = |lints: Vec<(&'static str, Vec<lint::LintId>)>| {
|
||||||
for (name, to) in lints {
|
for (name, to) in lints {
|
||||||
let name = name.chars().map(|x| x.to_lowercase())
|
let name = name.to_lowercase().replace("_", "-");
|
||||||
.collect::<String>().replace("_", "-");
|
|
||||||
let desc = to.into_iter().map(|x| x.as_str().replace("_", "-"))
|
let desc = to.into_iter().map(|x| x.as_str().replace("_", "-"))
|
||||||
.collect::<Vec<String>>().connect(", ");
|
.collect::<Vec<String>>().connect(", ");
|
||||||
println!(" {} {}",
|
println!(" {} {}",
|
||||||
|
|
|
@ -810,11 +810,11 @@ impl NonCamelCaseTypes {
|
||||||
fn to_camel_case(s: &str) -> String {
|
fn to_camel_case(s: &str) -> String {
|
||||||
s.split('_').flat_map(|word| word.chars().enumerate().map(|(i, c)|
|
s.split('_').flat_map(|word| word.chars().enumerate().map(|(i, c)|
|
||||||
if i == 0 {
|
if i == 0 {
|
||||||
c.to_uppercase()
|
c.to_uppercase().collect::<String>()
|
||||||
} else {
|
} else {
|
||||||
c
|
c.to_string()
|
||||||
}
|
}
|
||||||
)).collect()
|
)).collect::<Vec<_>>().concat()
|
||||||
}
|
}
|
||||||
|
|
||||||
let s = token::get_ident(ident);
|
let s = token::get_ident(ident);
|
||||||
|
@ -947,7 +947,7 @@ impl NonSnakeCase {
|
||||||
buf = String::new();
|
buf = String::new();
|
||||||
}
|
}
|
||||||
last_upper = ch.is_uppercase();
|
last_upper = ch.is_uppercase();
|
||||||
buf.push(ch.to_lowercase());
|
buf.extend(ch.to_lowercase());
|
||||||
}
|
}
|
||||||
words.push(buf);
|
words.push(buf);
|
||||||
}
|
}
|
||||||
|
@ -1064,8 +1064,7 @@ impl NonUpperCaseGlobals {
|
||||||
let s = token::get_ident(ident);
|
let s = token::get_ident(ident);
|
||||||
|
|
||||||
if s.chars().any(|c| c.is_lowercase()) {
|
if s.chars().any(|c| c.is_lowercase()) {
|
||||||
let uc: String = NonSnakeCase::to_snake_case(&s).chars()
|
let uc = NonSnakeCase::to_snake_case(&s).to_uppercase();
|
||||||
.map(|c| c.to_uppercase()).collect();
|
|
||||||
if uc != &s[..] {
|
if uc != &s[..] {
|
||||||
cx.span_lint(NON_UPPER_CASE_GLOBALS, span,
|
cx.span_lint(NON_UPPER_CASE_GLOBALS, span,
|
||||||
&format!("{} `{}` should have an upper case name such as `{}`",
|
&format!("{} `{}` should have an upper case name such as `{}`",
|
||||||
|
|
|
@ -41,7 +41,6 @@
|
||||||
#![feature(unsafe_destructor)]
|
#![feature(unsafe_destructor)]
|
||||||
#![feature(staged_api)]
|
#![feature(staged_api)]
|
||||||
#![feature(std_misc)]
|
#![feature(std_misc)]
|
||||||
#![feature(unicode)]
|
|
||||||
#![cfg_attr(test, feature(test))]
|
#![cfg_attr(test, feature(test))]
|
||||||
|
|
||||||
extern crate syntax;
|
extern crate syntax;
|
||||||
|
|
|
@ -343,8 +343,7 @@ impl IoError {
|
||||||
pub fn from_errno(errno: i32, detail: bool) -> IoError {
|
pub fn from_errno(errno: i32, detail: bool) -> IoError {
|
||||||
let mut err = sys::decode_error(errno as i32);
|
let mut err = sys::decode_error(errno as i32);
|
||||||
if detail && err.kind == OtherIoError {
|
if detail && err.kind == OtherIoError {
|
||||||
err.detail = Some(os::error_string(errno).chars()
|
err.detail = Some(os::error_string(errno).to_lowercase());
|
||||||
.map(|c| c.to_lowercase()).collect())
|
|
||||||
}
|
}
|
||||||
err
|
err
|
||||||
}
|
}
|
||||||
|
|
|
@ -110,10 +110,11 @@ struct EnvKey(CString);
|
||||||
#[cfg(windows)]
|
#[cfg(windows)]
|
||||||
impl hash::Hash for EnvKey {
|
impl hash::Hash for EnvKey {
|
||||||
fn hash<H: hash::Hasher>(&self, state: &mut H) {
|
fn hash<H: hash::Hasher>(&self, state: &mut H) {
|
||||||
|
use ascii::AsciiExt;
|
||||||
let &EnvKey(ref x) = self;
|
let &EnvKey(ref x) = self;
|
||||||
match str::from_utf8(x.as_bytes()) {
|
match str::from_utf8(x.as_bytes()) {
|
||||||
Ok(s) => for ch in s.chars() {
|
Ok(s) => for ch in s.chars() {
|
||||||
(ch as u8 as char).to_lowercase().hash(state);
|
ch.to_ascii_lowercase().hash(state);
|
||||||
},
|
},
|
||||||
Err(..) => x.hash(state)
|
Err(..) => x.hash(state)
|
||||||
}
|
}
|
||||||
|
@ -123,6 +124,7 @@ impl hash::Hash for EnvKey {
|
||||||
#[cfg(windows)]
|
#[cfg(windows)]
|
||||||
impl PartialEq for EnvKey {
|
impl PartialEq for EnvKey {
|
||||||
fn eq(&self, other: &EnvKey) -> bool {
|
fn eq(&self, other: &EnvKey) -> bool {
|
||||||
|
use ascii::AsciiExt;
|
||||||
let &EnvKey(ref x) = self;
|
let &EnvKey(ref x) = self;
|
||||||
let &EnvKey(ref y) = other;
|
let &EnvKey(ref y) = other;
|
||||||
match (str::from_utf8(x.as_bytes()), str::from_utf8(y.as_bytes())) {
|
match (str::from_utf8(x.as_bytes()), str::from_utf8(y.as_bytes())) {
|
||||||
|
@ -131,7 +133,7 @@ impl PartialEq for EnvKey {
|
||||||
return false
|
return false
|
||||||
} else {
|
} else {
|
||||||
for (xch, ych) in xs.chars().zip(ys.chars()) {
|
for (xch, ych) in xs.chars().zip(ys.chars()) {
|
||||||
if xch.to_lowercase() != ych.to_lowercase() {
|
if xch.to_ascii_lowercase() != ych.to_ascii_lowercase() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -61,7 +61,6 @@
|
||||||
#![feature(rustc_private)]
|
#![feature(rustc_private)]
|
||||||
#![feature(staged_api)]
|
#![feature(staged_api)]
|
||||||
#![feature(std_misc)]
|
#![feature(std_misc)]
|
||||||
#![feature(unicode)]
|
|
||||||
#![feature(path_ext)]
|
#![feature(path_ext)]
|
||||||
#![cfg_attr(windows, feature(libc))]
|
#![cfg_attr(windows, feature(libc))]
|
||||||
|
|
||||||
|
|
|
@ -8,16 +8,39 @@
|
||||||
// option. This file may not be copied, modified, or distributed
|
// option. This file may not be copied, modified, or distributed
|
||||||
// except according to those terms.
|
// except according to those terms.
|
||||||
|
|
||||||
//! Unicode-intensive `char` methods along with the `core` methods.
|
//! Character manipulation (`char` type, Unicode Scalar Value)
|
||||||
//!
|
//!
|
||||||
//! These methods implement functionality for `char` that requires knowledge of
|
//! This module provides the `CharExt` trait, as well as its
|
||||||
//! Unicode definitions, including normalization, categorization, and display information.
|
//! implementation for the primitive `char` type, in order to allow
|
||||||
|
//! basic character manipulation.
|
||||||
|
//!
|
||||||
|
//! A `char` actually represents a
|
||||||
|
//! *[Unicode Scalar
|
||||||
|
//! Value](http://www.unicode.org/glossary/#unicode_scalar_value)*, as it can
|
||||||
|
//! contain any Unicode code point except high-surrogate and low-surrogate code
|
||||||
|
//! points.
|
||||||
|
//!
|
||||||
|
//! As such, only values in the ranges \[0x0,0xD7FF\] and \[0xE000,0x10FFFF\]
|
||||||
|
//! (inclusive) are allowed. A `char` can always be safely cast to a `u32`;
|
||||||
|
//! however the converse is not always true due to the above range limits
|
||||||
|
//! and, as such, should be performed via the `from_u32` function.
|
||||||
|
|
||||||
|
#![stable(feature = "rust1", since = "1.0.0")]
|
||||||
|
#![doc(primitive = "char")]
|
||||||
|
|
||||||
use core::char;
|
|
||||||
use core::char::CharExt as C;
|
use core::char::CharExt as C;
|
||||||
use core::option::Option;
|
use core::option::Option::{self, Some};
|
||||||
|
use core::iter::Iterator;
|
||||||
use tables::{derived_property, property, general_category, conversions, charwidth};
|
use tables::{derived_property, property, general_category, conversions, charwidth};
|
||||||
|
|
||||||
|
// stable reexports
|
||||||
|
pub use core::char::{MAX, from_u32, from_digit, EscapeUnicode, EscapeDefault};
|
||||||
|
|
||||||
|
// unstable reexports
|
||||||
|
pub use normalize::{decompose_canonical, decompose_compatible, compose};
|
||||||
|
pub use tables::normalization::canonical_combining_class;
|
||||||
|
pub use tables::UNICODE_VERSION;
|
||||||
|
|
||||||
/// Functionality for manipulating `char`.
|
/// Functionality for manipulating `char`.
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
pub trait CharExt {
|
pub trait CharExt {
|
||||||
|
@ -34,8 +57,17 @@ pub trait CharExt {
|
||||||
/// # Panics
|
/// # Panics
|
||||||
///
|
///
|
||||||
/// Panics if given a radix > 36.
|
/// Panics if given a radix > 36.
|
||||||
#[unstable(feature = "unicode",
|
///
|
||||||
reason = "pending integer conventions")]
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// let c = '1';
|
||||||
|
///
|
||||||
|
/// assert!(c.is_digit(10));
|
||||||
|
///
|
||||||
|
/// assert!('f'.is_digit(16));
|
||||||
|
/// ```
|
||||||
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
fn is_digit(self, radix: u32) -> bool;
|
fn is_digit(self, radix: u32) -> bool;
|
||||||
|
|
||||||
/// Converts a character to the corresponding digit.
|
/// Converts a character to the corresponding digit.
|
||||||
|
@ -49,18 +81,56 @@ pub trait CharExt {
|
||||||
/// # Panics
|
/// # Panics
|
||||||
///
|
///
|
||||||
/// Panics if given a radix outside the range [0..36].
|
/// Panics if given a radix outside the range [0..36].
|
||||||
#[unstable(feature = "unicode",
|
///
|
||||||
reason = "pending integer conventions")]
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// let c = '1';
|
||||||
|
///
|
||||||
|
/// assert_eq!(c.to_digit(10), Some(1));
|
||||||
|
///
|
||||||
|
/// assert_eq!('f'.to_digit(16), Some(15));
|
||||||
|
/// ```
|
||||||
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
fn to_digit(self, radix: u32) -> Option<u32>;
|
fn to_digit(self, radix: u32) -> Option<u32>;
|
||||||
|
|
||||||
/// Returns an iterator that yields the hexadecimal Unicode escape
|
/// Returns an iterator that yields the hexadecimal Unicode escape of a
|
||||||
/// of a character, as `char`s.
|
/// character, as `char`s.
|
||||||
///
|
///
|
||||||
/// All characters are escaped with Rust syntax of the form `\\u{NNNN}`
|
/// All characters are escaped with Rust syntax of the form `\\u{NNNN}`
|
||||||
/// where `NNNN` is the shortest hexadecimal representation of the code
|
/// where `NNNN` is the shortest hexadecimal representation of the code
|
||||||
/// point.
|
/// point.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// for i in '❤'.escape_unicode() {
|
||||||
|
/// println!("{}", i);
|
||||||
|
/// }
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// This prints:
|
||||||
|
///
|
||||||
|
/// ```text
|
||||||
|
/// \
|
||||||
|
/// u
|
||||||
|
/// {
|
||||||
|
/// 2
|
||||||
|
/// 7
|
||||||
|
/// 6
|
||||||
|
/// 4
|
||||||
|
/// }
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// Collecting into a `String`:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// let heart: String = '❤'.escape_unicode().collect();
|
||||||
|
///
|
||||||
|
/// assert_eq!(heart, r"\u{2764}");
|
||||||
|
/// ```
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
fn escape_unicode(self) -> char::EscapeUnicode;
|
fn escape_unicode(self) -> EscapeUnicode;
|
||||||
|
|
||||||
/// Returns an iterator that yields the 'default' ASCII and
|
/// Returns an iterator that yields the 'default' ASCII and
|
||||||
/// C++11-like literal escape of a character, as `char`s.
|
/// C++11-like literal escape of a character, as `char`s.
|
||||||
|
@ -74,33 +144,118 @@ pub trait CharExt {
|
||||||
/// escaped.
|
/// escaped.
|
||||||
/// * Any other chars in the range [0x20,0x7e] are not escaped.
|
/// * Any other chars in the range [0x20,0x7e] are not escaped.
|
||||||
/// * Any other chars are given hex Unicode escapes; see `escape_unicode`.
|
/// * Any other chars are given hex Unicode escapes; see `escape_unicode`.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// for i in '"'.escape_default() {
|
||||||
|
/// println!("{}", i);
|
||||||
|
/// }
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// This prints:
|
||||||
|
///
|
||||||
|
/// ```text
|
||||||
|
/// \
|
||||||
|
/// "
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// Collecting into a `String`:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// let quote: String = '"'.escape_default().collect();
|
||||||
|
///
|
||||||
|
/// assert_eq!(quote, "\\\"");
|
||||||
|
/// ```
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
fn escape_default(self) -> char::EscapeDefault;
|
fn escape_default(self) -> EscapeDefault;
|
||||||
|
|
||||||
/// Returns the amount of bytes this character would need if encoded in
|
/// Returns the number of bytes this character would need if encoded in
|
||||||
/// UTF-8.
|
/// UTF-8.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// let n = 'ß'.len_utf8();
|
||||||
|
///
|
||||||
|
/// assert_eq!(n, 2);
|
||||||
|
/// ```
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
fn len_utf8(self) -> usize;
|
fn len_utf8(self) -> usize;
|
||||||
|
|
||||||
/// Returns the amount of bytes this character would need if encoded in
|
/// Returns the number of 16-bit code units this character would need if
|
||||||
/// UTF-16.
|
/// encoded in UTF-16.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// let n = 'ß'.len_utf16();
|
||||||
|
///
|
||||||
|
/// assert_eq!(n, 1);
|
||||||
|
/// ```
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
fn len_utf16(self) -> usize;
|
fn len_utf16(self) -> usize;
|
||||||
|
|
||||||
/// Encodes this character as UTF-8 into the provided byte buffer,
|
/// Encodes this character as UTF-8 into the provided byte buffer, and then
|
||||||
/// and then returns the number of bytes written.
|
/// returns the number of bytes written.
|
||||||
///
|
///
|
||||||
/// If the buffer is not large enough, nothing will be written into it
|
/// If the buffer is not large enough, nothing will be written into it and a
|
||||||
/// and a `None` will be returned.
|
/// `None` will be returned. A buffer of length four is large enough to
|
||||||
|
/// encode any `char`.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// In both of these examples, 'ß' takes two bytes to encode.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// let mut b = [0; 2];
|
||||||
|
///
|
||||||
|
/// let result = 'ß'.encode_utf8(&mut b);
|
||||||
|
///
|
||||||
|
/// assert_eq!(result, Some(2));
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// A buffer that's too small:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// let mut b = [0; 1];
|
||||||
|
///
|
||||||
|
/// let result = 'ß'.encode_utf8(&mut b);
|
||||||
|
///
|
||||||
|
/// assert_eq!(result, None);
|
||||||
|
/// ```
|
||||||
#[unstable(feature = "unicode",
|
#[unstable(feature = "unicode",
|
||||||
reason = "pending decision about Iterator/Writer/Reader")]
|
reason = "pending decision about Iterator/Writer/Reader")]
|
||||||
fn encode_utf8(self, dst: &mut [u8]) -> Option<usize>;
|
fn encode_utf8(self, dst: &mut [u8]) -> Option<usize>;
|
||||||
|
|
||||||
/// Encodes this character as UTF-16 into the provided `u16` buffer,
|
/// Encodes this character as UTF-16 into the provided `u16` buffer, and
|
||||||
/// and then returns the number of `u16`s written.
|
/// then returns the number of `u16`s written.
|
||||||
///
|
///
|
||||||
/// If the buffer is not large enough, nothing will be written into it
|
/// If the buffer is not large enough, nothing will be written into it and a
|
||||||
/// and a `None` will be returned.
|
/// `None` will be returned. A buffer of length 2 is large enough to encode
|
||||||
|
/// any `char`.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// In both of these examples, 'ß' takes one `u16` to encode.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// let mut b = [0; 1];
|
||||||
|
///
|
||||||
|
/// let result = 'ß'.encode_utf16(&mut b);
|
||||||
|
///
|
||||||
|
/// assert_eq!(result, Some(1));
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// A buffer that's too small:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// let mut b = [0; 0];
|
||||||
|
///
|
||||||
|
/// let result = 'ß'.encode_utf8(&mut b);
|
||||||
|
///
|
||||||
|
/// assert_eq!(result, None);
|
||||||
|
/// ```
|
||||||
#[unstable(feature = "unicode",
|
#[unstable(feature = "unicode",
|
||||||
reason = "pending decision about Iterator/Writer/Reader")]
|
reason = "pending decision about Iterator/Writer/Reader")]
|
||||||
fn encode_utf16(self, dst: &mut [u16]) -> Option<usize>;
|
fn encode_utf16(self, dst: &mut [u16]) -> Option<usize>;
|
||||||
|
@ -175,35 +330,35 @@ pub trait CharExt {
|
||||||
///
|
///
|
||||||
/// # Return value
|
/// # Return value
|
||||||
///
|
///
|
||||||
/// Returns the lowercase equivalent of the character, or the character
|
/// Returns an iterator which yields the characters corresponding to the
|
||||||
/// itself if no conversion is possible.
|
/// lowercase equivalent of the character. If no conversion is possible then
|
||||||
#[unstable(feature = "unicode",
|
/// the input character is returned.
|
||||||
reason = "pending case transformation decisions")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
fn to_lowercase(self) -> char;
|
fn to_lowercase(self) -> ToLowercase;
|
||||||
|
|
||||||
/// Converts a character to its uppercase equivalent.
|
/// Converts a character to its uppercase equivalent.
|
||||||
///
|
///
|
||||||
/// The case-folding performed is the common or simple mapping: it maps
|
/// The case-folding performed is the common or simple mapping: it maps
|
||||||
/// one Unicode codepoint (one character in Rust) to its uppercase
|
/// one Unicode codepoint to its uppercase equivalent according to the
|
||||||
/// equivalent according to the Unicode database [1]. The additional
|
/// Unicode database [1]. The additional [`SpecialCasing.txt`] is not yet
|
||||||
/// [`SpecialCasing.txt`] is not considered here, as it expands to multiple
|
/// considered here, but the iterator returned will soon support this form
|
||||||
/// codepoints in some cases.
|
/// of case folding.
|
||||||
///
|
///
|
||||||
/// A full reference can be found here [2].
|
/// A full reference can be found here [2].
|
||||||
///
|
///
|
||||||
/// # Return value
|
/// # Return value
|
||||||
///
|
///
|
||||||
/// Returns the uppercase equivalent of the character, or the character
|
/// Returns an iterator which yields the characters corresponding to the
|
||||||
/// itself if no conversion was made.
|
/// uppercase equivalent of the character. If no conversion is possible then
|
||||||
|
/// the input character is returned.
|
||||||
///
|
///
|
||||||
/// [1]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
|
/// [1]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
|
||||||
///
|
///
|
||||||
/// [`SpecialCasing`.txt`]: ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt
|
/// [`SpecialCasing`.txt`]: ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt
|
||||||
///
|
///
|
||||||
/// [2]: http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf#G33992
|
/// [2]: http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf#G33992
|
||||||
#[unstable(feature = "unicode",
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
reason = "pending case transformation decisions")]
|
fn to_uppercase(self) -> ToUppercase;
|
||||||
fn to_uppercase(self) -> char;
|
|
||||||
|
|
||||||
/// Returns this character's displayed width in columns, or `None` if it is a
|
/// Returns this character's displayed width in columns, or `None` if it is a
|
||||||
/// control character other than `'\x00'`.
|
/// control character other than `'\x00'`.
|
||||||
|
@ -221,28 +376,15 @@ pub trait CharExt {
|
||||||
|
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
impl CharExt for char {
|
impl CharExt for char {
|
||||||
#[unstable(feature = "unicode",
|
|
||||||
reason = "pending integer conventions")]
|
|
||||||
fn is_digit(self, radix: u32) -> bool { C::is_digit(self, radix) }
|
fn is_digit(self, radix: u32) -> bool { C::is_digit(self, radix) }
|
||||||
#[unstable(feature = "unicode",
|
|
||||||
reason = "pending integer conventions")]
|
|
||||||
fn to_digit(self, radix: u32) -> Option<u32> { C::to_digit(self, radix) }
|
fn to_digit(self, radix: u32) -> Option<u32> { C::to_digit(self, radix) }
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
fn escape_unicode(self) -> EscapeUnicode { C::escape_unicode(self) }
|
||||||
fn escape_unicode(self) -> char::EscapeUnicode { C::escape_unicode(self) }
|
fn escape_default(self) -> EscapeDefault { C::escape_default(self) }
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
|
||||||
fn escape_default(self) -> char::EscapeDefault { C::escape_default(self) }
|
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
|
||||||
fn len_utf8(self) -> usize { C::len_utf8(self) }
|
fn len_utf8(self) -> usize { C::len_utf8(self) }
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
|
||||||
fn len_utf16(self) -> usize { C::len_utf16(self) }
|
fn len_utf16(self) -> usize { C::len_utf16(self) }
|
||||||
#[unstable(feature = "unicode",
|
|
||||||
reason = "pending decision about Iterator/Writer/Reader")]
|
|
||||||
fn encode_utf8(self, dst: &mut [u8]) -> Option<usize> { C::encode_utf8(self, dst) }
|
fn encode_utf8(self, dst: &mut [u8]) -> Option<usize> { C::encode_utf8(self, dst) }
|
||||||
#[unstable(feature = "unicode",
|
|
||||||
reason = "pending decision about Iterator/Writer/Reader")]
|
|
||||||
fn encode_utf16(self, dst: &mut [u16]) -> Option<usize> { C::encode_utf16(self, dst) }
|
fn encode_utf16(self, dst: &mut [u16]) -> Option<usize> { C::encode_utf16(self, dst) }
|
||||||
|
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
|
||||||
fn is_alphabetic(self) -> bool {
|
fn is_alphabetic(self) -> bool {
|
||||||
match self {
|
match self {
|
||||||
'a' ... 'z' | 'A' ... 'Z' => true,
|
'a' ... 'z' | 'A' ... 'Z' => true,
|
||||||
|
@ -251,15 +393,10 @@ impl CharExt for char {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[unstable(feature = "unicode",
|
|
||||||
reason = "mainly needed for compiler internals")]
|
|
||||||
fn is_xid_start(self) -> bool { derived_property::XID_Start(self) }
|
fn is_xid_start(self) -> bool { derived_property::XID_Start(self) }
|
||||||
|
|
||||||
#[unstable(feature = "unicode",
|
|
||||||
reason = "mainly needed for compiler internals")]
|
|
||||||
fn is_xid_continue(self) -> bool { derived_property::XID_Continue(self) }
|
fn is_xid_continue(self) -> bool { derived_property::XID_Continue(self) }
|
||||||
|
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
|
||||||
fn is_lowercase(self) -> bool {
|
fn is_lowercase(self) -> bool {
|
||||||
match self {
|
match self {
|
||||||
'a' ... 'z' => true,
|
'a' ... 'z' => true,
|
||||||
|
@ -268,7 +405,6 @@ impl CharExt for char {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
|
||||||
fn is_uppercase(self) -> bool {
|
fn is_uppercase(self) -> bool {
|
||||||
match self {
|
match self {
|
||||||
'A' ... 'Z' => true,
|
'A' ... 'Z' => true,
|
||||||
|
@ -277,7 +413,6 @@ impl CharExt for char {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
|
||||||
fn is_whitespace(self) -> bool {
|
fn is_whitespace(self) -> bool {
|
||||||
match self {
|
match self {
|
||||||
' ' | '\x09' ... '\x0d' => true,
|
' ' | '\x09' ... '\x0d' => true,
|
||||||
|
@ -286,15 +421,12 @@ impl CharExt for char {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
|
||||||
fn is_alphanumeric(self) -> bool {
|
fn is_alphanumeric(self) -> bool {
|
||||||
self.is_alphabetic() || self.is_numeric()
|
self.is_alphabetic() || self.is_numeric()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
|
||||||
fn is_control(self) -> bool { general_category::Cc(self) }
|
fn is_control(self) -> bool { general_category::Cc(self) }
|
||||||
|
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
|
||||||
fn is_numeric(self) -> bool {
|
fn is_numeric(self) -> bool {
|
||||||
match self {
|
match self {
|
||||||
'0' ... '9' => true,
|
'0' ... '9' => true,
|
||||||
|
@ -303,15 +435,35 @@ impl CharExt for char {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[unstable(feature = "unicode",
|
fn to_lowercase(self) -> ToLowercase {
|
||||||
reason = "pending case transformation decisions")]
|
ToLowercase(Some(conversions::to_lower(self)))
|
||||||
fn to_lowercase(self) -> char { conversions::to_lower(self) }
|
}
|
||||||
|
|
||||||
#[unstable(feature = "unicode",
|
fn to_uppercase(self) -> ToUppercase {
|
||||||
reason = "pending case transformation decisions")]
|
ToUppercase(Some(conversions::to_upper(self)))
|
||||||
fn to_uppercase(self) -> char { conversions::to_upper(self) }
|
}
|
||||||
|
|
||||||
#[unstable(feature = "unicode",
|
|
||||||
reason = "needs expert opinion. is_cjk flag stands out as ugly")]
|
|
||||||
fn width(self, is_cjk: bool) -> Option<usize> { charwidth::width(self, is_cjk) }
|
fn width(self, is_cjk: bool) -> Option<usize> { charwidth::width(self, is_cjk) }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// An iterator over the lowercase mapping of a given character, returned from
|
||||||
|
/// the `lowercase` method on characters.
|
||||||
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
|
pub struct ToLowercase(Option<char>);
|
||||||
|
|
||||||
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
|
impl Iterator for ToLowercase {
|
||||||
|
type Item = char;
|
||||||
|
fn next(&mut self) -> Option<char> { self.0.take() }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An iterator over the uppercase mapping of a given character, returned from
|
||||||
|
/// the `uppercase` method on characters.
|
||||||
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
|
pub struct ToUppercase(Option<char>);
|
||||||
|
|
||||||
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
|
impl Iterator for ToUppercase {
|
||||||
|
type Item = char;
|
||||||
|
fn next(&mut self) -> Option<char> { self.0.take() }
|
||||||
|
}
|
|
@ -42,37 +42,8 @@ pub use tables::regex;
|
||||||
|
|
||||||
mod normalize;
|
mod normalize;
|
||||||
mod tables;
|
mod tables;
|
||||||
mod u_char;
|
|
||||||
mod u_str;
|
mod u_str;
|
||||||
|
pub mod char;
|
||||||
// re-export char so that std et al see it correctly
|
|
||||||
/// Character manipulation (`char` type, Unicode Scalar Value)
|
|
||||||
///
|
|
||||||
/// This module provides the `CharExt` trait, as well as its
|
|
||||||
/// implementation for the primitive `char` type, in order to allow
|
|
||||||
/// basic character manipulation.
|
|
||||||
///
|
|
||||||
/// A `char` actually represents a
|
|
||||||
/// *[Unicode Scalar Value](http://www.unicode.org/glossary/#unicode_scalar_value)*,
|
|
||||||
/// as it can contain any Unicode code point except high-surrogate and
|
|
||||||
/// low-surrogate code points.
|
|
||||||
///
|
|
||||||
/// As such, only values in the ranges \[0x0,0xD7FF\] and \[0xE000,0x10FFFF\]
|
|
||||||
/// (inclusive) are allowed. A `char` can always be safely cast to a `u32`;
|
|
||||||
/// however the converse is not always true due to the above range limits
|
|
||||||
/// and, as such, should be performed via the `from_u32` function.
|
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
|
||||||
#[doc(primitive = "char")]
|
|
||||||
pub mod char {
|
|
||||||
pub use core::char::{MAX, from_u32, from_digit};
|
|
||||||
|
|
||||||
pub use normalize::{decompose_canonical, decompose_compatible, compose};
|
|
||||||
|
|
||||||
pub use tables::normalization::canonical_combining_class;
|
|
||||||
pub use tables::UNICODE_VERSION;
|
|
||||||
|
|
||||||
pub use u_char::CharExt;
|
|
||||||
}
|
|
||||||
|
|
||||||
pub mod str {
|
pub mod str {
|
||||||
pub use u_str::{UnicodeStr, Words, Graphemes, GraphemeIndices};
|
pub use u_str::{UnicodeStr, Words, Graphemes, GraphemeIndices};
|
||||||
|
|
|
@ -26,7 +26,7 @@ use core::num::Int;
|
||||||
use core::slice;
|
use core::slice;
|
||||||
use core::str::Split;
|
use core::str::Split;
|
||||||
|
|
||||||
use u_char::CharExt as UCharExt; // conflicts with core::prelude::CharExt
|
use char::CharExt as UCharExt; // conflicts with core::prelude::CharExt
|
||||||
use tables::grapheme::GraphemeCat;
|
use tables::grapheme::GraphemeCat;
|
||||||
|
|
||||||
/// An iterator over the words of a string, separated by a sequence of whitespace
|
/// An iterator over the words of a string, separated by a sequence of whitespace
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue