1
Fork 0

Improve docs on some char boolean methods

This commit is contained in:
BO41 2019-10-06 15:59:49 +02:00
parent 0358617e3f
commit d8c2956906

View file

@ -116,9 +116,9 @@ impl char {
// the code is split up here to improve execution speed for cases where // the code is split up here to improve execution speed for cases where
// the `radix` is constant and 10 or smaller // the `radix` is constant and 10 or smaller
let val = if radix <= 10 { let val = if radix <= 10 {
match self { match self {
'0' ..= '9' => self as u32 - '0' as u32, '0'..='9' => self as u32 - '0' as u32,
_ => return None, _ => return None,
} }
} else { } else {
@ -130,8 +130,11 @@ impl char {
} }
}; };
if val < radix { Some(val) } if val < radix {
else { None } Some(val)
} else {
None
}
} }
/// Returns an iterator that yields the hexadecimal Unicode escape of a /// Returns an iterator that yields the hexadecimal Unicode escape of a
@ -303,8 +306,8 @@ impl char {
'\r' => EscapeDefaultState::Backslash('r'), '\r' => EscapeDefaultState::Backslash('r'),
'\n' => EscapeDefaultState::Backslash('n'), '\n' => EscapeDefaultState::Backslash('n'),
'\\' | '\'' | '"' => EscapeDefaultState::Backslash(self), '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self),
'\x20' ..= '\x7e' => EscapeDefaultState::Char(self), '\x20'..='\x7e' => EscapeDefaultState::Char(self),
_ => EscapeDefaultState::Unicode(self.escape_unicode()) _ => EscapeDefaultState::Unicode(self.escape_unicode()),
}; };
EscapeDefault { state: init_state } EscapeDefault { state: init_state }
} }
@ -436,30 +439,31 @@ impl char {
pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str { pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
let code = self as u32; let code = self as u32;
unsafe { unsafe {
let len = let len = if code < MAX_ONE_B && !dst.is_empty() {
if code < MAX_ONE_B && !dst.is_empty() {
*dst.get_unchecked_mut(0) = code as u8; *dst.get_unchecked_mut(0) = code as u8;
1 1
} else if code < MAX_TWO_B && dst.len() >= 2 { } else if code < MAX_TWO_B && dst.len() >= 2 {
*dst.get_unchecked_mut(0) = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; *dst.get_unchecked_mut(0) = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
*dst.get_unchecked_mut(1) = (code & 0x3F) as u8 | TAG_CONT; *dst.get_unchecked_mut(1) = (code & 0x3F) as u8 | TAG_CONT;
2 2
} else if code < MAX_THREE_B && dst.len() >= 3 { } else if code < MAX_THREE_B && dst.len() >= 3 {
*dst.get_unchecked_mut(0) = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; *dst.get_unchecked_mut(0) = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
*dst.get_unchecked_mut(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT; *dst.get_unchecked_mut(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
*dst.get_unchecked_mut(2) = (code & 0x3F) as u8 | TAG_CONT; *dst.get_unchecked_mut(2) = (code & 0x3F) as u8 | TAG_CONT;
3 3
} else if dst.len() >= 4 { } else if dst.len() >= 4 {
*dst.get_unchecked_mut(0) = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; *dst.get_unchecked_mut(0) = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
*dst.get_unchecked_mut(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT; *dst.get_unchecked_mut(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT;
*dst.get_unchecked_mut(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT; *dst.get_unchecked_mut(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
*dst.get_unchecked_mut(3) = (code & 0x3F) as u8 | TAG_CONT; *dst.get_unchecked_mut(3) = (code & 0x3F) as u8 | TAG_CONT;
4 4
} else { } else {
panic!("encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}", panic!(
"encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
from_u32_unchecked(code).len_utf8(), from_u32_unchecked(code).len_utf8(),
code, code,
dst.len()) dst.len(),
)
}; };
from_utf8_unchecked_mut(dst.get_unchecked_mut(..len)) from_utf8_unchecked_mut(dst.get_unchecked_mut(..len))
} }
@ -515,15 +519,24 @@ impl char {
*dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF); *dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF);
slice::from_raw_parts_mut(dst.as_mut_ptr(), 2) slice::from_raw_parts_mut(dst.as_mut_ptr(), 2)
} else { } else {
panic!("encode_utf16: need {} units to encode U+{:X}, but the buffer has {}", panic!(
"encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
from_u32_unchecked(code).len_utf16(), from_u32_unchecked(code).len_utf16(),
code, code,
dst.len()) dst.len(),
)
} }
} }
} }
/// Returns `true` if this `char` is an alphabetic code point, and false if not. /// Returns `true` if this `char` has the `Alphabetic` property.
///
/// `Alphabetic` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
/// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
///
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
/// [ucd]: https://www.unicode.org/reports/tr44/
/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
/// ///
/// # Examples /// # Examples
/// ///
@ -547,10 +560,14 @@ impl char {
} }
} }
/// Returns `true` if this `char` is lowercase. /// Returns `true` if this `char` has the `Lowercase` property.
/// ///
/// 'Lowercase' is defined according to the terms of the Unicode Derived Core /// `Lowercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
/// Property `Lowercase`. /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
///
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
/// [ucd]: https://www.unicode.org/reports/tr44/
/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
/// ///
/// # Examples /// # Examples
/// ///
@ -575,10 +592,14 @@ impl char {
} }
} }
/// Returns `true` if this `char` is uppercase. /// Returns `true` if this `char` has the `Uppercase` property.
/// ///
/// 'Uppercase' is defined according to the terms of the Unicode Derived Core /// `Uppercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
/// Property `Uppercase`. /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
///
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
/// [ucd]: https://www.unicode.org/reports/tr44/
/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
/// ///
/// # Examples /// # Examples
/// ///
@ -603,10 +624,12 @@ impl char {
} }
} }
/// Returns `true` if this `char` is whitespace. /// Returns `true` if this `char` has the `White_Space` property.
/// ///
/// 'Whitespace' is defined according to the terms of the Unicode Derived Core /// `White_Space` is specified in the [Unicode Character Database][ucd] [`PropList.txt`].
/// Property `White_Space`. ///
/// [ucd]: https://www.unicode.org/reports/tr44/
/// [`PropList.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
/// ///
/// # Examples /// # Examples
/// ///
@ -630,10 +653,10 @@ impl char {
} }
} }
/// Returns `true` if this `char` is alphanumeric. /// Returns `true` if this `char` satisfies either [`is_alphabetic()`] or [`is_numeric()`].
/// ///
/// 'Alphanumeric'-ness is defined in terms of the Unicode General Categories /// [`is_alphabetic()`]: #method.is_alphabetic
/// `Nd`, `Nl`, `No` and the Derived Core Property `Alphabetic`. /// [`is_numeric()`]: #method.is_numeric
/// ///
/// # Examples /// # Examples
/// ///
@ -655,10 +678,15 @@ impl char {
self.is_alphabetic() || self.is_numeric() self.is_alphabetic() || self.is_numeric()
} }
/// Returns `true` if this `char` is a control code point. /// Returns `true` if this `char` has the general category for control codes.
/// ///
/// 'Control code point' is defined in terms of the Unicode General /// Control codes (code points with the general category of `Cc`) are described in Chapter 4
/// Category `Cc`. /// (Character Properties) of the [Unicode Standard] and specified in the [Unicode Character
/// Database][ucd] [`UnicodeData.txt`].
///
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
/// [ucd]: https://www.unicode.org/reports/tr44/
/// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
/// ///
/// # Examples /// # Examples
/// ///
@ -675,19 +703,29 @@ impl char {
general_category::Cc(self) general_category::Cc(self)
} }
/// Returns `true` if this `char` is an extended grapheme character. /// Returns `true` if this `char` has the `Grapheme_Extend` property.
/// ///
/// 'Extended grapheme character' is defined in terms of the Unicode Shaping and Rendering /// `Grapheme_Extend` is described in [Unicode Standard Annex #29 (Unicode Text
/// Category `Grapheme_Extend`. /// Segmentation)][uax29] and specified in the [Unicode Character Database][ucd]
/// [`DerivedCoreProperties.txt`].
///
/// [uax29]: https://www.unicode.org/reports/tr29/
/// [ucd]: https://www.unicode.org/reports/tr44/
/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
#[inline] #[inline]
pub(crate) fn is_grapheme_extended(self) -> bool { pub(crate) fn is_grapheme_extended(self) -> bool {
derived_property::Grapheme_Extend(self) derived_property::Grapheme_Extend(self)
} }
/// Returns `true` if this `char` is numeric. /// Returns `true` if this `char` has one of the general categories for numbers.
/// ///
/// 'Numeric'-ness is defined in terms of the Unicode General Categories /// The general categories for numbers (`Nd` for decimal digits, `Nl` for letter-like numeric
/// `Nd`, `Nl`, `No`. /// characters, and `No` for other numeric characters) are specified in the [Unicode Character
/// Database][ucd] [`UnicodeData.txt`].
///
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
/// [ucd]: https://www.unicode.org/reports/tr44/
/// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
/// ///
/// # Examples /// # Examples
/// ///
@ -713,25 +751,29 @@ impl char {
} }
} }
/// Returns an iterator that yields the lowercase equivalent of a `char` /// Returns an iterator that yields the lowercase mapping of this `char` as one or more
/// as one or more `char`s. /// `char`s.
/// ///
/// If a character does not have a lowercase equivalent, the same character /// If this `char` does not have a lowercase mapping, the iterator yields the same `char`.
/// will be returned back by the iterator.
/// ///
/// This performs complex unconditional mappings with no tailoring: it maps /// If this `char` has a one-to-one lowercase mapping given by the [Unicode Character
/// one Unicode character to its lowercase equivalent according to the /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
/// [Unicode database] and the additional complex mappings
/// [`SpecialCasing.txt`]. Conditional mappings (based on context or
/// language) are not considered here.
/// ///
/// For a full reference, see [here][reference]. /// [ucd]: https://www.unicode.org/reports/tr44/
/// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
/// ///
/// [Unicode database]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
/// the `char`(s) given by [`SpecialCasing.txt`].
/// ///
/// [`SpecialCasing.txt`]: ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
/// ///
/// [reference]: http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992 /// This operation performs an unconditional mapping without tailoring. That is, the conversion
/// is independent of context and language.
///
/// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
/// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
///
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
/// ///
/// # Examples /// # Examples
/// ///
@ -774,25 +816,29 @@ impl char {
ToLowercase(CaseMappingIter::new(conversions::to_lower(self))) ToLowercase(CaseMappingIter::new(conversions::to_lower(self)))
} }
/// Returns an iterator that yields the uppercase equivalent of a `char` /// Returns an iterator that yields the uppercase mapping of this `char` as one or more
/// as one or more `char`s. /// `char`s.
/// ///
/// If a character does not have an uppercase equivalent, the same character /// If this `char` does not have a uppercase mapping, the iterator yields the same `char`.
/// will be returned back by the iterator.
/// ///
/// This performs complex unconditional mappings with no tailoring: it maps /// If this `char` has a one-to-one uppercase mapping given by the [Unicode Character
/// one Unicode character to its uppercase equivalent according to the /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
/// [Unicode database] and the additional complex mappings
/// [`SpecialCasing.txt`]. Conditional mappings (based on context or
/// language) are not considered here.
/// ///
/// For a full reference, see [here][reference]. /// [ucd]: https://www.unicode.org/reports/tr44/
/// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
/// ///
/// [Unicode database]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
/// the `char`(s) given by [`SpecialCasing.txt`].
/// ///
/// [`SpecialCasing.txt`]: ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
/// ///
/// [reference]: http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992 /// This operation performs an unconditional mapping without tailoring. That is, the conversion
/// is independent of context and language.
///
/// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
/// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
///
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
/// ///
/// # Examples /// # Examples
/// ///