implement rfc 1054: split_whitespace() fn, deprecate words()
For now, words() is left in (but deprecated), and Words is a type alias for struct SplitWhitespace. Also cleaned up references to s.words() throughout codebase. Closes #15628
This commit is contained in:
parent
f43c86cda4
commit
c361e13d71
11 changed files with 53 additions and 28 deletions
|
@ -76,7 +76,7 @@ pub use core::str::{Matches, RMatches};
|
||||||
pub use core::str::{MatchIndices, RMatchIndices};
|
pub use core::str::{MatchIndices, RMatchIndices};
|
||||||
pub use core::str::{from_utf8, Chars, CharIndices, Bytes};
|
pub use core::str::{from_utf8, Chars, CharIndices, Bytes};
|
||||||
pub use core::str::{from_utf8_unchecked, ParseBoolError};
|
pub use core::str::{from_utf8_unchecked, ParseBoolError};
|
||||||
pub use rustc_unicode::str::{Words, Graphemes, GraphemeIndices};
|
pub use rustc_unicode::str::{SplitWhitespace, Words, Graphemes, GraphemeIndices};
|
||||||
pub use core::str::pattern;
|
pub use core::str::pattern;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1737,27 +1737,44 @@ impl str {
|
||||||
UnicodeStr::grapheme_indices(&self[..], is_extended)
|
UnicodeStr::grapheme_indices(&self[..], is_extended)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// An iterator over the non-empty words of `self`.
|
/// An iterator over the non-empty substrings of `self` which contain no whitespace,
|
||||||
///
|
/// and which are separated by any amount of whitespace.
|
||||||
/// A 'word' is a subsequence separated by any sequence of whitespace.
|
|
||||||
/// Sequences of whitespace
|
|
||||||
/// are collapsed, so empty "words" are not included.
|
|
||||||
///
|
///
|
||||||
/// # Examples
|
/// # Examples
|
||||||
///
|
///
|
||||||
/// ```
|
/// ```
|
||||||
/// # #![feature(str_words)]
|
/// # #![feature(str_words)]
|
||||||
|
/// # #![allow(deprecated)]
|
||||||
/// let some_words = " Mary had\ta little \n\t lamb";
|
/// let some_words = " Mary had\ta little \n\t lamb";
|
||||||
/// let v: Vec<&str> = some_words.words().collect();
|
/// let v: Vec<&str> = some_words.words().collect();
|
||||||
///
|
///
|
||||||
/// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]);
|
/// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]);
|
||||||
/// ```
|
/// ```
|
||||||
|
#[deprecated(reason = "words() will be removed. Use split_whitespace() instead",
|
||||||
|
since = "1.1.0")]
|
||||||
#[unstable(feature = "str_words",
|
#[unstable(feature = "str_words",
|
||||||
reason = "the precise algorithm to use is unclear")]
|
reason = "the precise algorithm to use is unclear")]
|
||||||
|
#[allow(deprecated)]
|
||||||
pub fn words(&self) -> Words {
|
pub fn words(&self) -> Words {
|
||||||
UnicodeStr::words(&self[..])
|
UnicodeStr::words(&self[..])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// An iterator over the non-empty substrings of `self` which contain no whitespace,
|
||||||
|
/// and which are separated by any amount of whitespace.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// let some_words = " Mary had\ta little \n\t lamb";
|
||||||
|
/// let v: Vec<&str> = some_words.split_whitespace().collect();
|
||||||
|
///
|
||||||
|
/// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]);
|
||||||
|
/// ```
|
||||||
|
#[stable(feature = "split_whitespace", since = "1.1.0")]
|
||||||
|
pub fn split_whitespace(&self) -> SplitWhitespace {
|
||||||
|
UnicodeStr::split_whitespace(&self[..])
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns a string's displayed width in columns.
|
/// Returns a string's displayed width in columns.
|
||||||
///
|
///
|
||||||
/// Control characters have zero width.
|
/// Control characters have zero width.
|
||||||
|
|
|
@ -14,7 +14,6 @@
|
||||||
#![feature(hash)]
|
#![feature(hash)]
|
||||||
#![feature(rand)]
|
#![feature(rand)]
|
||||||
#![feature(rustc_private)]
|
#![feature(rustc_private)]
|
||||||
#![feature(str_words)]
|
|
||||||
#![feature(test)]
|
#![feature(test)]
|
||||||
#![feature(unboxed_closures)]
|
#![feature(unboxed_closures)]
|
||||||
#![feature(unicode)]
|
#![feature(unicode)]
|
||||||
|
|
|
@ -939,9 +939,9 @@ fn test_rsplitn() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_words() {
|
fn test_split_whitespace() {
|
||||||
let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
|
let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
|
||||||
let words: Vec<&str> = data.words().collect();
|
let words: Vec<&str> = data.split_whitespace().collect();
|
||||||
assert_eq!(words, ["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
|
assert_eq!(words, ["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -91,7 +91,6 @@
|
||||||
|
|
||||||
#![deny(missing_docs)]
|
#![deny(missing_docs)]
|
||||||
#![feature(staged_api)]
|
#![feature(staged_api)]
|
||||||
#![feature(str_words)]
|
|
||||||
#![feature(str_char)]
|
#![feature(str_char)]
|
||||||
#![cfg_attr(test, feature(rustc_private))]
|
#![cfg_attr(test, feature(rustc_private))]
|
||||||
|
|
||||||
|
@ -771,7 +770,7 @@ pub fn usage(brief: &str, opts: &[OptGroup]) -> String {
|
||||||
|
|
||||||
// Normalize desc to contain words separated by one space character
|
// Normalize desc to contain words separated by one space character
|
||||||
let mut desc_normalized_whitespace = String::new();
|
let mut desc_normalized_whitespace = String::new();
|
||||||
for word in desc.words() {
|
for word in desc.split_whitespace() {
|
||||||
desc_normalized_whitespace.push_str(word);
|
desc_normalized_whitespace.push_str(word);
|
||||||
desc_normalized_whitespace.push(' ');
|
desc_normalized_whitespace.push(' ');
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,7 +38,6 @@
|
||||||
#![feature(staged_api)]
|
#![feature(staged_api)]
|
||||||
#![feature(std_misc)]
|
#![feature(std_misc)]
|
||||||
#![feature(path_ext)]
|
#![feature(path_ext)]
|
||||||
#![feature(str_words)]
|
|
||||||
#![feature(str_char)]
|
#![feature(str_char)]
|
||||||
#![feature(into_cow)]
|
#![feature(into_cow)]
|
||||||
#![feature(slice_patterns)]
|
#![feature(slice_patterns)]
|
||||||
|
|
|
@ -418,7 +418,7 @@ macro_rules! options {
|
||||||
-> bool {
|
-> bool {
|
||||||
match v {
|
match v {
|
||||||
Some(s) => {
|
Some(s) => {
|
||||||
for s in s.words() {
|
for s in s.split_whitespace() {
|
||||||
slot.push(s.to_string());
|
slot.push(s.to_string());
|
||||||
}
|
}
|
||||||
true
|
true
|
||||||
|
@ -431,7 +431,7 @@ macro_rules! options {
|
||||||
-> bool {
|
-> bool {
|
||||||
match v {
|
match v {
|
||||||
Some(s) => {
|
Some(s) => {
|
||||||
let v = s.words().map(|s| s.to_string()).collect();
|
let v = s.split_whitespace().map(|s| s.to_string()).collect();
|
||||||
*slot = Some(v);
|
*slot = Some(v);
|
||||||
true
|
true
|
||||||
},
|
},
|
||||||
|
|
|
@ -45,7 +45,7 @@ mod u_str;
|
||||||
pub mod char;
|
pub mod char;
|
||||||
|
|
||||||
pub mod str {
|
pub mod str {
|
||||||
pub use u_str::{UnicodeStr, Words, Graphemes, GraphemeIndices};
|
pub use u_str::{UnicodeStr, SplitWhitespace, Words, Graphemes, GraphemeIndices};
|
||||||
pub use u_str::{utf8_char_width, is_utf16, Utf16Items, Utf16Item};
|
pub use u_str::{utf8_char_width, is_utf16, Utf16Items, Utf16Item};
|
||||||
pub use u_str::{utf16_items, Utf16Encoder};
|
pub use u_str::{utf16_items, Utf16Encoder};
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,10 +25,16 @@ use core::str::Split;
|
||||||
|
|
||||||
use tables::grapheme::GraphemeCat;
|
use tables::grapheme::GraphemeCat;
|
||||||
|
|
||||||
/// An iterator over the words of a string, separated by a sequence of whitespace
|
#[deprecated(reason = "struct Words is being replaced by struct SplitWhitespace",
|
||||||
|
since = "1.1.0")]
|
||||||
#[unstable(feature = "str_words",
|
#[unstable(feature = "str_words",
|
||||||
reason = "words() will be replaced by split_whitespace() in 1.1.0")]
|
reason = "words() will be replaced by split_whitespace() in 1.1.0")]
|
||||||
pub struct Words<'a> {
|
pub type Words<'a> = SplitWhitespace<'a>;
|
||||||
|
|
||||||
|
/// An iterator over the non-whitespace substrings of a string,
|
||||||
|
/// separated by any amount of whitespace.
|
||||||
|
#[stable(feature = "split_whitespace", since = "1.1.0")]
|
||||||
|
pub struct SplitWhitespace<'a> {
|
||||||
inner: Filter<Split<'a, fn(char) -> bool>, fn(&&str) -> bool>,
|
inner: Filter<Split<'a, fn(char) -> bool>, fn(&&str) -> bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -37,7 +43,9 @@ pub struct Words<'a> {
|
||||||
pub trait UnicodeStr {
|
pub trait UnicodeStr {
|
||||||
fn graphemes<'a>(&'a self, is_extended: bool) -> Graphemes<'a>;
|
fn graphemes<'a>(&'a self, is_extended: bool) -> Graphemes<'a>;
|
||||||
fn grapheme_indices<'a>(&'a self, is_extended: bool) -> GraphemeIndices<'a>;
|
fn grapheme_indices<'a>(&'a self, is_extended: bool) -> GraphemeIndices<'a>;
|
||||||
|
#[allow(deprecated)]
|
||||||
fn words<'a>(&'a self) -> Words<'a>;
|
fn words<'a>(&'a self) -> Words<'a>;
|
||||||
|
fn split_whitespace<'a>(&'a self) -> SplitWhitespace<'a>;
|
||||||
fn is_whitespace(&self) -> bool;
|
fn is_whitespace(&self) -> bool;
|
||||||
fn is_alphanumeric(&self) -> bool;
|
fn is_alphanumeric(&self) -> bool;
|
||||||
fn width(&self, is_cjk: bool) -> usize;
|
fn width(&self, is_cjk: bool) -> usize;
|
||||||
|
@ -57,15 +65,21 @@ impl UnicodeStr for str {
|
||||||
GraphemeIndices { start_offset: self.as_ptr() as usize, iter: self.graphemes(is_extended) }
|
GraphemeIndices { start_offset: self.as_ptr() as usize, iter: self.graphemes(is_extended) }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(deprecated)]
|
||||||
#[inline]
|
#[inline]
|
||||||
fn words(&self) -> Words {
|
fn words(&self) -> Words {
|
||||||
|
self.split_whitespace()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn split_whitespace(&self) -> SplitWhitespace {
|
||||||
fn is_not_empty(s: &&str) -> bool { !s.is_empty() }
|
fn is_not_empty(s: &&str) -> bool { !s.is_empty() }
|
||||||
let is_not_empty: fn(&&str) -> bool = is_not_empty; // coerce to fn pointer
|
let is_not_empty: fn(&&str) -> bool = is_not_empty; // coerce to fn pointer
|
||||||
|
|
||||||
fn is_whitespace(c: char) -> bool { c.is_whitespace() }
|
fn is_whitespace(c: char) -> bool { c.is_whitespace() }
|
||||||
let is_whitespace: fn(char) -> bool = is_whitespace; // coerce to fn pointer
|
let is_whitespace: fn(char) -> bool = is_whitespace; // coerce to fn pointer
|
||||||
|
|
||||||
Words { inner: self.split(is_whitespace).filter(is_not_empty) }
|
SplitWhitespace { inner: self.split(is_whitespace).filter(is_not_empty) }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
@ -546,11 +560,11 @@ impl<I> Iterator for Utf16Encoder<I> where I: Iterator<Item=char> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Iterator for Words<'a> {
|
impl<'a> Iterator for SplitWhitespace<'a> {
|
||||||
type Item = &'a str;
|
type Item = &'a str;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<&'a str> { self.inner.next() }
|
fn next(&mut self) -> Option<&'a str> { self.inner.next() }
|
||||||
}
|
}
|
||||||
impl<'a> DoubleEndedIterator for Words<'a> {
|
impl<'a> DoubleEndedIterator for SplitWhitespace<'a> {
|
||||||
fn next_back(&mut self) -> Option<&'a str> { self.inner.next_back() }
|
fn next_back(&mut self) -> Option<&'a str> { self.inner.next_back() }
|
||||||
}
|
}
|
||||||
|
|
|
@ -274,7 +274,7 @@ pub fn render(w: &mut fmt::Formatter, s: &str, print_toc: bool) -> fmt::Result {
|
||||||
};
|
};
|
||||||
|
|
||||||
// Transform the contents of the header into a hyphenated string
|
// Transform the contents of the header into a hyphenated string
|
||||||
let id = s.words().map(|s| s.to_ascii_lowercase())
|
let id = s.split_whitespace().map(|s| s.to_ascii_lowercase())
|
||||||
.collect::<Vec<String>>().connect("-");
|
.collect::<Vec<String>>().connect("-");
|
||||||
|
|
||||||
// This is a terrible hack working around how hoedown gives us rendered
|
// This is a terrible hack working around how hoedown gives us rendered
|
||||||
|
|
|
@ -31,7 +31,6 @@
|
||||||
#![feature(std_misc)]
|
#![feature(std_misc)]
|
||||||
#![feature(test)]
|
#![feature(test)]
|
||||||
#![feature(unicode)]
|
#![feature(unicode)]
|
||||||
#![feature(str_words)]
|
|
||||||
#![feature(path_ext)]
|
#![feature(path_ext)]
|
||||||
#![feature(path_relative_from)]
|
#![feature(path_relative_from)]
|
||||||
#![feature(slice_patterns)]
|
#![feature(slice_patterns)]
|
||||||
|
@ -240,7 +239,7 @@ pub fn main_args(args: &[String]) -> isize {
|
||||||
|
|
||||||
let test_args = matches.opt_strs("test-args");
|
let test_args = matches.opt_strs("test-args");
|
||||||
let test_args: Vec<String> = test_args.iter()
|
let test_args: Vec<String> = test_args.iter()
|
||||||
.flat_map(|s| s.words())
|
.flat_map(|s| s.split_whitespace())
|
||||||
.map(|s| s.to_string())
|
.map(|s| s.to_string())
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
|
@ -404,13 +403,13 @@ fn rust_input(cratefile: &str, externs: core::Externs, matches: &getopts::Matche
|
||||||
}
|
}
|
||||||
clean::NameValue(ref x, ref value)
|
clean::NameValue(ref x, ref value)
|
||||||
if "passes" == *x => {
|
if "passes" == *x => {
|
||||||
for pass in value.words() {
|
for pass in value.split_whitespace() {
|
||||||
passes.push(pass.to_string());
|
passes.push(pass.to_string());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
clean::NameValue(ref x, ref value)
|
clean::NameValue(ref x, ref value)
|
||||||
if "plugins" == *x => {
|
if "plugins" == *x => {
|
||||||
for p in value.words() {
|
for p in value.split_whitespace() {
|
||||||
plugins.push(p.to_string());
|
plugins.push(p.to_string());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,11 +9,9 @@
|
||||||
// except according to those terms.
|
// except according to those terms.
|
||||||
|
|
||||||
|
|
||||||
#![feature(str_words)]
|
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let foo = "hello".to_string();
|
let foo = "hello".to_string();
|
||||||
let foo: Vec<&str> = foo.words().collect();
|
let foo: Vec<&str> = foo.split_whitespace().collect();
|
||||||
let invalid_string = &foo[0];
|
let invalid_string = &foo[0];
|
||||||
assert_eq!(*invalid_string, "hello");
|
assert_eq!(*invalid_string, "hello");
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue