Auto merge of #22466 - Kimundi:str_pattern_ai_safe, r=aturon

This is not a complete implementation of the RFC:

- only existing methods got updated, no new ones added
- doc comments are not extensive enough yet
- optimizations got lost and need to be reimplemented

See https://github.com/rust-lang/rfcs/pull/528

Technically a

[breaking-change]
This commit is contained in:
bors 2015-02-22 22:45:46 +00:00
commit 67eb38ee4c
20 changed files with 1076 additions and 350 deletions

View file

@ -58,7 +58,7 @@ pub fn load_errors(testfile: &Path) -> Vec<ExpectedError> {
fn parse_expected(last_nonfollow_error: Option<uint>, fn parse_expected(last_nonfollow_error: Option<uint>,
line_num: uint, line_num: uint,
line: &str) -> Option<(WhichLine, ExpectedError)> { line: &str) -> Option<(WhichLine, ExpectedError)> {
let start = match line.find_str("//~") { Some(i) => i, None => return None }; let start = match line.find("//~") { Some(i) => i, None => return None };
let (follow, adjusts) = if line.char_at(start + 3) == '|' { let (follow, adjusts) = if line.char_at(start + 3) == '|' {
(true, 0) (true, 0)
} else { } else {

View file

@ -330,7 +330,7 @@ fn parse_name_directive(line: &str, directive: &str) -> bool {
pub fn parse_name_value_directive(line: &str, directive: &str) pub fn parse_name_value_directive(line: &str, directive: &str)
-> Option<String> { -> Option<String> {
let keycolon = format!("{}:", directive); let keycolon = format!("{}:", directive);
match line.find_str(&keycolon) { match line.find(&keycolon) {
Some(colon) => { Some(colon) => {
let value = line[(colon + keycolon.len()) .. line.len()].to_string(); let value = line[(colon + keycolon.len()) .. line.len()].to_string();
debug!("{}: {}", directive, value); debug!("{}: {}", directive, value);

View file

@ -847,7 +847,7 @@ fn check_debugger_output(debugger_run_result: &ProcRes, check_lines: &[String])
check_lines.iter().map(|s| { check_lines.iter().map(|s| {
s s
.trim() .trim()
.split_str("[...]") .split("[...]")
.map(|x| x.to_string()) .map(|x| x.to_string())
.collect() .collect()
}).collect(); }).collect();
@ -866,7 +866,7 @@ fn check_debugger_output(debugger_run_result: &ProcRes, check_lines: &[String])
None None
} }
} else { } else {
rest.find_str(frag) rest.find(frag)
}; };
match found { match found {
None => { None => {

View file

@ -82,6 +82,8 @@ pub use core::str::{SplitN, RSplitN};
pub use core::str::{from_utf8, CharEq, Chars, CharIndices, Bytes}; pub use core::str::{from_utf8, CharEq, Chars, CharIndices, Bytes};
pub use core::str::{from_utf8_unchecked, from_c_str, ParseBoolError}; pub use core::str::{from_utf8_unchecked, from_c_str, ParseBoolError};
pub use unicode::str::{Words, Graphemes, GraphemeIndices}; pub use unicode::str::{Words, Graphemes, GraphemeIndices};
pub use core::str::Pattern;
pub use core::str::{Searcher, ReverseSearcher, DoubleEndedSearcher, SearchStep};
/* /*
Section: Creating a string Section: Creating a string
@ -530,7 +532,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert!("bananas".contains("nana")); /// assert!("bananas".contains("nana"));
/// ``` /// ```
#[stable(feature = "rust1", since = "1.0.0")] #[stable(feature = "rust1", since = "1.0.0")]
fn contains(&self, pat: &str) -> bool { fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
core_str::StrExt::contains(&self[..], pat) core_str::StrExt::contains(&self[..], pat)
} }
@ -545,9 +547,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// ```rust /// ```rust
/// assert!("hello".contains_char('e')); /// assert!("hello".contains_char('e'));
/// ``` /// ```
#[unstable(feature = "collections", #[unstable(feature = "collections")]
reason = "might get removed in favour of a more generic contains()")] #[deprecated(since = "1.0.0", reason = "use `contains()` with a char")]
fn contains_char<P: CharEq>(&self, pat: P) -> bool { fn contains_char<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
core_str::StrExt::contains_char(&self[..], pat) core_str::StrExt::contains_char(&self[..], pat)
} }
@ -603,7 +605,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert_eq!(v, vec![""]); /// assert_eq!(v, vec![""]);
/// ``` /// ```
#[stable(feature = "rust1", since = "1.0.0")] #[stable(feature = "rust1", since = "1.0.0")]
fn split<P: CharEq>(&self, pat: P) -> Split<P> { fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> {
core_str::StrExt::split(&self[..], pat) core_str::StrExt::split(&self[..], pat)
} }
@ -630,7 +632,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert_eq!(v, vec![""]); /// assert_eq!(v, vec![""]);
/// ``` /// ```
#[stable(feature = "rust1", since = "1.0.0")] #[stable(feature = "rust1", since = "1.0.0")]
fn splitn<P: CharEq>(&self, count: usize, pat: P) -> SplitN<P> { fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> {
core_str::StrExt::splitn(&self[..], count, pat) core_str::StrExt::splitn(&self[..], count, pat)
} }
@ -658,8 +660,8 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// let v: Vec<&str> = "lionXXtigerXleopard".split('X').rev().collect(); /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').rev().collect();
/// assert_eq!(v, vec!["leopard", "tiger", "", "lion"]); /// assert_eq!(v, vec!["leopard", "tiger", "", "lion"]);
/// ``` /// ```
#[unstable(feature = "collections", reason = "might get removed")] #[stable(feature = "rust1", since = "1.0.0")]
fn split_terminator<P: CharEq>(&self, pat: P) -> SplitTerminator<P> { fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> {
core_str::StrExt::split_terminator(&self[..], pat) core_str::StrExt::split_terminator(&self[..], pat)
} }
@ -680,7 +682,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert_eq!(v, vec!["leopard", "tiger", "lionX"]); /// assert_eq!(v, vec!["leopard", "tiger", "lionX"]);
/// ``` /// ```
#[stable(feature = "rust1", since = "1.0.0")] #[stable(feature = "rust1", since = "1.0.0")]
fn rsplitn<P: CharEq>(&self, count: usize, pat: P) -> RSplitN<P> { fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P> {
core_str::StrExt::rsplitn(&self[..], count, pat) core_str::StrExt::rsplitn(&self[..], count, pat)
} }
@ -706,7 +708,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// ``` /// ```
#[unstable(feature = "collections", #[unstable(feature = "collections",
reason = "might have its iterator type changed")] reason = "might have its iterator type changed")]
fn match_indices<'a>(&'a self, pat: &'a str) -> MatchIndices<'a> { // NB: Right now MatchIndices yields `(usize, usize)`,
// but it would be more consistent and useful to return `(usize, &str)`
fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> {
core_str::StrExt::match_indices(&self[..], pat) core_str::StrExt::match_indices(&self[..], pat)
} }
@ -721,9 +725,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// let v: Vec<&str> = "1abcabc2".split_str("abc").collect(); /// let v: Vec<&str> = "1abcabc2".split_str("abc").collect();
/// assert_eq!(v, vec!["1", "", "2"]); /// assert_eq!(v, vec!["1", "", "2"]);
/// ``` /// ```
#[unstable(feature = "collections", #[unstable(feature = "collections")]
reason = "might get removed in the future in favor of a more generic split()")] #[deprecated(since = "1.0.0", reason = "use `split()` with a `&str`")]
fn split_str<'a>(&'a self, pat: &'a str) -> SplitStr<'a> { fn split_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitStr<'a, P> {
core_str::StrExt::split_str(&self[..], pat) core_str::StrExt::split_str(&self[..], pat)
} }
@ -825,7 +829,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert!("banana".starts_with("ba")); /// assert!("banana".starts_with("ba"));
/// ``` /// ```
#[stable(feature = "rust1", since = "1.0.0")] #[stable(feature = "rust1", since = "1.0.0")]
fn starts_with(&self, pat: &str) -> bool { fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
core_str::StrExt::starts_with(&self[..], pat) core_str::StrExt::starts_with(&self[..], pat)
} }
@ -837,7 +841,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert!("banana".ends_with("nana")); /// assert!("banana".ends_with("nana"));
/// ``` /// ```
#[stable(feature = "rust1", since = "1.0.0")] #[stable(feature = "rust1", since = "1.0.0")]
fn ends_with(&self, pat: &str) -> bool { fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool
where P::Searcher: ReverseSearcher<'a>
{
core_str::StrExt::ends_with(&self[..], pat) core_str::StrExt::ends_with(&self[..], pat)
} }
@ -857,7 +863,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert_eq!("123foo1bar123".trim_matches(|c: char| c.is_numeric()), "foo1bar"); /// assert_eq!("123foo1bar123".trim_matches(|c: char| c.is_numeric()), "foo1bar");
/// ``` /// ```
#[stable(feature = "rust1", since = "1.0.0")] #[stable(feature = "rust1", since = "1.0.0")]
fn trim_matches<P: CharEq>(&self, pat: P) -> &str { fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
where P::Searcher: DoubleEndedSearcher<'a>
{
core_str::StrExt::trim_matches(&self[..], pat) core_str::StrExt::trim_matches(&self[..], pat)
} }
@ -877,7 +885,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert_eq!("123foo1bar123".trim_left_matches(|c: char| c.is_numeric()), "foo1bar123"); /// assert_eq!("123foo1bar123".trim_left_matches(|c: char| c.is_numeric()), "foo1bar123");
/// ``` /// ```
#[stable(feature = "rust1", since = "1.0.0")] #[stable(feature = "rust1", since = "1.0.0")]
fn trim_left_matches<P: CharEq>(&self, pat: P) -> &str { fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
core_str::StrExt::trim_left_matches(&self[..], pat) core_str::StrExt::trim_left_matches(&self[..], pat)
} }
@ -897,7 +905,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert_eq!("123foo1bar123".trim_right_matches(|c: char| c.is_numeric()), "123foo1bar"); /// assert_eq!("123foo1bar123".trim_right_matches(|c: char| c.is_numeric()), "123foo1bar");
/// ``` /// ```
#[stable(feature = "rust1", since = "1.0.0")] #[stable(feature = "rust1", since = "1.0.0")]
fn trim_right_matches<P: CharEq>(&self, pat: P) -> &str { fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
where P::Searcher: ReverseSearcher<'a>
{
core_str::StrExt::trim_right_matches(&self[..], pat) core_str::StrExt::trim_right_matches(&self[..], pat)
} }
@ -1074,7 +1084,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert_eq!(s.find(x), None); /// assert_eq!(s.find(x), None);
/// ``` /// ```
#[stable(feature = "rust1", since = "1.0.0")] #[stable(feature = "rust1", since = "1.0.0")]
fn find<P: CharEq>(&self, pat: P) -> Option<usize> { fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
core_str::StrExt::find(&self[..], pat) core_str::StrExt::find(&self[..], pat)
} }
@ -1102,7 +1112,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert_eq!(s.rfind(x), None); /// assert_eq!(s.rfind(x), None);
/// ``` /// ```
#[stable(feature = "rust1", since = "1.0.0")] #[stable(feature = "rust1", since = "1.0.0")]
fn rfind<P: CharEq>(&self, pat: P) -> Option<usize> { fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>
where P::Searcher: ReverseSearcher<'a>
{
core_str::StrExt::rfind(&self[..], pat) core_str::StrExt::rfind(&self[..], pat)
} }
@ -1125,9 +1137,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert_eq!(s.find_str("老虎 L"), Some(6)); /// assert_eq!(s.find_str("老虎 L"), Some(6));
/// assert_eq!(s.find_str("muffin man"), None); /// assert_eq!(s.find_str("muffin man"), None);
/// ``` /// ```
#[unstable(feature = "collections", #[unstable(feature = "collections")]
reason = "might get removed in favor of a more generic find in the future")] #[deprecated(since = "1.0.0", reason = "use `find()` with a `&str`")]
fn find_str(&self, needle: &str) -> Option<usize> { fn find_str<'a, P: Pattern<'a>>(&'a self, needle: P) -> Option<usize> {
core_str::StrExt::find_str(&self[..], needle) core_str::StrExt::find_str(&self[..], needle)
} }
@ -2887,22 +2899,6 @@ mod bench {
b.iter(|| assert_eq!(s.split('V').count(), 3)); b.iter(|| assert_eq!(s.split('V').count(), 3));
} }
#[bench]
fn split_unicode_not_ascii(b: &mut Bencher) {
struct NotAscii(char);
impl CharEq for NotAscii {
fn matches(&mut self, c: char) -> bool {
let NotAscii(cc) = *self;
cc == c
}
fn only_ascii(&self) -> bool { false }
}
let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
}
#[bench] #[bench]
fn split_ascii(b: &mut Bencher) { fn split_ascii(b: &mut Bencher) {
let s = "Mary had a little lamb, Little lamb, little-lamb."; let s = "Mary had a little lamb, Little lamb, little-lamb.";
@ -2911,23 +2907,6 @@ mod bench {
b.iter(|| assert_eq!(s.split(' ').count(), len)); b.iter(|| assert_eq!(s.split(' ').count(), len));
} }
#[bench]
fn split_not_ascii(b: &mut Bencher) {
struct NotAscii(char);
impl CharEq for NotAscii {
#[inline]
fn matches(&mut self, c: char) -> bool {
let NotAscii(cc) = *self;
cc == c
}
fn only_ascii(&self) -> bool { false }
}
let s = "Mary had a little lamb, Little lamb, little-lamb.";
let len = s.split(' ').count();
b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
}
#[bench] #[bench]
fn split_extern_fn(b: &mut Bencher) { fn split_extern_fn(b: &mut Bencher) {
let s = "Mary had a little lamb, Little lamb, little-lamb."; let s = "Mary had a little lamb, Little lamb, little-lamb.";

View file

@ -22,13 +22,13 @@ use option::Option;
use slice::SliceExt; use slice::SliceExt;
// UTF-8 ranges and tags for encoding characters // UTF-8 ranges and tags for encoding characters
static TAG_CONT: u8 = 0b1000_0000u8; const TAG_CONT: u8 = 0b1000_0000u8;
static TAG_TWO_B: u8 = 0b1100_0000u8; const TAG_TWO_B: u8 = 0b1100_0000u8;
static TAG_THREE_B: u8 = 0b1110_0000u8; const TAG_THREE_B: u8 = 0b1110_0000u8;
static TAG_FOUR_B: u8 = 0b1111_0000u8; const TAG_FOUR_B: u8 = 0b1111_0000u8;
static MAX_ONE_B: u32 = 0x80u32; const MAX_ONE_B: u32 = 0x80u32;
static MAX_TWO_B: u32 = 0x800u32; const MAX_TWO_B: u32 = 0x800u32;
static MAX_THREE_B: u32 = 0x10000u32; const MAX_THREE_B: u32 = 0x10000u32;
/* /*
Lu Uppercase_Letter an uppercase letter Lu Uppercase_Letter an uppercase letter
@ -398,11 +398,14 @@ impl CharExt for char {
#[stable(feature = "rust1", since = "1.0.0")] #[stable(feature = "rust1", since = "1.0.0")]
fn len_utf8(self) -> usize { fn len_utf8(self) -> usize {
let code = self as u32; let code = self as u32;
match () { if code < MAX_ONE_B {
_ if code < MAX_ONE_B => 1, 1
_ if code < MAX_TWO_B => 2, } else if code < MAX_TWO_B {
_ if code < MAX_THREE_B => 3, 2
_ => 4, } else if code < MAX_THREE_B {
3
} else {
4
} }
} }

View file

@ -657,6 +657,8 @@ macro_rules! iterator {
fn next(&mut self) -> Option<$elem> { fn next(&mut self) -> Option<$elem> {
// could be implemented with slices, but this avoids bounds checks // could be implemented with slices, but this avoids bounds checks
unsafe { unsafe {
::intrinsics::assume(!self.ptr.is_null());
::intrinsics::assume(!self.end.is_null());
if self.ptr == self.end { if self.ptr == self.end {
None None
} else { } else {
@ -693,6 +695,8 @@ macro_rules! iterator {
fn next_back(&mut self) -> Option<$elem> { fn next_back(&mut self) -> Option<$elem> {
// could be implemented with slices, but this avoids bounds checks // could be implemented with slices, but this avoids bounds checks
unsafe { unsafe {
::intrinsics::assume(!self.ptr.is_null());
::intrinsics::assume(!self.end.is_null());
if self.end == self.ptr { if self.end == self.ptr {
None None
} else { } else {

View file

@ -16,7 +16,7 @@
#![doc(primitive = "str")] #![doc(primitive = "str")]
use self::Searcher::{Naive, TwoWay, TwoWayLong}; use self::OldSearcher::{TwoWay, TwoWayLong};
use clone::Clone; use clone::Clone;
use cmp::{self, Eq}; use cmp::{self, Eq};
@ -36,6 +36,11 @@ use result::Result::{self, Ok, Err};
use slice::{self, SliceExt}; use slice::{self, SliceExt};
use usize; use usize;
pub use self::pattern::Pattern;
pub use self::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher, SearchStep};
mod pattern;
macro_rules! delegate_iter { macro_rules! delegate_iter {
(exact $te:ty : $ti:ty) => { (exact $te:ty : $ti:ty) => {
delegate_iter!{$te : $ti} delegate_iter!{$te : $ti}
@ -70,7 +75,7 @@ macro_rules! delegate_iter {
}; };
(pattern $te:ty : $ti:ty) => { (pattern $te:ty : $ti:ty) => {
#[stable(feature = "rust1", since = "1.0.0")] #[stable(feature = "rust1", since = "1.0.0")]
impl<'a, P: CharEq> Iterator for $ti { impl<'a, P: Pattern<'a>> Iterator for $ti {
type Item = $te; type Item = $te;
#[inline] #[inline]
@ -83,7 +88,8 @@ macro_rules! delegate_iter {
} }
} }
#[stable(feature = "rust1", since = "1.0.0")] #[stable(feature = "rust1", since = "1.0.0")]
impl<'a, P: CharEq> DoubleEndedIterator for $ti { impl<'a, P: Pattern<'a>> DoubleEndedIterator for $ti
where P::Searcher: DoubleEndedSearcher<'a> {
#[inline] #[inline]
fn next_back(&mut self) -> Option<$te> { fn next_back(&mut self) -> Option<$te> {
self.0.next_back() self.0.next_back()
@ -92,7 +98,8 @@ macro_rules! delegate_iter {
}; };
(pattern forward $te:ty : $ti:ty) => { (pattern forward $te:ty : $ti:ty) => {
#[stable(feature = "rust1", since = "1.0.0")] #[stable(feature = "rust1", since = "1.0.0")]
impl<'a, P: CharEq> Iterator for $ti { impl<'a, P: Pattern<'a>> Iterator for $ti
where P::Searcher: DoubleEndedSearcher<'a> {
type Item = $te; type Item = $te;
#[inline] #[inline]
@ -235,8 +242,10 @@ pub unsafe fn from_c_str(s: *const i8) -> &'static str {
} }
/// Something that can be used to compare against a character /// Something that can be used to compare against a character
#[unstable(feature = "core", #[unstable(feature = "core")]
reason = "definition may change as pattern-related methods are stabilized")] #[deprecated(since = "1.0.0",
reason = "use `Pattern` instead")]
// NB: Rather than removing it, make it private and move it into self::pattern
pub trait CharEq { pub trait CharEq {
/// Determine if the splitter should split at the given character /// Determine if the splitter should split at the given character
fn matches(&mut self, char) -> bool; fn matches(&mut self, char) -> bool;
@ -245,6 +254,7 @@ pub trait CharEq {
fn only_ascii(&self) -> bool; fn only_ascii(&self) -> bool;
} }
#[allow(deprecated) /* for CharEq */ ]
impl CharEq for char { impl CharEq for char {
#[inline] #[inline]
fn matches(&mut self, c: char) -> bool { *self == c } fn matches(&mut self, c: char) -> bool { *self == c }
@ -253,6 +263,7 @@ impl CharEq for char {
fn only_ascii(&self) -> bool { (*self as u32) < 128 } fn only_ascii(&self) -> bool { (*self as u32) < 128 }
} }
#[allow(deprecated) /* for CharEq */ ]
impl<F> CharEq for F where F: FnMut(char) -> bool { impl<F> CharEq for F where F: FnMut(char) -> bool {
#[inline] #[inline]
fn matches(&mut self, c: char) -> bool { (*self)(c) } fn matches(&mut self, c: char) -> bool { (*self)(c) }
@ -261,13 +272,16 @@ impl<F> CharEq for F where F: FnMut(char) -> bool {
fn only_ascii(&self) -> bool { false } fn only_ascii(&self) -> bool { false }
} }
#[allow(deprecated) /* for CharEq */ ]
impl<'a> CharEq for &'a [char] { impl<'a> CharEq for &'a [char] {
#[inline] #[inline]
#[allow(deprecated) /* for CharEq */ ]
fn matches(&mut self, c: char) -> bool { fn matches(&mut self, c: char) -> bool {
self.iter().any(|&m| { let mut m = m; m.matches(c) }) self.iter().any(|&m| { let mut m = m; m.matches(c) })
} }
#[inline] #[inline]
#[allow(deprecated) /* for CharEq */ ]
fn only_ascii(&self) -> bool { fn only_ascii(&self) -> bool {
self.iter().all(|m| m.only_ascii()) self.iter().all(|m| m.only_ascii())
} }
@ -337,6 +351,7 @@ fn unwrap_or_0(opt: Option<&u8>) -> u8 {
/// Reads the next code point out of a byte iterator (assuming a /// Reads the next code point out of a byte iterator (assuming a
/// UTF-8-like encoding). /// UTF-8-like encoding).
#[unstable(feature = "core")] #[unstable(feature = "core")]
#[inline]
pub fn next_code_point(bytes: &mut slice::Iter<u8>) -> Option<u32> { pub fn next_code_point(bytes: &mut slice::Iter<u8>) -> Option<u32> {
// Decode UTF-8 // Decode UTF-8
let x = match bytes.next() { let x = match bytes.next() {
@ -368,6 +383,38 @@ pub fn next_code_point(bytes: &mut slice::Iter<u8>) -> Option<u32> {
Some(ch) Some(ch)
} }
/// Reads the last code point out of a byte iterator (assuming a
/// UTF-8-like encoding).
#[unstable(feature = "core")]
#[inline]
pub fn next_code_point_reverse(bytes: &mut slice::Iter<u8>) -> Option<u32> {
// Decode UTF-8
let w = match bytes.next_back() {
None => return None,
Some(&next_byte) if next_byte < 128 => return Some(next_byte as u32),
Some(&back_byte) => back_byte,
};
// Multibyte case follows
// Decode from a byte combination out of: [x [y [z w]]]
let mut ch;
let z = unwrap_or_0(bytes.next_back());
ch = utf8_first_byte!(z, 2);
if utf8_is_cont_byte!(z) {
let y = unwrap_or_0(bytes.next_back());
ch = utf8_first_byte!(y, 3);
if utf8_is_cont_byte!(y) {
let x = unwrap_or_0(bytes.next_back());
ch = utf8_first_byte!(x, 4);
ch = utf8_acc_cont_byte!(ch, y);
}
ch = utf8_acc_cont_byte!(ch, z);
}
ch = utf8_acc_cont_byte!(ch, w);
Some(ch)
}
#[stable(feature = "rust1", since = "1.0.0")] #[stable(feature = "rust1", since = "1.0.0")]
impl<'a> Iterator for Chars<'a> { impl<'a> Iterator for Chars<'a> {
type Item = char; type Item = char;
@ -393,33 +440,12 @@ impl<'a> Iterator for Chars<'a> {
impl<'a> DoubleEndedIterator for Chars<'a> { impl<'a> DoubleEndedIterator for Chars<'a> {
#[inline] #[inline]
fn next_back(&mut self) -> Option<char> { fn next_back(&mut self) -> Option<char> {
let w = match self.iter.next_back() { next_code_point_reverse(&mut self.iter).map(|ch| {
None => return None, // str invariant says `ch` is a valid Unicode Scalar Value
Some(&back_byte) if back_byte < 128 => return Some(back_byte as char), unsafe {
Some(&back_byte) => back_byte, mem::transmute(ch)
};
// Multibyte case follows
// Decode from a byte combination out of: [x [y [z w]]]
let mut ch;
let z = unwrap_or_0(self.iter.next_back());
ch = utf8_first_byte!(z, 2);
if utf8_is_cont_byte!(z) {
let y = unwrap_or_0(self.iter.next_back());
ch = utf8_first_byte!(y, 3);
if utf8_is_cont_byte!(y) {
let x = unwrap_or_0(self.iter.next_back());
ch = utf8_first_byte!(x, 4);
ch = utf8_acc_cont_byte!(ch, y);
} }
ch = utf8_acc_cont_byte!(ch, z); })
}
ch = utf8_acc_cont_byte!(ch, w);
// str invariant says `ch` is a valid Unicode Scalar Value
unsafe {
Some(mem::transmute(ch))
}
} }
} }
@ -495,22 +521,20 @@ impl<'a> Fn<(&'a u8,)> for BytesDeref {
} }
/// An iterator over the substrings of a string, separated by `sep`. /// An iterator over the substrings of a string, separated by `sep`.
#[derive(Clone)] struct CharSplits<'a, P: Pattern<'a>> {
struct CharSplits<'a, Sep> {
/// The slice remaining to be iterated /// The slice remaining to be iterated
string: &'a str, start: usize,
sep: Sep, end: usize,
matcher: P::Searcher,
/// Whether an empty string at the end is allowed /// Whether an empty string at the end is allowed
allow_trailing_empty: bool, allow_trailing_empty: bool,
only_ascii: bool,
finished: bool, finished: bool,
} }
/// An iterator over the substrings of a string, separated by `sep`, /// An iterator over the substrings of a string, separated by `sep`,
/// splitting at most `count` times. /// splitting at most `count` times.
#[derive(Clone)] struct CharSplitsN<'a, P: Pattern<'a>> {
struct CharSplitsN<'a, Sep> { iter: CharSplits<'a, P>,
iter: CharSplits<'a, Sep>,
/// The number of splits remaining /// The number of splits remaining
count: usize, count: usize,
invert: bool, invert: bool,
@ -528,12 +552,15 @@ pub struct LinesAny<'a> {
inner: Map<Lines<'a>, fn(&str) -> &str>, inner: Map<Lines<'a>, fn(&str) -> &str>,
} }
impl<'a, Sep> CharSplits<'a, Sep> { impl<'a, P: Pattern<'a>> CharSplits<'a, P> {
#[inline] #[inline]
fn get_end(&mut self) -> Option<&'a str> { fn get_end(&mut self) -> Option<&'a str> {
if !self.finished && (self.allow_trailing_empty || self.string.len() > 0) { if !self.finished && (self.allow_trailing_empty || self.end - self.start > 0) {
self.finished = true; self.finished = true;
Some(self.string) unsafe {
let string = self.matcher.haystack().slice_unchecked(self.start, self.end);
Some(string)
}
} else { } else {
None None
} }
@ -541,33 +568,18 @@ impl<'a, Sep> CharSplits<'a, Sep> {
} }
#[stable(feature = "rust1", since = "1.0.0")] #[stable(feature = "rust1", since = "1.0.0")]
impl<'a, Sep: CharEq> Iterator for CharSplits<'a, Sep> { impl<'a, P: Pattern<'a>> Iterator for CharSplits<'a, P> {
type Item = &'a str; type Item = &'a str;
#[inline] #[inline]
fn next(&mut self) -> Option<&'a str> { fn next(&mut self) -> Option<&'a str> {
if self.finished { return None } if self.finished { return None }
let mut next_split = None; let haystack = self.matcher.haystack();
if self.only_ascii { match self.matcher.next_match() {
for (idx, byte) in self.string.bytes().enumerate() {
if self.sep.matches(byte as char) && byte < 128u8 {
next_split = Some((idx, idx + 1));
break;
}
}
} else {
for (idx, ch) in self.string.char_indices() {
if self.sep.matches(ch) {
next_split = Some((idx, self.string.char_range_at(idx).next));
break;
}
}
}
match next_split {
Some((a, b)) => unsafe { Some((a, b)) => unsafe {
let elt = self.string.slice_unchecked(0, a); let elt = haystack.slice_unchecked(self.start, a);
self.string = self.string.slice_unchecked(b, self.string.len()); self.start = b;
Some(elt) Some(elt)
}, },
None => self.get_end(), None => self.get_end(),
@ -576,7 +588,8 @@ impl<'a, Sep: CharEq> Iterator for CharSplits<'a, Sep> {
} }
#[stable(feature = "rust1", since = "1.0.0")] #[stable(feature = "rust1", since = "1.0.0")]
impl<'a, Sep: CharEq> DoubleEndedIterator for CharSplits<'a, Sep> { impl<'a, P: Pattern<'a>> DoubleEndedIterator for CharSplits<'a, P>
where P::Searcher: DoubleEndedSearcher<'a> {
#[inline] #[inline]
fn next_back(&mut self) -> Option<&'a str> { fn next_back(&mut self) -> Option<&'a str> {
if self.finished { return None } if self.finished { return None }
@ -588,37 +601,25 @@ impl<'a, Sep: CharEq> DoubleEndedIterator for CharSplits<'a, Sep> {
_ => if self.finished { return None } _ => if self.finished { return None }
} }
} }
let len = self.string.len();
let mut next_split = None;
if self.only_ascii { let haystack = self.matcher.haystack();
for (idx, byte) in self.string.bytes().enumerate().rev() { match self.matcher.next_match_back() {
if self.sep.matches(byte as char) && byte < 128u8 {
next_split = Some((idx, idx + 1));
break;
}
}
} else {
for (idx, ch) in self.string.char_indices().rev() {
if self.sep.matches(ch) {
next_split = Some((idx, self.string.char_range_at(idx).next));
break;
}
}
}
match next_split {
Some((a, b)) => unsafe { Some((a, b)) => unsafe {
let elt = self.string.slice_unchecked(b, len); let elt = haystack.slice_unchecked(b, self.end);
self.string = self.string.slice_unchecked(0, a); self.end = a;
Some(elt) Some(elt)
}, },
None => { self.finished = true; Some(self.string) } None => unsafe {
self.finished = true;
Some(haystack.slice_unchecked(self.start, self.end))
},
} }
} }
} }
#[stable(feature = "rust1", since = "1.0.0")] #[stable(feature = "rust1", since = "1.0.0")]
impl<'a, Sep: CharEq> Iterator for CharSplitsN<'a, Sep> { impl<'a, P: Pattern<'a>> Iterator for CharSplitsN<'a, P>
where P::Searcher: DoubleEndedSearcher<'a> {
type Item = &'a str; type Item = &'a str;
#[inline] #[inline]
@ -632,32 +633,6 @@ impl<'a, Sep: CharEq> Iterator for CharSplitsN<'a, Sep> {
} }
} }
/// The internal state of an iterator that searches for matches of a substring
/// within a larger string using naive search
#[derive(Clone)]
struct NaiveSearcher {
position: usize
}
impl NaiveSearcher {
fn new() -> NaiveSearcher {
NaiveSearcher { position: 0 }
}
fn next(&mut self, haystack: &[u8], needle: &[u8]) -> Option<(usize, usize)> {
while self.position + needle.len() <= haystack.len() {
if &haystack[self.position .. self.position + needle.len()] == needle {
let match_pos = self.position;
self.position += needle.len(); // add 1 for all matches
return Some((match_pos, match_pos + needle.len()));
} else {
self.position += 1;
}
}
None
}
}
/// The internal state of an iterator that searches for matches of a substring /// The internal state of an iterator that searches for matches of a substring
/// within a larger string using two-way search /// within a larger string using two-way search
#[derive(Clone)] #[derive(Clone)]
@ -743,6 +718,7 @@ struct TwoWaySearcher {
*/ */
impl TwoWaySearcher { impl TwoWaySearcher {
#[allow(dead_code)]
fn new(needle: &[u8]) -> TwoWaySearcher { fn new(needle: &[u8]) -> TwoWaySearcher {
let (crit_pos_false, period_false) = TwoWaySearcher::maximal_suffix(needle, false); let (crit_pos_false, period_false) = TwoWaySearcher::maximal_suffix(needle, false);
let (crit_pos_true, period_true) = TwoWaySearcher::maximal_suffix(needle, true); let (crit_pos_true, period_true) = TwoWaySearcher::maximal_suffix(needle, true);
@ -852,6 +828,7 @@ impl TwoWaySearcher {
// Specifically, returns (i, p), where i is the starting index of v in some // Specifically, returns (i, p), where i is the starting index of v in some
// critical factorization (u, v) and p = period(v) // critical factorization (u, v) and p = period(v)
#[inline] #[inline]
#[allow(dead_code)]
fn maximal_suffix(arr: &[u8], reversed: bool) -> (usize, usize) { fn maximal_suffix(arr: &[u8], reversed: bool) -> (usize, usize) {
let mut left = -1; // Corresponds to i in the paper let mut left = -1; // Corresponds to i in the paper
let mut right = 0; // Corresponds to j in the paper let mut right = 0; // Corresponds to j in the paper
@ -896,20 +873,26 @@ impl TwoWaySearcher {
/// The internal state of an iterator that searches for matches of a substring /// The internal state of an iterator that searches for matches of a substring
/// within a larger string using a dynamically chosen search algorithm /// within a larger string using a dynamically chosen search algorithm
#[derive(Clone)] #[derive(Clone)]
enum Searcher { // NB: This is kept around for convenience because
Naive(NaiveSearcher), // it is planned to be used again in the future
enum OldSearcher {
TwoWay(TwoWaySearcher), TwoWay(TwoWaySearcher),
TwoWayLong(TwoWaySearcher) TwoWayLong(TwoWaySearcher),
} }
impl Searcher { impl OldSearcher {
fn new(haystack: &[u8], needle: &[u8]) -> Searcher { #[allow(dead_code)]
fn new(haystack: &[u8], needle: &[u8]) -> OldSearcher {
if needle.len() == 0 {
// Handle specially
unimplemented!()
// FIXME: Tune this. // FIXME: Tune this.
// FIXME(#16715): This unsigned integer addition will probably not // FIXME(#16715): This unsigned integer addition will probably not
// overflow because that would mean that the memory almost solely // overflow because that would mean that the memory almost solely
// consists of the needle. Needs #16715 to be formally fixed. // consists of the needle. Needs #16715 to be formally fixed.
if needle.len() + 20 > haystack.len() { } else if needle.len() + 20 > haystack.len() {
Naive(NaiveSearcher::new()) // Use naive searcher
unimplemented!()
} else { } else {
let searcher = TwoWaySearcher::new(needle); let searcher = TwoWaySearcher::new(needle);
if searcher.memory == usize::MAX { // If the period is long if searcher.memory == usize::MAX { // If the period is long
@ -921,66 +904,58 @@ impl Searcher {
} }
} }
/// An iterator over the start and end indices of the matches of a
/// substring within a larger string
#[derive(Clone)] #[derive(Clone)]
#[unstable(feature = "core", reason = "type may be removed")] // NB: This is kept around for convenience because
pub struct MatchIndices<'a> { // it is planned to be used again in the future
struct OldMatchIndices<'a, 'b> {
// constants // constants
haystack: &'a str, haystack: &'a str,
needle: &'a str, needle: &'b str,
searcher: Searcher searcher: OldSearcher
} }
/// An iterator over the substrings of a string separated by a given // FIXME: #21637 Prevents a Clone impl
/// search string /// An iterator over the start and end indices of the matches of a
#[derive(Clone)] /// substring within a larger string
#[unstable(feature = "core", reason = "type may be removed")] #[unstable(feature = "core", reason = "type may be removed")]
pub struct SplitStr<'a> { pub struct MatchIndices<'a, P: Pattern<'a>>(P::Searcher);
it: MatchIndices<'a>,
last_end: usize,
finished: bool
}
#[stable(feature = "rust1", since = "1.0.0")] #[stable(feature = "rust1", since = "1.0.0")]
impl<'a> Iterator for MatchIndices<'a> { impl<'a, P: Pattern<'a>> Iterator for MatchIndices<'a, P> {
type Item = (usize, usize); type Item = (usize, usize);
#[inline] #[inline]
fn next(&mut self) -> Option<(usize, usize)> { fn next(&mut self) -> Option<(usize, usize)> {
match self.searcher { self.0.next_match()
Naive(ref mut searcher)
=> searcher.next(self.haystack.as_bytes(), self.needle.as_bytes()),
TwoWay(ref mut searcher)
=> searcher.next(self.haystack.as_bytes(), self.needle.as_bytes(), false),
TwoWayLong(ref mut searcher)
=> searcher.next(self.haystack.as_bytes(), self.needle.as_bytes(), true)
}
} }
} }
#[stable(feature = "rust1", since = "1.0.0")] /// An iterator over the substrings of a string separated by a given
impl<'a> Iterator for SplitStr<'a> { /// search string
#[unstable(feature = "core")]
#[deprecated(since = "1.0.0", reason = "use `Split` with a `&str`")]
pub struct SplitStr<'a, P: Pattern<'a>>(Split<'a, P>);
impl<'a, P: Pattern<'a>> Iterator for SplitStr<'a, P> {
type Item = &'a str; type Item = &'a str;
#[inline] #[inline]
fn next(&mut self) -> Option<&'a str> { fn next(&mut self) -> Option<&'a str> {
if self.finished { return None; } Iterator::next(&mut self.0)
match self.it.next() {
Some((from, to)) => {
let ret = Some(&self.it.haystack[self.last_end .. from]);
self.last_end = to;
ret
}
None => {
self.finished = true;
Some(&self.it.haystack[self.last_end .. self.it.haystack.len()])
}
}
} }
} }
impl<'a, 'b> OldMatchIndices<'a, 'b> {
#[inline]
#[allow(dead_code)]
fn next(&mut self) -> Option<(usize, usize)> {
match self.searcher {
TwoWay(ref mut searcher)
=> searcher.next(self.haystack.as_bytes(), self.needle.as_bytes(), false),
TwoWayLong(ref mut searcher)
=> searcher.next(self.haystack.as_bytes(), self.needle.as_bytes(), true),
}
}
}
/* /*
Section: Comparing strings Section: Comparing strings
@ -1298,28 +1273,39 @@ impl<'a, S: ?Sized> Str for &'a S where S: Str {
} }
/// Return type of `StrExt::split` /// Return type of `StrExt::split`
#[derive(Clone)]
#[stable(feature = "rust1", since = "1.0.0")] #[stable(feature = "rust1", since = "1.0.0")]
pub struct Split<'a, P>(CharSplits<'a, P>); pub struct Split<'a, P: Pattern<'a>>(CharSplits<'a, P>);
delegate_iter!{pattern &'a str : Split<'a, P>} #[stable(feature = "rust1", since = "1.0.0")]
impl<'a, P: Pattern<'a>> Iterator for Split<'a, P> {
type Item = &'a str;
#[inline]
fn next(&mut self) -> Option<&'a str> {
self.0.next()
}
}
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a, P: Pattern<'a>> DoubleEndedIterator for Split<'a, P>
where P::Searcher: DoubleEndedSearcher<'a> {
#[inline]
fn next_back(&mut self) -> Option<&'a str> {
self.0.next_back()
}
}
/// Return type of `StrExt::split_terminator` /// Return type of `StrExt::split_terminator`
#[derive(Clone)] #[stable(feature = "rust1", since = "1.0.0")]
#[unstable(feature = "core", pub struct SplitTerminator<'a, P: Pattern<'a>>(CharSplits<'a, P>);
reason = "might get removed in favour of a constructor method on Split")]
pub struct SplitTerminator<'a, P>(CharSplits<'a, P>);
delegate_iter!{pattern &'a str : SplitTerminator<'a, P>} delegate_iter!{pattern &'a str : SplitTerminator<'a, P>}
/// Return type of `StrExt::splitn` /// Return type of `StrExt::splitn`
#[derive(Clone)]
#[stable(feature = "rust1", since = "1.0.0")] #[stable(feature = "rust1", since = "1.0.0")]
pub struct SplitN<'a, P>(CharSplitsN<'a, P>); pub struct SplitN<'a, P: Pattern<'a>>(CharSplitsN<'a, P>);
delegate_iter!{pattern forward &'a str : SplitN<'a, P>} delegate_iter!{pattern forward &'a str : SplitN<'a, P>}
/// Return type of `StrExt::rsplitn` /// Return type of `StrExt::rsplitn`
#[derive(Clone)]
#[stable(feature = "rust1", since = "1.0.0")] #[stable(feature = "rust1", since = "1.0.0")]
pub struct RSplitN<'a, P>(CharSplitsN<'a, P>); pub struct RSplitN<'a, P: Pattern<'a>>(CharSplitsN<'a, P>);
delegate_iter!{pattern forward &'a str : RSplitN<'a, P>} delegate_iter!{pattern forward &'a str : RSplitN<'a, P>}
/// Methods for string slices /// Methods for string slices
@ -1328,36 +1314,40 @@ pub trait StrExt {
// NB there are no docs here are they're all located on the StrExt trait in // NB there are no docs here are they're all located on the StrExt trait in
// libcollections, not here. // libcollections, not here.
fn contains(&self, pat: &str) -> bool; fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
fn contains_char<P: CharEq>(&self, pat: P) -> bool; fn contains_char<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
fn chars<'a>(&'a self) -> Chars<'a>; fn chars<'a>(&'a self) -> Chars<'a>;
fn bytes<'a>(&'a self) -> Bytes<'a>; fn bytes<'a>(&'a self) -> Bytes<'a>;
fn char_indices<'a>(&'a self) -> CharIndices<'a>; fn char_indices<'a>(&'a self) -> CharIndices<'a>;
fn split<'a, P: CharEq>(&'a self, pat: P) -> Split<'a, P>; fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P>;
fn splitn<'a, P: CharEq>(&'a self, count: usize, pat: P) -> SplitN<'a, P>; fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P>;
fn split_terminator<'a, P: CharEq>(&'a self, pat: P) -> SplitTerminator<'a, P>; fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P>;
fn rsplitn<'a, P: CharEq>(&'a self, count: usize, pat: P) -> RSplitN<'a, P>; fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P>;
fn match_indices<'a>(&'a self, sep: &'a str) -> MatchIndices<'a>; fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P>;
fn split_str<'a>(&'a self, pat: &'a str) -> SplitStr<'a>; fn split_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitStr<'a, P>;
fn lines<'a>(&'a self) -> Lines<'a>; fn lines<'a>(&'a self) -> Lines<'a>;
fn lines_any<'a>(&'a self) -> LinesAny<'a>; fn lines_any<'a>(&'a self) -> LinesAny<'a>;
fn char_len(&self) -> usize; fn char_len(&self) -> usize;
fn slice_chars<'a>(&'a self, begin: usize, end: usize) -> &'a str; fn slice_chars<'a>(&'a self, begin: usize, end: usize) -> &'a str;
unsafe fn slice_unchecked<'a>(&'a self, begin: usize, end: usize) -> &'a str; unsafe fn slice_unchecked<'a>(&'a self, begin: usize, end: usize) -> &'a str;
fn starts_with(&self, pat: &str) -> bool; fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
fn ends_with(&self, pat: &str) -> bool; fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool
fn trim_matches<'a, P: CharEq>(&'a self, pat: P) -> &'a str; where P::Searcher: ReverseSearcher<'a>;
fn trim_left_matches<'a, P: CharEq>(&'a self, pat: P) -> &'a str; fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
fn trim_right_matches<'a, P: CharEq>(&'a self, pat: P) -> &'a str; where P::Searcher: DoubleEndedSearcher<'a>;
fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str;
fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
where P::Searcher: ReverseSearcher<'a>;
fn is_char_boundary(&self, index: usize) -> bool; fn is_char_boundary(&self, index: usize) -> bool;
fn char_range_at(&self, start: usize) -> CharRange; fn char_range_at(&self, start: usize) -> CharRange;
fn char_range_at_reverse(&self, start: usize) -> CharRange; fn char_range_at_reverse(&self, start: usize) -> CharRange;
fn char_at(&self, i: usize) -> char; fn char_at(&self, i: usize) -> char;
fn char_at_reverse(&self, i: usize) -> char; fn char_at_reverse(&self, i: usize) -> char;
fn as_bytes<'a>(&'a self) -> &'a [u8]; fn as_bytes<'a>(&'a self) -> &'a [u8];
fn find<P: CharEq>(&self, pat: P) -> Option<usize>; fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>;
fn rfind<P: CharEq>(&self, pat: P) -> Option<usize>; fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>
fn find_str(&self, pat: &str) -> Option<usize>; where P::Searcher: ReverseSearcher<'a>;
fn find_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>;
fn slice_shift_char<'a>(&'a self) -> Option<(char, &'a str)>; fn slice_shift_char<'a>(&'a self) -> Option<(char, &'a str)>;
fn subslice_offset(&self, inner: &str) -> usize; fn subslice_offset(&self, inner: &str) -> usize;
fn as_ptr(&self) -> *const u8; fn as_ptr(&self) -> *const u8;
@ -1375,13 +1365,13 @@ fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
impl StrExt for str { impl StrExt for str {
#[inline] #[inline]
fn contains(&self, needle: &str) -> bool { fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
self.find_str(needle).is_some() pat.is_contained_in(self)
} }
#[inline] #[inline]
fn contains_char<P: CharEq>(&self, pat: P) -> bool { fn contains_char<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
self.find(pat).is_some() pat.is_contained_in(self)
} }
#[inline] #[inline]
@ -1400,18 +1390,18 @@ impl StrExt for str {
} }
#[inline] #[inline]
fn split<P: CharEq>(&self, pat: P) -> Split<P> { fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> {
Split(CharSplits { Split(CharSplits {
string: self, start: 0,
only_ascii: pat.only_ascii(), end: self.len(),
sep: pat, matcher: pat.into_searcher(self),
allow_trailing_empty: true, allow_trailing_empty: true,
finished: false, finished: false,
}) })
} }
#[inline] #[inline]
fn splitn<P: CharEq>(&self, count: usize, pat: P) -> SplitN<P> { fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> {
SplitN(CharSplitsN { SplitN(CharSplitsN {
iter: self.split(pat).0, iter: self.split(pat).0,
count: count, count: count,
@ -1420,7 +1410,7 @@ impl StrExt for str {
} }
#[inline] #[inline]
fn split_terminator<P: CharEq>(&self, pat: P) -> SplitTerminator<P> { fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> {
SplitTerminator(CharSplits { SplitTerminator(CharSplits {
allow_trailing_empty: false, allow_trailing_empty: false,
..self.split(pat).0 ..self.split(pat).0
@ -1428,7 +1418,7 @@ impl StrExt for str {
} }
#[inline] #[inline]
fn rsplitn<P: CharEq>(&self, count: usize, pat: P) -> RSplitN<P> { fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P> {
RSplitN(CharSplitsN { RSplitN(CharSplitsN {
iter: self.split(pat).0, iter: self.split(pat).0,
count: count, count: count,
@ -1437,22 +1427,14 @@ impl StrExt for str {
} }
#[inline] #[inline]
fn match_indices<'a>(&'a self, sep: &'a str) -> MatchIndices<'a> { fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> {
assert!(!sep.is_empty()); MatchIndices(pat.into_searcher(self))
MatchIndices {
haystack: self,
needle: sep,
searcher: Searcher::new(self.as_bytes(), sep.as_bytes())
}
} }
#[inline] #[inline]
fn split_str<'a>(&'a self, sep: &'a str) -> SplitStr<'a> { #[allow(deprecated) /* for SplitStr */ ]
SplitStr { fn split_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitStr<'a, P> {
it: self.match_indices(sep), SplitStr(self.split(pat))
last_end: 0,
finished: false
}
} }
#[inline] #[inline]
@ -1500,54 +1482,69 @@ impl StrExt for str {
#[inline] #[inline]
unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str { unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str {
mem::transmute(Slice { mem::transmute(Slice {
data: self.as_ptr().offset(begin as isize), data: self.as_ptr().offset(begin as int),
len: end - begin, len: end - begin,
}) })
} }
#[inline] #[inline]
fn starts_with(&self, needle: &str) -> bool { fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
let n = needle.len(); pat.is_prefix_of(self)
self.len() >= n && needle.as_bytes() == &self.as_bytes()[..n]
} }
#[inline] #[inline]
fn ends_with(&self, needle: &str) -> bool { fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool
let (m, n) = (self.len(), needle.len()); where P::Searcher: ReverseSearcher<'a>
m >= n && needle.as_bytes() == &self.as_bytes()[m-n..] {
pat.is_suffix_of(self)
} }
#[inline] #[inline]
fn trim_matches<P: CharEq>(&self, mut pat: P) -> &str { fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
let cur = match self.find(|c: char| !pat.matches(c)) { where P::Searcher: DoubleEndedSearcher<'a>
None => "", {
Some(i) => unsafe { self.slice_unchecked(i, self.len()) } let mut i = 0;
}; let mut j = 0;
match cur.rfind(|c: char| !pat.matches(c)) { let mut matcher = pat.into_searcher(self);
None => "", if let Some((a, b)) = matcher.next_reject() {
Some(i) => { i = a;
let right = cur.char_range_at(i).next; j = b; // Rember earliest known match, correct it below if
unsafe { cur.slice_unchecked(0, right) } // last match is different
} }
if let Some((_, b)) = matcher.next_reject_back() {
j = b;
}
unsafe {
// Searcher is known to return valid indices
self.slice_unchecked(i, j)
} }
} }
#[inline] #[inline]
fn trim_left_matches<P: CharEq>(&self, mut pat: P) -> &str { fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
match self.find(|c: char| !pat.matches(c)) { let mut i = self.len();
None => "", let mut matcher = pat.into_searcher(self);
Some(first) => unsafe { self.slice_unchecked(first, self.len()) } if let Some((a, _)) = matcher.next_reject() {
i = a;
}
unsafe {
// Searcher is known to return valid indices
self.slice_unchecked(i, self.len())
} }
} }
#[inline] #[inline]
fn trim_right_matches<P: CharEq>(&self, mut pat: P) -> &str { fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
match self.rfind(|c: char| !pat.matches(c)) { where P::Searcher: ReverseSearcher<'a>
None => "", {
Some(last) => { let mut j = 0;
let next = self.char_range_at(last).next; let mut matcher = pat.into_searcher(self);
unsafe { self.slice_unchecked(0, next) } if let Some((_, b)) = matcher.next_reject_back() {
} j = b;
}
unsafe {
// Searcher is known to return valid indices
self.slice_unchecked(0, j)
} }
} }
@ -1612,36 +1609,18 @@ impl StrExt for str {
unsafe { mem::transmute(self) } unsafe { mem::transmute(self) }
} }
fn find<P: CharEq>(&self, mut pat: P) -> Option<usize> { fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
if pat.only_ascii() { pat.into_searcher(self).next_match().map(|(i, _)| i)
self.bytes().position(|b| pat.matches(b as char))
} else {
for (index, c) in self.char_indices() {
if pat.matches(c) { return Some(index); }
}
None
}
} }
fn rfind<P: CharEq>(&self, mut pat: P) -> Option<usize> { fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>
if pat.only_ascii() { where P::Searcher: ReverseSearcher<'a>
self.bytes().rposition(|b| pat.matches(b as char)) {
} else { pat.into_searcher(self).next_match_back().map(|(i, _)| i)
for (index, c) in self.char_indices().rev() {
if pat.matches(c) { return Some(index); }
}
None
}
} }
fn find_str(&self, needle: &str) -> Option<usize> { fn find_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
if needle.is_empty() { self.find(pat)
Some(0)
} else {
self.match_indices(needle)
.next()
.map(|(start, _end)| start)
}
} }
#[inline] #[inline]

495
src/libcore/str/pattern.rs Normal file
View file

@ -0,0 +1,495 @@
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#![allow(deprecated) /* for CharEq */ ]
use prelude::*;
use super::CharEq;
// Pattern
/// A string pattern.
///
/// A `Pattern<'a>` expresses that the implementing type
/// can be used as a string pattern for searching in a `&'a str`.
///
/// For example, both `'a'` and `"aa"` are patterns that
/// would match at index `1` in the string `"baaaab"`.
///
/// The trait itself acts as a builder for an associated
/// `Searcher` type, which does the actual work of finding
/// occurences of the pattern in a string.
pub trait Pattern<'a>: Sized {
/// Associated searcher for this pattern
type Searcher: Searcher<'a>;
/// Construct the associated searcher from
/// `self` and the `haystack` to search in.
fn into_searcher(self, haystack: &'a str) -> Self::Searcher;
/// Check whether the pattern matches anywhere in the haystack
#[inline]
fn is_contained_in(self, haystack: &'a str) -> bool {
self.into_searcher(haystack).next_match().is_some()
}
/// Check whether the pattern matches at the front of the haystack
#[inline]
fn is_prefix_of(self, haystack: &'a str) -> bool {
match self.into_searcher(haystack).next() {
SearchStep::Match(0, _) => true,
_ => false,
}
}
/// Check whether the pattern matches at the back of the haystack
#[inline]
fn is_suffix_of(self, haystack: &'a str) -> bool
where Self::Searcher: ReverseSearcher<'a>
{
match self.into_searcher(haystack).next_back() {
SearchStep::Match(_, j) if haystack.len() == j => true,
_ => false,
}
}
}
// Searcher
/// Result of calling `Searcher::next()` or `ReverseSearcher::next_back()`.
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
pub enum SearchStep {
/// Expresses that a match of the pattern has been found at
/// `haystack[a..b]`.
Match(usize, usize),
/// Expresses that `haystack[a..b]` has been rejected as a possible match
/// of the pattern.
///
/// Note that there might be more than one `Reject` betwen two `Match`es,
/// there is no requirement for them to be combined into one.
Reject(usize, usize),
/// Expresses that every byte of the haystack has been visted, ending
/// the iteration.
Done
}
/// A searcher for a string pattern.
///
/// This trait provides methods for searching for non-overlapping
/// matches of a pattern starting from the front (left) of a string.
///
/// It will be implemented by associated `Searcher`
/// types of the `Pattern` trait.
///
/// The trait is marked unsafe because the indices returned by the
/// `next()` methods are required to lie on valid utf8 boundaries in
/// the haystack. This enables consumers of this trait to
/// slice the haystack without additional runtime checks.
pub unsafe trait Searcher<'a> {
/// Getter for the underlaying string to be searched in
///
/// Will always return the same `&str`
fn haystack(&self) -> &'a str;
/// Performs the next search step starting from the front.
///
/// - Returns `Match(a, b)` if `haystack[a..b]` matches the pattern.
/// - Returns `Reject(a, b)` if `haystack[a..b]` can not match the
/// pattern, even partially.
/// - Returns `Done` if every byte of the haystack has been visited
///
/// The stream of `Match` and `Reject` values up to a `Done`
/// will contain index ranges that are adjacent, non-overlapping,
/// covering the whole haystack, and laying on utf8 boundaries.
///
/// A `Match` result needs to contain the whole matched pattern,
/// however `Reject` results may be split up into arbitrary
/// many adjacent fragments. Both ranges may have zero length.
///
/// As an example, the pattern `"aaa"` and the haystack `"cbaaaaab"`
/// might produce the stream
/// `[Reject(0, 1), Reject(1, 2), Match(2, 5), Reject(5, 8)]`
fn next(&mut self) -> SearchStep;
/// Find the next `Match` result. See `next()`
#[inline]
fn next_match(&mut self) -> Option<(usize, usize)> {
loop {
match self.next() {
SearchStep::Match(a, b) => return Some((a, b)),
SearchStep::Done => return None,
_ => continue,
}
}
}
/// Find the next `Reject` result. See `next()`
#[inline]
fn next_reject(&mut self) -> Option<(usize, usize)> {
loop {
match self.next() {
SearchStep::Reject(a, b) => return Some((a, b)),
SearchStep::Done => return None,
_ => continue,
}
}
}
}
/// A reverse searcher for a string pattern.
///
/// This trait provides methods for searching for non-overlapping
/// matches of a pattern starting from the back (right) of a string.
///
/// It will be implemented by associated `Searcher`
/// types of the `Pattern` trait if the pattern supports searching
/// for it from the back.
///
/// The index ranges returned by this trait are not required
/// to exactly match those of the forward search in reverse.
///
/// For the reason why this trait is marked unsafe, see them
/// parent trait `Searcher`.
pub unsafe trait ReverseSearcher<'a>: Searcher<'a> {
/// Performs the next search step starting from the back.
///
/// - Returns `Match(a, b)` if `haystack[a..b]` matches the pattern.
/// - Returns `Reject(a, b)` if `haystack[a..b]` can not match the
/// pattern, even partially.
/// - Returns `Done` if every byte of the haystack has been visited
///
/// The stream of `Match` and `Reject` values up to a `Done`
/// will contain index ranges that are adjacent, non-overlapping,
/// covering the whole haystack, and laying on utf8 boundaries.
///
/// A `Match` result needs to contain the whole matched pattern,
/// however `Reject` results may be split up into arbitrary
/// many adjacent fragments. Both ranges may have zero length.
///
/// As an example, the pattern `"aaa"` and the haystack `"cbaaaaab"`
/// might produce the stream
/// `[Reject(7, 8), Match(4, 7), Reject(1, 4), Reject(0, 1)]`
fn next_back(&mut self) -> SearchStep;
/// Find the next `Match` result. See `next_back()`
#[inline]
fn next_match_back(&mut self) -> Option<(usize, usize)>{
loop {
match self.next_back() {
SearchStep::Match(a, b) => return Some((a, b)),
SearchStep::Done => return None,
_ => continue,
}
}
}
/// Find the next `Reject` result. See `next_back()`
#[inline]
fn next_reject_back(&mut self) -> Option<(usize, usize)>{
loop {
match self.next_back() {
SearchStep::Reject(a, b) => return Some((a, b)),
SearchStep::Done => return None,
_ => continue,
}
}
}
}
/// A marker trait to express that a `ReverseSearcher`
/// can be used for a `DoubleEndedIterator` implementation.
///
/// For this, the impl of `Searcher` and `ReverseSearcher` need
/// to follow these conditions:
///
/// - All results of `next()` need to be identical
/// to the results of `next_back()` in reverse order.
/// - `next()` and `next_back()` need to behave as
/// the two ends of a range of values, that is they
/// can not "walk past each other".
///
/// # Example
///
/// `char::Searcher` is a `DoubleEndedSearcher` because searching for a
/// `char` only requires looking at one at a time, which behaves the same
/// from both ends.
///
/// `(&str)::Searcher` is not a `DoubleEndedSearcher` because
/// the pattern `"aa"` in the haystack `"aaa"` matches as either
/// `"[aa]a"` or `"a[aa]"`, depending from which side it is searched.
pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {}
// Impl for a CharEq wrapper
struct CharEqPattern<C: CharEq>(C);
struct CharEqSearcher<'a, C: CharEq> {
char_eq: C,
haystack: &'a str,
char_indices: super::CharIndices<'a>,
#[allow(dead_code)]
ascii_only: bool,
}
impl<'a, C: CharEq> Pattern<'a> for CharEqPattern<C> {
type Searcher = CharEqSearcher<'a, C>;
#[inline]
fn into_searcher(self, haystack: &'a str) -> CharEqSearcher<'a, C> {
CharEqSearcher {
ascii_only: self.0.only_ascii(),
haystack: haystack,
char_eq: self.0,
char_indices: haystack.char_indices(),
}
}
}
unsafe impl<'a, C: CharEq> Searcher<'a> for CharEqSearcher<'a, C> {
#[inline]
fn haystack(&self) -> &'a str {
self.haystack
}
#[inline]
fn next(&mut self) -> SearchStep {
let s = &mut self.char_indices;
// Compare lengths of the internal byte slice iterator
// to find length of current char
let (pre_len, _) = s.iter.iter.size_hint();
if let Some((i, c)) = s.next() {
let (len, _) = s.iter.iter.size_hint();
let char_len = pre_len - len;
if self.char_eq.matches(c) {
return SearchStep::Match(i, i + char_len);
} else {
return SearchStep::Reject(i, i + char_len);
}
}
SearchStep::Done
}
}
unsafe impl<'a, C: CharEq> ReverseSearcher<'a> for CharEqSearcher<'a, C> {
#[inline]
fn next_back(&mut self) -> SearchStep {
let s = &mut self.char_indices;
// Compare lengths of the internal byte slice iterator
// to find length of current char
let (pre_len, _) = s.iter.iter.size_hint();
if let Some((i, c)) = s.next_back() {
let (len, _) = s.iter.iter.size_hint();
let char_len = pre_len - len;
if self.char_eq.matches(c) {
return SearchStep::Match(i, i + char_len);
} else {
return SearchStep::Reject(i, i + char_len);
}
}
SearchStep::Done
}
}
impl<'a, C: CharEq> DoubleEndedSearcher<'a> for CharEqSearcher<'a, C> {}
// Impl for &str
// Todo: Optimize the naive implementation here
#[derive(Clone)]
struct StrSearcher<'a, 'b> {
haystack: &'a str,
needle: &'b str,
start: usize,
end: usize,
done: bool,
}
/// Non-allocating substring search.
///
/// Will handle the pattern `""` as returning empty matches at each utf8
/// boundary.
impl<'a, 'b> Pattern<'a> for &'b str {
type Searcher = StrSearcher<'a, 'b>;
#[inline]
fn into_searcher(self, haystack: &'a str) -> StrSearcher<'a, 'b> {
StrSearcher {
haystack: haystack,
needle: self,
start: 0,
end: haystack.len(),
done: false,
}
}
}
unsafe impl<'a, 'b> Searcher<'a> for StrSearcher<'a, 'b> {
#[inline]
fn haystack(&self) -> &'a str {
self.haystack
}
#[inline]
fn next(&mut self) -> SearchStep {
str_search_step(self,
|m: &mut StrSearcher| {
// Forward step for empty needle
let current_start = m.start;
if !m.done {
m.start = m.haystack.char_range_at(current_start).next;
}
SearchStep::Match(current_start, current_start)
},
|m: &mut StrSearcher| {
// Forward step for nonempty needle
let current_start = m.start;
// Compare byte window because this might break utf8 boundaries
let possible_match = &m.haystack.as_bytes()[m.start .. m.start + m.needle.len()];
if possible_match == m.needle.as_bytes() {
m.start += m.needle.len();
SearchStep::Match(current_start, m.start)
} else {
// Skip a char
let haystack_suffix = &m.haystack[m.start..];
m.start += haystack_suffix.chars().next().unwrap().len_utf8();
SearchStep::Reject(current_start, m.start)
}
})
}
}
unsafe impl<'a, 'b> ReverseSearcher<'a> for StrSearcher<'a, 'b> {
#[inline]
fn next_back(&mut self) -> SearchStep {
str_search_step(self,
|m: &mut StrSearcher| {
// Backward step for empty needle
let current_end = m.end;
if !m.done {
m.end = m.haystack.char_range_at_reverse(current_end).next;
}
SearchStep::Match(current_end, current_end)
},
|m: &mut StrSearcher| {
// Backward step for nonempty needle
let current_end = m.end;
// Compare byte window because this might break utf8 boundaries
let possible_match = &m.haystack.as_bytes()[m.end - m.needle.len() .. m.end];
if possible_match == m.needle.as_bytes() {
m.end -= m.needle.len();
SearchStep::Match(m.end, current_end)
} else {
// Skip a char
let haystack_prefix = &m.haystack[..m.end];
m.end -= haystack_prefix.chars().rev().next().unwrap().len_utf8();
SearchStep::Reject(m.end, current_end)
}
})
}
}
// Helper function for encapsulating the common control flow
// of doing a search step from the front or doing a search step from the back
fn str_search_step<F, G>(mut m: &mut StrSearcher,
empty_needle_step: F,
nonempty_needle_step: G) -> SearchStep
where F: FnOnce(&mut StrSearcher) -> SearchStep,
G: FnOnce(&mut StrSearcher) -> SearchStep
{
if m.done {
SearchStep::Done
} else if m.needle.len() == 0 && m.start <= m.end {
// Case for needle == ""
if m.start == m.end {
m.done = true;
}
empty_needle_step(&mut m)
} else if m.start + m.needle.len() <= m.end {
// Case for needle != ""
nonempty_needle_step(&mut m)
} else if m.start < m.end {
// Remaining slice shorter than needle, reject it
m.done = true;
SearchStep::Reject(m.start, m.end)
} else {
m.done = true;
SearchStep::Done
}
}
macro_rules! associated_items {
($t:ty, $s:ident, $e:expr) => {
// FIXME: #22463
//type Searcher = $t;
fn into_searcher(self, haystack: &'a str) -> $t {
let $s = self;
$e.into_searcher(haystack)
}
#[inline]
fn is_contained_in(self, haystack: &'a str) -> bool {
let $s = self;
$e.is_contained_in(haystack)
}
#[inline]
fn is_prefix_of(self, haystack: &'a str) -> bool {
let $s = self;
$e.is_prefix_of(haystack)
}
// FIXME: #21750
/*#[inline]
fn is_suffix_of(self, haystack: &'a str) -> bool
where $t: ReverseSearcher<'a>
{
let $s = self;
$e.is_suffix_of(haystack)
}*/
}
}
// CharEq delegation impls
/// Searches for chars that are equal to a given char
impl<'a> Pattern<'a> for char {
type Searcher = <CharEqPattern<Self> as Pattern<'a>>::Searcher;
associated_items!(<CharEqPattern<Self> as Pattern<'a>>::Searcher,
s, CharEqPattern(s));
}
/// Searches for chars that are equal to any of the chars in the array
impl<'a, 'b> Pattern<'a> for &'b [char] {
type Searcher = <CharEqPattern<Self> as Pattern<'a>>::Searcher;
associated_items!(<CharEqPattern<Self> as Pattern<'a>>::Searcher,
s, CharEqPattern(s));
}
/// Searches for chars that match the given predicate
impl<'a, F> Pattern<'a> for F where F: FnMut(char) -> bool {
type Searcher = <CharEqPattern<Self> as Pattern<'a>>::Searcher;
associated_items!(<CharEqPattern<Self> as Pattern<'a>>::Searcher,
s, CharEqPattern(s));
}
// Deref-forward impl
use ops::Deref;
/// Delegates to the next deref coercion of `Self` that implements `Pattern`
impl<'a, 'b, P: 'b + ?Sized, T: Deref<Target = P> + ?Sized> Pattern<'a> for &'b T
where &'b P: Pattern<'a>
{
type Searcher = <&'b P as Pattern<'a>>::Searcher;
associated_items!(<&'b P as Pattern<'a>>::Searcher,
s, (&**s));
}

View file

@ -1,4 +1,4 @@
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT // Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at // file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT. // http://rust-lang.org/COPYRIGHT.
// //
@ -8,6 +8,23 @@
// option. This file may not be copied, modified, or distributed // option. This file may not be copied, modified, or distributed
// except according to those terms. // except according to those terms.
#[test]
fn test_pattern_deref_forward() {
let data = "aabcdaa";
assert!(data.contains("bcd"));
assert!(data.contains(&"bcd"));
assert!(data.contains(&&"bcd"));
assert!(data.contains(&"bcd".to_string()));
assert!(data.contains(&&"bcd".to_string()));
}
#[test]
fn test_empty_match_indices() {
let data = "aä中!";
let vec: Vec<_> = data.match_indices("").collect();
assert_eq!(vec, vec![(0, 0), (1, 1), (3, 3), (6, 6), (7, 7)]);
}
#[test] #[test]
fn test_bool_from_str() { fn test_bool_from_str() {
assert_eq!("true".parse().ok(), Some(true)); assert_eq!("true".parse().ok(), Some(true));
@ -121,3 +138,252 @@ fn test_utf16_code_units() {
assert_eq!(Utf16Encoder::new(vec!['é', '\u{1F4A9}'].into_iter()).collect::<Vec<u16>>(), assert_eq!(Utf16Encoder::new(vec!['é', '\u{1F4A9}'].into_iter()).collect::<Vec<u16>>(),
vec![0xE9, 0xD83D, 0xDCA9]) vec![0xE9, 0xD83D, 0xDCA9])
} }
#[test]
fn starts_with_in_unicode() {
assert!(!"├── Cargo.toml".starts_with("# "));
}
#[test]
fn starts_short_long() {
assert!(!"".starts_with("##"));
assert!(!"##".starts_with("####"));
assert!("####".starts_with("##"));
assert!(!"##ä".starts_with("####"));
assert!("####ä".starts_with("##"));
assert!(!"##".starts_with("####ä"));
assert!("##ä##".starts_with("##ä"));
assert!("".starts_with(""));
assert!("ä".starts_with(""));
assert!("".starts_with(""));
assert!("##ä".starts_with(""));
assert!("ä###".starts_with(""));
assert!("#ä##".starts_with(""));
assert!("##ä#".starts_with(""));
}
#[test]
fn contains_weird_cases() {
assert!("* \t".contains_char(' '));
assert!(!"* \t".contains_char('?'));
assert!(!"* \t".contains_char('\u{1F4A9}'));
}
#[test]
fn trim_ws() {
assert_eq!(" \t a \t ".trim_left_matches(|c: char| c.is_whitespace()),
"a \t ");
assert_eq!(" \t a \t ".trim_right_matches(|c: char| c.is_whitespace()),
" \t a");
assert_eq!(" \t a \t ".trim_matches(|c: char| c.is_whitespace()),
"a");
assert_eq!(" \t \t ".trim_left_matches(|c: char| c.is_whitespace()),
"");
assert_eq!(" \t \t ".trim_right_matches(|c: char| c.is_whitespace()),
"");
assert_eq!(" \t \t ".trim_matches(|c: char| c.is_whitespace()),
"");
}
mod pattern {
use std::str::Pattern;
use std::str::{Searcher, ReverseSearcher, DoubleEndedSearcher};
use std::str::SearchStep::{self, Match, Reject, Done};
macro_rules! make_test {
($name:ident, $p:expr, $h:expr, [$($e:expr,)*]) => {
mod $name {
use std::str::Pattern;
use std::str::{Searcher, ReverseSearcher, DoubleEndedSearcher};
use std::str::SearchStep::{self, Match, Reject, Done};
use super::{cmp_search_to_vec};
#[test]
fn fwd() {
cmp_search_to_vec(false, $p, $h, vec![$($e),*]);
}
#[test]
fn bwd() {
cmp_search_to_vec(true, $p, $h, vec![$($e),*]);
}
}
}
}
fn cmp_search_to_vec<'a, P: Pattern<'a>>(rev: bool, pat: P, haystack: &'a str,
right: Vec<SearchStep>)
where P::Searcher: ReverseSearcher<'a>
{
let mut searcher = pat.into_searcher(haystack);
let mut v = vec![];
loop {
match if !rev {searcher.next()} else {searcher.next_back()} {
Match(a, b) => v.push(Match(a, b)),
Reject(a, b) => v.push(Reject(a, b)),
Done => break,
}
}
if rev {
v.reverse();
}
assert_eq!(v, right);
}
make_test!(str_searcher_ascii_haystack, "bb", "abbcbbd", [
Reject(0, 1),
Match (1, 3),
Reject(3, 4),
Match (4, 6),
Reject(6, 7),
]);
make_test!(str_searcher_empty_needle_ascii_haystack, "", "abbcbbd", [
Match(0, 0),
Match(1, 1),
Match(2, 2),
Match(3, 3),
Match(4, 4),
Match(5, 5),
Match(6, 6),
Match(7, 7),
]);
make_test!(str_searcher_mulibyte_haystack, " ", "├──", [
Reject(0, 3),
Reject(3, 6),
Reject(6, 9),
]);
make_test!(str_searcher_empty_needle_mulibyte_haystack, "", "├──", [
Match(0, 0),
Match(3, 3),
Match(6, 6),
Match(9, 9),
]);
make_test!(str_searcher_empty_needle_empty_haystack, "", "", [
Match(0, 0),
]);
make_test!(str_searcher_nonempty_needle_empty_haystack, "", "", [
]);
make_test!(char_searcher_ascii_haystack, 'b', "abbcbbd", [
Reject(0, 1),
Match (1, 2),
Match (2, 3),
Reject(3, 4),
Match (4, 5),
Match (5, 6),
Reject(6, 7),
]);
make_test!(char_searcher_mulibyte_haystack, ' ', "├──", [
Reject(0, 3),
Reject(3, 6),
Reject(6, 9),
]);
make_test!(char_searcher_short_haystack, '\u{1F4A9}', "* \t", [
Reject(0, 1),
Reject(1, 2),
Reject(2, 3),
]);
}
mod bench {
macro_rules! make_test_inner {
($s:ident, $code:expr, $name:ident, $str:expr) => {
#[bench]
fn $name(bencher: &mut Bencher) {
let mut $s = $str;
black_box(&mut $s);
bencher.iter(|| $code);
}
}
}
macro_rules! make_test {
($name:ident, $s:ident, $code:expr) => {
mod $name {
use test::Bencher;
use test::black_box;
// Short strings: 65 bytes each
make_test_inner!($s, $code, short_ascii,
"Mary had a little lamb, Little lamb Mary had a littl lamb, lamb!");
make_test_inner!($s, $code, short_mixed,
"ศไทย中华Việt Nam; Mary had a little lamb, Little lam!");
make_test_inner!($s, $code, short_pile_of_poo,
"💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩!");
make_test_inner!($s, $code, long_lorem_ipsum,"\
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
tempus vel, gravida nec quam.
In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
interdum. Curabitur ut nisi justo.
Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
Aliquam sit amet placerat lorem.
Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
cursus accumsan.
Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
malesuada sollicitudin quam eu fermentum!");
}
}
}
make_test!(chars_count, s, s.chars().count());
make_test!(contains_bang_str, s, s.contains("!"));
make_test!(contains_bang_char, s, s.contains_char('!'));
make_test!(match_indices_a_str, s, s.match_indices("a").count());
make_test!(split_str_a_str, s, s.split_str("a").count());
make_test!(trim_ascii_char, s, {
use std::ascii::AsciiExt;
s.trim_matches(|c: char| c.is_ascii())
});
make_test!(trim_left_ascii_char, s, {
use std::ascii::AsciiExt;
s.trim_left_matches(|c: char| c.is_ascii())
});
make_test!(trim_right_ascii_char, s, {
use std::ascii::AsciiExt;
s.trim_right_matches(|c: char| c.is_ascii())
});
make_test!(find_underscore_char, s, s.find('_'));
make_test!(rfind_underscore_char, s, s.rfind('_'));
make_test!(find_underscore_str, s, s.find_str("_"));
make_test!(find_zzz_char, s, s.find('\u{1F4A4}'));
make_test!(rfind_zzz_char, s, s.rfind('\u{1F4A4}'));
make_test!(find_zzz_str, s, s.find_str("\u{1F4A4}"));
make_test!(split_space_char, s, s.split(' ').count());
make_test!(split_terminator_space_char, s, s.split_terminator(' ').count());
make_test!(splitn_space_char, s, s.splitn(10, ' ').count());
make_test!(rsplitn_space_char, s, s.rsplitn(10, ' ').count());
make_test!(split_str_space_str, s, s.split_str(" ").count());
make_test!(split_str_ad_str, s, s.split_str("ad").count());
}

View file

@ -463,7 +463,7 @@ impl<'a> LabelText<'a> {
fn pre_escaped_content(self) -> Cow<'a, str> { fn pre_escaped_content(self) -> Cow<'a, str> {
match self { match self {
EscStr(s) => s, EscStr(s) => s,
LabelStr(s) => if s.contains_char('\\') { LabelStr(s) => if s.contains('\\') {
(&*s).escape_default().into_cow() (&*s).escape_default().into_cow()
} else { } else {
s s

View file

@ -784,7 +784,7 @@ impl NonCamelCaseTypes {
// start with a non-lowercase letter rather than non-uppercase // start with a non-lowercase letter rather than non-uppercase
// ones (some scripts don't have a concept of upper/lowercase) // ones (some scripts don't have a concept of upper/lowercase)
ident.len() > 0 && !ident.char_at(0).is_lowercase() && !ident.contains_char('_') ident.len() > 0 && !ident.char_at(0).is_lowercase() && !ident.contains('_')
} }
fn to_camel_case(s: &str) -> String { fn to_camel_case(s: &str) -> String {

View file

@ -219,7 +219,7 @@ pub fn rust_path() -> Vec<Path> {
let mut env_rust_path: Vec<Path> = match get_rust_path() { let mut env_rust_path: Vec<Path> = match get_rust_path() {
Some(env_path) => { Some(env_path) => {
let env_path_components = let env_path_components =
env_path.split_str(PATH_ENTRY_SEPARATOR); env_path.split(PATH_ENTRY_SEPARATOR);
env_path_components.map(|s| Path::new(s)).collect() env_path_components.map(|s| Path::new(s)).collect()
} }
None => Vec::new() None => Vec::new()

View file

@ -90,7 +90,7 @@ pub fn maybe_print_constraints_for<'a, 'tcx>(region_vars: &RegionVarBindings<'a,
tcx.sess.bug("empty string provided as RUST_REGION_GRAPH"); tcx.sess.bug("empty string provided as RUST_REGION_GRAPH");
} }
if output_template.contains_char('%') { if output_template.contains('%') {
let mut new_str = String::new(); let mut new_str = String::new();
for c in output_template.chars() { for c in output_template.chars() {
if c == '%' { if c == '%' {

View file

@ -280,9 +280,9 @@ fn split_msg_into_multilines(msg: &str) -> Option<String> {
} }
let mut tail = &msg[head..]; let mut tail = &msg[head..];
let third = tail.find_str("(values differ") let third = tail.find("(values differ")
.or(tail.find_str("(lifetime")) .or(tail.find("(lifetime"))
.or(tail.find_str("(cyclic type of infinite size")); .or(tail.find("(cyclic type of infinite size"));
// Insert `\n` before any remaining messages which match. // Insert `\n` before any remaining messages which match.
if let Some(pos) = third { if let Some(pos) = third {
// The end of the message may just be wrapped in `()` without // The end of the message may just be wrapped in `()` without

View file

@ -348,7 +348,7 @@ impl FromStr for UserIdentifiedItem {
type Err = (); type Err = ();
fn from_str(s: &str) -> Result<UserIdentifiedItem, ()> { fn from_str(s: &str) -> Result<UserIdentifiedItem, ()> {
Ok(s.parse().map(ItemViaNode).unwrap_or_else(|_| { Ok(s.parse().map(ItemViaNode).unwrap_or_else(|_| {
ItemViaPath(s.split_str("::").map(|s| s.to_string()).collect()) ItemViaPath(s.split("::").map(|s| s.to_string()).collect())
})) }))
} }
} }

View file

@ -1469,7 +1469,7 @@ fn full_path(cx: &Context, item: &clean::Item) -> String {
fn shorter<'a>(s: Option<&'a str>) -> &'a str { fn shorter<'a>(s: Option<&'a str>) -> &'a str {
match s { match s {
Some(s) => match s.find_str("\n\n") { Some(s) => match s.find("\n\n") {
Some(pos) => &s[..pos], Some(pos) => &s[..pos],
None => s, None => s,
}, },

View file

@ -507,7 +507,7 @@ impl GenericPath for Path {
fn path_relative_from(&self, base: &Path) -> Option<Path> { fn path_relative_from(&self, base: &Path) -> Option<Path> {
fn comp_requires_verbatim(s: &str) -> bool { fn comp_requires_verbatim(s: &str) -> bool {
s == "." || s == ".." || s.contains_char(SEP2) s == "." || s == ".." || s.contains(SEP2)
} }
if !self.equiv_prefix(base) { if !self.equiv_prefix(base) {

View file

@ -92,7 +92,7 @@ pub fn strip_doc_comment_decoration(comment: &str) -> String {
let mut first = true; let mut first = true;
for line in &lines { for line in &lines {
for (j, c) in line.chars().enumerate() { for (j, c) in line.chars().enumerate() {
if j > i || !"* \t".contains_char(c) { if j > i || !"* \t".contains(c) {
can_trim = false; can_trim = false;
break; break;
} }
@ -264,7 +264,7 @@ fn read_block_comment(rdr: &mut StringReader,
if is_block_doc_comment(&curr_line[..]) { if is_block_doc_comment(&curr_line[..]) {
return return
} }
assert!(!curr_line.contains_char('\n')); assert!(!curr_line.contains('\n'));
lines.push(curr_line); lines.push(curr_line);
} else { } else {
let mut level: isize = 1; let mut level: isize = 1;

View file

@ -2496,7 +2496,7 @@ impl<'a> Parser<'a> {
let fstr = n.as_str(); let fstr = n.as_str();
self.span_err(last_span, self.span_err(last_span,
&format!("unexpected token: `{}`", n.as_str())); &format!("unexpected token: `{}`", n.as_str()));
if fstr.chars().all(|x| "0123456789.".contains_char(x)) { if fstr.chars().all(|x| "0123456789.".contains(x)) {
let float = match fstr.parse::<f64>().ok() { let float = match fstr.parse::<f64>().ok() {
Some(f) => f, Some(f) => f,
None => continue, None => continue,

View file

@ -84,7 +84,7 @@ impl UnicodeStr for str {
#[inline] #[inline]
fn trim(&self) -> &str { fn trim(&self) -> &str {
self.trim_left().trim_right() self.trim_matches(|c: char| c.is_whitespace())
} }
#[inline] #[inline]