From 624a685283f66afcb40ee3c235624aedebc2f08f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marvin=20L=C3=B6bel?= Date: Fri, 22 Mar 2013 02:04:13 +0100 Subject: [PATCH 1/7] Moved float str buffer constants to the strconv module --- src/libcore/num/strconv.rs | 13 ++++++++++--- src/libcore/str.rs | 8 -------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/libcore/num/strconv.rs b/src/libcore/num/strconv.rs index 26f0582bfb2..e73a4a2ccaa 100644 --- a/src/libcore/num/strconv.rs +++ b/src/libcore/num/strconv.rs @@ -130,6 +130,13 @@ impl_NumStrConv_Integer!(u16) impl_NumStrConv_Integer!(u32) impl_NumStrConv_Integer!(u64) + +// Special value strings as [u8] consts. +const inf_buf: [u8*3] = ['i' as u8, 'n' as u8, 'f' as u8]; +const positive_inf_buf: [u8*4] = ['+' as u8, 'i' as u8, 'n' as u8, 'f' as u8]; +const negative_inf_buf: [u8*4] = ['-' as u8, 'i' as u8, 'n' as u8, 'f' as u8]; +const nan_buf: [u8*3] = ['N' as u8, 'a' as u8, 'N' as u8]; + /** * Converts a number to its string representation as a byte vector. * This is meant to be a common base implementation for all numeric string @@ -479,15 +486,15 @@ pub fn from_str_bytes_common+ } if special { - if buf == str::inf_buf || buf == str::positive_inf_buf { + if buf == inf_buf || buf == positive_inf_buf { return NumStrConv::inf(); - } else if buf == str::negative_inf_buf { + } else if buf == negative_inf_buf { if negative { return NumStrConv::neg_inf(); } else { return None; } - } else if buf == str::nan_buf { + } else if buf == nan_buf { return NumStrConv::NaN(); } } diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 92358c6a5e9..e91120e7790 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -1919,14 +1919,6 @@ static tag_five_b: uint = 248u; static max_five_b: uint = 67108864u; static tag_six_b: uint = 252u; -// Constants used for converting strs to floats -pub static inf_buf: [u8*3] = ['i' as u8, 'n' as u8, 'f' as u8]; -pub static positive_inf_buf: [u8*4] = ['+' as u8, 'i' as u8, - 'n' as u8, 'f' as u8]; -pub static negative_inf_buf: [u8*4] = ['-' as u8, 'i' as u8, - 'n' as u8, 'f' as u8]; -pub static nan_buf: [u8*3] = ['N' as u8, 'a' as u8, 'N' as u8]; - /** * Work with the byte buffer of a string. * From d74606ead60d524eb72afad2cd8b45facd6c5d40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marvin=20L=C3=B6bel?= Date: Sat, 23 Mar 2013 17:25:16 +0100 Subject: [PATCH 2/7] pre-rebase commit --- src/libcore/str.rs | 201 +++++++++++++++++++++--------------------- src/libstd/getopts.rs | 19 +++- 2 files changed, 119 insertions(+), 101 deletions(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index e91120e7790..28f76125746 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -56,15 +56,15 @@ pub fn from_slice(s: &str) -> ~str { impl ToStr for ~str { #[inline(always)] - fn to_str(&self) -> ~str { copy *self } + fn to_str(&self) -> ~str { from_slice(*self) } } impl ToStr for &'self str { #[inline(always)] - fn to_str(&self) -> ~str { ::str::from_slice(*self) } + fn to_str(&self) -> ~str { from_slice(*self) } } impl ToStr for @str { #[inline(always)] - fn to_str(&self) -> ~str { ::str::from_slice(*self) } + fn to_str(&self) -> ~str { from_slice(*self) } } /** @@ -383,7 +383,7 @@ Section: Transforming strings */ /** - * Converts a string to a vector of bytes + * Converts a string to a unique vector of bytes * * The result vector is not null-terminated. */ @@ -403,14 +403,11 @@ pub fn byte_slice(s: &str, f: &fn(v: &[u8]) -> T) -> T { } } -/// Convert a string to a vector of characters -pub fn chars(s: &str) -> ~[char] { - let mut buf = ~[], i = 0; - let len = len(s); - while i < len { - let CharRange {ch, next} = char_range_at(s, i); - unsafe { buf.push(ch); } - i = next; +/// Convert a string to a unique vector of characters +pub fn to_chars(s: &str) -> ~[char] { + let mut buf = ~[]; + for each_char(s) |c| { + buf.push(c); } buf } @@ -418,7 +415,7 @@ pub fn chars(s: &str) -> ~[char] { /** * Take a substring of another. * - * Returns a string containing `n` characters starting at byte offset + * Returns a slice pointing at `n` characters starting from byte offset * `begin`. */ pub fn substr(s: &'a str, begin: uint, n: uint) -> &'a str { @@ -437,10 +434,17 @@ pub fn slice(s: &'a str, begin: uint, end: uint) -> &'a str { unsafe { raw::slice_bytes(s, begin, end) } } -/// Splits a string into substrings at each occurrence of a given -/// character. -pub fn split_char(s: &str, sep: char) -> ~[~str] { - split_char_inner(s, sep, len(s), true, true) +/// Splits a string into substrings at each occurrence of a given character +pub fn each_split_char(s: &str, sep: char, it: &fn(&str) -> bool) { + each_split_char_inner(s, sep, len(s), true, true, it) +} + +/** + * Like `split_char`, but a trailing empty string is omitted + * (e.g. `split_char_no_trailing("A B ",' ') == ~[~"A",~"B"]`) + */ +pub fn each_split_char_no_trailing(s: &str, sep: char, it: &fn(&str) -> bool) { + each_split_char_inner(s, sep, len(s), true, false, it) } /** @@ -449,35 +453,25 @@ pub fn split_char(s: &str, sep: char) -> ~[~str] { * * The byte must be a valid UTF-8/ASCII byte */ -pub fn splitn_char(s: &str, sep: char, count: uint) -> ~[~str] { - split_char_inner(s, sep, count, true, true) +pub fn each_splitn_char(s: &str, sep: char, count: uint, it: &fn(&str) -> bool) { + each_split_char_inner(s, sep, count, true, true, it) } /// Like `split_char`, but omits empty strings from the returned vector -pub fn split_char_nonempty(s: &str, sep: char) -> ~[~str] { - split_char_inner(s, sep, len(s), false, false) +pub fn each_split_char_nonempty(s: &str, sep: char, it: &fn(&str) -> bool) { + each_split_char_inner(s, sep, len(s), false, false, it) } -/** - * Like `split_char`, but a trailing empty string is omitted - * (e.g. `split_char_no_trailing("A B ",' ') == ~[~"A",~"B"]`) - */ -pub fn split_char_no_trailing(s: &str, sep: char) -> ~[~str] { - split_char_inner(s, sep, len(s), true, false) -} - -fn split_char_inner(s: &str, sep: char, count: uint, allow_empty: bool, - allow_trailing_empty: bool) -> ~[~str] { +fn each_split_char_inner(s: &str, sep: char, count: uint, allow_empty: bool, + allow_trailing_empty: bool), it: &fn(&str) -> bool) { if sep < 128u as char { let b = sep as u8, l = len(s); - let mut result = ~[], done = 0u; + let mut done = 0u; let mut i = 0u, start = 0u; while i < l && done < count { if s[i] == b { if allow_empty || start < i { - unsafe { - result.push(raw::slice_bytes_unique(s, start, i)); - } + if !it( unsafe{ raw::slice_bytes(s, start, i) } ) { return; } } start = i + 1u; done += 1u; @@ -486,56 +480,48 @@ fn split_char_inner(s: &str, sep: char, count: uint, allow_empty: bool, } // only push a non-empty trailing substring if allow_trailing_empty || start < l { - unsafe { result.push(raw::slice_bytes_unique(s, start, l) ) }; + if !it( unsafe{ raw::slice_bytes(s, start, l) } ) { return; } } - result } else { - split_inner(s, |cur| cur == sep, count, allow_empty, allow_trailing_empty) + each_split_inner(s, |cur| cur == sep, count, allow_empty, allow_trailing_empty, it) } } - /// Splits a string into substrings using a character function -pub fn split(s: &str, sepfn: &fn(char) -> bool) -> ~[~str] { - split_inner(s, sepfn, len(s), true, true) +pub fn each_split(s: &str, sepfn: &fn(char) -> bool, it: &fn(&str) -> bool) { + each_split_inner(s, sepfn, len(s), true, true, it) +} + +/** + * Like `split`, but a trailing empty string is omitted + * (e.g. `split_no_trailing("A B ",' ') == ~[~"A",~"B"]`) + */ +pub fn each_split_no_trailing(s: &str, sepfn: &fn(char) -> bool, it: &fn(&str) -> bool) { + each_split_inner(s, sepfn, len(s), true, false, it) } /** * Splits a string into substrings using a character function, cutting at * most `count` times. */ -pub fn splitn(s: &str, - sepfn: &fn(char) -> bool, - count: uint) - -> ~[~str] { - split_inner(s, sepfn, count, true, true) +pub fn each_splitn(s: &str, sepfn: &fn(char) -> bool, count: uint, it: &fn(&str) -> bool) { + each_split_inner(s, sepfn, count, true, true, it) } /// Like `split`, but omits empty strings from the returned vector -pub fn split_nonempty(s: &str, sepfn: &fn(char) -> bool) -> ~[~str] { - split_inner(s, sepfn, len(s), false, false) +pub fn each_split_nonempty(s: &str, sepfn: &fn(char) -> bool, it: &fn(&str) -> bool) { + each_split_inner(s, sepfn, len(s), false, false, it) } - -/** - * Like `split`, but a trailing empty string is omitted - * (e.g. `split_no_trailing("A B ",' ') == ~[~"A",~"B"]`) - */ -pub fn split_no_trailing(s: &str, sepfn: &fn(char) -> bool) -> ~[~str] { - split_inner(s, sepfn, len(s), true, false) -} - -fn split_inner(s: &str, sepfn: &fn(cc: char) -> bool, count: uint, - allow_empty: bool, allow_trailing_empty: bool) -> ~[~str] { +pure fn each_split_inner(s: &str, sepfn: &fn(cc: char) -> bool, count: uint, + allow_empty: bool, allow_trailing_empty: bool), it: &fn(&str) -> bool) { let l = len(s); - let mut result = ~[], i = 0u, start = 0u, done = 0u; + let mut i = 0u, start = 0u, done = 0u; while i < l && done < count { let CharRange {ch, next} = char_range_at(s, i); if sepfn(ch) { if allow_empty || start < i { - unsafe { - result.push(raw::slice_bytes_unique(s, start, i)); - } + if !it( unsafe{ raw::slice_bytes(s, start, i) } ) { return; } } start = next; done += 1u; @@ -543,11 +529,8 @@ fn split_inner(s: &str, sepfn: &fn(cc: char) -> bool, count: uint, i = next; } if allow_trailing_empty || start < l { - unsafe { - result.push(raw::slice_bytes_unique(s, start, l)); - } + if !it( unsafe{ raw::slice_bytes(s, start, l) } ) { return; } } - result } // See Issue #1932 for why this is a naive search @@ -596,22 +579,18 @@ fn iter_between_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint)) { * fail_unless!(["", "XXX", "YYY", ""] == split_str(".XXX.YYY.", ".")) * ~~~ */ -pub fn split_str(s: &'a str, sep: &'b str) -> ~[~str] { - let mut result = ~[]; +pub fn each_split_str(s: &'a str, sep: &'b str, it: &fn(&str) -> bool) { do iter_between_matches(s, sep) |from, to| { - unsafe { result.push(raw::slice_bytes_unique(s, from, to)); } + if !it( unsafe { raw::slice_bytes(s, from, to) } ) { return; } } - result } -pub fn split_str_nonempty(s: &'a str, sep: &'b str) -> ~[~str] { - let mut result = ~[]; +pub fn each_split_str_nonempty(s: &'a str, sep: &'b str, it: &fn(&str) -> bool) { do iter_between_matches(s, sep) |from, to| { if to > from { - unsafe { result.push(raw::slice_bytes_unique(s, from, to)); } + if !it( unsafe { raw::slice_bytes(s, from, to) } ) { return; } } } - result } /// Levenshtein Distance between two strings @@ -651,34 +630,32 @@ pub fn levdistance(s: &str, t: &str) -> uint { /** * Splits a string into a vector of the substrings separated by LF ('\n'). */ -pub fn lines(s: &str) -> ~[~str] { - split_char_no_trailing(s, '\n') -} +pub fn each_line(s: &str, it: &fn(&str) -> bool) { each_split_char(s, '\n', it) } /** * Splits a string into a vector of the substrings separated by LF ('\n') * and/or CR LF ("\r\n") */ -pub fn lines_any(s: &str) -> ~[~str] { - vec::map(lines(s), |s| { - let l = len(*s); - let mut cp = copy *s; +pub fn each_line_any(s: &str, it: &fn(&str) -> bool) { + for each_line(s) |s| { + let l = s.len(); if l > 0u && s[l - 1u] == '\r' as u8 { - unsafe { raw::set_len(&mut cp, l - 1u); } + if !it( unsafe { raw::slice_bytes(s, 0, l - 1) } ) { return; } + } else { + if !it( s ) { return; } } - cp - }) + } } /// Splits a string into a vector of the substrings separated by whitespace -pub fn words(s: &str) -> ~[~str] { - split_nonempty(s, char::is_whitespace) +pub fn each_word(s: &str, it: &fn(&str) -> bool) { + each_split_nonempty(s, |c| char::is_whitespace(c), it) } /** Split a string into a vector of substrings, - * each of which is less than a limit + * each of which is less bytes long than a limit */ -pub fn split_within(ss: &str, lim: uint) -> ~[~str] { +pub fn each_split_within(ss: &str, lim: uint, it: &fn(&str) -> bool) { let words = str::words(ss); // empty? @@ -705,6 +682,22 @@ pub fn split_within(ss: &str, lim: uint) -> ~[~str] { if row != ~"" { rows.push(row); } rows + // NOTE: Finish change here + + let mut last_slice_i = 0, last_word_i = 0, word_start = true; + for each_chari(s) |i, c| { + if (i - last_slice_i) <= lim { + if char::is_whitespace(c) { + + } else { + + } + } else { + + } + + + } } @@ -997,10 +990,17 @@ pub fn eachi_reverse(s: &str, it: &fn(uint, u8) -> bool) { } } -/// Iterates over the chars in a string + +/// Iterate over each char of a string, without allocating #[inline(always)] pub fn each_char(s: &str, it: &fn(char) -> bool) { - each_chari(s, |_i, c| it(c)) + let mut i = 0; + let len = len(s); + while i < len { + let CharRange {ch, next} = char_range_at(s, i); + if !it(ch) { return; } + i = next; + } } /// Iterates over the chars in a string, with indices @@ -1038,31 +1038,34 @@ pub fn each_chari_reverse(s: &str, it: &fn(uint, char) -> bool) { } } -/// Apply a function to each substring after splitting by character +///////////////////////////////////////////////////////////////////////////////////////////////// +// NOTE: Remove afterwards +/* /// Apply a function to each substring after splitting by character pub fn split_char_each(ss: &str, cc: char, ff: &fn(v: &str) -> bool) { vec::each(split_char(ss, cc), |s| ff(*s)) } -/** +** * Apply a function to each substring after splitting by character, up to * `count` times - */ + * pub fn splitn_char_each(ss: &str, sep: char, count: uint, ff: &fn(v: &str) -> bool) { vec::each(splitn_char(ss, sep, count), |s| ff(*s)) } -/// Apply a function to each word +/ Apply a function to each word pub fn words_each(ss: &str, ff: &fn(v: &str) -> bool) { vec::each(words(ss), |s| ff(*s)) } -/** +** * Apply a function to each line (by '\n') - */ + * pub fn lines_each(ss: &str, ff: &fn(v: &str) -> bool) { vec::each(lines(ss), |s| ff(*s)) -} +} */ +///////////////////////////////////////////////////////////////////////////////////////////////// /* Section: Searching @@ -2511,7 +2514,7 @@ impl OwnedStr for ~str { impl Clone for ~str { #[inline(always)] fn clone(&self) -> ~str { - self.to_str() // hilarious + from_slice(*self) } } diff --git a/src/libstd/getopts.rs b/src/libstd/getopts.rs index de8a8f34381..f837f776b96 100644 --- a/src/libstd/getopts.rs +++ b/src/libstd/getopts.rs @@ -601,7 +601,7 @@ pub mod groups { row += match short_name.len() { 0 => ~"", 1 => ~"-" + short_name + " ", - _ => fail!(~"the short name should only be 1 char long"), + _ => fail!(~"the short name should only be 1 ascii char long"), }; // long option @@ -617,6 +617,7 @@ pub mod groups { Maybe => ~"[" + hint + ~"]", }; + // FIXME: #5516 // here we just need to indent the start of the description let rowlen = row.len(); row += if rowlen < 24 { @@ -625,8 +626,22 @@ pub mod groups { desc_sep }; + // Normalize desc to contain words seperated by one space character + let mut desc_normalized_whitespace = ~str + for desc.each_word |word| { + desc_normalized_whitespace.push_str(word); + desc_normalized_whitespace.push_char(' '); + } + + // FIXME: #5516 + let mut desc_rows: ~[~str] = ~[]; + for desc_normalized_whitespace.each_split_within(54) |substr| { + desc_rows.push(~substr); + } + + // FIXME: #5516 // wrapped description - row += str::connect(str::split_within(desc, 54), desc_sep); + row += str::connect(desc_rows, desc_sep); row }); From b9de2b5787440bbb196fd38223ef4a6a6f196f83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marvin=20L=C3=B6bel?= Date: Sun, 24 Mar 2013 07:51:18 +0100 Subject: [PATCH 3/7] Switched over a bunch of splitting funktions to non-allocating iterators --- src/libcore/num/strconv.rs | 8 +-- src/libcore/os.rs | 3 +- src/libcore/path.rs | 33 ++++++--- src/libcore/rand.rs | 4 +- src/libcore/str.rs | 114 +++++++++++++++++--------------- src/librustc/metadata/cstore.rs | 4 +- src/librustc/middle/resolve.rs | 5 +- src/libstd/base64.rs | 30 +++++---- src/libstd/getopts.rs | 13 ++-- src/libstd/json.rs | 3 +- src/libstd/net_ip.rs | 4 +- src/libstd/net_url.rs | 4 +- src/libsyntax/parse/comments.rs | 8 ++- 13 files changed, 135 insertions(+), 98 deletions(-) diff --git a/src/libcore/num/strconv.rs b/src/libcore/num/strconv.rs index e73a4a2ccaa..ce6c015c131 100644 --- a/src/libcore/num/strconv.rs +++ b/src/libcore/num/strconv.rs @@ -132,10 +132,10 @@ impl_NumStrConv_Integer!(u64) // Special value strings as [u8] consts. -const inf_buf: [u8*3] = ['i' as u8, 'n' as u8, 'f' as u8]; -const positive_inf_buf: [u8*4] = ['+' as u8, 'i' as u8, 'n' as u8, 'f' as u8]; -const negative_inf_buf: [u8*4] = ['-' as u8, 'i' as u8, 'n' as u8, 'f' as u8]; -const nan_buf: [u8*3] = ['N' as u8, 'a' as u8, 'N' as u8]; +static inf_buf: [u8*3] = ['i' as u8, 'n' as u8, 'f' as u8]; +static positive_inf_buf: [u8*4] = ['+' as u8, 'i' as u8, 'n' as u8, 'f' as u8]; +static negative_inf_buf: [u8*4] = ['-' as u8, 'i' as u8, 'n' as u8, 'f' as u8]; +static nan_buf: [u8*3] = ['N' as u8, 'a' as u8, 'N' as u8]; /** * Converts a number to its string representation as a byte vector. diff --git a/src/libcore/os.rs b/src/libcore/os.rs index 3c2dbf7ea15..9aa00e8e457 100644 --- a/src/libcore/os.rs +++ b/src/libcore/os.rs @@ -218,7 +218,8 @@ pub fn env() -> ~[(~str,~str)] { fn env_convert(input: ~[~str]) -> ~[(~str, ~str)] { let mut pairs = ~[]; for input.each |p| { - let vs = str::splitn_char(*p, '=', 1); + let mut vs = ~[]; + for str::each_splitn_char(*p, '=', 1) |s| { vs.push(s.to_owned()) } debug!("splitting: len: %u", vs.len()); fail_unless!(vs.len() == 2); diff --git a/src/libcore/path.rs b/src/libcore/path.rs index 76aaf14d4ac..3d06809a452 100644 --- a/src/libcore/path.rs +++ b/src/libcore/path.rs @@ -381,7 +381,8 @@ impl ToStr for PosixPath { impl GenericPath for PosixPath { fn from_str(s: &str) -> PosixPath { - let mut components = str::split_nonempty(s, |c| c == '/'); + let mut components = ~[]; + for str::each_split_nonempty(s, |c| c == '/') |s| { components.push(s.to_owned()) } let is_absolute = (s.len() != 0 && s[0] == '/' as u8); return PosixPath { is_absolute: is_absolute, components: components } @@ -504,9 +505,10 @@ impl GenericPath for PosixPath { fn push_many(&self, cs: &[~str]) -> PosixPath { let mut v = copy self.components; for cs.each |e| { - let mut ss = str::split_nonempty( - *e, - |c| windows::is_sep(c as u8)); + let mut ss = ~[]; + for str::each_split_nonempty(*e, |c| windows::is_sep(c as u8)) |s| { + ss.push(s.to_owned()) + } unsafe { v.push_all_move(ss); } } PosixPath { is_absolute: self.is_absolute, @@ -515,7 +517,10 @@ impl GenericPath for PosixPath { fn push(&self, s: &str) -> PosixPath { let mut v = copy self.components; - let mut ss = str::split_nonempty(s, |c| windows::is_sep(c as u8)); + let mut ss = ~[]; + for str::each_split_nonempty(s, |c| windows::is_sep(c as u8)) |s| { + ss.push(s.to_owned()) + } unsafe { v.push_all_move(ss); } PosixPath { components: v, ..copy *self } } @@ -590,8 +595,10 @@ impl GenericPath for WindowsPath { } } - let mut components = - str::split_nonempty(rest, |c| windows::is_sep(c as u8)); + let mut components = ~[]; + for str::each_split_nonempty(rest, |c| windows::is_sep(c as u8)) |s| { + components.push(s.to_owned()) + } let is_absolute = (rest.len() != 0 && windows::is_sep(rest[0])); return WindowsPath { host: host, device: device, @@ -759,9 +766,10 @@ impl GenericPath for WindowsPath { fn push_many(&self, cs: &[~str]) -> WindowsPath { let mut v = copy self.components; for cs.each |e| { - let mut ss = str::split_nonempty( - *e, - |c| windows::is_sep(c as u8)); + let mut ss = ~[]; + for str::each_split_nonempty(*e, |c| windows::is_sep(c as u8)) |s| { + ss.push(s.to_owned()) + } unsafe { v.push_all_move(ss); } } // tedious, but as-is, we can't use ..self @@ -775,7 +783,10 @@ impl GenericPath for WindowsPath { fn push(&self, s: &str) -> WindowsPath { let mut v = copy self.components; - let mut ss = str::split_nonempty(s, |c| windows::is_sep(c as u8)); + let mut ss = ~[]; + for str::each_split_nonempty(s, |c| windows::is_sep(c as u8)) |s| { + ss.push(s.to_owned()) + } unsafe { v.push_all_move(ss); } return WindowsPath { components: v, ..copy *self } } diff --git a/src/libcore/rand.rs b/src/libcore/rand.rs index 3085269f692..afa4ea66ca6 100644 --- a/src/libcore/rand.rs +++ b/src/libcore/rand.rs @@ -327,7 +327,9 @@ impl RngUtil for @Rng { */ fn gen_char_from(&self, chars: &str) -> char { fail_unless!(!chars.is_empty()); - self.choose(str::chars(chars)) + let mut cs = ~[]; + for str::each_char(chars) |c| { cs.push(c) } + self.choose(cs) } /// Return a random bool diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 28f76125746..4fc960a7c04 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -463,7 +463,7 @@ pub fn each_split_char_nonempty(s: &str, sep: char, it: &fn(&str) -> bool) { } fn each_split_char_inner(s: &str, sep: char, count: uint, allow_empty: bool, - allow_trailing_empty: bool), it: &fn(&str) -> bool) { + allow_trailing_empty: bool, it: &fn(&str) -> bool) { if sep < 128u as char { let b = sep as u8, l = len(s); let mut done = 0u; @@ -513,8 +513,8 @@ pub fn each_split_nonempty(s: &str, sepfn: &fn(char) -> bool, it: &fn(&str) -> b each_split_inner(s, sepfn, len(s), false, false, it) } -pure fn each_split_inner(s: &str, sepfn: &fn(cc: char) -> bool, count: uint, - allow_empty: bool, allow_trailing_empty: bool), it: &fn(&str) -> bool) { +fn each_split_inner(s: &str, sepfn: &fn(cc: char) -> bool, count: uint, + allow_empty: bool, allow_trailing_empty: bool, it: &fn(&str) -> bool) { let l = len(s); let mut i = 0u, start = 0u, done = 0u; while i < l && done < count { @@ -534,7 +534,7 @@ pure fn each_split_inner(s: &str, sepfn: &fn(cc: char) -> bool, count: uint, } // See Issue #1932 for why this is a naive search -fn iter_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint)) { +fn iter_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint) -> bool) { let sep_len = len(sep), l = len(s); fail_unless!(sep_len > 0u); let mut i = 0u, match_start = 0u, match_i = 0u; @@ -545,7 +545,7 @@ fn iter_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint)) { match_i += 1u; // Found a match if match_i == sep_len { - f(match_start, i + 1u); + if !f(match_start, i + 1u) { return; } match_i = 0u; } i += 1u; @@ -561,10 +561,10 @@ fn iter_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint)) { } } -fn iter_between_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint)) { +fn iter_between_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint) -> bool) { let mut last_end = 0u; - do iter_matches(s, sep) |from, to| { - f(last_end, from); + for iter_matches(s, sep) |from, to| { + if !f(last_end, from) { return; } last_end = to; } f(last_end, len(s)); @@ -580,13 +580,13 @@ fn iter_between_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint)) { * ~~~ */ pub fn each_split_str(s: &'a str, sep: &'b str, it: &fn(&str) -> bool) { - do iter_between_matches(s, sep) |from, to| { + for iter_between_matches(s, sep) |from, to| { if !it( unsafe { raw::slice_bytes(s, from, to) } ) { return; } } } pub fn each_split_str_nonempty(s: &'a str, sep: &'b str, it: &fn(&str) -> bool) { - do iter_between_matches(s, sep) |from, to| { + for iter_between_matches(s, sep) |from, to| { if to > from { if !it( unsafe { raw::slice_bytes(s, from, to) } ) { return; } } @@ -630,7 +630,7 @@ pub fn levdistance(s: &str, t: &str) -> uint { /** * Splits a string into a vector of the substrings separated by LF ('\n'). */ -pub fn each_line(s: &str, it: &fn(&str) -> bool) { each_split_char(s, '\n', it) } +pub fn each_line(s: &str, it: &fn(&str) -> bool) { each_split_char_no_trailing(s, '\n', it) } /** * Splits a string into a vector of the substrings separated by LF ('\n') @@ -656,52 +656,56 @@ pub fn each_word(s: &str, it: &fn(&str) -> bool) { * each of which is less bytes long than a limit */ pub fn each_split_within(ss: &str, lim: uint, it: &fn(&str) -> bool) { - let words = str::words(ss); - - // empty? - if words == ~[] { return ~[]; } - - let mut rows : ~[~str] = ~[]; - let mut row : ~str = ~""; - - for words.each |wptr| { - let word = copy *wptr; - - // if adding this word to the row would go over the limit, - // then start a new row - if row.len() + word.len() + 1 > lim { - rows.push(copy row); // save previous row - row = word; // start a new one - } else { - if row.len() > 0 { row += ~" " } // separate words - row += word; // append to this row - } + // Just for fun, let's write this as an automaton + enum SplitWithinState { + A, // Leading whitespace, initial state + B, // Words + C, // Internal and trailing whitespace } + enum Whitespace { Ws, Cr } + enum LengthLimit { UnderLim, OverLim } - // save the last row - if row != ~"" { rows.push(row); } + let mut slice_start = 0; + let mut last_start = 0; + let mut last_end = 0; + let mut state = A; - rows - // NOTE: Finish change here + let mut cont = true; + let slice = || { cont = it(ss.slice(slice_start, last_end)) }; - let mut last_slice_i = 0, last_word_i = 0, word_start = true; - for each_chari(s) |i, c| { - if (i - last_slice_i) <= lim { - if char::is_whitespace(c) { + let machine = |i: uint, c: char| { + let whitespace = if char::is_whitespace(c) { Ws } else { Cr }; + let limit = if (i - slice_start + 1) <= lim { UnderLim } else { OverLim }; - } else { + state = match (state, whitespace, limit) { + (A, Ws, _) => { A } + (A, Cr, _) => { slice_start = i; last_start = i; B } - } - } else { + (B, Cr, UnderLim) => { B } + (B, Cr, OverLim) if (i - last_start + 1) > lim + => { fail!(~"word longer than limit!") } + (B, Cr, OverLim) => { slice(); slice_start = last_start; B } + (B, Ws, UnderLim) => { last_end = i; C } + (B, Ws, OverLim) => { last_end = i; slice(); A } - } + (C, Cr, UnderLim) => { last_start = i; B } + (C, Cr, OverLim) => { slice(); slice_start = i; last_start = i; last_end = i; B } + (C, Ws, OverLim) => { slice(); A } + (C, Ws, UnderLim) => { C } + }; + cont + }; + str::each_chari(ss, machine); + // Let the automaton 'run out' + let mut fake_i = ss.len(); + while cont && match state { B | C => true, A => false } { + machine(fake_i, ' '); + fake_i += 1; } } - - /// Convert a string to lowercase. ASCII only pub fn to_lower(s: &str) -> ~str { map(s, @@ -731,7 +735,7 @@ pub fn to_upper(s: &str) -> ~str { */ pub fn replace(s: &str, from: &str, to: &str) -> ~str { let mut result = ~"", first = true; - do iter_between_matches(s, from) |start, end| { + for iter_between_matches(s, from) |start, end| { if first { first = false; } else { @@ -2286,9 +2290,9 @@ pub trait StrSlice { fn len(&self) -> uint; fn char_len(&self) -> uint; fn slice(&self, begin: uint, end: uint) -> &'self str; - fn split(&self, sepfn: &fn(char) -> bool) -> ~[~str]; - fn split_char(&self, sep: char) -> ~[~str]; - fn split_str(&self, sep: &'a str) -> ~[~str]; + fn each_split(&self, sepfn: &fn(char) -> bool, it: &fn(&str) -> bool); + fn each_split_char(&self, sep: char, it: &fn(&str) -> bool); + fn each_split_str(&self, sep: &'a str, it: &fn(&str) -> bool); fn starts_with(&self, needle: &'a str) -> bool; fn substr(&self, begin: uint, n: uint) -> &'self str; fn to_lower(&self) -> ~str; @@ -2408,20 +2412,24 @@ impl StrSlice for &'self str { } /// Splits a string into substrings using a character function #[inline] - fn split(&self, sepfn: &fn(char) -> bool) -> ~[~str] { - split(*self, sepfn) + fn each_split(&self, sepfn: &fn(char) -> bool, it: &fn(&str) -> bool) { + each_split(*self, sepfn, it) } /** * Splits a string into substrings at each occurrence of a given character */ #[inline] - fn split_char(&self, sep: char) -> ~[~str] { split_char(*self, sep) } + fn each_split_char(&self, sep: char, it: &fn(&str) -> bool) { + each_split_char(*self, sep, it) + } /** * Splits a string into a vector of the substrings separated by a given * string */ #[inline] - fn split_str(&self, sep: &'a str) -> ~[~str] { split_str(*self, sep) } + fn each_split_str(&self, sep: &'a str, it: &fn(&str) -> bool) { + each_split_str(*self, sep, it) + } /// Returns true if one string starts with another #[inline] fn starts_with(&self, needle: &'a str) -> bool { diff --git a/src/librustc/metadata/cstore.rs b/src/librustc/metadata/cstore.rs index 581ad5336de..018a365f37f 100644 --- a/src/librustc/metadata/cstore.rs +++ b/src/librustc/metadata/cstore.rs @@ -120,7 +120,9 @@ pub fn get_used_libraries(cstore: @mut CStore) -> ~[~str] { } pub fn add_used_link_args(cstore: @mut CStore, args: &str) { - cstore.used_link_args.push_all(args.split_char(' ')); + for args.each_split_char(' ') |s| { + cstore.used_link_args.push(s.to_owned()); + } } pub fn get_used_link_args(cstore: @mut CStore) -> ~[~str] { diff --git a/src/librustc/middle/resolve.rs b/src/librustc/middle/resolve.rs index 00883b28b04..079110e67f5 100644 --- a/src/librustc/middle/resolve.rs +++ b/src/librustc/middle/resolve.rs @@ -76,7 +76,7 @@ use syntax::visit::{visit_mod, visit_ty, vt}; use syntax::opt_vec::OptVec; use core::option::{Some, get, is_some, is_none}; -use core::str::{connect, split_str}; +use core::str::{connect, each_split_str}; use core::hashmap::linear::LinearMap; use std::oldmap::HashMap; @@ -1696,7 +1696,8 @@ pub impl Resolver { entry: %s (%?)", path_string, def_like); - let mut pieces = split_str(path_string, ~"::"); + let mut pieces = ~[]; + for each_split_str(path_string, "::") |s| { pieces.push(s.to_owned()) } let final_ident_str = pieces.pop(); let final_ident = self.session.ident_of(final_ident_str); diff --git a/src/libstd/base64.rs b/src/libstd/base64.rs index ff026324404..b11ad7052b9 100644 --- a/src/libstd/base64.rs +++ b/src/libstd/base64.rs @@ -16,12 +16,16 @@ pub trait ToBase64 { fn to_base64(&self) -> ~str; } +static CHARS: &'static[char] = &[ + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' +]; + impl ToBase64 for &'self [u8] { fn to_base64(&self) -> ~str { - let chars = str::chars( - ~"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" - ); - let mut s = ~""; unsafe { let len = self.len(); @@ -35,10 +39,10 @@ impl ToBase64 for &'self [u8] { (self[i + 2u] as uint); // This 24-bit number gets separated into four 6-bit numbers. - str::push_char(&mut s, chars[(n >> 18u) & 63u]); - str::push_char(&mut s, chars[(n >> 12u) & 63u]); - str::push_char(&mut s, chars[(n >> 6u) & 63u]); - str::push_char(&mut s, chars[n & 63u]); + str::push_char(&mut s, CHARS[(n >> 18u) & 63u]); + str::push_char(&mut s, CHARS[(n >> 12u) & 63u]); + str::push_char(&mut s, CHARS[(n >> 6u) & 63u]); + str::push_char(&mut s, CHARS[n & 63u]); i += 3u; } @@ -49,17 +53,17 @@ impl ToBase64 for &'self [u8] { 0 => (), 1 => { let n = (self[i] as uint) << 16u; - str::push_char(&mut s, chars[(n >> 18u) & 63u]); - str::push_char(&mut s, chars[(n >> 12u) & 63u]); + str::push_char(&mut s, CHARS[(n >> 18u) & 63u]); + str::push_char(&mut s, CHARS[(n >> 12u) & 63u]); str::push_char(&mut s, '='); str::push_char(&mut s, '='); } 2 => { let n = (self[i] as uint) << 16u | (self[i + 1u] as uint) << 8u; - str::push_char(&mut s, chars[(n >> 18u) & 63u]); - str::push_char(&mut s, chars[(n >> 12u) & 63u]); - str::push_char(&mut s, chars[(n >> 6u) & 63u]); + str::push_char(&mut s, CHARS[(n >> 18u) & 63u]); + str::push_char(&mut s, CHARS[(n >> 12u) & 63u]); + str::push_char(&mut s, CHARS[(n >> 6u) & 63u]); str::push_char(&mut s, '='); } _ => fail!(~"Algebra is broken, please alert the math police") diff --git a/src/libstd/getopts.rs b/src/libstd/getopts.rs index f837f776b96..ae783fb9b69 100644 --- a/src/libstd/getopts.rs +++ b/src/libstd/getopts.rs @@ -244,7 +244,8 @@ pub fn getopts(args: &[~str], opts: &[Opt]) -> Result { let mut i_arg = None; if cur[1] == '-' as u8 { let tail = str::slice(cur, 2, curlen).to_owned(); - let tail_eq = str::splitn_char(tail, '=', 1); + let mut tail_eq = ~[]; + for str::each_splitn_char(tail, '=', 1) |s| { tail_eq.push(s.to_owned()) } if tail_eq.len() <= 1 { names = ~[Long(tail)]; } else { @@ -627,16 +628,16 @@ pub mod groups { }; // Normalize desc to contain words seperated by one space character - let mut desc_normalized_whitespace = ~str - for desc.each_word |word| { + let mut desc_normalized_whitespace = ~""; + for str::each_word(desc) |word| { desc_normalized_whitespace.push_str(word); desc_normalized_whitespace.push_char(' '); } // FIXME: #5516 - let mut desc_rows: ~[~str] = ~[]; - for desc_normalized_whitespace.each_split_within(54) |substr| { - desc_rows.push(~substr); + let mut desc_rows = ~[]; + for str::each_split_within(desc_normalized_whitespace, 54) |substr| { + desc_rows.push(substr.to_owned()); } // FIXME: #5516 diff --git a/src/libstd/json.rs b/src/libstd/json.rs index a9b9b2977cd..f39e406bc00 100644 --- a/src/libstd/json.rs +++ b/src/libstd/json.rs @@ -806,7 +806,8 @@ impl serialize::Decoder for Decoder<'self> { } fn read_char(&self) -> char { - let v = str::chars(self.read_owned_str()); + let mut v = ~[]; + for str::each_char(self.read_owned_str()) |c| { v.push(c) } if v.len() != 1 { fail!(~"string must have one character") } v[0] } diff --git a/src/libstd/net_ip.rs b/src/libstd/net_ip.rs index 15593571b43..4d82d35cc32 100644 --- a/src/libstd/net_ip.rs +++ b/src/libstd/net_ip.rs @@ -197,7 +197,9 @@ pub mod v4 { } } pub fn parse_to_ipv4_rep(ip: &str) -> result::Result { - let parts = vec::map(str::split_char(ip, '.'), |s| { + let mut parts = ~[]; + for str::each_split_char(ip, '.') |s| { parts.push(s.to_owned()) } + let parts = vec::map(parts, |s| { match uint::from_str(*s) { Some(n) if n <= 255 => n, _ => 256 diff --git a/src/libstd/net_url.rs b/src/libstd/net_url.rs index 21b60584635..9caab11d643 100644 --- a/src/libstd/net_url.rs +++ b/src/libstd/net_url.rs @@ -344,8 +344,8 @@ fn userinfo_to_str(userinfo: &UserInfo) -> ~str { fn query_from_str(rawquery: &str) -> Query { let mut query: Query = ~[]; if str::len(rawquery) != 0 { - for str::split_char(rawquery, '&').each |p| { - let (k, v) = split_char_first(*p, '='); + for str::each_split_char(rawquery, '&') |p| { + let (k, v) = split_char_first(p, '='); // FIXME(#3722): unsafe only because decode_inner does (string) IO unsafe {query.push((decode_component(k), decode_component(v)));} }; diff --git a/src/libsyntax/parse/comments.rs b/src/libsyntax/parse/comments.rs index 3f8a5588c71..1b6b25db38a 100644 --- a/src/libsyntax/parse/comments.rs +++ b/src/libsyntax/parse/comments.rs @@ -99,7 +99,8 @@ pub fn strip_doc_comment_decoration(comment: &str) -> ~str { } return do lines.map |line| { - let chars = str::chars(*line); + let mut chars = ~[]; + for str::each_char(*line) |c| { chars.push(c) } if i > chars.len() { ~"" } else { @@ -116,7 +117,10 @@ pub fn strip_doc_comment_decoration(comment: &str) -> ~str { } if comment.starts_with(~"/*") { - let lines = str::lines_any(comment.slice(3u, comment.len() - 2u).to_owned()); + let mut lines = ~[]; + for str::each_line_any(comment.slice(3u, comment.len() - 2u)) |line| { + lines.push(line.to_owned()) + } let lines = vertical_trim(lines); let lines = block_trim(lines, ~"\t ", None); let lines = block_trim(lines, ~"*", Some(1u)); From c99488b3a4045171e36bcd2a89e742ac06d3ba72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marvin=20L=C3=B6bel?= Date: Mon, 25 Mar 2013 02:49:42 +0100 Subject: [PATCH 4/7] Isolated bug, static vector seems to behave differently than fixed sized one --- src/libcore/str.rs | 30 ------------------------------ src/libstd/base64.rs | 2 +- 2 files changed, 1 insertion(+), 31 deletions(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 4fc960a7c04..96cadadfe89 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -994,7 +994,6 @@ pub fn eachi_reverse(s: &str, it: &fn(uint, u8) -> bool) { } } - /// Iterate over each char of a string, without allocating #[inline(always)] pub fn each_char(s: &str, it: &fn(char) -> bool) { @@ -1042,35 +1041,6 @@ pub fn each_chari_reverse(s: &str, it: &fn(uint, char) -> bool) { } } -///////////////////////////////////////////////////////////////////////////////////////////////// -// NOTE: Remove afterwards -/* /// Apply a function to each substring after splitting by character -pub fn split_char_each(ss: &str, cc: char, ff: &fn(v: &str) -> bool) { - vec::each(split_char(ss, cc), |s| ff(*s)) -} - -** - * Apply a function to each substring after splitting by character, up to - * `count` times - * -pub fn splitn_char_each(ss: &str, sep: char, count: uint, - ff: &fn(v: &str) -> bool) { - vec::each(splitn_char(ss, sep, count), |s| ff(*s)) -} - -/ Apply a function to each word -pub fn words_each(ss: &str, ff: &fn(v: &str) -> bool) { - vec::each(words(ss), |s| ff(*s)) -} - -** - * Apply a function to each line (by '\n') - * -pub fn lines_each(ss: &str, ff: &fn(v: &str) -> bool) { - vec::each(lines(ss), |s| ff(*s)) -} */ -///////////////////////////////////////////////////////////////////////////////////////////////// - /* Section: Searching */ diff --git a/src/libstd/base64.rs b/src/libstd/base64.rs index b11ad7052b9..02858de9b34 100644 --- a/src/libstd/base64.rs +++ b/src/libstd/base64.rs @@ -16,7 +16,7 @@ pub trait ToBase64 { fn to_base64(&self) -> ~str; } -static CHARS: &'static[char] = &[ +static CHARS: [char * 64] = [ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', From 06c371605b3e2ad8068e359f28d4c77dccad0237 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marvin=20L=C3=B6bel?= Date: Tue, 26 Mar 2013 04:39:10 +0100 Subject: [PATCH 5/7] Fixed all use sites and tests --- src/compiletest/header.rs | 3 +- src/compiletest/runtest.rs | 14 +- src/libcore/str.rs | 278 ++++++++++++++------------- src/librust/rust.rc | 20 +- src/librustdoc/desc_to_brief_pass.rs | 3 +- src/librustdoc/markdown_pass.rs | 8 +- src/librustdoc/sectionalize_pass.rs | 4 +- src/librustdoc/unindent_pass.rs | 3 +- src/librusti/rusti.rc | 3 +- src/librustpkg/rustpkg.rc | 27 +-- src/librustpkg/util.rs | 10 +- src/test/bench/sudoku.rs | 3 +- src/test/run-pass/utf8_chars.rs | 4 +- 13 files changed, 204 insertions(+), 176 deletions(-) diff --git a/src/compiletest/header.rs b/src/compiletest/header.rs index a039aaf5683..5a35c56c075 100644 --- a/src/compiletest/header.rs +++ b/src/compiletest/header.rs @@ -142,7 +142,8 @@ fn parse_check_line(line: ~str) -> Option<~str> { fn parse_exec_env(line: ~str) -> Option<(~str, ~str)> { do parse_name_value_directive(line, ~"exec-env").map |nv| { // nv is either FOO or FOO=BAR - let strs = str::splitn_char(*nv, '=', 1u); + let mut strs = ~[]; + for str::each_splitn_char(*nv, '=', 1u) |s| { strs.push(s.to_owned()); } match strs.len() { 1u => (strs[0], ~""), 2u => (strs[0], strs[1]), diff --git a/src/compiletest/runtest.rs b/src/compiletest/runtest.rs index f17e9ffe548..ed66aea4f0c 100644 --- a/src/compiletest/runtest.rs +++ b/src/compiletest/runtest.rs @@ -267,7 +267,7 @@ fn run_debuginfo_test(config: config, props: TestProps, testfile: &Path) { // check if each line in props.check_lines appears in the // output (in order) let mut i = 0u; - for str::lines_each(ProcRes.stdout) |line| { + for str::each_line(ProcRes.stdout) |line| { if props.check_lines[i].trim() == line.trim() { i += 1u; } @@ -297,7 +297,7 @@ fn check_error_patterns(props: TestProps, let mut next_err_idx = 0u; let mut next_err_pat = props.error_patterns[next_err_idx]; let mut done = false; - for str::lines_each(ProcRes.stderr) |line| { + for str::each_line(ProcRes.stderr) |line| { if str::contains(line, next_err_pat) { debug!("found error pattern %s", next_err_pat); next_err_idx += 1u; @@ -347,7 +347,7 @@ fn check_expected_errors(expected_errors: ~[errors::ExpectedError], // filename:line1:col1: line2:col2: *warning:* msg // where line1:col1: is the starting point, line2:col2: // is the ending point, and * represents ANSI color codes. - for str::lines_each(ProcRes.stderr) |line| { + for str::each_line(ProcRes.stderr) |line| { let mut was_expected = false; for vec::eachi(expected_errors) |i, ee| { if !found_flags[i] { @@ -596,8 +596,12 @@ fn split_maybe_args(argstr: Option<~str>) -> ~[~str] { } match argstr { - Some(s) => rm_whitespace(str::split_char(s, ' ')), - None => ~[] + Some(s) => { + let mut ss = ~[]; + for str::each_split_char(s, ' ') |s| { ss.push(s.to_owned()) } + rm_whitespace(ss) + } + None => ~[] } } diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 96cadadfe89..1b3761a0db8 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -2586,7 +2586,8 @@ mod tests { fn test_split_char() { fn t(s: &str, c: char, u: &[~str]) { debug!(~"split_byte: " + s); - let v = split_char(s, c); + let mut v = ~[]; + for each_split_char(s, c) |s| { v.push(s.to_owned()) } debug!("split_byte to: %?", v); fail_unless!(vec::all2(v, u, |a,b| a == b)); } @@ -2594,28 +2595,31 @@ mod tests { t(~".hello.there", '.', ~[~"", ~"hello", ~"there"]); t(~"...hello.there.", '.', ~[~"", ~"", ~"", ~"hello", ~"there", ~""]); - fail_unless!(~[~"", ~"", ~"", ~"hello", ~"there", ~""] - == split_char(~"...hello.there.", '.')); - - fail_unless!(~[~""] == split_char(~"", 'z')); - fail_unless!(~[~"",~""] == split_char(~"z", 'z')); - fail_unless!(~[~"ok"] == split_char(~"ok", 'z')); + t(~"", 'z', ~[~""]); + t(~"z", 'z', ~[~"",~""]); + t(~"ok", 'z', ~[~"ok"]); } #[test] fn test_split_char_2() { + fn t(s: &str, c: char, u: &[~str]) { + debug!(~"split_byte: " + s); + let mut v = ~[]; + for each_split_char(s, c) |s| { v.push(s.to_owned()) } + debug!("split_byte to: %?", v); + fail_unless!(vec::all2(v, u, |a,b| a == b)); + } let data = ~"ประเทศไทย中华Việt Nam"; - fail_unless!(~[~"ประเทศไทย中华", ~"iệt Nam"] - == split_char(data, 'V')); - fail_unless!(~[~"ประเ", ~"ศไ", ~"ย中华Việt Nam"] - == split_char(data, 'ท')); + t(data, 'V', ~[~"ประเทศไทย中华", ~"iệt Nam"]); + t(data, 'ท', ~[~"ประเ", ~"ศไ", ~"ย中华Việt Nam"]); } #[test] fn test_splitn_char() { fn t(s: &str, c: char, n: uint, u: &[~str]) { debug!(~"splitn_byte: " + s); - let v = splitn_char(s, c, n); + let mut v = ~[]; + for each_splitn_char(s, c, n) |s| { v.push(s.to_owned()) } debug!("split_byte to: %?", v); debug!("comparing vs. %?", u); fail_unless!(vec::all2(v, u, |a,b| a == b)); @@ -2627,46 +2631,56 @@ mod tests { t(~".hello.there", '.', 0u, ~[~".hello.there"]); t(~".hello.there", '.', 1u, ~[~"", ~"hello.there"]); t(~"...hello.there.", '.', 3u, ~[~"", ~"", ~"", ~"hello.there."]); - t(~"...hello.there.", '.', 5u, - ~[~"", ~"", ~"", ~"hello", ~"there", ~""]); + t(~"...hello.there.", '.', 5u, ~[~"", ~"", ~"", ~"hello", ~"there", ~""]); - fail_unless!(~[~""] == splitn_char(~"", 'z', 5u)); - fail_unless!(~[~"",~""] == splitn_char(~"z", 'z', 5u)); - fail_unless!(~[~"ok"] == splitn_char(~"ok", 'z', 5u)); - fail_unless!(~[~"z"] == splitn_char(~"z", 'z', 0u)); - fail_unless!(~[~"w.x.y"] == splitn_char(~"w.x.y", '.', 0u)); - fail_unless!(~[~"w",~"x.y"] == splitn_char(~"w.x.y", '.', 1u)); + t(~"", 'z', 5u, ~[~""]); + t(~"z", 'z', 5u, ~[~"",~""]); + t(~"ok", 'z', 5u, ~[~"ok"]); + t(~"z", 'z', 0u, ~[~"z"]); + t(~"w.x.y", '.', 0u, ~[~"w.x.y"]); + t(~"w.x.y", '.', 1u, ~[~"w",~"x.y"]); } #[test] fn test_splitn_char_2 () { - let data = ~"ประเทศไทย中华Việt Nam"; - fail_unless!(~[~"ประเทศไทย中", ~"Việt Nam"] - == splitn_char(data, '华', 1u)); + fn t(s: &str, c: char, n: uint, u: &[~str]) { + debug!(~"splitn_byte: " + s); + let mut v = ~[]; + for each_splitn_char(s, c, n) |s| { v.push(s.to_owned()) } + debug!("split_byte to: %?", v); + debug!("comparing vs. %?", u); + fail_unless!(vec::all2(v, u, |a,b| a == b)); + } - fail_unless!(~[~"", ~"", ~"XXX", ~"YYYzWWWz"] - == splitn_char(~"zzXXXzYYYzWWWz", 'z', 3u)); - fail_unless!(~[~"",~""] == splitn_char(~"z", 'z', 5u)); - fail_unless!(~[~""] == splitn_char(~"", 'z', 5u)); - fail_unless!(~[~"ok"] == splitn_char(~"ok", 'z', 5u)); + t(~"ประเทศไทย中华Việt Nam", '华', 1u, ~[~"ประเทศไทย中", ~"Việt Nam"]); + t(~"zzXXXzYYYzWWWz", 'z', 3u, ~[~"", ~"", ~"XXX", ~"YYYzWWWz"]); + t(~"z", 'z', 5u, ~[~"",~""]); + t(~"", 'z', 5u, ~[~""]); + t(~"ok", 'z', 5u, ~[~"ok"]); } #[test] fn test_splitn_char_3() { + fn t(s: &str, c: char, n: uint, u: &[~str]) { + debug!(~"splitn_byte: " + s); + let mut v = ~[]; + for each_splitn_char(s, c, n) |s| { v.push(s.to_owned()) } + debug!("split_byte to: %?", v); + debug!("comparing vs. %?", u); + fail_unless!(vec::all2(v, u, |a,b| a == b)); + } let data = ~"ประเทศไทย中华Việt Nam"; - fail_unless!(~[~"ประเทศไทย中华", ~"iệt Nam"] - == splitn_char(data, 'V', 1u)); - fail_unless!(~[~"ประเ", ~"ศไทย中华Việt Nam"] - == splitn_char(data, 'ท', 1u)); - + t(data, 'V', 1u, ~[~"ประเทศไทย中华", ~"iệt Nam"]); + t(data, 'ท', 1u, ~[~"ประเ", ~"ศไทย中华Việt Nam"]); } #[test] fn test_split_char_no_trailing() { - fn t(s: &str, c: char, u: &[~str]) { + fn t(s: &str, c: char, u: &[~str]) { debug!(~"split_byte: " + s); - let v = split_char_no_trailing(s, c); + let mut v = ~[]; + for each_split_char_no_trailing(s, c) |s| { v.push(s.to_owned()) } debug!("split_byte to: %?", v); fail_unless!(vec::all2(v, u, |a,b| a == b)); } @@ -2674,91 +2688,80 @@ mod tests { t(~".hello.there", '.', ~[~"", ~"hello", ~"there"]); t(~"...hello.there.", '.', ~[~"", ~"", ~"", ~"hello", ~"there"]); - fail_unless!(~[~"", ~"", ~"", ~"hello", ~"there"] - == split_char_no_trailing(~"...hello.there.", '.')); - - fail_unless!(~[] == split_char_no_trailing(~"", 'z')); - fail_unless!(~[~""] == split_char_no_trailing(~"z", 'z')); - fail_unless!(~[~"ok"] == split_char_no_trailing(~"ok", 'z')); + t(~"...hello.there.", '.', ~[~"", ~"", ~"", ~"hello", ~"there"]); + t(~"", 'z', ~[]); + t(~"z", 'z', ~[~""]); + t(~"ok", 'z', ~[~"ok"]); } #[test] fn test_split_char_no_trailing_2() { + fn t(s: &str, c: char, u: &[~str]) { + debug!(~"split_byte: " + s); + let mut v = ~[]; + for each_split_char_no_trailing(s, c) |s| { v.push(s.to_owned()) } + debug!("split_byte to: %?", v); + fail_unless!(vec::all2(v, u, |a,b| a == b)); + } let data = ~"ประเทศไทย中华Việt Nam"; - fail_unless!(~[~"ประเทศไทย中华", ~"iệt Nam"] - == split_char_no_trailing(data, 'V')); - fail_unless!(~[~"ประเ", ~"ศไ", ~"ย中华Việt Nam"] - == split_char_no_trailing(data, 'ท')); + t(data, 'V', ~[~"ประเทศไทย中华", ~"iệt Nam"]); + t(data, 'ท', ~[~"ประเ", ~"ศไ", ~"ย中华Việt Nam"]); } #[test] fn test_split_str() { - fn t(s: &str, sep: &'a str, i: int, k: &str) { - fn borrow(x: &'a str) -> &'a str { x } - let v = split_str(s, sep); - fail_unless!(borrow(v[i]) == k); + fn t(s: &str, sep: &'a str, u: &[~str]) { + let mut v = ~[]; + for each_split_str(s, sep) |s| { v.push(s.to_owned()) } + fail_unless!(vec::all2(v, u, |a,b| a == b)); } - - t(~"--1233345--", ~"12345", 0, ~"--1233345--"); - t(~"abc::hello::there", ~"::", 0, ~"abc"); - t(~"abc::hello::there", ~"::", 1, ~"hello"); - t(~"abc::hello::there", ~"::", 2, ~"there"); - t(~"::hello::there", ~"::", 0, ~""); - t(~"hello::there::", ~"::", 2, ~""); - t(~"::hello::there::", ~"::", 3, ~""); - - let data = ~"ประเทศไทย中华Việt Nam"; - fail_unless!(~[~"ประเทศไทย", ~"Việt Nam"] - == split_str (data, ~"中华")); - - fail_unless!(~[~"", ~"XXX", ~"YYY", ~""] - == split_str(~"zzXXXzzYYYzz", ~"zz")); - - fail_unless!(~[~"zz", ~"zYYYz"] == split_str(~"zzXXXzYYYz", ~"XXX")); - - - fail_unless!(~[~"", ~"XXX", ~"YYY", ~""] == - split_str(~".XXX.YYY.", ~".")); - fail_unless!(~[~""] == split_str(~"", ~".")); - fail_unless!(~[~"",~""] == split_str(~"zz", ~"zz")); - fail_unless!(~[~"ok"] == split_str(~"ok", ~"z")); - fail_unless!(~[~"",~"z"] == split_str(~"zzz", ~"zz")); - fail_unless!(~[~"",~"",~"z"] == split_str(~"zzzzz", ~"zz")); + t(~"--1233345--", ~"12345", ~[~"--1233345--"]); + t(~"abc::hello::there", ~"::", ~[~"abc", ~"hello", ~"there"]); + t(~"::hello::there", ~"::", ~[~"", ~"hello", ~"there"]); + t(~"hello::there::", ~"::", ~[~"hello", ~"there", ~""]); + t(~"::hello::there::", ~"::", ~[~"", ~"hello", ~"there", ~""]); + t(~"ประเทศไทย中华Việt Nam", ~"中华", ~[~"ประเทศไทย", ~"Việt Nam"]); + t(~"zzXXXzzYYYzz", ~"zz", ~[~"", ~"XXX", ~"YYY", ~""]); + t(~"zzXXXzYYYz", ~"XXX", ~[~"zz", ~"zYYYz"]); + t(~".XXX.YYY.", ~".", ~[~"", ~"XXX", ~"YYY", ~""]); + t(~"", ~".", ~[~""]); + t(~"zz", ~"zz", ~[~"",~""]); + t(~"ok", ~"z", ~[~"ok"]); + t(~"zzz", ~"zz", ~[~"",~"z"]); + t(~"zzzzz", ~"zz", ~[~"",~"",~"z"]); } #[test] fn test_split() { - let data = ~"ประเทศไทย中华Việt Nam"; - fail_unless!(~[~"ประเทศไทย中", ~"Việt Nam"] - == split (data, |cc| cc == '华')); + fn t(s: &str, sepf: &fn(char) -> bool, u: &[~str]) { + let mut v = ~[]; + for each_split(s, sepf) |s| { v.push(s.to_owned()) } + fail_unless!(vec::all2(v, u, |a,b| a == b)); + } - fail_unless!(~[~"", ~"", ~"XXX", ~"YYY", ~""] - == split(~"zzXXXzYYYz", char::is_lowercase)); - - fail_unless!(~[~"zz", ~"", ~"", ~"z", ~"", ~"", ~"z"] - == split(~"zzXXXzYYYz", char::is_uppercase)); - - fail_unless!(~[~"",~""] == split(~"z", |cc| cc == 'z')); - fail_unless!(~[~""] == split(~"", |cc| cc == 'z')); - fail_unless!(~[~"ok"] == split(~"ok", |cc| cc == 'z')); + t(~"ประเทศไทย中华Việt Nam", |cc| cc == '华', ~[~"ประเทศไทย中", ~"Việt Nam"]); + t(~"zzXXXzYYYz", char::is_lowercase, ~[~"", ~"", ~"XXX", ~"YYY", ~""]); + t(~"zzXXXzYYYz", char::is_uppercase, ~[~"zz", ~"", ~"", ~"z", ~"", ~"", ~"z"]); + t(~"z", |cc| cc == 'z', ~[~"",~""]); + t(~"", |cc| cc == 'z', ~[~""]); + t(~"ok", |cc| cc == 'z', ~[~"ok"]); } #[test] fn test_split_no_trailing() { - let data = ~"ประเทศไทย中华Việt Nam"; - fail_unless!(~[~"ประเทศไทย中", ~"Việt Nam"] - == split_no_trailing (data, |cc| cc == '华')); + fn t(s: &str, sepf: &fn(char) -> bool, u: &[~str]) { + let mut v = ~[]; + for each_split_no_trailing(s, sepf) |s| { v.push(s.to_owned()) } + fail_unless!(vec::all2(v, u, |a,b| a == b)); + } - fail_unless!(~[~"", ~"", ~"XXX", ~"YYY"] - == split_no_trailing(~"zzXXXzYYYz", char::is_lowercase)); - - fail_unless!(~[~"zz", ~"", ~"", ~"z", ~"", ~"", ~"z"] - == split_no_trailing(~"zzXXXzYYYz", char::is_uppercase)); - - fail_unless!(~[~""] == split_no_trailing(~"z", |cc| cc == 'z')); - fail_unless!(~[] == split_no_trailing(~"", |cc| cc == 'z')); - fail_unless!(~[~"ok"] == split_no_trailing(~"ok", |cc| cc == 'z')); + t(~"ประเทศไทย中华Việt Nam", |cc| cc == '华', ~[~"ประเทศไทย中", ~"Việt Nam"]); + t(~"zzXXXzYYYz", char::is_lowercase, ~[~"", ~"", ~"XXX", ~"YYY"]); + t(~"zzXXXzYYYz", char::is_uppercase, ~[~"zz", ~"", ~"", ~"z", ~"", ~"", ~"z"]); + t(~"z", |cc| cc == 'z', ~[~""]); + t(~"", |cc| cc == 'z', ~[]); + t(~"ok", |cc| cc == 'z', ~[~"ok"]); } #[test] @@ -2766,49 +2769,50 @@ mod tests { let lf = ~"\nMary had a little lamb\nLittle lamb\n"; let crlf = ~"\r\nMary had a little lamb\r\nLittle lamb\r\n"; - fail_unless!(~[~"", ~"Mary had a little lamb", ~"Little lamb"] - == lines(lf)); + fn t(s: &str, f: &fn(&str, &fn(&str) -> bool), u: &[~str]) { + let mut v = ~[]; + for f(s) |s| { v.push(s.to_owned()) } + fail_unless!(vec::all2(v, u, |a,b| a == b)); + } - fail_unless!(~[~"", ~"Mary had a little lamb", ~"Little lamb"] - == lines_any(lf)); - - fail_unless!(~[~"\r", ~"Mary had a little lamb\r", - ~"Little lamb\r"] - == lines(crlf)); - - fail_unless!(~[~"", ~"Mary had a little lamb", ~"Little lamb"] - == lines_any(crlf)); - - fail_unless!(~[] == lines (~"")); - fail_unless!(~[] == lines_any(~"")); - fail_unless!(~[~""] == lines (~"\n")); - fail_unless!(~[~""] == lines_any(~"\n")); - fail_unless!(~[~"banana"] == lines (~"banana")); - fail_unless!(~[~"banana"] == lines_any(~"banana")); + t(lf, each_line ,~[~"", ~"Mary had a little lamb", ~"Little lamb"]); + t(lf, each_line_any, ~[~"", ~"Mary had a little lamb", ~"Little lamb"]); + t(crlf, each_line, ~[~"\r", ~"Mary had a little lamb\r", ~"Little lamb\r"]); + t(crlf, each_line_any, ~[~"", ~"Mary had a little lamb", ~"Little lamb"]); + t(~"", each_line, ~[]); + t(~"", each_line_any, ~[]); + t(~"\n", each_line, ~[~""]); + t(~"\n", each_line_any, ~[~""]); + t(~"banana", each_line, ~[~"banana"]); + t(~"banana", each_line_any, ~[~"banana"]); } #[test] fn test_words () { + fn t(s: &str, f: &fn(&str, &fn(&str) -> bool), u: &[~str]) { + let mut v = ~[]; + for f(s) |s| { v.push(s.to_owned()) } + fail_unless!(vec::all2(v, u, |a,b| a == b)); + } let data = ~"\nMary had a little lamb\nLittle lamb\n"; - fail_unless!(~[ - ~"Mary",~"had",~"a",~"little",~"lamb",~"Little",~"lamb"] - == words(data)); - fail_unless!(~[~"ok"] == words(~"ok")); - fail_unless!(~[] == words(~"")); + t(data, each_word, ~[~"Mary",~"had",~"a",~"little",~"lamb",~"Little",~"lamb"]); + t(~"ok", each_word, ~[~"ok"]); + t(~"", each_word, ~[]); } #[test] fn test_split_within() { - fail_unless!(split_within(~"", 0) == ~[]); - fail_unless!(split_within(~"", 15) == ~[]); - fail_unless!(split_within(~"hello", 15) == ~[~"hello"]); - - let data = ~"\nMary had a little lamb\nLittle lamb\n"; - error!("~~~~ %?", split_within(data, 15)); - fail_unless!(split_within(data, 15) == ~[~"Mary had a", - ~"little lamb", - ~"Little lamb"]); + fn t(s: &str, i: uint, u: &[~str]) { + let mut v = ~[]; + for each_split_within(s, i) |s| { v.push(s.to_owned()) } + fail_unless!(vec::all2(v, u, |a,b| a == b)); + } + t(~"", 0, ~[]); + t(~"", 15, ~[]); + t(~"hello", 15, ~[~"hello"]); + t(~"\nMary had a little lamb\nLittle lamb\n", 15, + ~[~"Mary had a", ~"little lamb", ~"Little lamb"]); } #[test] @@ -3338,7 +3342,7 @@ mod tests { let mut ii = 0; - for split_char_each(data, ' ') |xx| { + for each_split_char(data, ' ') |xx| { match ii { 0 => fail_unless!("\nMary" == xx), 1 => fail_unless!("had" == xx), @@ -3356,7 +3360,7 @@ mod tests { let mut ii = 0; - for splitn_char_each(data, ' ', 2u) |xx| { + for each_splitn_char(data, ' ', 2u) |xx| { match ii { 0 => fail_unless!("\nMary" == xx), 1 => fail_unless!("had" == xx), @@ -3373,7 +3377,7 @@ mod tests { let mut ii = 0; - for words_each(data) |ww| { + for each_word(data) |ww| { match ii { 0 => fail_unless!("Mary" == ww), 1 => fail_unless!("had" == ww), @@ -3384,7 +3388,7 @@ mod tests { ii += 1; } - words_each(~"", |_x| fail!()); // should not fail + each_word(~"", |_x| fail!()); // should not fail } #[test] @@ -3393,7 +3397,7 @@ mod tests { let mut ii = 0; - for lines_each(lf) |x| { + for each_line(lf) |x| { match ii { 0 => fail_unless!("" == x), 1 => fail_unless!("Mary had a little lamb" == x), @@ -3437,7 +3441,7 @@ mod tests { let ss = ~"ศไทย中华Việt Nam"; fail_unless!(~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a', 'm'] - == chars(ss)); + == to_chars(ss)); } #[test] diff --git a/src/librust/rust.rc b/src/librust/rust.rc index e590586abbb..ffd7669c2d2 100644 --- a/src/librust/rust.rc +++ b/src/librust/rust.rc @@ -9,7 +9,7 @@ // except according to those terms. // rust - central access to other rust tools -// FIXME #2238 Make commands run and test emit proper file endings on winds +// FIXME #2238 Make commands run and test emit proper file endings on windows // FIXME #2238 Make run only accept source that emits an executable #[link(name = "rust", @@ -29,10 +29,12 @@ enum ValidUsage { } impl ValidUsage { - fn is_valid(&self) -> bool { match *self { - Valid => true, - Invalid => false - }} + fn is_valid(&self) -> bool { + match *self { + Valid => true, + Invalid => false + } + } } enum Action { @@ -128,7 +130,9 @@ fn cmd_help(args: &[~str]) -> ValidUsage { match command.usage_full { UsgStr(msg) => io::println(fmt!("%s\n", msg)), UsgExec(commandline) => { - let words = str::words(commandline); + let mut words = ~[]; + for str::each_word(commandline) |word| { words.push(word.to_owned()) } + let words = words; let (prog, args) = (words.head(), words.tail()); run::run_program(*prog, args); } @@ -184,7 +188,9 @@ fn do_command(command: &Command, args: &[~str]) -> ValidUsage { match command.action { Call(f) => f(args), Exec(commandline) => { - let words = str::words(commandline); + let mut words = ~[]; + for str::each_word(commandline) |word| { words.push(word.to_owned()) } + let words = words; let (prog, prog_args) = (words.head(), words.tail()); let exitstatus = run::run_program( *prog, diff --git a/src/librustdoc/desc_to_brief_pass.rs b/src/librustdoc/desc_to_brief_pass.rs index 957b94d18f5..012a56c5b72 100644 --- a/src/librustdoc/desc_to_brief_pass.rs +++ b/src/librustdoc/desc_to_brief_pass.rs @@ -183,7 +183,8 @@ fn first_sentence_(s: &str) -> ~str { } fn paragraphs(s: &str) -> ~[~str] { - let lines = str::lines_any(s); + let mut lines = ~[]; + for str::each_line_any(s) |line| { lines.push(line.to_owned()); } let mut whitespace_lines = 0; let mut accum = ~""; let paras = do vec::foldl(~[], lines) |paras, line| { diff --git a/src/librustdoc/markdown_pass.rs b/src/librustdoc/markdown_pass.rs index 73f3aa53c25..1e39373d9a5 100644 --- a/src/librustdoc/markdown_pass.rs +++ b/src/librustdoc/markdown_pass.rs @@ -534,9 +534,11 @@ fn write_sig(ctxt: &Ctxt, sig: Option<~str>) { } fn code_block_indent(s: ~str) -> ~str { - let lines = str::lines_any(s); - let indented = vec::map(lines, |line| fmt!(" %s", *line) ); - str::connect(indented, ~"\n") + let mut indented = ~[]; + for str::each_line_any(s) |line| { + indented.push(fmt!(" %s", line)); + } + str::connect(indented, "\n") } #[test] diff --git a/src/librustdoc/sectionalize_pass.rs b/src/librustdoc/sectionalize_pass.rs index 8b058048ff4..33003a59611 100644 --- a/src/librustdoc/sectionalize_pass.rs +++ b/src/librustdoc/sectionalize_pass.rs @@ -104,8 +104,8 @@ fn sectionalize(desc: Option<~str>) -> (Option<~str>, ~[doc::Section]) { if desc.is_none() { return (None, ~[]); } - - let lines = str::lines((copy desc).get()); + let mut lines = ~[]; + for str::each_line_any(*desc.get_ref()) |line| { lines.push(line.to_owned()); } let mut new_desc = None::<~str>; let mut current_section = None; diff --git a/src/librustdoc/unindent_pass.rs b/src/librustdoc/unindent_pass.rs index ecd72950468..6207e2252e4 100644 --- a/src/librustdoc/unindent_pass.rs +++ b/src/librustdoc/unindent_pass.rs @@ -33,7 +33,8 @@ pub fn mk_pass() -> Pass { } fn unindent(s: &str) -> ~str { - let lines = str::lines_any(s); + let mut lines = ~[]; + for str::each_line_any(s) |line| { lines.push(line.to_owned()); } let mut saw_first_line = false; let mut saw_second_line = false; let min_indent = do vec::foldl(uint::max_value, lines) diff --git a/src/librusti/rusti.rc b/src/librusti/rusti.rc index e04cc9e3898..ddde66157bb 100644 --- a/src/librusti/rusti.rc +++ b/src/librusti/rusti.rc @@ -337,7 +337,8 @@ fn run_line(repl: &mut Repl, in: @io::Reader, out: @io::Writer, line: ~str) -> Option { if line.starts_with(~":") { let full = line.substr(1, line.len() - 1); - let split = str::words(full); + let mut split = ~[]; + for str::each_word(full) |word| { split.push(word.to_owned()) } let len = split.len(); if len > 0 { diff --git a/src/librustpkg/rustpkg.rc b/src/librustpkg/rustpkg.rc index 2032969fbca..35698bb235a 100644 --- a/src/librustpkg/rustpkg.rc +++ b/src/librustpkg/rustpkg.rc @@ -270,14 +270,11 @@ impl Ctx { fn sep_name_vers(in: ~str) -> (Option<~str>, Option<~str>) { let mut name = None; let mut vers = None; - let parts = str::split_char(in, '@'); - if parts.len() >= 1 { - name = Some(parts[0]); - - if parts.len() >= 2 { - vers = Some(parts[1]); - } + for str::each_split_char(in, '@') |s| { + if name.is_none() { name = Some(s.to_owned()); } + else if vers.is_none() { vers = Some(s.to_owned()); } + else { break; } } (name, vers) @@ -733,8 +730,12 @@ impl Ctx { for package.bins.each |&bin| { let path = Path(bin); - let name = str::split_char(path.file_path().to_str(), '-')[0]; - let out = bin_dir.push(name); + let mut name = None; + for str::each_split_char(path.file_path().to_str(), '-') |s| { + name = Some(s.to_owned()); + break; + } + let out = bin_dir.push(name.unwrap()); util::link_exe(&path, &out); util::note(fmt!("linked %s", out.to_str())); @@ -847,8 +848,12 @@ impl Ctx { for package.bins.each |&bin| { let path = Path(bin); - let name = str::split_char(path.file_path().to_str(), '-')[0]; - let out = bin_dir.push(name); + let mut name = None; + for str::each_split_char(path.file_path().to_str(), '-') |s| { + name = Some(s.to_owned()); + break; + } + let out = bin_dir.push(name.unwrap()); if os::path_exists(&out) { if os::remove_file(&out) { diff --git a/src/librustpkg/util.rs b/src/librustpkg/util.rs index 8e5d7e95ae1..58a286a1854 100644 --- a/src/librustpkg/util.rs +++ b/src/librustpkg/util.rs @@ -44,10 +44,10 @@ pub fn is_cmd(cmd: ~str) -> bool { } pub fn parse_name(id: ~str) -> result::Result<~str, ~str> { - let parts = str::split_char(id, '.'); + let mut last_part = None; - for parts.each |&part| { - for str::chars(part).each |&char| { + for str::each_split_char(id, '.') |part| { + for str::each_char(part) |char| { if char::is_whitespace(char) { return result::Err( ~"could not parse id: contains whitespace"); @@ -56,9 +56,11 @@ pub fn parse_name(id: ~str) -> result::Result<~str, ~str> { ~"could not parse id: should be all lowercase"); } } + last_part = Some(part.to_owned()); } + if last_part.is_none() { return result::Err(~"could not parse id: is empty"); } - result::Ok(copy *parts.last()) + result::Ok(last_part.unwrap()) } struct ListenerFn { diff --git a/src/test/bench/sudoku.rs b/src/test/bench/sudoku.rs index 4964cea28ad..dcc5fe58755 100644 --- a/src/test/bench/sudoku.rs +++ b/src/test/bench/sudoku.rs @@ -68,7 +68,8 @@ pub impl Sudoku { let mut g = vec::from_fn(10u, { |_i| ~[0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8] }); while !reader.eof() { let line = reader.read_line(); - let comps = str::split_char(line.trim(), ','); + let mut comps = ~[]; + for str::each_split_char(line.trim(), ',') |s| { comps.push(s.to_owned()) } if vec::len(comps) == 3u { let row = uint::from_str(comps[0]).get() as u8; let col = uint::from_str(comps[1]).get() as u8; diff --git a/src/test/run-pass/utf8_chars.rs b/src/test/run-pass/utf8_chars.rs index cfbb7398159..247fd2d712a 100644 --- a/src/test/run-pass/utf8_chars.rs +++ b/src/test/run-pass/utf8_chars.rs @@ -17,8 +17,8 @@ pub fn main() { fail_unless!((str::len(s) == 10u)); fail_unless!((str::char_len(s) == 4u)); - fail_unless!((vec::len(str::chars(s)) == 4u)); - fail_unless!((str::from_chars(str::chars(s)) == s)); + fail_unless!((vec::len(str::to_chars(s)) == 4u)); + fail_unless!((str::from_chars(str::to_chars(s)) == s)); fail_unless!((str::char_at(s, 0u) == 'e')); fail_unless!((str::char_at(s, 1u) == 'é')); From c88a20d171b407aa6a88b516c213e93b7dda5c94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marvin=20L=C3=B6bel?= Date: Tue, 26 Mar 2013 16:11:48 +0100 Subject: [PATCH 6/7] Added missing lifetime annotations, doc and whitespace cleanups --- src/libcore/str.rs | 132 ++++++++++++++++++++++----------------------- 1 file changed, 66 insertions(+), 66 deletions(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 1b3761a0db8..bbedf65ea0a 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -186,6 +186,7 @@ pub fn push_str_no_overallocate(lhs: &mut ~str, rhs: &str) { raw::set_len(lhs, llen + rlen); } } + /// Appends a string slice to the back of a string #[inline(always)] pub fn push_str(lhs: &mut ~str, rhs: &str) { @@ -214,7 +215,6 @@ pub fn append(lhs: ~str, rhs: &str) -> ~str { v } - /// Concatenate a vector of strings pub fn concat(v: &[~str]) -> ~str { let mut s: ~str = ~""; @@ -435,15 +435,12 @@ pub fn slice(s: &'a str, begin: uint, end: uint) -> &'a str { } /// Splits a string into substrings at each occurrence of a given character -pub fn each_split_char(s: &str, sep: char, it: &fn(&str) -> bool) { +pub fn each_split_char(s: &'a str, sep: char, it: &fn(&'a str) -> bool) { each_split_char_inner(s, sep, len(s), true, true, it) } -/** - * Like `split_char`, but a trailing empty string is omitted - * (e.g. `split_char_no_trailing("A B ",' ') == ~[~"A",~"B"]`) - */ -pub fn each_split_char_no_trailing(s: &str, sep: char, it: &fn(&str) -> bool) { +/// Like `each_split_char`, but a trailing empty string is omitted +pub fn each_split_char_no_trailing(s: &'a str, sep: char, it: &fn(&'a str) -> bool) { each_split_char_inner(s, sep, len(s), true, false, it) } @@ -451,19 +448,19 @@ pub fn each_split_char_no_trailing(s: &str, sep: char, it: &fn(&str) -> bool) { * Splits a string into substrings at each occurrence of a given * character up to 'count' times. * - * The byte must be a valid UTF-8/ASCII byte + * The character must be a valid UTF-8/ASCII character */ -pub fn each_splitn_char(s: &str, sep: char, count: uint, it: &fn(&str) -> bool) { +pub fn each_splitn_char(s: &'a str, sep: char, count: uint, it: &fn(&'a str) -> bool) { each_split_char_inner(s, sep, count, true, true, it) } -/// Like `split_char`, but omits empty strings from the returned vector -pub fn each_split_char_nonempty(s: &str, sep: char, it: &fn(&str) -> bool) { +/// Like `each_split_char`, but omits empty strings +pub fn each_split_char_nonempty(s: &'a str, sep: char, it: &fn(&'a str) -> bool) { each_split_char_inner(s, sep, len(s), false, false, it) } -fn each_split_char_inner(s: &str, sep: char, count: uint, allow_empty: bool, - allow_trailing_empty: bool, it: &fn(&str) -> bool) { +fn each_split_char_inner(s: &'a str, sep: char, count: uint, allow_empty: bool, + allow_trailing_empty: bool, it: &fn(&'a str) -> bool) { if sep < 128u as char { let b = sep as u8, l = len(s); let mut done = 0u; @@ -478,7 +475,7 @@ fn each_split_char_inner(s: &str, sep: char, count: uint, allow_empty: bool, } i += 1u; } - // only push a non-empty trailing substring + // only slice a non-empty trailing substring if allow_trailing_empty || start < l { if !it( unsafe{ raw::slice_bytes(s, start, l) } ) { return; } } @@ -488,15 +485,12 @@ fn each_split_char_inner(s: &str, sep: char, count: uint, allow_empty: bool, } /// Splits a string into substrings using a character function -pub fn each_split(s: &str, sepfn: &fn(char) -> bool, it: &fn(&str) -> bool) { +pub fn each_split(s: &'a str, sepfn: &fn(char) -> bool, it: &fn(&'a str) -> bool) { each_split_inner(s, sepfn, len(s), true, true, it) } -/** - * Like `split`, but a trailing empty string is omitted - * (e.g. `split_no_trailing("A B ",' ') == ~[~"A",~"B"]`) - */ -pub fn each_split_no_trailing(s: &str, sepfn: &fn(char) -> bool, it: &fn(&str) -> bool) { +/// Like `each_split`, but a trailing empty string is omitted +pub fn each_split_no_trailing(s: &'a str, sepfn: &fn(char) -> bool, it: &fn(&'a str) -> bool) { each_split_inner(s, sepfn, len(s), true, false, it) } @@ -504,17 +498,17 @@ pub fn each_split_no_trailing(s: &str, sepfn: &fn(char) -> bool, it: &fn(&str) - * Splits a string into substrings using a character function, cutting at * most `count` times. */ -pub fn each_splitn(s: &str, sepfn: &fn(char) -> bool, count: uint, it: &fn(&str) -> bool) { +pub fn each_splitn(s: &'a str, sepfn: &fn(char) -> bool, count: uint, it: &fn(&'a str) -> bool) { each_split_inner(s, sepfn, count, true, true, it) } -/// Like `split`, but omits empty strings from the returned vector -pub fn each_split_nonempty(s: &str, sepfn: &fn(char) -> bool, it: &fn(&str) -> bool) { +/// Like `each_split`, but omits empty strings +pub fn each_split_nonempty(s: &'a str, sepfn: &fn(char) -> bool, it: &fn(&'a str) -> bool) { each_split_inner(s, sepfn, len(s), false, false, it) } -fn each_split_inner(s: &str, sepfn: &fn(cc: char) -> bool, count: uint, - allow_empty: bool, allow_trailing_empty: bool, it: &fn(&str) -> bool) { +fn each_split_inner(s: &'a str, sepfn: &fn(cc: char) -> bool, count: uint, + allow_empty: bool, allow_trailing_empty: bool, it: &fn(&'a str) -> bool) { let l = len(s); let mut i = 0u, start = 0u, done = 0u; while i < l && done < count { @@ -576,16 +570,18 @@ fn iter_between_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint) -> bool) { * # Example * * ~~~ - * fail_unless!(["", "XXX", "YYY", ""] == split_str(".XXX.YYY.", ".")) + * let mut v = ~[]; + * for each_split_str(".XXX.YYY.", ".") |subs| { v.push(subs); } + * fail_unless!(v == ["", "XXX", "YYY", ""]); * ~~~ */ -pub fn each_split_str(s: &'a str, sep: &'b str, it: &fn(&str) -> bool) { +pub fn each_split_str(s: &'a str, sep: &'b str, it: &fn(&'a str) -> bool) { for iter_between_matches(s, sep) |from, to| { if !it( unsafe { raw::slice_bytes(s, from, to) } ) { return; } } } -pub fn each_split_str_nonempty(s: &'a str, sep: &'b str, it: &fn(&str) -> bool) { +pub fn each_split_str_nonempty(s: &'a str, sep: &'b str, it: &fn(&'a str) -> bool) { for iter_between_matches(s, sep) |from, to| { if to > from { if !it( unsafe { raw::slice_bytes(s, from, to) } ) { return; } @@ -628,15 +624,17 @@ pub fn levdistance(s: &str, t: &str) -> uint { } /** - * Splits a string into a vector of the substrings separated by LF ('\n'). + * Splits a string into substrings separated by LF ('\n'). */ -pub fn each_line(s: &str, it: &fn(&str) -> bool) { each_split_char_no_trailing(s, '\n', it) } +pub fn each_line(s: &'a str, it: &fn(&'a str) -> bool) { + each_split_char_no_trailing(s, '\n', it) +} /** - * Splits a string into a vector of the substrings separated by LF ('\n') + * Splits a string into substrings separated by LF ('\n') * and/or CR LF ("\r\n") */ -pub fn each_line_any(s: &str, it: &fn(&str) -> bool) { +pub fn each_line_any(s: &'a str, it: &fn(&'a str) -> bool) { for each_line(s) |s| { let l = s.len(); if l > 0u && s[l - 1u] == '\r' as u8 { @@ -647,23 +645,36 @@ pub fn each_line_any(s: &str, it: &fn(&str) -> bool) { } } -/// Splits a string into a vector of the substrings separated by whitespace -pub fn each_word(s: &str, it: &fn(&str) -> bool) { - each_split_nonempty(s, |c| char::is_whitespace(c), it) +/// Splits a string into substrings separated by whitespace +pub fn each_word(s: &'a str, it: &fn(&'a str) -> bool) { + each_split_nonempty(s, char::is_whitespace, it) } -/** Split a string into a vector of substrings, - * each of which is less bytes long than a limit +/** Splits a string into substrings with possibly internal whitespace, + * each of them at most `lim` bytes long. The substrings have leading and trailing + * whitespace removed, and are only cut at whitespace boundaries. + * + * #Failure: + * + * Fails during iteration if the string contains a non-whitespace + * sequence longer than the limit. */ -pub fn each_split_within(ss: &str, lim: uint, it: &fn(&str) -> bool) { - // Just for fun, let's write this as an automaton +pub fn each_split_within(ss: &'a str, lim: uint, it: &fn(&'a str) -> bool) { + // Just for fun, let's write this as an state machine: + enum SplitWithinState { - A, // Leading whitespace, initial state - B, // Words - C, // Internal and trailing whitespace + A, // leading whitespace, initial state + B, // words + C, // internal and trailing whitespace + } + enum Whitespace { + Ws, // current char is whitespace + Cr // current char is not whitespace + } + enum LengthLimit { + UnderLim, // current char makes current substring still fit in limit + OverLim // current char makes current substring no longer fit in limit } - enum Whitespace { Ws, Cr } - enum LengthLimit { UnderLim, OverLim } let mut slice_start = 0; let mut last_start = 0; @@ -671,9 +682,9 @@ pub fn each_split_within(ss: &str, lim: uint, it: &fn(&str) -> bool) { let mut state = A; let mut cont = true; - let slice = || { cont = it(ss.slice(slice_start, last_end)) }; + let slice: &fn() = || { cont = it(slice(ss, slice_start, last_end)) }; - let machine = |i: uint, c: char| { + let machine: &fn(uint, char) -> bool = |i, c| { let whitespace = if char::is_whitespace(c) { Ws } else { Cr }; let limit = if (i - slice_start + 1) <= lim { UnderLim } else { OverLim }; @@ -693,12 +704,13 @@ pub fn each_split_within(ss: &str, lim: uint, it: &fn(&str) -> bool) { (C, Ws, OverLim) => { slice(); A } (C, Ws, UnderLim) => { C } }; + cont }; str::each_chari(ss, machine); - // Let the automaton 'run out' + // Let the automaton 'run out' by supplying trailing whitespace let mut fake_i = ss.len(); while cont && match state { B | C => true, A => false } { machine(fake_i, ' '); @@ -1186,8 +1198,7 @@ pub fn rfind_char_from(s: &str, c: char, start: uint) -> Option { * or equal to `len(s)`. `start` must be the index of a character boundary, * as defined by `is_char_boundary`. */ -pub fn rfind_char_between(s: &str, c: char, start: uint, end: uint) - -> Option { +pub fn rfind_char_between(s: &str, c: char, start: uint, end: uint) -> Option { if c < 128u as char { fail_unless!(start >= end); fail_unless!(start <= len(s)); @@ -1268,11 +1279,7 @@ pub fn find_from(s: &str, start: uint, f: &fn(char) * or equal to `len(s)`. `start` must be the index of a character * boundary, as defined by `is_char_boundary`. */ -pub fn find_between(s: &str, - start: uint, - end: uint, - f: &fn(char) -> bool) - -> Option { +pub fn find_between(s: &str, start: uint, end: uint, f: &fn(char) -> bool) -> Option { fail_unless!(start <= end); fail_unless!(end <= len(s)); fail_unless!(is_char_boundary(s, start)); @@ -1323,8 +1330,7 @@ pub fn rfind(s: &str, f: &fn(char) -> bool) -> Option { * `start` must be less than or equal to `len(s)', `start` must be the * index of a character boundary, as defined by `is_char_boundary` */ -pub fn rfind_from(s: &str, start: uint, f: &fn(char) -> bool) - -> Option { +pub fn rfind_from(s: &str, start: uint, f: &fn(char) -> bool) -> Option { rfind_between(s, start, 0u, f) } @@ -1350,9 +1356,7 @@ pub fn rfind_from(s: &str, start: uint, f: &fn(char) -> bool) * than or equal to `len(s)`. `start` must be the index of a character * boundary, as defined by `is_char_boundary` */ -pub fn rfind_between(s: &str, start: uint, end: uint, - f: &fn(char) -> bool) - -> Option { +pub fn rfind_between(s: &str, start: uint, end: uint, f: &fn(char) -> bool) -> Option { fail_unless!(start >= end); fail_unless!(start <= len(s)); fail_unless!(is_char_boundary(s, start)); @@ -1408,8 +1412,7 @@ pub fn find_str(haystack: &'a str, needle: &'b str) -> Option { * * `start` must be less than or equal to `len(s)` */ -pub fn find_str_from(haystack: &'a str, needle: &'b str, start: uint) - -> Option { +pub fn find_str_from(haystack: &'a str, needle: &'b str, start: uint) -> Option { find_str_between(haystack, needle, start, len(haystack)) } @@ -1433,9 +1436,8 @@ pub fn find_str_from(haystack: &'a str, needle: &'b str, start: uint) * `start` must be less than or equal to `end` and `end` must be less than * or equal to `len(s)`. */ -pub fn find_str_between(haystack: &'a str, needle: &'b str, start: uint, - end:uint) - -> Option { +pub fn find_str_between(haystack: &'a str, needle: &'b str, start: uint, end:uint) + -> Option { // See Issue #1932 for why this is a naive search fail_unless!(end <= len(haystack)); let needle_len = len(needle); @@ -1638,7 +1640,6 @@ pub fn utf16_chars(v: &[u16], f: &fn(char)) { } } - pub fn from_utf16(v: &[u16]) -> ~str { let mut buf = ~""; unsafe { @@ -1955,7 +1956,6 @@ pub fn as_c_str(s: &str, f: &fn(*libc::c_char) -> T) -> T { } } - /** * Work with the byte buffer and length of a slice. * From de468c8cd2f55124f98ae67941bc4c11dee92c14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marvin=20L=C3=B6bel?= Date: Tue, 26 Mar 2013 19:07:14 +0100 Subject: [PATCH 7/7] Added more missing lifetime annotations --- src/libcore/str.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index bbedf65ea0a..0f393dee597 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -2240,7 +2240,7 @@ pub mod traits { #[cfg(test)] pub mod traits {} -pub trait StrSlice { +pub trait StrSlice<'self> { fn all(&self, it: &fn(char) -> bool) -> bool; fn any(&self, it: &fn(char) -> bool) -> bool; fn contains(&self, needle: &'a str) -> bool; @@ -2260,9 +2260,9 @@ pub trait StrSlice { fn len(&self) -> uint; fn char_len(&self) -> uint; fn slice(&self, begin: uint, end: uint) -> &'self str; - fn each_split(&self, sepfn: &fn(char) -> bool, it: &fn(&str) -> bool); - fn each_split_char(&self, sep: char, it: &fn(&str) -> bool); - fn each_split_str(&self, sep: &'a str, it: &fn(&str) -> bool); + fn each_split(&self, sepfn: &fn(char) -> bool, it: &fn(&'self str) -> bool); + fn each_split_char(&self, sep: char, it: &fn(&'self str) -> bool); + fn each_split_str(&self, sep: &'a str, it: &fn(&'self str) -> bool); fn starts_with(&self, needle: &'a str) -> bool; fn substr(&self, begin: uint, n: uint) -> &'self str; fn to_lower(&self) -> ~str; @@ -2283,7 +2283,7 @@ pub trait StrSlice { } /// Extension methods for strings -impl StrSlice for &'self str { +impl StrSlice<'self> for &'self str { /** * Return true if a predicate matches all characters or if the string * contains no characters @@ -2382,14 +2382,14 @@ impl StrSlice for &'self str { } /// Splits a string into substrings using a character function #[inline] - fn each_split(&self, sepfn: &fn(char) -> bool, it: &fn(&str) -> bool) { + fn each_split(&self, sepfn: &fn(char) -> bool, it: &fn(&'self str) -> bool) { each_split(*self, sepfn, it) } /** * Splits a string into substrings at each occurrence of a given character */ #[inline] - fn each_split_char(&self, sep: char, it: &fn(&str) -> bool) { + fn each_split_char(&self, sep: char, it: &fn(&'self str) -> bool) { each_split_char(*self, sep, it) } /** @@ -2397,7 +2397,7 @@ impl StrSlice for &'self str { * string */ #[inline] - fn each_split_str(&self, sep: &'a str, it: &fn(&str) -> bool) { + fn each_split_str(&self, sep: &'a str, it: &fn(&'self str) -> bool) { each_split_str(*self, sep, it) } /// Returns true if one string starts with another