Various cleanups and optimizations in core::str
This commit is contained in:
parent
35e9192762
commit
d802c1fbd2
7 changed files with 271 additions and 397 deletions
|
@ -440,8 +440,7 @@ fn build_link_meta(sess: session, c: ast::crate, output: str,
|
||||||
none {
|
none {
|
||||||
let name =
|
let name =
|
||||||
{
|
{
|
||||||
let os = str::split_byte(
|
let os = str::split_char(fs::basename(output), '.');
|
||||||
fs::basename(output), '.' as u8);
|
|
||||||
if (vec::len(os) < 2u) {
|
if (vec::len(os) < 2u) {
|
||||||
sess.fatal(#fmt("Output file name %s doesn't\
|
sess.fatal(#fmt("Output file name %s doesn't\
|
||||||
appear to have an extension", output));
|
appear to have an extension", output));
|
||||||
|
@ -576,7 +575,7 @@ fn link_binary(sess: session,
|
||||||
} else { ret filename; }
|
} else { ret filename; }
|
||||||
};
|
};
|
||||||
fn rmext(filename: str) -> str {
|
fn rmext(filename: str) -> str {
|
||||||
let parts = str::split_byte(filename, '.' as u8);
|
let parts = str::split_char(filename, '.');
|
||||||
vec::pop(parts);
|
vec::pop(parts);
|
||||||
ret str::connect(parts, ".");
|
ret str::connect(parts, ".");
|
||||||
}
|
}
|
||||||
|
|
|
@ -121,8 +121,8 @@ fn get_relative_to(abs1: fs::path, abs2: fs::path) -> fs::path {
|
||||||
abs1, abs2);
|
abs1, abs2);
|
||||||
let normal1 = fs::normalize(abs1);
|
let normal1 = fs::normalize(abs1);
|
||||||
let normal2 = fs::normalize(abs2);
|
let normal2 = fs::normalize(abs2);
|
||||||
let split1 = str::split_byte(normal1, os_fs::path_sep as u8);
|
let split1 = str::split_char(normal1, os_fs::path_sep);
|
||||||
let split2 = str::split_byte(normal2, os_fs::path_sep as u8);
|
let split2 = str::split_char(normal2, os_fs::path_sep);
|
||||||
let len1 = vec::len(split1);
|
let len1 = vec::len(split1);
|
||||||
let len2 = vec::len(split2);
|
let len2 = vec::len(split2);
|
||||||
assert len1 > 0u;
|
assert len1 > 0u;
|
||||||
|
|
|
@ -119,7 +119,7 @@ fn get_used_libraries(cstore: cstore) -> [str] {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn add_used_link_args(cstore: cstore, args: str) {
|
fn add_used_link_args(cstore: cstore, args: str) {
|
||||||
p(cstore).used_link_args += str::split_byte(args, ' ' as u8);
|
p(cstore).used_link_args += str::split_char(args, ' ');
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_used_link_args(cstore: cstore) -> [str] {
|
fn get_used_link_args(cstore: cstore) -> [str] {
|
||||||
|
|
|
@ -176,16 +176,16 @@ fn lookup_byte_offset(cm: codemap::codemap, chpos: uint)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn span_to_snippet(sp: span, cm: codemap::codemap) -> str {
|
fn span_to_snippet(sp: span, cm: codemap::codemap) -> str {
|
||||||
let begin = lookup_byte_offset(cm,sp.lo);
|
let begin = lookup_byte_offset(cm, sp.lo);
|
||||||
let end = lookup_byte_offset(cm,sp.hi);
|
let end = lookup_byte_offset(cm, sp.hi);
|
||||||
assert begin.fm == end.fm;
|
assert begin.fm == end.fm;
|
||||||
ret str::slice_chars(*begin.fm.src, begin.pos, end.pos);
|
ret str::slice(*begin.fm.src, begin.pos, end.pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_snippet(cm: codemap::codemap, fidx: uint, lo: uint, hi: uint) -> str
|
fn get_snippet(cm: codemap::codemap, fidx: uint, lo: uint, hi: uint) -> str
|
||||||
{
|
{
|
||||||
let fm = cm.files[fidx];
|
let fm = cm.files[fidx];
|
||||||
ret str::slice_chars(*fm.src, lo, hi)
|
ret str::slice(*fm.src, lo, hi)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_filemap(cm: codemap, filename: str) -> filemap {
|
fn get_filemap(cm: codemap, filename: str) -> filemap {
|
||||||
|
|
|
@ -13,7 +13,7 @@ export
|
||||||
// Creating a string
|
// Creating a string
|
||||||
from_bytes,
|
from_bytes,
|
||||||
from_byte,
|
from_byte,
|
||||||
//push_utf8_bytes,
|
push_char,
|
||||||
from_char,
|
from_char,
|
||||||
from_chars,
|
from_chars,
|
||||||
from_cstr,
|
from_cstr,
|
||||||
|
@ -35,17 +35,12 @@ export
|
||||||
chars,
|
chars,
|
||||||
substr,
|
substr,
|
||||||
slice,
|
slice,
|
||||||
slice_chars,
|
split, splitn, split_nonempty,
|
||||||
split,
|
split_char, splitn_char, split_char_nonempty,
|
||||||
split_str,
|
split_str,
|
||||||
split_char,
|
|
||||||
splitn_char,
|
|
||||||
split_byte,
|
|
||||||
splitn_byte,
|
|
||||||
lines,
|
lines,
|
||||||
lines_any,
|
lines_any,
|
||||||
words,
|
words,
|
||||||
windowed,
|
|
||||||
to_lower,
|
to_lower,
|
||||||
to_upper,
|
to_upper,
|
||||||
replace,
|
replace,
|
||||||
|
@ -95,6 +90,7 @@ export
|
||||||
substr_len_chars,
|
substr_len_chars,
|
||||||
utf8_char_width,
|
utf8_char_width,
|
||||||
char_range_at,
|
char_range_at,
|
||||||
|
is_char_boundary,
|
||||||
char_at,
|
char_at,
|
||||||
substr_all,
|
substr_all,
|
||||||
escape_char,
|
escape_char,
|
||||||
|
@ -135,42 +131,49 @@ Function: from_byte
|
||||||
|
|
||||||
Convert a byte to a UTF-8 string. Fails if invalid UTF-8.
|
Convert a byte to a UTF-8 string. Fails if invalid UTF-8.
|
||||||
*/
|
*/
|
||||||
fn from_byte(uu: u8) -> str {
|
fn from_byte(b: u8) -> str unsafe {
|
||||||
from_bytes([uu])
|
assert b < 128u8;
|
||||||
|
let v = [b, 0u8];
|
||||||
|
let s: str = ::unsafe::reinterpret_cast(v);
|
||||||
|
::unsafe::leak(v);
|
||||||
|
s
|
||||||
}
|
}
|
||||||
|
|
||||||
fn push_utf8_bytes(&s: str, ch: char) unsafe {
|
/*
|
||||||
|
Function: push_char
|
||||||
|
|
||||||
|
Appends a character at the end of a string.
|
||||||
|
*/
|
||||||
|
fn push_char(&s: str, ch: char) unsafe {
|
||||||
let code = ch as uint;
|
let code = ch as uint;
|
||||||
let bytes =
|
if code < max_one_b {
|
||||||
if code < max_one_b {
|
rustrt::rust_str_push(s, code as u8);
|
||||||
[code as u8]
|
} else if code < max_two_b {
|
||||||
} else if code < max_two_b {
|
rustrt::rust_str_push(s, (code >> 6u & 31u | tag_two_b) as u8);
|
||||||
[(code >> 6u & 31u | tag_two_b) as u8,
|
rustrt::rust_str_push(s, (code & 63u | tag_cont) as u8);
|
||||||
(code & 63u | tag_cont) as u8]
|
} else if code < max_three_b {
|
||||||
} else if code < max_three_b {
|
rustrt::rust_str_push(s, (code >> 12u & 15u | tag_three_b) as u8);
|
||||||
[(code >> 12u & 15u | tag_three_b) as u8,
|
rustrt::rust_str_push(s, (code >> 6u & 63u | tag_cont) as u8);
|
||||||
(code >> 6u & 63u | tag_cont) as u8,
|
rustrt::rust_str_push(s, (code & 63u | tag_cont) as u8);
|
||||||
(code & 63u | tag_cont) as u8]
|
} else if code < max_four_b {
|
||||||
} else if code < max_four_b {
|
rustrt::rust_str_push(s, (code >> 18u & 7u | tag_four_b) as u8);
|
||||||
[(code >> 18u & 7u | tag_four_b) as u8,
|
rustrt::rust_str_push(s, (code >> 12u & 63u | tag_cont) as u8);
|
||||||
(code >> 12u & 63u | tag_cont) as u8,
|
rustrt::rust_str_push(s, (code >> 6u & 63u | tag_cont) as u8);
|
||||||
(code >> 6u & 63u | tag_cont) as u8,
|
rustrt::rust_str_push(s, (code & 63u | tag_cont) as u8);
|
||||||
(code & 63u | tag_cont) as u8]
|
} else if code < max_five_b {
|
||||||
} else if code < max_five_b {
|
rustrt::rust_str_push(s, (code >> 24u & 3u | tag_five_b) as u8);
|
||||||
[(code >> 24u & 3u | tag_five_b) as u8,
|
rustrt::rust_str_push(s, (code >> 18u & 63u | tag_cont) as u8);
|
||||||
(code >> 18u & 63u | tag_cont) as u8,
|
rustrt::rust_str_push(s, (code >> 12u & 63u | tag_cont) as u8);
|
||||||
(code >> 12u & 63u | tag_cont) as u8,
|
rustrt::rust_str_push(s, (code >> 6u & 63u | tag_cont) as u8);
|
||||||
(code >> 6u & 63u | tag_cont) as u8,
|
rustrt::rust_str_push(s, (code & 63u | tag_cont) as u8);
|
||||||
(code & 63u | tag_cont) as u8]
|
} else {
|
||||||
} else {
|
rustrt::rust_str_push(s, (code >> 30u & 1u | tag_six_b) as u8);
|
||||||
[(code >> 30u & 1u | tag_six_b) as u8,
|
rustrt::rust_str_push(s, (code >> 24u & 63u | tag_cont) as u8);
|
||||||
(code >> 24u & 63u | tag_cont) as u8,
|
rustrt::rust_str_push(s, (code >> 18u & 63u | tag_cont) as u8);
|
||||||
(code >> 18u & 63u | tag_cont) as u8,
|
rustrt::rust_str_push(s, (code >> 12u & 63u | tag_cont) as u8);
|
||||||
(code >> 12u & 63u | tag_cont) as u8,
|
rustrt::rust_str_push(s, (code >> 6u & 63u | tag_cont) as u8);
|
||||||
(code >> 6u & 63u | tag_cont) as u8,
|
rustrt::rust_str_push(s, (code & 63u | tag_cont) as u8);
|
||||||
(code & 63u | tag_cont) as u8]
|
}
|
||||||
};
|
|
||||||
unsafe::push_bytes(s, bytes);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -180,7 +183,7 @@ Convert a char to a string
|
||||||
*/
|
*/
|
||||||
fn from_char(ch: char) -> str {
|
fn from_char(ch: char) -> str {
|
||||||
let buf = "";
|
let buf = "";
|
||||||
push_utf8_bytes(buf, ch);
|
push_char(buf, ch);
|
||||||
ret buf;
|
ret buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -191,7 +194,8 @@ Convert a vector of chars to a string
|
||||||
*/
|
*/
|
||||||
fn from_chars(chs: [char]) -> str {
|
fn from_chars(chs: [char]) -> str {
|
||||||
let buf = "";
|
let buf = "";
|
||||||
for ch: char in chs { push_utf8_bytes(buf, ch); }
|
reserve(buf, chs.len());
|
||||||
|
for ch in chs { push_char(buf, ch); }
|
||||||
ret buf;
|
ret buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -201,12 +205,10 @@ Function: from_cstr
|
||||||
Create a Rust string from a null-terminated C string
|
Create a Rust string from a null-terminated C string
|
||||||
*/
|
*/
|
||||||
fn from_cstr(cstr: sbuf) -> str unsafe {
|
fn from_cstr(cstr: sbuf) -> str unsafe {
|
||||||
let start = cstr;
|
let curr = cstr, i = 0u;
|
||||||
let curr = start;
|
|
||||||
let i = 0u;
|
|
||||||
while *curr != 0u8 {
|
while *curr != 0u8 {
|
||||||
i += 1u;
|
i += 1u;
|
||||||
curr = ptr::offset(start, i);
|
curr = ptr::offset(cstr, i);
|
||||||
}
|
}
|
||||||
ret from_cstr_len(cstr, i);
|
ret from_cstr_len(cstr, i);
|
||||||
}
|
}
|
||||||
|
@ -246,8 +248,7 @@ Function: connect
|
||||||
Concatenate a vector of strings, placing a given separator between each
|
Concatenate a vector of strings, placing a given separator between each
|
||||||
*/
|
*/
|
||||||
fn connect(v: [str], sep: str) -> str {
|
fn connect(v: [str], sep: str) -> str {
|
||||||
let s: str = "";
|
let s = "", first = true;
|
||||||
let first: bool = true;
|
|
||||||
for ss: str in v {
|
for ss: str in v {
|
||||||
if first { first = false; } else { s += sep; }
|
if first { first = false; } else { s += sep; }
|
||||||
s += ss;
|
s += ss;
|
||||||
|
@ -259,13 +260,6 @@ fn connect(v: [str], sep: str) -> str {
|
||||||
Section: Adding to and removing from a string
|
Section: Adding to and removing from a string
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
|
||||||
Function: push_char
|
|
||||||
|
|
||||||
Append a character to a string
|
|
||||||
*/
|
|
||||||
fn push_char(&s: str, ch: char) { s += from_char(ch); }
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Function: pop_char
|
Function: pop_char
|
||||||
|
|
||||||
|
@ -276,6 +270,7 @@ If the string does not contain any characters.
|
||||||
*/
|
*/
|
||||||
fn pop_char(&s: str) -> char {
|
fn pop_char(&s: str) -> char {
|
||||||
let end = len(s);
|
let end = len(s);
|
||||||
|
assert end > 0u;
|
||||||
let {ch, prev} = char_range_at_reverse(s, end);
|
let {ch, prev} = char_range_at_reverse(s, end);
|
||||||
unsafe { unsafe::set_len(s, prev); }
|
unsafe { unsafe::set_len(s, prev); }
|
||||||
ret ch;
|
ret ch;
|
||||||
|
@ -291,9 +286,9 @@ Failure:
|
||||||
If the string does not contain any characters.
|
If the string does not contain any characters.
|
||||||
*/
|
*/
|
||||||
fn shift_char(&s: str) -> char unsafe {
|
fn shift_char(&s: str) -> char unsafe {
|
||||||
let r = char_range_at(s, 0u);
|
let {ch, next} = char_range_at(s, 0u);
|
||||||
s = unsafe::slice_bytes(s, r.next, len(s));
|
s = unsafe::slice_bytes(s, next, len(s));
|
||||||
ret r.ch;
|
ret ch;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -369,13 +364,11 @@ Function: chars
|
||||||
Convert a string to a vector of characters
|
Convert a string to a vector of characters
|
||||||
*/
|
*/
|
||||||
fn chars(s: str) -> [char] {
|
fn chars(s: str) -> [char] {
|
||||||
let buf: [char] = [];
|
let buf = [], i = 0u, len = len(s);
|
||||||
let i = 0u;
|
|
||||||
let len = len(s);
|
|
||||||
while i < len {
|
while i < len {
|
||||||
let cur = char_range_at(s, i);
|
let {ch, next} = char_range_at(s, i);
|
||||||
buf += [cur.ch];
|
buf += [ch];
|
||||||
i = cur.next;
|
i = next;
|
||||||
}
|
}
|
||||||
ret buf;
|
ret buf;
|
||||||
}
|
}
|
||||||
|
@ -399,94 +392,106 @@ fn substr(s: str, begin: uint, len: uint) -> str {
|
||||||
// Return a slice of the given string from the byte range [`begin`..`end`)
|
// Return a slice of the given string from the byte range [`begin`..`end`)
|
||||||
// or else fail when `begin` and `end` do not point to valid characters or
|
// or else fail when `begin` and `end` do not point to valid characters or
|
||||||
// beyond the last character of the string
|
// beyond the last character of the string
|
||||||
fn slice(ss: str, begin: uint, end: uint) -> str {
|
fn slice(s: str, begin: uint, end: uint) -> str unsafe {
|
||||||
alt maybe_slice(ss, begin, end) {
|
assert is_char_boundary(s, begin);
|
||||||
some(sli) { ret sli; }
|
assert is_char_boundary(s, end);
|
||||||
none { fail "slice requires a valid start and end"; }
|
unsafe::slice_bytes(s, begin, end)
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Function: maybe_slice
|
// Function: split_char
|
||||||
//
|
//
|
||||||
// Like slice, only returns an option<str>
|
// Splits a string into substrings at each occurrence of a given
|
||||||
fn maybe_slice(ss: str, begin: uint, end: uint) -> option<str> unsafe {
|
// character
|
||||||
let sli = unsafe::slice_bytes(ss, begin, end);
|
fn split_char(s: str, sep: char) -> [str] {
|
||||||
|
split_char_inner(s, sep, len(s), true)
|
||||||
|
}
|
||||||
|
|
||||||
if is_utf8(bytes(sli)) {
|
// Function: splitn_char
|
||||||
ret some(sli);
|
//
|
||||||
} else {
|
// Splits a string into substrings at each occurrence of a given
|
||||||
ret none;
|
// character up to 'count' times
|
||||||
}
|
//
|
||||||
|
// The byte must be a valid UTF-8/ASCII byte
|
||||||
|
fn splitn_char(s: str, sep: char, count: uint) -> [str] {
|
||||||
|
split_char_inner(s, sep, count, true)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Function: split_char_nonempty
|
||||||
|
//
|
||||||
|
// Like `split_char`, but omits empty strings from the returned vector.
|
||||||
|
fn split_char_nonempty(s: str, sep: char) -> [str] {
|
||||||
|
split_char_inner(s, sep, len(s), false)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn split_char_inner(s: str, sep: char, count: uint, allow_empty: bool)
|
||||||
|
-> [str] unsafe {
|
||||||
|
if sep < 128u as char {
|
||||||
|
let result = [], b = sep as u8, l = len(s), done = 0u;
|
||||||
|
let i = 0u, start = 0u;
|
||||||
|
while i < l && done < count {
|
||||||
|
if s[i] == b {
|
||||||
|
if allow_empty || start < i {
|
||||||
|
result += [unsafe::slice_bytes(s, start, i)];
|
||||||
|
}
|
||||||
|
start = i + 1u;
|
||||||
|
done += 1u;
|
||||||
|
}
|
||||||
|
i += 1u;
|
||||||
|
}
|
||||||
|
if allow_empty || start < l {
|
||||||
|
result += [unsafe::slice_bytes(s, start, l)];
|
||||||
|
}
|
||||||
|
result
|
||||||
|
} else {
|
||||||
|
splitn(s, {|cur| cur == sep}, count)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
Function: split
|
||||||
|
|
||||||
|
Splits a string into substrings using a character function
|
||||||
|
*/
|
||||||
|
fn split(s: str, sepfn: fn(char) -> bool) -> [str] {
|
||||||
|
split_inner(s, sepfn, len(s), true)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Function: slice_chars
|
Function: splitn
|
||||||
|
|
||||||
Unicode-safe slice. Returns a slice of the given string containing
|
Splits a string into substrings using a character function, cutting at
|
||||||
the characters in the range [`begin`..`end`). `begin` and `end` are
|
most [count] times.
|
||||||
character indexes, not byte indexes.
|
|
||||||
|
|
||||||
Failure:
|
|
||||||
|
|
||||||
- If begin is greater than end
|
|
||||||
- If end is greater than the character length of the string
|
|
||||||
|
|
||||||
FIXME: make faster by avoiding char conversion
|
|
||||||
FIXME: delete?
|
|
||||||
*/
|
*/
|
||||||
fn slice_chars(s: str, begin: uint, end: uint) -> str {
|
fn splitn(s: str, sepfn: fn(char) -> bool, count: uint) -> [str] {
|
||||||
from_chars(vec::slice(chars(s), begin, end))
|
split_inner(s, sepfn, count, true)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Function: split_byte
|
// Function: split_nonempty
|
||||||
//
|
//
|
||||||
// Splits a string into substrings at each occurrence of a given byte
|
// Like `split`, but omits empty strings from the returned vector.
|
||||||
//
|
fn split_nonempty(s: str, sepfn: fn(char) -> bool) -> [str] {
|
||||||
// The byte must be a valid UTF-8/ASCII byte
|
split_inner(s, sepfn, len(s), false)
|
||||||
fn split_byte(ss: str, sep: u8) -> [str] unsafe {
|
|
||||||
// still safe if we only split on an ASCII byte
|
|
||||||
assert u8::is_ascii(sep);
|
|
||||||
|
|
||||||
let vv = [];
|
|
||||||
let start = 0u, current = 0u;
|
|
||||||
|
|
||||||
str::bytes_iter(ss) {|cc|
|
|
||||||
if sep == cc {
|
|
||||||
vec::push(vv, str::unsafe::slice_bytes(ss, start, current));
|
|
||||||
start = current + 1u;
|
|
||||||
}
|
|
||||||
current += 1u;
|
|
||||||
}
|
|
||||||
|
|
||||||
vec::push(vv, str::unsafe::slice_bytes(ss, start, current));
|
|
||||||
ret vv;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Function: splitn_byte
|
fn split_inner(s: str, sepfn: fn(cc: char) -> bool, count: uint,
|
||||||
//
|
allow_empty: bool) -> [str] unsafe {
|
||||||
// Splits a string into substrings at each occurrence of a given byte
|
let result = [], i = 0u, l = len(s), start = 0u, done = 0u;
|
||||||
// up to 'count' times
|
while i < l && done < count {
|
||||||
//
|
let {ch, next} = char_range_at(s, i);
|
||||||
// The byte must be a valid UTF-8/ASCII byte
|
if sepfn(ch) {
|
||||||
fn splitn_byte(ss: str, sep: u8, count: uint) -> [str] unsafe {
|
if allow_empty || start < i {
|
||||||
// still safe if we only split on an ASCII byte
|
result += [unsafe::slice_bytes(s, start, i)];
|
||||||
assert u8::is_ascii(sep);
|
}
|
||||||
|
start = next;
|
||||||
let vv = [];
|
done += 1u;
|
||||||
let start = 0u, current = 0u, len = len(ss);
|
|
||||||
let splits_done = 0u;
|
|
||||||
|
|
||||||
while splits_done < count && current < len {
|
|
||||||
if sep == ss[current] {
|
|
||||||
vec::push(vv, str::unsafe::slice_bytes(ss, start, current));
|
|
||||||
start = current + 1u;
|
|
||||||
splits_done += 1u;
|
|
||||||
}
|
}
|
||||||
current += 1u;
|
i = next;
|
||||||
}
|
}
|
||||||
|
if allow_empty || start < l {
|
||||||
vec::push(vv, str::unsafe::slice_bytes(ss, start, len));
|
result += [unsafe::slice_bytes(s, start, l)];
|
||||||
ret vv;
|
}
|
||||||
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -499,104 +504,35 @@ Note that this has recently been changed. For example:
|
||||||
|
|
||||||
FIXME: Boyer-Moore should be faster
|
FIXME: Boyer-Moore should be faster
|
||||||
*/
|
*/
|
||||||
fn split_str(ss: str, sep: str) -> [str] unsafe {
|
fn split_str(s: str, sep: str) -> [str] unsafe {
|
||||||
// unsafe is justified: we are splitting
|
let sep_len = len(sep), l = len(s);
|
||||||
// UTF-8 with UTF-8, so the results will be OK
|
|
||||||
|
|
||||||
let sep_len = len(sep);
|
|
||||||
assert sep_len > 0u;
|
assert sep_len > 0u;
|
||||||
let vv = [];
|
let result = [], i = 0u, start = 0u;
|
||||||
let start = 0u, start_match = 0u, current = 0u, matching = 0u;
|
let match_start = 0u, match_i = 0u;
|
||||||
|
|
||||||
str::bytes_iter(ss) {|cc|
|
while i < l {
|
||||||
if sep[matching] == cc {
|
if s[i] == sep[match_i] {
|
||||||
matching += 1u;
|
if match_i == 0u { match_start = i; }
|
||||||
|
match_i += 1u;
|
||||||
|
// Found a match
|
||||||
|
if match_i == sep_len {
|
||||||
|
result += [unsafe::slice_bytes(s, start, match_start)];
|
||||||
|
match_i = 0u;
|
||||||
|
start = i + 1u;
|
||||||
|
}
|
||||||
|
i += 1u;
|
||||||
} else {
|
} else {
|
||||||
start_match += 1u;
|
// Failed match, backtrack
|
||||||
|
if match_i > 0u {
|
||||||
|
match_i = 0u;
|
||||||
|
i = match_start + 1u;
|
||||||
|
} else {
|
||||||
|
i += 1u;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if matching == sep_len {
|
|
||||||
// found a separator
|
|
||||||
// push whatever is before it, including ""
|
|
||||||
vec::push(vv, str::unsafe::slice_bytes(ss, start, start_match));
|
|
||||||
|
|
||||||
// reset cursors and counters
|
|
||||||
start = current + 1u;
|
|
||||||
start_match = current + 1u;
|
|
||||||
matching = 0u;
|
|
||||||
}
|
|
||||||
|
|
||||||
current += 1u;
|
|
||||||
}
|
}
|
||||||
|
result += [unsafe::slice_bytes(s, start, l)];
|
||||||
// whether we have a "", or something meaningful, push it
|
result
|
||||||
vec::push(vv, str::unsafe::slice_bytes(ss, start, current));
|
|
||||||
ret vv;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
Function: split
|
|
||||||
|
|
||||||
Splits a string into substrings using a character function
|
|
||||||
(unicode safe)
|
|
||||||
*/
|
|
||||||
fn split(ss: str, sepfn: fn(cc: char)->bool) -> [str] {
|
|
||||||
let vv: [str] = [];
|
|
||||||
let accum: str = "";
|
|
||||||
let ends_with_sep: bool = false;
|
|
||||||
|
|
||||||
chars_iter(ss, {|cc| if sepfn(cc) {
|
|
||||||
vv += [accum];
|
|
||||||
accum = "";
|
|
||||||
ends_with_sep = true;
|
|
||||||
} else {
|
|
||||||
str::push_char(accum, cc);
|
|
||||||
ends_with_sep = false;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
if len_chars(accum) >= 0u || ends_with_sep {
|
|
||||||
vv += [accum];
|
|
||||||
}
|
|
||||||
|
|
||||||
ret vv;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
Function: split_char
|
|
||||||
|
|
||||||
Splits a string into a vector of the substrings separated by a given character
|
|
||||||
*/
|
|
||||||
fn split_char(ss: str, cc: char) -> [str] {
|
|
||||||
split(ss, {|kk| kk == cc})
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
Function: splitn_char
|
|
||||||
|
|
||||||
Splits a string into a vector of the substrings separated by a given character
|
|
||||||
up to `count` times
|
|
||||||
*/
|
|
||||||
fn splitn_char(ss: str, sep: char, count: uint) -> [str] unsafe {
|
|
||||||
|
|
||||||
let vv = [];
|
|
||||||
let start = 0u, current = 0u, len = len(ss);
|
|
||||||
let splits_done = 0u;
|
|
||||||
|
|
||||||
while splits_done < count && current < len {
|
|
||||||
// grab a char...
|
|
||||||
let {ch, next} = char_range_at(ss, current);
|
|
||||||
|
|
||||||
if sep == ch {
|
|
||||||
vec::push(vv, str::unsafe::slice_bytes(ss, start, current));
|
|
||||||
start = next;
|
|
||||||
splits_done += 1u;
|
|
||||||
}
|
|
||||||
current = next;
|
|
||||||
}
|
|
||||||
|
|
||||||
vec::push(vv, str::unsafe::slice_bytes(ss, start, len));
|
|
||||||
ret vv;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -605,9 +541,7 @@ Function: lines
|
||||||
Splits a string into a vector of the substrings
|
Splits a string into a vector of the substrings
|
||||||
separated by LF ('\n')
|
separated by LF ('\n')
|
||||||
*/
|
*/
|
||||||
fn lines(ss: str) -> [str] {
|
fn lines(s: str) -> [str] { split_char(s, '\n') }
|
||||||
split(ss, {|cc| cc == '\n'})
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Function: lines_any
|
Function: lines_any
|
||||||
|
@ -615,8 +549,14 @@ Function: lines_any
|
||||||
Splits a string into a vector of the substrings
|
Splits a string into a vector of the substrings
|
||||||
separated by LF ('\n') and/or CR LF ('\r\n')
|
separated by LF ('\n') and/or CR LF ('\r\n')
|
||||||
*/
|
*/
|
||||||
fn lines_any(ss: str) -> [str] {
|
fn lines_any(s: str) -> [str] {
|
||||||
vec::map(lines(ss), {|s| trim_right(s)})
|
vec::map(lines(s), {|s|
|
||||||
|
let l = len(s), cp = s;
|
||||||
|
if l > 0u && s[l - 1u] == '\r' as u8 {
|
||||||
|
unsafe { unsafe::set_len(cp, l - 1u); }
|
||||||
|
}
|
||||||
|
cp
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -625,30 +565,8 @@ Function: words
|
||||||
Splits a string into a vector of the substrings
|
Splits a string into a vector of the substrings
|
||||||
separated by whitespace
|
separated by whitespace
|
||||||
*/
|
*/
|
||||||
fn words(ss: str) -> [str] {
|
fn words(s: str) -> [str] {
|
||||||
ret vec::filter( split(ss, {|cc| char::is_whitespace(cc)}),
|
split_nonempty(s, {|c| char::is_whitespace(c)})
|
||||||
{|w| 0u < str::len_chars(w)});
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
Function: windowed
|
|
||||||
|
|
||||||
Create a vector of substrings of size `nn`
|
|
||||||
*/
|
|
||||||
fn windowed(nn: uint, ss: str) -> [str] {
|
|
||||||
let ww = [];
|
|
||||||
let len = str::len_chars(ss);
|
|
||||||
|
|
||||||
assert 1u <= nn;
|
|
||||||
|
|
||||||
let ii = 0u;
|
|
||||||
while ii+nn <= len {
|
|
||||||
let w = slice_chars( ss, ii, ii+nn );
|
|
||||||
vec::push(ww,w);
|
|
||||||
ii += 1u;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret ww;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1308,6 +1226,18 @@ fn char_range_at(s: str, i: uint) -> {ch: char, next: uint} {
|
||||||
ret {ch: val as char, next: i};
|
ret {ch: val as char, next: i};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Function is_char_boundary
|
||||||
|
|
||||||
|
Returns false if the index points into the middle of a multi-byte
|
||||||
|
character sequence.
|
||||||
|
*/
|
||||||
|
pure fn is_char_boundary(s: str, index: uint) -> bool {
|
||||||
|
if index == len(s) { ret true; }
|
||||||
|
let b = s[index];
|
||||||
|
ret b < 128u8 || b >= 192u8;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Function: char_at
|
Function: char_at
|
||||||
|
|
||||||
|
@ -1647,43 +1577,42 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_split_byte() {
|
fn test_split_char() {
|
||||||
fn t(s: str, c: char, u: [str]) {
|
fn t(s: str, c: char, u: [str]) {
|
||||||
log(debug, "split_byte: " + s);
|
log(debug, "split_byte: " + s);
|
||||||
let v = split_byte(s, c as u8);
|
let v = split_char(s, c);
|
||||||
#debug("split_byte to: ");
|
#debug("split_byte to: %?", v);
|
||||||
log(debug, v);
|
assert vec::all2(v, u, { |a,b| a == b });
|
||||||
assert (vec::all2(v, u, { |a,b| a == b }));
|
|
||||||
}
|
}
|
||||||
t("abc.hello.there", '.', ["abc", "hello", "there"]);
|
t("abc.hello.there", '.', ["abc", "hello", "there"]);
|
||||||
t(".hello.there", '.', ["", "hello", "there"]);
|
t(".hello.there", '.', ["", "hello", "there"]);
|
||||||
t("...hello.there.", '.', ["", "", "", "hello", "there", ""]);
|
t("...hello.there.", '.', ["", "", "", "hello", "there", ""]);
|
||||||
|
|
||||||
assert ["", "", "", "hello", "there", ""]
|
assert ["", "", "", "hello", "there", ""]
|
||||||
== split_byte("...hello.there.", '.' as u8);
|
== split_char("...hello.there.", '.');
|
||||||
|
|
||||||
assert [""] == split_byte("", 'z' as u8);
|
assert [""] == split_char("", 'z');
|
||||||
assert ["",""] == split_byte("z", 'z' as u8);
|
assert ["",""] == split_char("z", 'z');
|
||||||
assert ["ok"] == split_byte("ok", 'z' as u8);
|
assert ["ok"] == split_char("ok", 'z');
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_split_byte_2() {
|
fn test_split_char_2() {
|
||||||
let data = "ประเทศไทย中华Việt Nam";
|
let data = "ประเทศไทย中华Việt Nam";
|
||||||
assert ["ประเทศไทย中华", "iệt Nam"]
|
assert ["ประเทศไทย中华", "iệt Nam"]
|
||||||
== split_byte(data, 'V' as u8);
|
== split_char(data, 'V');
|
||||||
|
assert ["ประเ", "ศไ", "ย中华Việt Nam"]
|
||||||
|
== split_char(data, 'ท');
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_splitn_byte() {
|
fn test_splitn_char() {
|
||||||
fn t(s: str, c: char, n: uint, u: [str]) {
|
fn t(s: str, c: char, n: uint, u: [str]) {
|
||||||
log(debug, "splitn_byte: " + s);
|
log(debug, "splitn_byte: " + s);
|
||||||
let v = splitn_byte(s, c as u8, n);
|
let v = splitn_char(s, c, n);
|
||||||
#debug("split_byte to: ");
|
#debug("split_byte to: %?", v);
|
||||||
log(debug, v);
|
#debug("comparing vs. %?", u);
|
||||||
#debug("comparing vs. ");
|
assert vec::all2(v, u, { |a,b| a == b });
|
||||||
log(debug, u);
|
|
||||||
assert (vec::all2(v, u, { |a,b| a == b }));
|
|
||||||
}
|
}
|
||||||
t("abc.hello.there", '.', 0u, ["abc.hello.there"]);
|
t("abc.hello.there", '.', 0u, ["abc.hello.there"]);
|
||||||
t("abc.hello.there", '.', 1u, ["abc", "hello.there"]);
|
t("abc.hello.there", '.', 1u, ["abc", "hello.there"]);
|
||||||
|
@ -1694,19 +1623,36 @@ mod tests {
|
||||||
t("...hello.there.", '.', 3u, ["", "", "", "hello.there."]);
|
t("...hello.there.", '.', 3u, ["", "", "", "hello.there."]);
|
||||||
t("...hello.there.", '.', 5u, ["", "", "", "hello", "there", ""]);
|
t("...hello.there.", '.', 5u, ["", "", "", "hello", "there", ""]);
|
||||||
|
|
||||||
assert [""] == splitn_byte("", 'z' as u8, 5u);
|
assert [""] == splitn_char("", 'z', 5u);
|
||||||
assert ["",""] == splitn_byte("z", 'z' as u8, 5u);
|
assert ["",""] == splitn_char("z", 'z', 5u);
|
||||||
assert ["ok"] == splitn_byte("ok", 'z' as u8, 5u);
|
assert ["ok"] == splitn_char("ok", 'z', 5u);
|
||||||
assert ["z"] == splitn_byte("z", 'z' as u8, 0u);
|
assert ["z"] == splitn_char("z", 'z', 0u);
|
||||||
assert ["w.x.y"] == splitn_byte("w.x.y", '.' as u8, 0u);
|
assert ["w.x.y"] == splitn_char("w.x.y", '.', 0u);
|
||||||
assert ["w","x.y"] == splitn_byte("w.x.y", '.' as u8, 1u);
|
assert ["w","x.y"] == splitn_char("w.x.y", '.', 1u);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_splitn_byte_2() {
|
fn test_splitn_char_2 () {
|
||||||
|
let data = "ประเทศไทย中华Việt Nam";
|
||||||
|
assert ["ประเทศไทย中", "Việt Nam"]
|
||||||
|
== splitn_char(data, '华', 1u);
|
||||||
|
|
||||||
|
assert ["", "", "XXX", "YYYzWWWz"]
|
||||||
|
== splitn_char("zzXXXzYYYzWWWz", 'z', 3u);
|
||||||
|
assert ["",""] == splitn_char("z", 'z', 5u);
|
||||||
|
assert [""] == splitn_char("", 'z', 5u);
|
||||||
|
assert ["ok"] == splitn_char("ok", 'z', 5u);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_splitn_char_3() {
|
||||||
let data = "ประเทศไทย中华Việt Nam";
|
let data = "ประเทศไทย中华Việt Nam";
|
||||||
assert ["ประเทศไทย中华", "iệt Nam"]
|
assert ["ประเทศไทย中华", "iệt Nam"]
|
||||||
== splitn_byte(data, 'V' as u8, 1u);
|
== splitn_char(data, 'V', 1u);
|
||||||
|
assert ["ประเ", "ศไทย中华Việt Nam"]
|
||||||
|
== splitn_char(data, 'ท', 1u);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -1716,6 +1662,7 @@ mod tests {
|
||||||
assert eq(v[i], k);
|
assert eq(v[i], k);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
t("--1233345--", "12345", 0, "--1233345--");
|
||||||
t("abc::hello::there", "::", 0, "abc");
|
t("abc::hello::there", "::", 0, "abc");
|
||||||
t("abc::hello::there", "::", 1, "hello");
|
t("abc::hello::there", "::", 1, "hello");
|
||||||
t("abc::hello::there", "::", 2, "there");
|
t("abc::hello::there", "::", 2, "there");
|
||||||
|
@ -1744,7 +1691,7 @@ mod tests {
|
||||||
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_split () {
|
fn test_split() {
|
||||||
let data = "ประเทศไทย中华Việt Nam";
|
let data = "ประเทศไทย中华Việt Nam";
|
||||||
assert ["ประเทศไทย中", "Việt Nam"]
|
assert ["ประเทศไทย中", "Việt Nam"]
|
||||||
== split (data, {|cc| cc == '华'});
|
== split (data, {|cc| cc == '华'});
|
||||||
|
@ -1761,33 +1708,7 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_split_char () {
|
fn test_lines() {
|
||||||
let data = "ประเทศไทย中华Việt Nam";
|
|
||||||
assert ["ประเทศไทย中", "Việt Nam"]
|
|
||||||
== split_char(data, '华');
|
|
||||||
|
|
||||||
assert ["", "", "XXX", "YYY", ""]
|
|
||||||
== split_char("zzXXXzYYYz", 'z');
|
|
||||||
assert ["",""] == split_char("z", 'z');
|
|
||||||
assert [""] == split_char("", 'z');
|
|
||||||
assert ["ok"] == split_char("ok", 'z');
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_splitn_char () {
|
|
||||||
let data = "ประเทศไทย中华Việt Nam";
|
|
||||||
assert ["ประเทศไทย中", "Việt Nam"]
|
|
||||||
== splitn_char(data, '华', 1u);
|
|
||||||
|
|
||||||
assert ["", "", "XXX", "YYYzWWWz"]
|
|
||||||
== splitn_char("zzXXXzYYYzWWWz", 'z', 3u);
|
|
||||||
assert ["",""] == splitn_char("z", 'z', 5u);
|
|
||||||
assert [""] == splitn_char("", 'z', 5u);
|
|
||||||
assert ["ok"] == splitn_char("ok", 'z', 5u);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_lines () {
|
|
||||||
let lf = "\nMary had a little lamb\nLittle lamb\n";
|
let lf = "\nMary had a little lamb\nLittle lamb\n";
|
||||||
let crlf = "\r\nMary had a little lamb\r\nLittle lamb\r\n";
|
let crlf = "\r\nMary had a little lamb\r\nLittle lamb\r\n";
|
||||||
|
|
||||||
|
@ -2045,10 +1966,10 @@ mod tests {
|
||||||
assert (eq("\u65e5", slice("\u65e5\u672c", 0u, 3u)));
|
assert (eq("\u65e5", slice("\u65e5\u672c", 0u, 3u)));
|
||||||
|
|
||||||
let data = "ประเทศไทย中华";
|
let data = "ประเทศไทย中华";
|
||||||
assert (eq("ป", slice(data, 0u, 3u)));
|
assert "ป" == slice(data, 0u, 3u);
|
||||||
assert (eq("ร", slice(data, 3u, 6u)));
|
assert "ร" == slice(data, 3u, 6u);
|
||||||
assert (eq("", slice(data, 1u, 1u)));
|
assert "" == slice(data, 3u, 3u);
|
||||||
assert (eq("华", slice(data, 30u, 33u)));
|
assert "华" == slice(data, 30u, 33u);
|
||||||
|
|
||||||
fn a_million_letter_X() -> str {
|
fn a_million_letter_X() -> str {
|
||||||
let i = 0;
|
let i = 0;
|
||||||
|
@ -2062,22 +1983,25 @@ mod tests {
|
||||||
while i < 100000 { rs += "华华华华华"; i += 1; }
|
while i < 100000 { rs += "华华华华华"; i += 1; }
|
||||||
ret rs;
|
ret rs;
|
||||||
}
|
}
|
||||||
assert (eq(half_a_million_letter_X(),
|
assert eq(half_a_million_letter_X(),
|
||||||
slice(a_million_letter_X(), 0u, (3u * 500000u))));
|
slice(a_million_letter_X(), 0u, 3u * 500000u));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_maybe_slice() {
|
fn test_slice_2() {
|
||||||
let ss = "中华Việt Nam";
|
let ss = "中华Việt Nam";
|
||||||
|
|
||||||
assert none == maybe_slice(ss, 0u, 2u);
|
assert "华" == slice(ss, 3u, 6u);
|
||||||
assert none == maybe_slice(ss, 1u, 3u);
|
assert "Việt Nam" == slice(ss, 6u, 16u);
|
||||||
assert none == maybe_slice(ss, 1u, 2u);
|
|
||||||
assert some("华") == maybe_slice(ss, 3u, 6u);
|
|
||||||
assert some("Việt Nam") == maybe_slice(ss, 6u, 16u);
|
|
||||||
assert none == maybe_slice(ss, 4u, 16u);
|
|
||||||
|
|
||||||
/* 0: 中
|
assert "ab" == slice("abc", 0u, 2u);
|
||||||
|
assert "bc" == slice("abc", 1u, 3u);
|
||||||
|
assert "" == slice("abc", 1u, 1u);
|
||||||
|
|
||||||
|
assert "中" == slice(ss, 0u, 3u);
|
||||||
|
assert "华V" == slice(ss, 3u, 7u);
|
||||||
|
assert "" == slice(ss, 3u, 3u);
|
||||||
|
/*0: 中
|
||||||
3: 华
|
3: 华
|
||||||
6: V
|
6: V
|
||||||
7: i
|
7: i
|
||||||
|
@ -2090,32 +2014,10 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_slice_chars() {
|
#[should_fail]
|
||||||
assert (eq("ab", slice_chars("abc", 0u, 2u)));
|
#[ignore(cfg(target_os = "win32"))]
|
||||||
assert (eq("bc", slice_chars("abc", 1u, 3u)));
|
fn test_slice_fail() {
|
||||||
assert (eq("", slice_chars("abc", 1u, 1u)));
|
slice("中华Việt Nam", 0u, 2u);
|
||||||
assert (eq("\u65e5", slice_chars("\u65e5\u672c", 0u, 1u)));
|
|
||||||
|
|
||||||
let data = "ประเทศไทย中华";
|
|
||||||
assert (eq("ป", slice_chars(data, 0u, 1u)));
|
|
||||||
assert (eq("ร", slice_chars(data, 1u, 2u)));
|
|
||||||
assert (eq("华", slice_chars(data, 10u, 11u)));
|
|
||||||
assert (eq("", slice_chars(data, 1u, 1u)));
|
|
||||||
|
|
||||||
fn a_million_letter_X() -> str {
|
|
||||||
let i = 0;
|
|
||||||
let rs = "";
|
|
||||||
while i < 100000 { rs += "华华华华华华华华华华"; i += 1; }
|
|
||||||
ret rs;
|
|
||||||
}
|
|
||||||
fn half_a_million_letter_X() -> str {
|
|
||||||
let i = 0;
|
|
||||||
let rs = "";
|
|
||||||
while i < 100000 { rs += "华华华华华"; i += 1; }
|
|
||||||
ret rs;
|
|
||||||
}
|
|
||||||
assert (eq(half_a_million_letter_X(),
|
|
||||||
slice_chars(a_million_letter_X(), 0u, 500000u)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -2438,25 +2340,6 @@ mod tests {
|
||||||
assert true == any("Ymcy", char::is_uppercase);
|
assert true == any("Ymcy", char::is_uppercase);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_windowed() {
|
|
||||||
let data = "ประเทศไทย中";
|
|
||||||
|
|
||||||
assert ["ประ", "ระเ", "ะเท", "เทศ", "ทศไ", "ศไท", "ไทย", "ทย中"]
|
|
||||||
== windowed(3u, data);
|
|
||||||
|
|
||||||
assert [data] == windowed(10u, data);
|
|
||||||
|
|
||||||
assert [] == windowed(6u, "abcd");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
#[should_fail]
|
|
||||||
#[ignore(cfg(target_os = "win32"))]
|
|
||||||
fn test_windowed_() {
|
|
||||||
let _x = windowed(0u, "abcd");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_chars() {
|
fn test_chars() {
|
||||||
let ss = "ศไทย中华Việt Nam";
|
let ss = "ศไทย中华Việt Nam";
|
||||||
|
|
|
@ -254,16 +254,9 @@ the first element of the returned vector will be the drive letter
|
||||||
followed by a colon.
|
followed by a colon.
|
||||||
*/
|
*/
|
||||||
fn split(p: path) -> [path] {
|
fn split(p: path) -> [path] {
|
||||||
// FIXME: use UTF-8 safe str, and/or various other string formats
|
str::split_nonempty(p, {|c|
|
||||||
let split1 = str::split_byte(p, os_fs::path_sep as u8);
|
c == os_fs::path_sep || c == os_fs::alt_path_sep
|
||||||
let split2 = [];
|
})
|
||||||
for s in split1 {
|
|
||||||
split2 += str::split_byte(s, os_fs::alt_path_sep as u8);
|
|
||||||
}
|
|
||||||
|
|
||||||
// filter out ""
|
|
||||||
let split3 = vec::filter(split2, {|seg| "" != seg});
|
|
||||||
ret split3;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -276,13 +269,12 @@ path includes directory components then they are included in the filename part
|
||||||
of the result pair.
|
of the result pair.
|
||||||
*/
|
*/
|
||||||
fn splitext(p: path) -> (str, str) {
|
fn splitext(p: path) -> (str, str) {
|
||||||
// FIXME: use UTF-8 safe str, and/or various other string formats
|
|
||||||
if str::is_empty(p) { ("", "") }
|
if str::is_empty(p) { ("", "") }
|
||||||
else {
|
else {
|
||||||
let parts = str::split_byte(p, '.' as u8);
|
let parts = str::split_char(p, '.');
|
||||||
if vec::len(parts) > 1u {
|
if vec::len(parts) > 1u {
|
||||||
let base = str::connect(vec::init(parts), ".");
|
let base = str::connect(vec::init(parts), ".");
|
||||||
let ext = "." + option::get(vec::last(parts));
|
let ext = "." + vec::last_total(parts);
|
||||||
|
|
||||||
fn is_dotfile(base: str) -> bool {
|
fn is_dotfile(base: str) -> bool {
|
||||||
str::is_empty(base)
|
str::is_empty(base)
|
||||||
|
|
|
@ -49,7 +49,7 @@ Failure:
|
||||||
String must be a valid IPv4 address
|
String must be a valid IPv4 address
|
||||||
*/
|
*/
|
||||||
fn parse_addr(ip: str) -> ip_addr {
|
fn parse_addr(ip: str) -> ip_addr {
|
||||||
let parts = vec::map(str::split_byte(ip, "."[0]), {|s|
|
let parts = vec::map(str::split_char(ip, '.'), {|s|
|
||||||
alt uint::from_str(s) {
|
alt uint::from_str(s) {
|
||||||
some(n) if n <= 255u { n }
|
some(n) if n <= 255u { n }
|
||||||
_ { fail "Invalid IP Address part." }
|
_ { fail "Invalid IP Address part." }
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue