core: make str::substr use char positions (and replace other uses)
This commit is contained in:
parent
a3f5626ad1
commit
2b0396c34a
7 changed files with 45 additions and 40 deletions
|
@ -113,12 +113,13 @@ mod write {
|
||||||
|
|
||||||
// Decides what to call an intermediate file, given the name of the output
|
// Decides what to call an intermediate file, given the name of the output
|
||||||
// and the extension to use.
|
// and the extension to use.
|
||||||
fn mk_intermediate_name(output_path: str, extension: str) -> str {
|
fn mk_intermediate_name(output_path: str, extension: str) -> str unsafe {
|
||||||
let dot_pos = str::index(output_path, '.' as u8);
|
let dot_pos = str::index(output_path, '.' as u8);
|
||||||
let stem;
|
let stem;
|
||||||
if dot_pos < 0 {
|
if dot_pos < 0 {
|
||||||
stem = output_path;
|
stem = output_path;
|
||||||
} else { stem = str::substr(output_path, 0u, dot_pos as uint); }
|
} else { stem = str::unsafe::slice_bytes(output_path, 0u,
|
||||||
|
dot_pos as uint); }
|
||||||
ret stem + "." + extension;
|
ret stem + "." + extension;
|
||||||
}
|
}
|
||||||
fn run_passes(sess: session, llmod: ModuleRef, output: str) {
|
fn run_passes(sess: session, llmod: ModuleRef, output: str) {
|
||||||
|
@ -480,8 +481,8 @@ fn build_link_meta(sess: session, c: ast::crate, output: str,
|
||||||
ret {name: name, vers: vers, extras_hash: extras_hash};
|
ret {name: name, vers: vers, extras_hash: extras_hash};
|
||||||
}
|
}
|
||||||
|
|
||||||
fn truncated_sha1_result(sha: sha1) -> str {
|
fn truncated_sha1_result(sha: sha1) -> str unsafe {
|
||||||
ret str::substr(sha.result_str(), 0u, 16u);
|
ret str::unsafe::slice_bytes(sha.result_str(), 0u, 16u);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -116,9 +116,9 @@ fn ty_to_str(cx: ctxt, typ: t) -> str {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn ty_to_short_str(cx: ctxt, typ: t) -> str {
|
fn ty_to_short_str(cx: ctxt, typ: t) -> str unsafe {
|
||||||
let s = encoder::encoded_ty(cx, typ);
|
let s = encoder::encoded_ty(cx, typ);
|
||||||
if str::byte_len(s) >= 32u { s = str::substr(s, 0u, 32u); }
|
if str::byte_len(s) >= 32u { s = str::unsafe::slice_bytes(s, 0u, 32u); }
|
||||||
ret s;
|
ret s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -80,7 +80,7 @@ mod ct {
|
||||||
enum piece { piece_string(str), piece_conv(conv), }
|
enum piece { piece_string(str), piece_conv(conv), }
|
||||||
type error_fn = fn@(str) -> ! ;
|
type error_fn = fn@(str) -> ! ;
|
||||||
|
|
||||||
fn parse_fmt_string(s: str, error: error_fn) -> [piece] {
|
fn parse_fmt_string(s: str, error: error_fn) -> [piece] unsafe {
|
||||||
let pieces: [piece] = [];
|
let pieces: [piece] = [];
|
||||||
let lim = str::byte_len(s);
|
let lim = str::byte_len(s);
|
||||||
let buf = "";
|
let buf = "";
|
||||||
|
@ -93,13 +93,13 @@ mod ct {
|
||||||
}
|
}
|
||||||
let i = 0u;
|
let i = 0u;
|
||||||
while i < lim {
|
while i < lim {
|
||||||
let curr = str::substr(s, i, 1u);
|
let curr = str::unsafe::slice_bytes(s, i, i+1u);
|
||||||
if str::eq(curr, "%") {
|
if str::eq(curr, "%") {
|
||||||
i += 1u;
|
i += 1u;
|
||||||
if i >= lim {
|
if i >= lim {
|
||||||
error("unterminated conversion at end of string");
|
error("unterminated conversion at end of string");
|
||||||
}
|
}
|
||||||
let curr2 = str::substr(s, i, 1u);
|
let curr2 = str::unsafe::slice_bytes(s, i, i+1u);
|
||||||
if str::eq(curr2, "%") {
|
if str::eq(curr2, "%") {
|
||||||
buf += curr2;
|
buf += curr2;
|
||||||
i += 1u;
|
i += 1u;
|
||||||
|
@ -223,9 +223,9 @@ mod ct {
|
||||||
} else { {count: count_implied, next: i} };
|
} else { {count: count_implied, next: i} };
|
||||||
}
|
}
|
||||||
fn parse_type(s: str, i: uint, lim: uint, error: error_fn) ->
|
fn parse_type(s: str, i: uint, lim: uint, error: error_fn) ->
|
||||||
{ty: ty, next: uint} {
|
{ty: ty, next: uint} unsafe {
|
||||||
if i >= lim { error("missing type in conversion"); }
|
if i >= lim { error("missing type in conversion"); }
|
||||||
let tstr = str::substr(s, i, 1u);
|
let tstr = str::unsafe::slice_bytes(s, i, i+1u);
|
||||||
// TODO: Do we really want two signed types here?
|
// TODO: Do we really want two signed types here?
|
||||||
// How important is it to be printf compatible?
|
// How important is it to be printf compatible?
|
||||||
let t =
|
let t =
|
||||||
|
@ -317,7 +317,7 @@ mod rt {
|
||||||
fn conv_char(cv: conv, c: char) -> str {
|
fn conv_char(cv: conv, c: char) -> str {
|
||||||
ret pad(cv, str::from_char(c), pad_nozero);
|
ret pad(cv, str::from_char(c), pad_nozero);
|
||||||
}
|
}
|
||||||
fn conv_str(cv: conv, s: str) -> str {
|
fn conv_str(cv: conv, s: str) -> str unsafe {
|
||||||
// For strings, precision is the maximum characters
|
// For strings, precision is the maximum characters
|
||||||
// displayed
|
// displayed
|
||||||
|
|
||||||
|
@ -327,7 +327,7 @@ mod rt {
|
||||||
count_implied { s }
|
count_implied { s }
|
||||||
count_is(max) {
|
count_is(max) {
|
||||||
if max as uint < str::char_len(s) {
|
if max as uint < str::char_len(s) {
|
||||||
str::substr(s, 0u, max as uint)
|
str::unsafe::slice_bytes(s, 0u, max as uint)
|
||||||
} else { s }
|
} else { s }
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -391,7 +391,7 @@ mod rt {
|
||||||
ret str::from_bytes(svec);
|
ret str::from_bytes(svec);
|
||||||
}
|
}
|
||||||
enum pad_mode { pad_signed, pad_unsigned, pad_nozero, }
|
enum pad_mode { pad_signed, pad_unsigned, pad_nozero, }
|
||||||
fn pad(cv: conv, s: str, mode: pad_mode) -> str {
|
fn pad(cv: conv, s: str, mode: pad_mode) -> str unsafe {
|
||||||
let uwidth;
|
let uwidth;
|
||||||
alt cv.width {
|
alt cv.width {
|
||||||
count_implied { ret s; }
|
count_implied { ret s; }
|
||||||
|
@ -440,7 +440,7 @@ mod rt {
|
||||||
let headstr = str::from_bytes([head]);
|
let headstr = str::from_bytes([head]);
|
||||||
// FIXME: not UTF-8 safe
|
// FIXME: not UTF-8 safe
|
||||||
let bytelen = str::byte_len(s);
|
let bytelen = str::byte_len(s);
|
||||||
let numpart = str::substr(s, 1u, bytelen - 1u);
|
let numpart = str::unsafe::slice_bytes(s, 1u, bytelen);
|
||||||
ret headstr + padstr + numpart;
|
ret headstr + padstr + numpart;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -249,12 +249,12 @@ Failure:
|
||||||
|
|
||||||
If the string does not contain any characters.
|
If the string does not contain any characters.
|
||||||
*/
|
*/
|
||||||
fn pop_char(&s: str) -> char {
|
fn pop_char(&s: str) -> char unsafe {
|
||||||
let end = byte_len(s);
|
let end = byte_len(s);
|
||||||
while end > 0u && s[end - 1u] & 192u8 == tag_cont_u8 { end -= 1u; }
|
while end > 0u && s[end - 1u] & 192u8 == tag_cont_u8 { end -= 1u; }
|
||||||
assert (end > 0u);
|
assert (end > 0u);
|
||||||
let ch = char_at(s, end - 1u);
|
let ch = char_at(s, end - 1u);
|
||||||
s = substr(s, 0u, end - 1u);
|
s = unsafe::slice_bytes(s, 0u, end - 1u);
|
||||||
ret ch;
|
ret ch;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -267,9 +267,9 @@ Failure:
|
||||||
|
|
||||||
If the string does not contain any characters.
|
If the string does not contain any characters.
|
||||||
*/
|
*/
|
||||||
fn shift_char(&s: str) -> char {
|
fn shift_char(&s: str) -> char unsafe {
|
||||||
let r = char_range_at(s, 0u);
|
let r = char_range_at(s, 0u);
|
||||||
s = substr(s, r.next, byte_len(s) - r.next);
|
s = unsafe::slice_bytes(s, r.next, byte_len(s));
|
||||||
ret r.ch;
|
ret r.ch;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -306,12 +306,13 @@ Function: pop_byte
|
||||||
Removes the last byte from a string and returns it.
|
Removes the last byte from a string and returns it.
|
||||||
|
|
||||||
This function is not unicode-safe.
|
This function is not unicode-safe.
|
||||||
|
FIXME: move to unsafe?
|
||||||
*/
|
*/
|
||||||
fn pop_byte(&s: str) -> u8 {
|
fn pop_byte(&s: str) -> u8 unsafe {
|
||||||
let len = byte_len(s);
|
let len = byte_len(s);
|
||||||
assert (len > 0u);
|
assert (len > 0u);
|
||||||
let b = s[len - 1u];
|
let b = s[len - 1u];
|
||||||
s = substr(s, 0u, len - 1u);
|
s = unsafe::slice_bytes(s, 0u, len - 1u);
|
||||||
ret b;
|
ret b;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -321,12 +322,13 @@ Function: shift_byte
|
||||||
Removes the first byte from a string and returns it.
|
Removes the first byte from a string and returns it.
|
||||||
|
|
||||||
This function is not unicode-safe.
|
This function is not unicode-safe.
|
||||||
|
FIXME: move to unsafe?
|
||||||
*/
|
*/
|
||||||
fn shift_byte(&s: str) -> u8 {
|
fn shift_byte(&s: str) -> u8 unsafe {
|
||||||
let len = byte_len(s);
|
let len = byte_len(s);
|
||||||
assert (len > 0u);
|
assert (len > 0u);
|
||||||
let b = s[0];
|
let b = s[0];
|
||||||
s = substr(s, 1u, len - 1u);
|
s = unsafe::slice_bytes(s, 1u, len);
|
||||||
ret b;
|
ret b;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -413,17 +415,15 @@ fn chars(s: str) -> [char] {
|
||||||
/*
|
/*
|
||||||
Function: substr
|
Function: substr
|
||||||
|
|
||||||
Take a substring of another. Returns a string containing `len` bytes
|
Take a substring of another. Returns a string containing `len` chars
|
||||||
starting at byte offset `begin`.
|
starting at char offset `begin`.
|
||||||
|
|
||||||
FIXME: This function is not unicode-safe.
|
|
||||||
|
|
||||||
Failure:
|
Failure:
|
||||||
|
|
||||||
If `begin` + `len` is is greater than the byte length of the string
|
If `begin` + `len` is is greater than the char length of the string
|
||||||
*/
|
*/
|
||||||
fn substr(s: str, begin: uint, len: uint) -> str unsafe {
|
fn substr(s: str, begin: uint, len: uint) -> str {
|
||||||
ret unsafe::slice_bytes(s, begin, begin + len);
|
ret slice(s, begin, begin + len);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -941,8 +941,8 @@ haystack - The string to look in
|
||||||
needle - The string to look for
|
needle - The string to look for
|
||||||
*/
|
*/
|
||||||
fn ends_with(haystack: str, needle: str) -> bool {
|
fn ends_with(haystack: str, needle: str) -> bool {
|
||||||
let haystack_len: uint = byte_len(haystack);
|
let haystack_len: uint = char_len(haystack);
|
||||||
let needle_len: uint = byte_len(needle);
|
let needle_len: uint = char_len(needle);
|
||||||
ret if needle_len == 0u {
|
ret if needle_len == 0u {
|
||||||
true
|
true
|
||||||
} else if needle_len > haystack_len {
|
} else if needle_len > haystack_len {
|
||||||
|
@ -1598,7 +1598,9 @@ mod tests {
|
||||||
}
|
}
|
||||||
t("hello", "llo", 2);
|
t("hello", "llo", 2);
|
||||||
t("hello", "el", 1);
|
t("hello", "el", 1);
|
||||||
t("substr should not be a challenge", "not", 14);
|
|
||||||
|
assert "ะเทศไท"
|
||||||
|
== substr("ประเทศไทย中华Việt Nam", 2u, 6u);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
|
@ -43,13 +43,13 @@ The dirname of "/usr/share" will be "/usr", but the dirname of
|
||||||
|
|
||||||
If the path is not prefixed with a directory, then "." is returned.
|
If the path is not prefixed with a directory, then "." is returned.
|
||||||
*/
|
*/
|
||||||
fn dirname(p: path) -> path {
|
fn dirname(p: path) -> path unsafe {
|
||||||
let i: int = str::rindex(p, os_fs::path_sep as u8);
|
let i: int = str::rindex(p, os_fs::path_sep as u8);
|
||||||
if i == -1 {
|
if i == -1 {
|
||||||
i = str::rindex(p, os_fs::alt_path_sep as u8);
|
i = str::rindex(p, os_fs::alt_path_sep as u8);
|
||||||
if i == -1 { ret "."; }
|
if i == -1 { ret "."; }
|
||||||
}
|
}
|
||||||
ret str::substr(p, 0u, i as uint);
|
ret str::unsafe::slice_bytes(p, 0u, i as uint);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -1341,11 +1341,12 @@ mod tests {
|
||||||
node::empty { ret "" }
|
node::empty { ret "" }
|
||||||
node::content(x) {
|
node::content(x) {
|
||||||
let str = @mutable "";
|
let str = @mutable "";
|
||||||
fn aux(str: @mutable str, node: @node::node) {
|
fn aux(str: @mutable str, node: @node::node) unsafe {
|
||||||
alt(*node) {
|
alt(*node) {
|
||||||
node::leaf(x) {
|
node::leaf(x) {
|
||||||
*str += str::substr(
|
*str += str::unsafe::slice_bytes(
|
||||||
*x.content, x.byte_offset, x.byte_len);
|
*x.content, x.byte_offset,
|
||||||
|
x.byte_offset + x.byte_len);
|
||||||
}
|
}
|
||||||
node::concat(x) {
|
node::concat(x) {
|
||||||
aux(str, x.left);
|
aux(str, x.left);
|
||||||
|
|
|
@ -291,7 +291,7 @@ fn mk_sha1() -> sha1 {
|
||||||
mod tests {
|
mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test() {
|
fn test() unsafe {
|
||||||
type test = {input: str, output: [u8]};
|
type test = {input: str, output: [u8]};
|
||||||
|
|
||||||
fn a_million_letter_a() -> str {
|
fn a_million_letter_a() -> str {
|
||||||
|
@ -372,7 +372,8 @@ mod tests {
|
||||||
let left = len;
|
let left = len;
|
||||||
while left > 0u {
|
while left > 0u {
|
||||||
let take = (left + 1u) / 2u;
|
let take = (left + 1u) / 2u;
|
||||||
sh.input_str(str::substr(t.input, len - left, take));
|
sh.input_str(str::unsafe::slice_bytes(t.input, len - left,
|
||||||
|
take + len - left));
|
||||||
left = left - take;
|
left = left - take;
|
||||||
}
|
}
|
||||||
let out = sh.result();
|
let out = sh.result();
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue