1
Fork 0

Implement _str.len() to return the number of bytes, rename it to byte_len(),

and add a test.
This commit is contained in:
Jeffrey Yasskin 2010-07-25 00:36:03 -07:00 committed by Graydon Hoare
parent 581a95a804
commit 3f6e8ffe64
4 changed files with 33 additions and 5 deletions

View file

@ -506,6 +506,7 @@ TEST_XFAILS_LLVM := $(TASK_XFAILS) \
str-append.rs \ str-append.rs \
str-concat.rs \ str-concat.rs \
str-idx.rs \ str-idx.rs \
str-lib.rs \
tag.rs \ tag.rs \
tail-cps.rs \ tail-cps.rs \
tail-direct.rs \ tail-direct.rs \

View file

@ -3,7 +3,7 @@ import rustrt.sbuf;
native "rust" mod rustrt { native "rust" mod rustrt {
type sbuf; type sbuf;
fn str_buf(str s) -> sbuf; fn str_buf(str s) -> sbuf;
fn str_len(str s) -> uint; fn str_byte_len(str s) -> uint;
fn str_alloc(uint n_bytes) -> str; fn str_alloc(uint n_bytes) -> str;
fn refcount[T](str s) -> uint; fn refcount[T](str s) -> uint;
} }
@ -13,7 +13,7 @@ fn is_utf8(vec[u8] v) -> bool {
} }
fn is_ascii(str s) -> bool { fn is_ascii(str s) -> bool {
let uint i = len(s); let uint i = byte_len(s);
while (i > 0u) { while (i > 0u) {
i -= 1u; i -= 1u;
if ((s.(i) & 0x80u8) != 0u8) { if ((s.(i) & 0x80u8) != 0u8) {
@ -27,8 +27,13 @@ fn alloc(uint n_bytes) -> str {
ret rustrt.str_alloc(n_bytes); ret rustrt.str_alloc(n_bytes);
} }
fn len(str s) -> uint { // Returns the number of bytes (a.k.a. UTF-8 code units) in s.
ret rustrt.str_len(s); // Contrast with a function that would return the number of code
// points (char's), combining character sequences, words, etc. See
// http://icu-project.org/apiref/icu4c/classBreakIterator.html for a
// way to implement those.
fn byte_len(str s) -> uint {
ret rustrt.str_byte_len(s);
} }
fn buf(str s) -> sbuf { fn buf(str s) -> sbuf {
@ -39,5 +44,5 @@ fn bytes(&str s) -> vec[u8] {
fn ith(str s, uint i) -> u8 { fn ith(str s, uint i) -> u8 {
ret s.(i); ret s.(i);
} }
ret _vec.init_fn[u8](bind ith(s, _), _str.len(s)); ret _vec.init_fn[u8](bind ith(s, _), _str.byte_len(s));
} }

View file

@ -115,6 +115,12 @@ str_buf(rust_task *task, rust_str *s)
return (char const *)&s->data[0]; return (char const *)&s->data[0];
} }
extern "C" CDECL size_t
str_byte_len(rust_task *task, rust_str *s)
{
return s->fill - 1; // -1 for the '\0' terminator.
}
extern "C" CDECL void * extern "C" CDECL void *
vec_buf(rust_task *task, type_desc *ty, rust_vec *v, size_t offset) vec_buf(rust_task *task, type_desc *ty, rust_vec *v, size_t offset)
{ {

View file

@ -0,0 +1,16 @@
use std;
import std._str;
fn test_bytes_len() {
check (_str.byte_len("") == 0u);
check (_str.byte_len("hello world") == 11u);
check (_str.byte_len("\x63") == 1u);
check (_str.byte_len("\xa2") == 2u);
check (_str.byte_len("\u03c0") == 2u);
check (_str.byte_len("\u2620") == 3u);
check (_str.byte_len("\U0001d11e") == 4u);
}
fn main() {
test_bytes_len();
}