auto merge of #5980 : Kimundi/rust/ascii-encoding, r=thestinger

Added Ascii type to use for byte inputs that are known to contain Ascii only.
2013-04-22 16:33:51 -07:00 · 2013-04-22 16:33:51 -07:00 · 05f9586d06
commit 05f9586d06
parent aba93c6b60 bf4f088eac
6 changed files with 320 additions and 31 deletions
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@ -1,4 +1,4 @@
-// Copyright 2012 The Rust Project Developers. See the COPYRIGHT
+// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
 // http://rust-lang.org/COPYRIGHT.
 //
@ -789,16 +789,18 @@ pub fn each_split_within<'a>(ss: &'a str,

 /// Convert a string to lowercase. ASCII only
 pub fn to_lower(s: &str) -> ~str {
-    map(s,
-        |c| unsafe{(libc::tolower(c as libc::c_char)) as char}
-    )
+    do map(s) |c| {
+        assert!(char::is_ascii(c));
+        (unsafe{libc::tolower(c as libc::c_char)}) as char
+    }
 }

 /// Convert a string to uppercase. ASCII only
 pub fn to_upper(s: &str) -> ~str {
-    map(s,
-        |c| unsafe{(libc::toupper(c as libc::c_char)) as char}
-    )
+    do map(s) |c| {
+        assert!(char::is_ascii(c));
+        (unsafe{libc::toupper(c as libc::c_char)}) as char
+    }
 }

 /**
@ -2317,20 +2319,20 @@ pub mod raw {
    }

    /// Removes the last byte from a string and returns it. (Not UTF-8 safe).
-    pub fn pop_byte(s: &mut ~str) -> u8 {
+    pub unsafe fn pop_byte(s: &mut ~str) -> u8 {
        let len = len(*s);
        assert!((len > 0u));
        let b = s[len - 1u];
-        unsafe { set_len(s, len - 1u) };
+        set_len(s, len - 1u);
        return b;
    }

    /// Removes the first byte from a string and returns it. (Not UTF-8 safe).
-    pub fn shift_byte(s: &mut ~str) -> u8 {
+    pub unsafe fn shift_byte(s: &mut ~str) -> u8 {
        let len = len(*s);
        assert!((len > 0u));
        let b = s[0];
-        *s = unsafe { raw::slice_bytes_owned(*s, 1u, len) };
+        *s = raw::slice_bytes_owned(*s, 1u, len);
        return b;
    }

@ -3096,12 +3098,11 @@ mod tests {

    #[test]
    fn test_to_lower() {
-        unsafe {
-            assert!(~"" == map(~"",
-                |c| libc::tolower(c as c_char) as char));
-            assert!(~"ymca" == map(~"YMCA",
-                |c| libc::tolower(c as c_char) as char));
-        }
+        // libc::tolower, and hence str::to_lower
+        // are culturally insensitive: they only work for ASCII
+        // (see Issue #1347)
+        assert!(~"" == to_lower(""));
+        assert!(~"ymca" == to_lower("YMCA"));
    }

    #[test]
@ -3346,7 +3347,7 @@ mod tests {
    #[test]
    fn test_shift_byte() {
        let mut s = ~"ABC";
-        let b = raw::shift_byte(&mut s);
+        let b = unsafe{raw::shift_byte(&mut s)};
        assert!((s == ~"BC"));
        assert!((b == 65u8));
    }
@ -3354,7 +3355,7 @@ mod tests {
    #[test]
    fn test_pop_byte() {
        let mut s = ~"ABC";
-        let b = raw::pop_byte(&mut s);
+        let b = unsafe{raw::pop_byte(&mut s)};
        assert!((s == ~"AB"));
        assert!((b == 67u8));
    }
@ -3666,12 +3667,8 @@ mod tests {

    #[test]
    fn test_map() {
-        unsafe {
-            assert!(~"" == map(~"", |c|
-                libc::toupper(c as c_char) as char));
-            assert!(~"YMCA" == map(~"ymca",
-                                  |c| libc::toupper(c as c_char) as char));
-        }
+        assert!(~"" == map(~"", |c| unsafe {libc::toupper(c as c_char)} as char));
+        assert!(~"YMCA" == map(~"ymca", |c| unsafe {libc::toupper(c as c_char)} as char));
    }

    #[test]
@ -3685,11 +3682,11 @@ mod tests {

    #[test]
    fn test_any() {
-        assert!(false  == any(~"", char::is_uppercase));
+        assert!(false == any(~"", char::is_uppercase));
        assert!(false == any(~"ymca", char::is_uppercase));
        assert!(true  == any(~"YMCA", char::is_uppercase));
-        assert!(true == any(~"yMCA", char::is_uppercase));
-        assert!(true == any(~"Ymcy", char::is_uppercase));
+        assert!(true  == any(~"yMCA", char::is_uppercase));
+        assert!(true  == any(~"Ymcy", char::is_uppercase));
    }

    #[test]