rustc: Remove &str indexing from the language.

Being able to index into the bytes of a string encourages poor UTF-8 hygiene. To get a view of `&[u8]` from either a `String` or `&str` slice, use the `as_bytes()` method. Closes #12710. [breaking-change]
2014-06-19 18:22:33 -07:00 · 2014-06-19 18:22:33 -07:00 · d21336ee0a
commit d21336ee0a
parent 44ec28cfac
26 changed files with 101 additions and 87 deletions
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@ -1743,7 +1743,7 @@ impl<'a> StrSlice<'a> for &'a str {
    fn lines_any(&self) -> AnyLines<'a> {
        self.lines().map(|line| {
            let l = line.len();
-            if l > 0 && line[l - 1] == '\r' as u8 { line.slice(0, l - 1) }
+            if l > 0 && line.as_bytes()[l - 1] == '\r' as u8 { line.slice(0, l - 1) }
            else { line }
        })
    }
@ -1867,26 +1867,26 @@ impl<'a> StrSlice<'a> for &'a str {
    fn is_char_boundary(&self, index: uint) -> bool {
        if index == self.len() { return true; }
        if index > self.len() { return false; }
-        let b = self[index];
+        let b = self.as_bytes()[index];
        return b < 128u8 || b >= 192u8;
    }

    #[inline]
    fn char_range_at(&self, i: uint) -> CharRange {
-        if self[i] < 128u8 {
-            return CharRange {ch: self[i] as char, next: i + 1 };
+        if self.as_bytes()[i] < 128u8 {
+            return CharRange {ch: self.as_bytes()[i] as char, next: i + 1 };
        }

        // Multibyte case is a fn to allow char_range_at to inline cleanly
        fn multibyte_char_range_at(s: &str, i: uint) -> CharRange {
-            let mut val = s[i] as u32;
+            let mut val = s.as_bytes()[i] as u32;
            let w = UTF8_CHAR_WIDTH[val as uint] as uint;
            assert!((w != 0));

            val = utf8_first_byte!(val, w);
-            val = utf8_acc_cont_byte!(val, s[i + 1]);
-            if w > 2 { val = utf8_acc_cont_byte!(val, s[i + 2]); }
-            if w > 3 { val = utf8_acc_cont_byte!(val, s[i + 3]); }
+            val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 1]);
+            if w > 2 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 2]); }
+            if w > 3 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 3]); }

            return CharRange {ch: unsafe { mem::transmute(val) }, next: i + w};
        }
@ -1899,23 +1899,25 @@ impl<'a> StrSlice<'a> for &'a str {
        let mut prev = start;

        prev = prev.saturating_sub(1);
-        if self[prev] < 128 { return CharRange{ch: self[prev] as char, next: prev} }
+        if self.as_bytes()[prev] < 128 {
+            return CharRange{ch: self.as_bytes()[prev] as char, next: prev}
+        }

        // Multibyte case is a fn to allow char_range_at_reverse to inline cleanly
        fn multibyte_char_range_at_reverse(s: &str, mut i: uint) -> CharRange {
            // while there is a previous byte == 10......
-            while i > 0 && s[i] & 192u8 == TAG_CONT_U8 {
+            while i > 0 && s.as_bytes()[i] & 192u8 == TAG_CONT_U8 {
                i -= 1u;
            }

-            let mut val = s[i] as u32;
+            let mut val = s.as_bytes()[i] as u32;
            let w = UTF8_CHAR_WIDTH[val as uint] as uint;
            assert!((w != 0));

            val = utf8_first_byte!(val, w);
-            val = utf8_acc_cont_byte!(val, s[i + 1]);
-            if w > 2 { val = utf8_acc_cont_byte!(val, s[i + 2]); }
-            if w > 3 { val = utf8_acc_cont_byte!(val, s[i + 3]); }
+            val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 1]);
+            if w > 2 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 2]); }
+            if w > 3 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 3]); }

            return CharRange {ch: unsafe { mem::transmute(val) }, next: i};
        }