1
Fork 0

rustc: Remove &str indexing from the language.

Being able to index into the bytes of a string encourages
poor UTF-8 hygiene. To get a view of `&[u8]` from either
a `String` or `&str` slice, use the `as_bytes()` method.

Closes #12710.

[breaking-change]
This commit is contained in:
Brian Anderson 2014-06-19 18:22:33 -07:00
parent 44ec28cfac
commit d21336ee0a
26 changed files with 101 additions and 87 deletions

View file

@ -1743,7 +1743,7 @@ impl<'a> StrSlice<'a> for &'a str {
fn lines_any(&self) -> AnyLines<'a> {
self.lines().map(|line| {
let l = line.len();
if l > 0 && line[l - 1] == '\r' as u8 { line.slice(0, l - 1) }
if l > 0 && line.as_bytes()[l - 1] == '\r' as u8 { line.slice(0, l - 1) }
else { line }
})
}
@ -1867,26 +1867,26 @@ impl<'a> StrSlice<'a> for &'a str {
fn is_char_boundary(&self, index: uint) -> bool {
if index == self.len() { return true; }
if index > self.len() { return false; }
let b = self[index];
let b = self.as_bytes()[index];
return b < 128u8 || b >= 192u8;
}
#[inline]
fn char_range_at(&self, i: uint) -> CharRange {
if self[i] < 128u8 {
return CharRange {ch: self[i] as char, next: i + 1 };
if self.as_bytes()[i] < 128u8 {
return CharRange {ch: self.as_bytes()[i] as char, next: i + 1 };
}
// Multibyte case is a fn to allow char_range_at to inline cleanly
fn multibyte_char_range_at(s: &str, i: uint) -> CharRange {
let mut val = s[i] as u32;
let mut val = s.as_bytes()[i] as u32;
let w = UTF8_CHAR_WIDTH[val as uint] as uint;
assert!((w != 0));
val = utf8_first_byte!(val, w);
val = utf8_acc_cont_byte!(val, s[i + 1]);
if w > 2 { val = utf8_acc_cont_byte!(val, s[i + 2]); }
if w > 3 { val = utf8_acc_cont_byte!(val, s[i + 3]); }
val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 1]);
if w > 2 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 2]); }
if w > 3 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 3]); }
return CharRange {ch: unsafe { mem::transmute(val) }, next: i + w};
}
@ -1899,23 +1899,25 @@ impl<'a> StrSlice<'a> for &'a str {
let mut prev = start;
prev = prev.saturating_sub(1);
if self[prev] < 128 { return CharRange{ch: self[prev] as char, next: prev} }
if self.as_bytes()[prev] < 128 {
return CharRange{ch: self.as_bytes()[prev] as char, next: prev}
}
// Multibyte case is a fn to allow char_range_at_reverse to inline cleanly
fn multibyte_char_range_at_reverse(s: &str, mut i: uint) -> CharRange {
// while there is a previous byte == 10......
while i > 0 && s[i] & 192u8 == TAG_CONT_U8 {
while i > 0 && s.as_bytes()[i] & 192u8 == TAG_CONT_U8 {
i -= 1u;
}
let mut val = s[i] as u32;
let mut val = s.as_bytes()[i] as u32;
let w = UTF8_CHAR_WIDTH[val as uint] as uint;
assert!((w != 0));
val = utf8_first_byte!(val, w);
val = utf8_acc_cont_byte!(val, s[i + 1]);
if w > 2 { val = utf8_acc_cont_byte!(val, s[i + 2]); }
if w > 3 { val = utf8_acc_cont_byte!(val, s[i + 3]); }
val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 1]);
if w > 2 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 2]); }
if w > 3 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 3]); }
return CharRange {ch: unsafe { mem::transmute(val) }, next: i};
}