Deprecate str::from_utf8_lossy
Use `String::from_utf8_lossy` instead [breaking-change]
This commit is contained in:
parent
1900abdd9b
commit
c6b82c7566
11 changed files with 258 additions and 256 deletions
|
@ -402,131 +402,10 @@ macro_rules! utf8_acc_cont_byte(
|
||||||
($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
|
($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
|
||||||
)
|
)
|
||||||
|
|
||||||
static TAG_CONT_U8: u8 = 128u8;
|
/// Deprecated. Use `String::from_utf8_lossy`.
|
||||||
|
#[deprecated = "Replaced by String::from_utf8_lossy"]
|
||||||
/// Converts a vector of bytes to a new utf-8 string.
|
|
||||||
/// Any invalid utf-8 sequences are replaced with U+FFFD REPLACEMENT CHARACTER.
|
|
||||||
///
|
|
||||||
/// # Example
|
|
||||||
///
|
|
||||||
/// ```rust
|
|
||||||
/// let input = b"Hello \xF0\x90\x80World";
|
|
||||||
/// let output = std::str::from_utf8_lossy(input);
|
|
||||||
/// assert_eq!(output.as_slice(), "Hello \uFFFDWorld");
|
|
||||||
/// ```
|
|
||||||
pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> MaybeOwned<'a> {
|
pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> MaybeOwned<'a> {
|
||||||
if is_utf8(v) {
|
String::from_utf8_lossy(v)
|
||||||
return Slice(unsafe { mem::transmute(v) })
|
|
||||||
}
|
|
||||||
|
|
||||||
static REPLACEMENT: &'static [u8] = b"\xEF\xBF\xBD"; // U+FFFD in UTF-8
|
|
||||||
let mut i = 0;
|
|
||||||
let total = v.len();
|
|
||||||
fn unsafe_get(xs: &[u8], i: uint) -> u8 {
|
|
||||||
unsafe { *xs.unsafe_ref(i) }
|
|
||||||
}
|
|
||||||
fn safe_get(xs: &[u8], i: uint, total: uint) -> u8 {
|
|
||||||
if i >= total {
|
|
||||||
0
|
|
||||||
} else {
|
|
||||||
unsafe_get(xs, i)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut res = String::with_capacity(total);
|
|
||||||
|
|
||||||
if i > 0 {
|
|
||||||
unsafe {
|
|
||||||
res.push_bytes(v.slice_to(i))
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// subseqidx is the index of the first byte of the subsequence we're looking at.
|
|
||||||
// It's used to copy a bunch of contiguous good codepoints at once instead of copying
|
|
||||||
// them one by one.
|
|
||||||
let mut subseqidx = 0;
|
|
||||||
|
|
||||||
while i < total {
|
|
||||||
let i_ = i;
|
|
||||||
let byte = unsafe_get(v, i);
|
|
||||||
i += 1;
|
|
||||||
|
|
||||||
macro_rules! error(() => ({
|
|
||||||
unsafe {
|
|
||||||
if subseqidx != i_ {
|
|
||||||
res.push_bytes(v.slice(subseqidx, i_));
|
|
||||||
}
|
|
||||||
subseqidx = i;
|
|
||||||
res.push_bytes(REPLACEMENT);
|
|
||||||
}
|
|
||||||
}))
|
|
||||||
|
|
||||||
if byte < 128u8 {
|
|
||||||
// subseqidx handles this
|
|
||||||
} else {
|
|
||||||
let w = utf8_char_width(byte);
|
|
||||||
|
|
||||||
match w {
|
|
||||||
2 => {
|
|
||||||
if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
|
|
||||||
error!();
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
3 => {
|
|
||||||
match (byte, safe_get(v, i, total)) {
|
|
||||||
(0xE0 , 0xA0 .. 0xBF) => (),
|
|
||||||
(0xE1 .. 0xEC, 0x80 .. 0xBF) => (),
|
|
||||||
(0xED , 0x80 .. 0x9F) => (),
|
|
||||||
(0xEE .. 0xEF, 0x80 .. 0xBF) => (),
|
|
||||||
_ => {
|
|
||||||
error!();
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
i += 1;
|
|
||||||
if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
|
|
||||||
error!();
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
4 => {
|
|
||||||
match (byte, safe_get(v, i, total)) {
|
|
||||||
(0xF0 , 0x90 .. 0xBF) => (),
|
|
||||||
(0xF1 .. 0xF3, 0x80 .. 0xBF) => (),
|
|
||||||
(0xF4 , 0x80 .. 0x8F) => (),
|
|
||||||
_ => {
|
|
||||||
error!();
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
i += 1;
|
|
||||||
if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
|
|
||||||
error!();
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
i += 1;
|
|
||||||
if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
|
|
||||||
error!();
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
error!();
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if subseqidx < total {
|
|
||||||
unsafe {
|
|
||||||
res.push_bytes(v.slice(subseqidx, total))
|
|
||||||
};
|
|
||||||
}
|
|
||||||
Owned(res.into_string())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -2052,41 +1931,6 @@ String::from_str("\u1111\u1171\u11b6"));
|
||||||
assert_eq!(from_utf8(xs), None);
|
assert_eq!(from_utf8(xs), None);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_str_from_utf8_lossy() {
|
|
||||||
let xs = b"hello";
|
|
||||||
assert_eq!(from_utf8_lossy(xs), Slice("hello"));
|
|
||||||
|
|
||||||
let xs = "ศไทย中华Việt Nam".as_bytes();
|
|
||||||
assert_eq!(from_utf8_lossy(xs), Slice("ศไทย中华Việt Nam"));
|
|
||||||
|
|
||||||
let xs = b"Hello\xC2 There\xFF Goodbye";
|
|
||||||
assert_eq!(from_utf8_lossy(xs), Owned(String::from_str("Hello\uFFFD There\uFFFD Goodbye")));
|
|
||||||
|
|
||||||
let xs = b"Hello\xC0\x80 There\xE6\x83 Goodbye";
|
|
||||||
assert_eq!(from_utf8_lossy(xs),
|
|
||||||
Owned(String::from_str("Hello\uFFFD\uFFFD There\uFFFD Goodbye")));
|
|
||||||
|
|
||||||
let xs = b"\xF5foo\xF5\x80bar";
|
|
||||||
assert_eq!(from_utf8_lossy(xs), Owned(String::from_str("\uFFFDfoo\uFFFD\uFFFDbar")));
|
|
||||||
|
|
||||||
let xs = b"\xF1foo\xF1\x80bar\xF1\x80\x80baz";
|
|
||||||
assert_eq!(from_utf8_lossy(xs), Owned(String::from_str("\uFFFDfoo\uFFFDbar\uFFFDbaz")));
|
|
||||||
|
|
||||||
let xs = b"\xF4foo\xF4\x80bar\xF4\xBFbaz";
|
|
||||||
assert_eq!(from_utf8_lossy(xs),
|
|
||||||
Owned(String::from_str("\uFFFDfoo\uFFFDbar\uFFFD\uFFFDbaz")));
|
|
||||||
|
|
||||||
let xs = b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar";
|
|
||||||
assert_eq!(from_utf8_lossy(xs), Owned(String::from_str("\uFFFD\uFFFD\uFFFD\uFFFD\
|
|
||||||
foo\U00010000bar")));
|
|
||||||
|
|
||||||
// surrogates
|
|
||||||
let xs = b"\xED\xA0\x80foo\xED\xBF\xBFbar";
|
|
||||||
assert_eq!(from_utf8_lossy(xs), Owned(String::from_str("\uFFFD\uFFFD\uFFFDfoo\
|
|
||||||
\uFFFD\uFFFD\uFFFDbar")));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_maybe_owned_traits() {
|
fn test_maybe_owned_traits() {
|
||||||
let s = Slice("abcde");
|
let s = Slice("abcde");
|
||||||
|
@ -2296,42 +2140,6 @@ mod bench {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn from_utf8_lossy_100_ascii(b: &mut Bencher) {
|
|
||||||
let s = b"Hello there, the quick brown fox jumped over the lazy dog! \
|
|
||||||
Lorem ipsum dolor sit amet, consectetur. ";
|
|
||||||
|
|
||||||
assert_eq!(100, s.len());
|
|
||||||
b.iter(|| {
|
|
||||||
let _ = from_utf8_lossy(s);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn from_utf8_lossy_100_multibyte(b: &mut Bencher) {
|
|
||||||
let s = "𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰".as_bytes();
|
|
||||||
assert_eq!(100, s.len());
|
|
||||||
b.iter(|| {
|
|
||||||
let _ = from_utf8_lossy(s);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn from_utf8_lossy_invalid(b: &mut Bencher) {
|
|
||||||
let s = b"Hello\xC0\x80 There\xE6\x83 Goodbye";
|
|
||||||
b.iter(|| {
|
|
||||||
let _ = from_utf8_lossy(s);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn from_utf8_lossy_100_invalid(b: &mut Bencher) {
|
|
||||||
let s = Vec::from_elem(100, 0xF5u8);
|
|
||||||
b.iter(|| {
|
|
||||||
let _ = from_utf8_lossy(s.as_slice());
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn bench_connect(b: &mut Bencher) {
|
fn bench_connect(b: &mut Bencher) {
|
||||||
let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
|
let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
|
||||||
|
|
|
@ -21,7 +21,7 @@ use core::raw::Slice;
|
||||||
use {Collection, Mutable};
|
use {Collection, Mutable};
|
||||||
use hash;
|
use hash;
|
||||||
use str;
|
use str;
|
||||||
use str::{CharRange, StrAllocating};
|
use str::{CharRange, StrAllocating, MaybeOwned, Owned, Slice};
|
||||||
use vec::Vec;
|
use vec::Vec;
|
||||||
|
|
||||||
/// A growable string stored as a UTF-8 encoded buffer.
|
/// A growable string stored as a UTF-8 encoded buffer.
|
||||||
|
@ -92,6 +92,132 @@ impl String {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Converts a vector of bytes to a new utf-8 string.
|
||||||
|
/// Any invalid utf-8 sequences are replaced with U+FFFD REPLACEMENT CHARACTER.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
///
|
||||||
|
/// ```rust
|
||||||
|
/// let input = b"Hello \xF0\x90\x80World";
|
||||||
|
/// let output = std::str::from_utf8_lossy(input);
|
||||||
|
/// assert_eq!(output.as_slice(), "Hello \uFFFDWorld");
|
||||||
|
/// ```
|
||||||
|
pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> MaybeOwned<'a> {
|
||||||
|
if str::is_utf8(v) {
|
||||||
|
return Slice(unsafe { mem::transmute(v) })
|
||||||
|
}
|
||||||
|
|
||||||
|
static TAG_CONT_U8: u8 = 128u8;
|
||||||
|
static REPLACEMENT: &'static [u8] = b"\xEF\xBF\xBD"; // U+FFFD in UTF-8
|
||||||
|
let mut i = 0;
|
||||||
|
let total = v.len();
|
||||||
|
fn unsafe_get(xs: &[u8], i: uint) -> u8 {
|
||||||
|
unsafe { *xs.unsafe_ref(i) }
|
||||||
|
}
|
||||||
|
fn safe_get(xs: &[u8], i: uint, total: uint) -> u8 {
|
||||||
|
if i >= total {
|
||||||
|
0
|
||||||
|
} else {
|
||||||
|
unsafe_get(xs, i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut res = String::with_capacity(total);
|
||||||
|
|
||||||
|
if i > 0 {
|
||||||
|
unsafe {
|
||||||
|
res.push_bytes(v.slice_to(i))
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// subseqidx is the index of the first byte of the subsequence we're looking at.
|
||||||
|
// It's used to copy a bunch of contiguous good codepoints at once instead of copying
|
||||||
|
// them one by one.
|
||||||
|
let mut subseqidx = 0;
|
||||||
|
|
||||||
|
while i < total {
|
||||||
|
let i_ = i;
|
||||||
|
let byte = unsafe_get(v, i);
|
||||||
|
i += 1;
|
||||||
|
|
||||||
|
macro_rules! error(() => ({
|
||||||
|
unsafe {
|
||||||
|
if subseqidx != i_ {
|
||||||
|
res.push_bytes(v.slice(subseqidx, i_));
|
||||||
|
}
|
||||||
|
subseqidx = i;
|
||||||
|
res.push_bytes(REPLACEMENT);
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
|
||||||
|
if byte < 128u8 {
|
||||||
|
// subseqidx handles this
|
||||||
|
} else {
|
||||||
|
let w = str::utf8_char_width(byte);
|
||||||
|
|
||||||
|
match w {
|
||||||
|
2 => {
|
||||||
|
if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
|
||||||
|
error!();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
3 => {
|
||||||
|
match (byte, safe_get(v, i, total)) {
|
||||||
|
(0xE0 , 0xA0 .. 0xBF) => (),
|
||||||
|
(0xE1 .. 0xEC, 0x80 .. 0xBF) => (),
|
||||||
|
(0xED , 0x80 .. 0x9F) => (),
|
||||||
|
(0xEE .. 0xEF, 0x80 .. 0xBF) => (),
|
||||||
|
_ => {
|
||||||
|
error!();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
i += 1;
|
||||||
|
if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
|
||||||
|
error!();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
4 => {
|
||||||
|
match (byte, safe_get(v, i, total)) {
|
||||||
|
(0xF0 , 0x90 .. 0xBF) => (),
|
||||||
|
(0xF1 .. 0xF3, 0x80 .. 0xBF) => (),
|
||||||
|
(0xF4 , 0x80 .. 0x8F) => (),
|
||||||
|
_ => {
|
||||||
|
error!();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
i += 1;
|
||||||
|
if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
|
||||||
|
error!();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
i += 1;
|
||||||
|
if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
|
||||||
|
error!();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
error!();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if subseqidx < total {
|
||||||
|
unsafe {
|
||||||
|
res.push_bytes(v.slice(subseqidx, total))
|
||||||
|
};
|
||||||
|
}
|
||||||
|
Owned(res.into_string())
|
||||||
|
}
|
||||||
|
|
||||||
/// Decode a UTF-16 encoded vector `v` into a string, returning `None`
|
/// Decode a UTF-16 encoded vector `v` into a string, returning `None`
|
||||||
/// if `v` contains any invalid data.
|
/// if `v` contains any invalid data.
|
||||||
///
|
///
|
||||||
|
@ -449,7 +575,7 @@ mod tests {
|
||||||
|
|
||||||
use Mutable;
|
use Mutable;
|
||||||
use str;
|
use str;
|
||||||
use str::{Str, StrSlice};
|
use str::{Str, StrSlice, MaybeOwned, Owned, Slice};
|
||||||
use super::String;
|
use super::String;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -458,6 +584,54 @@ mod tests {
|
||||||
assert_eq!(owned.as_ref().map(|s| s.as_slice()), Some("string"));
|
assert_eq!(owned.as_ref().map(|s| s.as_slice()), Some("string"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_from_utf8() {
|
||||||
|
let xs = Vec::from_slice(b"hello");
|
||||||
|
assert_eq!(String::from_utf8(xs), Ok("hello".to_string()));
|
||||||
|
|
||||||
|
let xs = Vec::from_slice("ศไทยä¸åŽViệt Nam".as_bytes());
|
||||||
|
assert_eq!(String::from_utf8(xs), Ok("ศไทยä¸åŽViệt Nam".to_string()));
|
||||||
|
|
||||||
|
let xs = Vec::from_slice(b"hello\xFF");
|
||||||
|
assert_eq!(String::from_utf8(xs),
|
||||||
|
Err(Vec::from_slice(b"hello\xFF")));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_from_utf8_lossy() {
|
||||||
|
let xs = b"hello";
|
||||||
|
assert_eq!(String::from_utf8_lossy(xs), Slice("hello"));
|
||||||
|
|
||||||
|
let xs = "ศไทยä¸åŽViệt Nam".as_bytes();
|
||||||
|
assert_eq!(String::from_utf8_lossy(xs), Slice("ศไทยä¸åŽViệt Nam"));
|
||||||
|
|
||||||
|
let xs = b"Hello\xC2 There\xFF Goodbye";
|
||||||
|
assert_eq!(String::from_utf8_lossy(xs), Owned(String::from_str("Hello\uFFFD There\uFFFD Goodbye")));
|
||||||
|
|
||||||
|
let xs = b"Hello\xC0\x80 There\xE6\x83 Goodbye";
|
||||||
|
assert_eq!(String::from_utf8_lossy(xs),
|
||||||
|
Owned(String::from_str("Hello\uFFFD\uFFFD There\uFFFD Goodbye")));
|
||||||
|
|
||||||
|
let xs = b"\xF5foo\xF5\x80bar";
|
||||||
|
assert_eq!(String::from_utf8_lossy(xs), Owned(String::from_str("\uFFFDfoo\uFFFD\uFFFDbar")));
|
||||||
|
|
||||||
|
let xs = b"\xF1foo\xF1\x80bar\xF1\x80\x80baz";
|
||||||
|
assert_eq!(String::from_utf8_lossy(xs), Owned(String::from_str("\uFFFDfoo\uFFFDbar\uFFFDbaz")));
|
||||||
|
|
||||||
|
let xs = b"\xF4foo\xF4\x80bar\xF4\xBFbaz";
|
||||||
|
assert_eq!(String::from_utf8_lossy(xs),
|
||||||
|
Owned(String::from_str("\uFFFDfoo\uFFFDbar\uFFFD\uFFFDbaz")));
|
||||||
|
|
||||||
|
let xs = b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar";
|
||||||
|
assert_eq!(String::from_utf8_lossy(xs), Owned(String::from_str("\uFFFD\uFFFD\uFFFD\uFFFD\
|
||||||
|
foo\U00010000bar")));
|
||||||
|
|
||||||
|
// surrogates
|
||||||
|
let xs = b"\xED\xA0\x80foo\xED\xBF\xBFbar";
|
||||||
|
assert_eq!(String::from_utf8_lossy(xs), Owned(String::from_str("\uFFFD\uFFFD\uFFFDfoo\
|
||||||
|
\uFFFD\uFFFD\uFFFDbar")));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_from_utf16() {
|
fn test_from_utf16() {
|
||||||
let pairs =
|
let pairs =
|
||||||
|
@ -547,35 +721,6 @@ mod tests {
|
||||||
String::from_str("\uFFFDð’‹\uFFFD"));
|
String::from_str("\uFFFDð’‹\uFFFD"));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn bench_with_capacity(b: &mut Bencher) {
|
|
||||||
b.iter(|| {
|
|
||||||
String::with_capacity(100)
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn bench_push_str(b: &mut Bencher) {
|
|
||||||
let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
|
|
||||||
b.iter(|| {
|
|
||||||
let mut r = String::new();
|
|
||||||
r.push_str(s);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_str_from_utf8() {
|
|
||||||
let xs = Vec::from_slice(b"hello");
|
|
||||||
assert_eq!(String::from_utf8(xs), Ok("hello".to_string()));
|
|
||||||
|
|
||||||
let xs = Vec::from_slice("ศไทยä¸åŽViệt Nam".as_bytes());
|
|
||||||
assert_eq!(String::from_utf8(xs), Ok("ศไทยä¸åŽViệt Nam".to_string()));
|
|
||||||
|
|
||||||
let xs = Vec::from_slice(b"hello\xFF");
|
|
||||||
assert_eq!(String::from_utf8(xs),
|
|
||||||
Err(Vec::from_slice(b"hello\xFF")));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_push_bytes() {
|
fn test_push_bytes() {
|
||||||
let mut s = String::from_str("ABC");
|
let mut s = String::from_str("ABC");
|
||||||
|
@ -677,4 +822,56 @@ mod tests {
|
||||||
assert_eq!(b.len(), 7);
|
assert_eq!(b.len(), 7);
|
||||||
assert_eq!(b.as_slice(), "1234522");
|
assert_eq!(b.as_slice(), "1234522");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_with_capacity(b: &mut Bencher) {
|
||||||
|
b.iter(|| {
|
||||||
|
String::with_capacity(100)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_push_str(b: &mut Bencher) {
|
||||||
|
let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
|
||||||
|
b.iter(|| {
|
||||||
|
let mut r = String::new();
|
||||||
|
r.push_str(s);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn from_utf8_lossy_100_ascii(b: &mut Bencher) {
|
||||||
|
let s = b"Hello there, the quick brown fox jumped over the lazy dog! \
|
||||||
|
Lorem ipsum dolor sit amet, consectetur. ";
|
||||||
|
|
||||||
|
assert_eq!(100, s.len());
|
||||||
|
b.iter(|| {
|
||||||
|
let _ = String::from_utf8_lossy(s);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn from_utf8_lossy_100_multibyte(b: &mut Bencher) {
|
||||||
|
let s = "ðŒ€ðŒ–ðŒ‹ðŒ„ðŒ‘ðŒ‰à¸›à¸£Ø¯ÙˆÙ„Ø© الكويتทศไทยä¸åŽð…ðŒ¿ðŒ»ð†ðŒ¹ðŒ»ðŒ°".as_bytes();
|
||||||
|
assert_eq!(100, s.len());
|
||||||
|
b.iter(|| {
|
||||||
|
let _ = String::from_utf8_lossy(s);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn from_utf8_lossy_invalid(b: &mut Bencher) {
|
||||||
|
let s = b"Hello\xC0\x80 There\xE6\x83 Goodbye";
|
||||||
|
b.iter(|| {
|
||||||
|
let _ = String::from_utf8_lossy(s);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn from_utf8_lossy_100_invalid(b: &mut Bencher) {
|
||||||
|
let s = Vec::from_elem(100, 0xF5u8);
|
||||||
|
b.iter(|| {
|
||||||
|
let _ = String::from_utf8_lossy(s.as_slice());
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -61,7 +61,7 @@ pub fn llvm_err(sess: &Session, msg: String) -> ! {
|
||||||
sess.fatal(msg.as_slice());
|
sess.fatal(msg.as_slice());
|
||||||
} else {
|
} else {
|
||||||
let err = CString::new(cstr, true);
|
let err = CString::new(cstr, true);
|
||||||
let err = str::from_utf8_lossy(err.as_bytes());
|
let err = String::from_utf8_lossy(err.as_bytes());
|
||||||
sess.fatal(format!("{}: {}",
|
sess.fatal(format!("{}: {}",
|
||||||
msg.as_slice(),
|
msg.as_slice(),
|
||||||
err.as_slice()).as_slice());
|
err.as_slice()).as_slice());
|
||||||
|
|
|
@ -37,7 +37,7 @@ pub fn highlight(src: &str, class: Option<&str>, id: Option<&str>) -> String {
|
||||||
class,
|
class,
|
||||||
id,
|
id,
|
||||||
&mut out).unwrap();
|
&mut out).unwrap();
|
||||||
str::from_utf8_lossy(out.unwrap().as_slice()).to_string()
|
String::from_utf8_lossy(out.unwrap().as_slice()).into_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Exhausts the `lexer` writing the output into `out`.
|
/// Exhausts the `lexer` writing the output into `out`.
|
||||||
|
|
|
@ -313,7 +313,6 @@ impl Command {
|
||||||
///
|
///
|
||||||
/// ```
|
/// ```
|
||||||
/// use std::io::Command;
|
/// use std::io::Command;
|
||||||
/// use std::str;
|
|
||||||
///
|
///
|
||||||
/// let output = match Command::new("cat").arg("foot.txt").output() {
|
/// let output = match Command::new("cat").arg("foot.txt").output() {
|
||||||
/// Ok(output) => output,
|
/// Ok(output) => output,
|
||||||
|
@ -321,8 +320,8 @@ impl Command {
|
||||||
/// };
|
/// };
|
||||||
///
|
///
|
||||||
/// println!("status: {}", output.status);
|
/// println!("status: {}", output.status);
|
||||||
/// println!("stdout: {}", str::from_utf8_lossy(output.output.as_slice()));
|
/// println!("stdout: {}", String::from_utf8_lossy(output.output.as_slice()));
|
||||||
/// println!("stderr: {}", str::from_utf8_lossy(output.error.as_slice()));
|
/// println!("stderr: {}", String::from_utf8_lossy(output.error.as_slice()));
|
||||||
/// ```
|
/// ```
|
||||||
pub fn output(&self) -> IoResult<ProcessOutput> {
|
pub fn output(&self) -> IoResult<ProcessOutput> {
|
||||||
self.spawn().and_then(|p| p.wait_with_output())
|
self.spawn().and_then(|p| p.wait_with_output())
|
||||||
|
@ -353,9 +352,9 @@ impl fmt::Show for Command {
|
||||||
/// non-utf8 data is lossily converted using the utf8 replacement
|
/// non-utf8 data is lossily converted using the utf8 replacement
|
||||||
/// character.
|
/// character.
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
try!(write!(f, "{}", str::from_utf8_lossy(self.program.as_bytes_no_nul())));
|
try!(write!(f, "{}", String::from_utf8_lossy(self.program.as_bytes_no_nul())));
|
||||||
for arg in self.args.iter() {
|
for arg in self.args.iter() {
|
||||||
try!(write!(f, " '{}'", str::from_utf8_lossy(arg.as_bytes_no_nul())));
|
try!(write!(f, " '{}'", String::from_utf8_lossy(arg.as_bytes_no_nul())));
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -903,7 +902,7 @@ mod tests {
|
||||||
let new_env = vec![("RUN_TEST_NEW_ENV", "123")];
|
let new_env = vec![("RUN_TEST_NEW_ENV", "123")];
|
||||||
let prog = env_cmd().env_set_all(new_env.as_slice()).spawn().unwrap();
|
let prog = env_cmd().env_set_all(new_env.as_slice()).spawn().unwrap();
|
||||||
let result = prog.wait_with_output().unwrap();
|
let result = prog.wait_with_output().unwrap();
|
||||||
let output = str::from_utf8_lossy(result.output.as_slice()).into_string();
|
let output = String::from_utf8_lossy(result.output.as_slice()).into_string();
|
||||||
|
|
||||||
assert!(output.as_slice().contains("RUN_TEST_NEW_ENV=123"),
|
assert!(output.as_slice().contains("RUN_TEST_NEW_ENV=123"),
|
||||||
"didn't find RUN_TEST_NEW_ENV inside of:\n\n{}", output);
|
"didn't find RUN_TEST_NEW_ENV inside of:\n\n{}", output);
|
||||||
|
|
|
@ -208,7 +208,7 @@ fn with_env_lock<T>(f: || -> T) -> T {
|
||||||
/// Returns a vector of (variable, value) pairs, for all the environment
|
/// Returns a vector of (variable, value) pairs, for all the environment
|
||||||
/// variables of the current process.
|
/// variables of the current process.
|
||||||
///
|
///
|
||||||
/// Invalid UTF-8 bytes are replaced with \uFFFD. See `str::from_utf8_lossy()`
|
/// Invalid UTF-8 bytes are replaced with \uFFFD. See `String::from_utf8_lossy()`
|
||||||
/// for details.
|
/// for details.
|
||||||
///
|
///
|
||||||
/// # Example
|
/// # Example
|
||||||
|
@ -223,8 +223,8 @@ fn with_env_lock<T>(f: || -> T) -> T {
|
||||||
/// ```
|
/// ```
|
||||||
pub fn env() -> Vec<(String,String)> {
|
pub fn env() -> Vec<(String,String)> {
|
||||||
env_as_bytes().move_iter().map(|(k,v)| {
|
env_as_bytes().move_iter().map(|(k,v)| {
|
||||||
let k = String::from_str(str::from_utf8_lossy(k.as_slice()).as_slice());
|
let k = String::from_utf8_lossy(k.as_slice()).into_string();
|
||||||
let v = String::from_str(str::from_utf8_lossy(v.as_slice()).as_slice());
|
let v = String::from_utf8_lossy(v.as_slice()).into_string();
|
||||||
(k,v)
|
(k,v)
|
||||||
}).collect()
|
}).collect()
|
||||||
}
|
}
|
||||||
|
@ -316,7 +316,7 @@ pub fn env_as_bytes() -> Vec<(Vec<u8>,Vec<u8>)> {
|
||||||
/// None if the variable isn't set.
|
/// None if the variable isn't set.
|
||||||
///
|
///
|
||||||
/// Any invalid UTF-8 bytes in the value are replaced by \uFFFD. See
|
/// Any invalid UTF-8 bytes in the value are replaced by \uFFFD. See
|
||||||
/// `str::from_utf8_lossy()` for details.
|
/// `String::from_utf8_lossy()` for details.
|
||||||
///
|
///
|
||||||
/// # Failure
|
/// # Failure
|
||||||
///
|
///
|
||||||
|
@ -334,7 +334,7 @@ pub fn env_as_bytes() -> Vec<(Vec<u8>,Vec<u8>)> {
|
||||||
/// }
|
/// }
|
||||||
/// ```
|
/// ```
|
||||||
pub fn getenv(n: &str) -> Option<String> {
|
pub fn getenv(n: &str) -> Option<String> {
|
||||||
getenv_as_bytes(n).map(|v| String::from_str(str::from_utf8_lossy(v.as_slice()).as_slice()))
|
getenv_as_bytes(n).map(|v| String::from_utf8_lossy(v.as_slice()).into_string())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
|
@ -1186,7 +1186,7 @@ fn real_args_as_bytes() -> Vec<Vec<u8>> {
|
||||||
fn real_args() -> Vec<String> {
|
fn real_args() -> Vec<String> {
|
||||||
real_args_as_bytes().move_iter()
|
real_args_as_bytes().move_iter()
|
||||||
.map(|v| {
|
.map(|v| {
|
||||||
str::from_utf8_lossy(v.as_slice()).into_string()
|
String::from_utf8_lossy(v.as_slice()).into_string()
|
||||||
}).collect()
|
}).collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1244,7 +1244,7 @@ extern "system" {
|
||||||
/// via the command line).
|
/// via the command line).
|
||||||
///
|
///
|
||||||
/// The arguments are interpreted as utf-8, with invalid bytes replaced with \uFFFD.
|
/// The arguments are interpreted as utf-8, with invalid bytes replaced with \uFFFD.
|
||||||
/// See `str::from_utf8_lossy` for details.
|
/// See `String::from_utf8_lossy` for details.
|
||||||
/// # Example
|
/// # Example
|
||||||
///
|
///
|
||||||
/// ```rust
|
/// ```rust
|
||||||
|
|
|
@ -72,7 +72,7 @@ use fmt;
|
||||||
use iter::Iterator;
|
use iter::Iterator;
|
||||||
use option::{Option, None, Some};
|
use option::{Option, None, Some};
|
||||||
use str;
|
use str;
|
||||||
use str::{MaybeOwned, Str, StrSlice, from_utf8_lossy};
|
use str::{MaybeOwned, Str, StrSlice};
|
||||||
use string::String;
|
use string::String;
|
||||||
use slice::Vector;
|
use slice::Vector;
|
||||||
use slice::{ImmutableEqVector, ImmutableVector};
|
use slice::{ImmutableEqVector, ImmutableVector};
|
||||||
|
@ -483,7 +483,7 @@ impl<'a, P: GenericPath> Display<'a, P> {
|
||||||
/// unicode replacement char. This involves allocation.
|
/// unicode replacement char. This involves allocation.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn as_maybe_owned(&self) -> MaybeOwned<'a> {
|
pub fn as_maybe_owned(&self) -> MaybeOwned<'a> {
|
||||||
from_utf8_lossy(if self.filename {
|
String::from_utf8_lossy(if self.filename {
|
||||||
match self.path.filename() {
|
match self.path.filename() {
|
||||||
None => &[],
|
None => &[],
|
||||||
Some(v) => v
|
Some(v) => v
|
||||||
|
|
|
@ -60,7 +60,6 @@ use std::io::stdio::StdWriter;
|
||||||
use std::io::{File, ChanReader, ChanWriter};
|
use std::io::{File, ChanReader, ChanWriter};
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::os;
|
use std::os;
|
||||||
use std::str;
|
|
||||||
use std::string::String;
|
use std::string::String;
|
||||||
use std::task::TaskBuilder;
|
use std::task::TaskBuilder;
|
||||||
|
|
||||||
|
@ -636,7 +635,7 @@ impl<T: Writer> ConsoleTestState<T> {
|
||||||
if stdout.len() > 0 {
|
if stdout.len() > 0 {
|
||||||
fail_out.push_str(format!("---- {} stdout ----\n\t",
|
fail_out.push_str(format!("---- {} stdout ----\n\t",
|
||||||
f.name.as_slice()).as_slice());
|
f.name.as_slice()).as_slice());
|
||||||
let output = str::from_utf8_lossy(stdout.as_slice());
|
let output = String::from_utf8_lossy(stdout.as_slice());
|
||||||
fail_out.push_str(output.as_slice()
|
fail_out.push_str(output.as_slice()
|
||||||
.replace("\n", "\n\t")
|
.replace("\n", "\n\t")
|
||||||
.as_slice());
|
.as_slice());
|
||||||
|
@ -873,7 +872,7 @@ fn should_sort_failures_before_printing_them() {
|
||||||
|
|
||||||
st.write_failures().unwrap();
|
st.write_failures().unwrap();
|
||||||
let s = match st.out {
|
let s = match st.out {
|
||||||
Raw(ref m) => str::from_utf8_lossy(m.get_ref()),
|
Raw(ref m) => String::from_utf8_lossy(m.get_ref()),
|
||||||
Pretty(_) => unreachable!()
|
Pretty(_) => unreachable!()
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
// option. This file may not be copied, modified, or distributed
|
// option. This file may not be copied, modified, or distributed
|
||||||
// except according to those terms.
|
// except according to those terms.
|
||||||
|
|
||||||
use std::{char, os, str};
|
use std::{char, os};
|
||||||
use std::io::{File, Command};
|
use std::io::{File, Command};
|
||||||
use std::rand::{task_rng, Rng};
|
use std::rand::{task_rng, Rng};
|
||||||
|
|
||||||
|
@ -61,7 +61,7 @@ fn main() {
|
||||||
main_file.as_str()
|
main_file.as_str()
|
||||||
.unwrap()).as_slice())
|
.unwrap()).as_slice())
|
||||||
.output().unwrap();
|
.output().unwrap();
|
||||||
let err = str::from_utf8_lossy(result.error.as_slice());
|
let err = String::from_utf8_lossy(result.error.as_slice());
|
||||||
|
|
||||||
// positive test so that this test will be updated when the
|
// positive test so that this test will be updated when the
|
||||||
// compiler changes.
|
// compiler changes.
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
// option. This file may not be copied, modified, or distributed
|
// option. This file may not be copied, modified, or distributed
|
||||||
// except according to those terms.
|
// except according to those terms.
|
||||||
|
|
||||||
use std::{char, os, str};
|
use std::{char, os};
|
||||||
use std::io::{File, Command};
|
use std::io::{File, Command};
|
||||||
use std::rand::{task_rng, Rng};
|
use std::rand::{task_rng, Rng};
|
||||||
|
|
||||||
|
@ -57,7 +57,7 @@ fn main() {
|
||||||
.unwrap()).as_slice())
|
.unwrap()).as_slice())
|
||||||
.output().unwrap();
|
.output().unwrap();
|
||||||
|
|
||||||
let err = str::from_utf8_lossy(result.error.as_slice());
|
let err = String::from_utf8_lossy(result.error.as_slice());
|
||||||
|
|
||||||
// the span should end the line (e.g no extra ~'s)
|
// the span should end the line (e.g no extra ~'s)
|
||||||
let expected_span = format!("^{}\n", "~".repeat(n - 1));
|
let expected_span = format!("^{}\n", "~".repeat(n - 1));
|
||||||
|
|
|
@ -12,7 +12,6 @@
|
||||||
|
|
||||||
use std::io::process::Command;
|
use std::io::process::Command;
|
||||||
use std::os;
|
use std::os;
|
||||||
use std::str;
|
|
||||||
|
|
||||||
// lifted from the test module
|
// lifted from the test module
|
||||||
// Inlining to avoid llvm turning the recursive functions into tail calls,
|
// Inlining to avoid llvm turning the recursive functions into tail calls,
|
||||||
|
@ -42,12 +41,12 @@ fn main() {
|
||||||
} else {
|
} else {
|
||||||
let silent = Command::new(args[0].as_slice()).arg("silent").output().unwrap();
|
let silent = Command::new(args[0].as_slice()).arg("silent").output().unwrap();
|
||||||
assert!(!silent.status.success());
|
assert!(!silent.status.success());
|
||||||
let error = str::from_utf8_lossy(silent.error.as_slice());
|
let error = String::from_utf8_lossy(silent.error.as_slice());
|
||||||
assert!(error.as_slice().contains("has overflowed its stack"));
|
assert!(error.as_slice().contains("has overflowed its stack"));
|
||||||
|
|
||||||
let loud = Command::new(args[0].as_slice()).arg("loud").output().unwrap();
|
let loud = Command::new(args[0].as_slice()).arg("loud").output().unwrap();
|
||||||
assert!(!loud.status.success());
|
assert!(!loud.status.success());
|
||||||
let error = str::from_utf8_lossy(silent.error.as_slice());
|
let error = String::from_utf8_lossy(silent.error.as_slice());
|
||||||
assert!(error.as_slice().contains("has overflowed its stack"));
|
assert!(error.as_slice().contains("has overflowed its stack"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue