From 1900abdd9b5b5eef5d90b43555c1ae06743e50db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adolfo=20Ochagav=C3=ADa?= Date: Thu, 10 Jul 2014 17:53:51 +0200 Subject: [PATCH] Deprecate `str::from_utf16_lossy` Use `String::from_utf16_lossy` instead. [breaking-change] --- src/libcollections/str.rs | 108 +---------------------------------- src/libcollections/string.rs | 107 ++++++++++++++++++++++++++++++++++ src/libstd/os.rs | 2 +- 3 files changed, 111 insertions(+), 106 deletions(-) diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs index 8ed664e6c69..e7536469a7a 100644 --- a/src/libcollections/str.rs +++ b/src/libcollections/str.rs @@ -384,23 +384,10 @@ pub fn from_utf16(v: &[u16]) -> Option { String::from_utf16(v) } -/// Decode a UTF-16 encoded vector `v` into a string, replacing -/// invalid data with the replacement character (U+FFFD). -/// -/// # Example -/// ```rust -/// use std::str; -/// -/// // ๐„žmusic -/// let v = [0xD834, 0xDD1E, 0x006d, 0x0075, -/// 0x0073, 0xDD1E, 0x0069, 0x0063, -/// 0xD834]; -/// -/// assert_eq!(str::from_utf16_lossy(v), -/// "๐„žmus\uFFFDic\uFFFD".to_string()); -/// ``` +/// Deprecated. Use `String::from_utf16_lossy`. +#[deprecated = "Replaced by String::from_utf16_lossy"] pub fn from_utf16_lossy(v: &[u16]) -> String { - utf16_items(v).map(|c| c.to_char_lossy()).collect() + String::from_utf16_lossy(v) } // Return the initial codepoint accumulator for the first byte. @@ -1655,95 +1642,6 @@ mod tests { assert!(!"".contains_char('a')); } - #[test] - fn test_utf16() { - let pairs = - [(String::from_str("๐…๐Œฟ๐Œป๐†๐Œน๐Œป๐Œฐ\n"), - vec![0xd800_u16, 0xdf45_u16, 0xd800_u16, 0xdf3f_u16, - 0xd800_u16, 0xdf3b_u16, 0xd800_u16, 0xdf46_u16, - 0xd800_u16, 0xdf39_u16, 0xd800_u16, 0xdf3b_u16, - 0xd800_u16, 0xdf30_u16, 0x000a_u16]), - - (String::from_str("๐’๐‘‰๐ฎ๐‘€๐ฒ๐‘‹ ๐๐ฒ๐‘\n"), - vec![0xd801_u16, 0xdc12_u16, 0xd801_u16, - 0xdc49_u16, 0xd801_u16, 0xdc2e_u16, 0xd801_u16, - 0xdc40_u16, 0xd801_u16, 0xdc32_u16, 0xd801_u16, - 0xdc4b_u16, 0x0020_u16, 0xd801_u16, 0xdc0f_u16, - 0xd801_u16, 0xdc32_u16, 0xd801_u16, 0xdc4d_u16, - 0x000a_u16]), - - (String::from_str("๐Œ€๐Œ–๐Œ‹๐Œ„๐Œ‘๐Œ‰ยท๐ŒŒ๐Œ„๐Œ•๐Œ„๐Œ‹๐Œ‰๐Œ‘\n"), - vec![0xd800_u16, 0xdf00_u16, 0xd800_u16, 0xdf16_u16, - 0xd800_u16, 0xdf0b_u16, 0xd800_u16, 0xdf04_u16, - 0xd800_u16, 0xdf11_u16, 0xd800_u16, 0xdf09_u16, - 0x00b7_u16, 0xd800_u16, 0xdf0c_u16, 0xd800_u16, - 0xdf04_u16, 0xd800_u16, 0xdf15_u16, 0xd800_u16, - 0xdf04_u16, 0xd800_u16, 0xdf0b_u16, 0xd800_u16, - 0xdf09_u16, 0xd800_u16, 0xdf11_u16, 0x000a_u16 ]), - - (String::from_str("๐’‹๐’˜๐’ˆ๐’‘๐’›๐’’ ๐’•๐’“ ๐’ˆ๐’š๐’ ๐’๐’œ๐’’๐’–๐’† ๐’•๐’†\n"), - vec![0xd801_u16, 0xdc8b_u16, 0xd801_u16, 0xdc98_u16, - 0xd801_u16, 0xdc88_u16, 0xd801_u16, 0xdc91_u16, - 0xd801_u16, 0xdc9b_u16, 0xd801_u16, 0xdc92_u16, - 0x0020_u16, 0xd801_u16, 0xdc95_u16, 0xd801_u16, - 0xdc93_u16, 0x0020_u16, 0xd801_u16, 0xdc88_u16, - 0xd801_u16, 0xdc9a_u16, 0xd801_u16, 0xdc8d_u16, - 0x0020_u16, 0xd801_u16, 0xdc8f_u16, 0xd801_u16, - 0xdc9c_u16, 0xd801_u16, 0xdc92_u16, 0xd801_u16, - 0xdc96_u16, 0xd801_u16, 0xdc86_u16, 0x0020_u16, - 0xd801_u16, 0xdc95_u16, 0xd801_u16, 0xdc86_u16, - 0x000a_u16 ]), - // Issue #12318, even-numbered non-BMP planes - (String::from_str("\U00020000"), - vec![0xD840, 0xDC00])]; - - for p in pairs.iter() { - let (s, u) = (*p).clone(); - let s_as_utf16 = s.as_slice().utf16_units().collect::>(); - let u_as_string = String::from_utf16(u.as_slice()).unwrap(); - - assert!(is_utf16(u.as_slice())); - assert_eq!(s_as_utf16, u); - - assert_eq!(u_as_string, s); - assert_eq!(from_utf16_lossy(u.as_slice()), s); - - assert_eq!(String::from_utf16(s_as_utf16.as_slice()).unwrap(), s); - assert_eq!(u_as_string.as_slice().utf16_units().collect::>(), u); - } - } - - #[test] - fn test_utf16_invalid() { - // completely positive cases tested above. - // lead + eof - assert_eq!(String::from_utf16([0xD800]), None); - // lead + lead - assert_eq!(String::from_utf16([0xD800, 0xD800]), None); - - // isolated trail - assert_eq!(String::from_utf16([0x0061, 0xDC00]), None); - - // general - assert_eq!(String::from_utf16([0xD800, 0xd801, 0xdc8b, 0xD800]), None); - } - - #[test] - fn test_utf16_lossy() { - // completely positive cases tested above. - // lead + eof - assert_eq!(from_utf16_lossy([0xD800]), String::from_str("\uFFFD")); - // lead + lead - assert_eq!(from_utf16_lossy([0xD800, 0xD800]), String::from_str("\uFFFD\uFFFD")); - - // isolated trail - assert_eq!(from_utf16_lossy([0x0061, 0xDC00]), String::from_str("a\uFFFD")); - - // general - assert_eq!(from_utf16_lossy([0xD800, 0xd801, 0xdc8b, 0xD800]), - String::from_str("\uFFFD๐’‹\uFFFD")); - } - #[test] fn test_truncate_utf16_at_nul() { let v = []; diff --git a/src/libcollections/string.rs b/src/libcollections/string.rs index 2928fd327ae..a5d66dd23ea 100644 --- a/src/libcollections/string.rs +++ b/src/libcollections/string.rs @@ -117,6 +117,23 @@ impl String { } Some(s) } + + /// Decode a UTF-16 encoded vector `v` into a string, replacing + /// invalid data with the replacement character (U+FFFD). + /// + /// # Example + /// ```rust + /// // รฐยโ€žลพmusic + /// let v = [0xD834, 0xDD1E, 0x006d, 0x0075, + /// 0x0073, 0xDD1E, 0x0069, 0x0063, + /// 0xD834]; + /// + /// assert_eq!(String::from_utf16_lossy(v), + /// "รฐยโ€žลพmus\uFFFDic\uFFFD".to_string()); + /// ``` + pub fn from_utf16_lossy(v: &[u16]) -> String { + str::utf16_items(v).map(|c| c.to_char_lossy()).collect() + } /// Convert a vector of chars to a string /// @@ -431,6 +448,7 @@ mod tests { use test::Bencher; use Mutable; + use str; use str::{Str, StrSlice}; use super::String; @@ -439,6 +457,95 @@ mod tests { let owned: Option<::std::string::String> = from_str("string"); assert_eq!(owned.as_ref().map(|s| s.as_slice()), Some("string")); } + + #[test] + fn test_from_utf16() { + let pairs = + [(String::from_str("รฐยยโ€ฆรฐยล’ยฟรฐยล’ยปรฐยยโ€ รฐยล’ยนรฐยล’ยปรฐยล’ยฐ\n"), + vec![0xd800_u16, 0xdf45_u16, 0xd800_u16, 0xdf3f_u16, + 0xd800_u16, 0xdf3b_u16, 0xd800_u16, 0xdf46_u16, + 0xd800_u16, 0xdf39_u16, 0xd800_u16, 0xdf3b_u16, + 0xd800_u16, 0xdf30_u16, 0x000a_u16]), + + (String::from_str("รฐยยโ€™รฐยโ€˜โ€ฐรฐยยยฎรฐยโ€˜โ‚ฌรฐยยยฒรฐยโ€˜โ€น รฐยยยรฐยยยฒรฐยโ€˜ย\n"), + vec![0xd801_u16, 0xdc12_u16, 0xd801_u16, + 0xdc49_u16, 0xd801_u16, 0xdc2e_u16, 0xd801_u16, + 0xdc40_u16, 0xd801_u16, 0xdc32_u16, 0xd801_u16, + 0xdc4b_u16, 0x0020_u16, 0xd801_u16, 0xdc0f_u16, + 0xd801_u16, 0xdc32_u16, 0xd801_u16, 0xdc4d_u16, + 0x000a_u16]), + + (String::from_str("รฐยล’โ‚ฌรฐยล’โ€“รฐยล’โ€นรฐยล’โ€žรฐยล’โ€˜รฐยล’โ€ฐร‚ยทรฐยล’ล’รฐยล’โ€žรฐยล’โ€ขรฐยล’โ€žรฐยล’โ€นรฐยล’โ€ฐรฐยล’โ€˜\n"), + vec![0xd800_u16, 0xdf00_u16, 0xd800_u16, 0xdf16_u16, + 0xd800_u16, 0xdf0b_u16, 0xd800_u16, 0xdf04_u16, + 0xd800_u16, 0xdf11_u16, 0xd800_u16, 0xdf09_u16, + 0x00b7_u16, 0xd800_u16, 0xdf0c_u16, 0xd800_u16, + 0xdf04_u16, 0xd800_u16, 0xdf15_u16, 0xd800_u16, + 0xdf04_u16, 0xd800_u16, 0xdf0b_u16, 0xd800_u16, + 0xdf09_u16, 0xd800_u16, 0xdf11_u16, 0x000a_u16 ]), + + (String::from_str("รฐยโ€™โ€นรฐยโ€™หœรฐยโ€™ห†รฐยโ€™โ€˜รฐยโ€™โ€บรฐยโ€™โ€™ รฐยโ€™โ€ขรฐยโ€™โ€œ รฐยโ€™ห†รฐยโ€™ลกรฐยโ€™ย รฐยโ€™ยรฐยโ€™ล“รฐยโ€™โ€™รฐยโ€™โ€“รฐยโ€™โ€  รฐยโ€™โ€ขรฐยโ€™โ€ \n"), + vec![0xd801_u16, 0xdc8b_u16, 0xd801_u16, 0xdc98_u16, + 0xd801_u16, 0xdc88_u16, 0xd801_u16, 0xdc91_u16, + 0xd801_u16, 0xdc9b_u16, 0xd801_u16, 0xdc92_u16, + 0x0020_u16, 0xd801_u16, 0xdc95_u16, 0xd801_u16, + 0xdc93_u16, 0x0020_u16, 0xd801_u16, 0xdc88_u16, + 0xd801_u16, 0xdc9a_u16, 0xd801_u16, 0xdc8d_u16, + 0x0020_u16, 0xd801_u16, 0xdc8f_u16, 0xd801_u16, + 0xdc9c_u16, 0xd801_u16, 0xdc92_u16, 0xd801_u16, + 0xdc96_u16, 0xd801_u16, 0xdc86_u16, 0x0020_u16, + 0xd801_u16, 0xdc95_u16, 0xd801_u16, 0xdc86_u16, + 0x000a_u16 ]), + // Issue #12318, even-numbered non-BMP planes + (String::from_str("\U00020000"), + vec![0xD840, 0xDC00])]; + + for p in pairs.iter() { + let (s, u) = (*p).clone(); + let s_as_utf16 = s.as_slice().utf16_units().collect::>(); + let u_as_string = String::from_utf16(u.as_slice()).unwrap(); + + assert!(str::is_utf16(u.as_slice())); + assert_eq!(s_as_utf16, u); + + assert_eq!(u_as_string, s); + assert_eq!(String::from_utf16_lossy(u.as_slice()), s); + + assert_eq!(String::from_utf16(s_as_utf16.as_slice()).unwrap(), s); + assert_eq!(u_as_string.as_slice().utf16_units().collect::>(), u); + } + } + + #[test] + fn test_utf16_invalid() { + // completely positive cases tested above. + // lead + eof + assert_eq!(String::from_utf16([0xD800]), None); + // lead + lead + assert_eq!(String::from_utf16([0xD800, 0xD800]), None); + + // isolated trail + assert_eq!(String::from_utf16([0x0061, 0xDC00]), None); + + // general + assert_eq!(String::from_utf16([0xD800, 0xd801, 0xdc8b, 0xD800]), None); + } + + #[test] + fn test_from_utf16_lossy() { + // completely positive cases tested above. + // lead + eof + assert_eq!(String::from_utf16_lossy([0xD800]), String::from_str("\uFFFD")); + // lead + lead + assert_eq!(String::from_utf16_lossy([0xD800, 0xD800]), String::from_str("\uFFFD\uFFFD")); + + // isolated trail + assert_eq!(String::from_utf16_lossy([0x0061, 0xDC00]), String::from_str("a\uFFFD")); + + // general + assert_eq!(String::from_utf16_lossy([0xD800, 0xd801, 0xdc8b, 0xD800]), + String::from_str("\uFFFDรฐยโ€™โ€น\uFFFD")); + } #[bench] fn bench_with_capacity(b: &mut Bencher) { diff --git a/src/libstd/os.rs b/src/libstd/os.rs index a221dd5b376..9537d5daca0 100644 --- a/src/libstd/os.rs +++ b/src/libstd/os.rs @@ -266,7 +266,7 @@ pub fn env_as_bytes() -> Vec<(Vec,Vec)> { let p = &*ch.offset(i); let len = ptr::position(p, |c| *c == 0); raw::buf_as_slice(p, len, |s| { - result.push(str::from_utf16_lossy(s).into_bytes()); + result.push(String::from_utf16_lossy(s).into_bytes()); }); i += len as int + 1; }