Make slice->str conversion and related functions const
This commit makes the following functions from `core::str` `const fn`: - `from_utf8[_mut]` (`feature(const_str_from_utf8)`) - `from_utf8_unchecked_mut` (`feature(const_str_from_utf8_unchecked_mut)`) - `Utf8Error::{valid_up_to,error_len}` (`feature(const_str_from_utf8)`)
This commit is contained in:
parent
c9c4b5d727
commit
cf6f64a963
6 changed files with 106 additions and 24 deletions
|
@ -25,6 +25,7 @@
|
||||||
#![feature(const_btree_new)]
|
#![feature(const_btree_new)]
|
||||||
#![feature(const_default_impls)]
|
#![feature(const_default_impls)]
|
||||||
#![feature(const_trait_impl)]
|
#![feature(const_trait_impl)]
|
||||||
|
#![feature(const_str_from_utf8)]
|
||||||
|
|
||||||
use std::collections::hash_map::DefaultHasher;
|
use std::collections::hash_map::DefaultHasher;
|
||||||
use std::hash::{Hash, Hasher};
|
use std::hash::{Hash, Hasher};
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
use std::assert_matches::assert_matches;
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::cmp::Ordering::{Equal, Greater, Less};
|
use std::cmp::Ordering::{Equal, Greater, Less};
|
||||||
use std::str::{from_utf8, from_utf8_unchecked};
|
use std::str::{from_utf8, from_utf8_unchecked};
|
||||||
|
@ -883,6 +884,33 @@ fn test_is_utf8() {
|
||||||
assert!(from_utf8(&[0xF4, 0x8F, 0xBF, 0xBF]).is_ok());
|
assert!(from_utf8(&[0xF4, 0x8F, 0xBF, 0xBF]).is_ok());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_const_is_utf8() {
|
||||||
|
const _: () = {
|
||||||
|
// deny overlong encodings
|
||||||
|
assert!(from_utf8(&[0xc0, 0x80]).is_err());
|
||||||
|
assert!(from_utf8(&[0xc0, 0xae]).is_err());
|
||||||
|
assert!(from_utf8(&[0xe0, 0x80, 0x80]).is_err());
|
||||||
|
assert!(from_utf8(&[0xe0, 0x80, 0xaf]).is_err());
|
||||||
|
assert!(from_utf8(&[0xe0, 0x81, 0x81]).is_err());
|
||||||
|
assert!(from_utf8(&[0xf0, 0x82, 0x82, 0xac]).is_err());
|
||||||
|
assert!(from_utf8(&[0xf4, 0x90, 0x80, 0x80]).is_err());
|
||||||
|
|
||||||
|
// deny surrogates
|
||||||
|
assert!(from_utf8(&[0xED, 0xA0, 0x80]).is_err());
|
||||||
|
assert!(from_utf8(&[0xED, 0xBF, 0xBF]).is_err());
|
||||||
|
|
||||||
|
assert!(from_utf8(&[0xC2, 0x80]).is_ok());
|
||||||
|
assert!(from_utf8(&[0xDF, 0xBF]).is_ok());
|
||||||
|
assert!(from_utf8(&[0xE0, 0xA0, 0x80]).is_ok());
|
||||||
|
assert!(from_utf8(&[0xED, 0x9F, 0xBF]).is_ok());
|
||||||
|
assert!(from_utf8(&[0xEE, 0x80, 0x80]).is_ok());
|
||||||
|
assert!(from_utf8(&[0xEF, 0xBF, 0xBF]).is_ok());
|
||||||
|
assert!(from_utf8(&[0xF0, 0x90, 0x80, 0x80]).is_ok());
|
||||||
|
assert!(from_utf8(&[0xF4, 0x8F, 0xBF, 0xBF]).is_ok());
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn from_utf8_mostly_ascii() {
|
fn from_utf8_mostly_ascii() {
|
||||||
// deny invalid bytes embedded in long stretches of ascii
|
// deny invalid bytes embedded in long stretches of ascii
|
||||||
|
@ -895,13 +923,43 @@ fn from_utf8_mostly_ascii() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn const_from_utf8_mostly_ascii() {
|
||||||
|
const _: () = {
|
||||||
|
// deny invalid bytes embedded in long stretches of ascii
|
||||||
|
let mut i = 32;
|
||||||
|
while i < 64 {
|
||||||
|
let mut data = [0; 128];
|
||||||
|
data[i] = 0xC0;
|
||||||
|
assert!(from_utf8(&data).is_err());
|
||||||
|
data[i] = 0xC2;
|
||||||
|
assert!(from_utf8(&data).is_err());
|
||||||
|
|
||||||
|
i = i + 1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn from_utf8_error() {
|
fn from_utf8_error() {
|
||||||
macro_rules! test {
|
macro_rules! test {
|
||||||
($input: expr, $expected_valid_up_to: expr, $expected_error_len: expr) => {
|
($input: expr, $expected_valid_up_to:pat, $expected_error_len:pat) => {
|
||||||
let error = from_utf8($input).unwrap_err();
|
let error = from_utf8($input).unwrap_err();
|
||||||
assert_eq!(error.valid_up_to(), $expected_valid_up_to);
|
assert_matches!(error.valid_up_to(), $expected_valid_up_to);
|
||||||
assert_eq!(error.error_len(), $expected_error_len);
|
assert_matches!(error.error_len(), $expected_error_len);
|
||||||
|
|
||||||
|
const _: () = {
|
||||||
|
match from_utf8($input) {
|
||||||
|
Err(error) => {
|
||||||
|
let valid_up_to = error.valid_up_to();
|
||||||
|
let error_len = error.error_len();
|
||||||
|
|
||||||
|
assert!(matches!(valid_up_to, $expected_valid_up_to));
|
||||||
|
assert!(matches!(error_len, $expected_error_len));
|
||||||
|
}
|
||||||
|
Ok(_) => unreachable!(),
|
||||||
|
}
|
||||||
|
};
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
test!(b"A\xC3\xA9 \xFF ", 4, Some(1));
|
test!(b"A\xC3\xA9 \xFF ", 4, Some(1));
|
||||||
|
|
|
@ -97,6 +97,7 @@
|
||||||
#![allow(explicit_outlives_requirements)]
|
#![allow(explicit_outlives_requirements)]
|
||||||
//
|
//
|
||||||
// Library features for const fns:
|
// Library features for const fns:
|
||||||
|
#![feature(const_align_offset)]
|
||||||
#![feature(const_align_of_val)]
|
#![feature(const_align_of_val)]
|
||||||
#![feature(const_alloc_layout)]
|
#![feature(const_alloc_layout)]
|
||||||
#![feature(const_arguments_as_str)]
|
#![feature(const_arguments_as_str)]
|
||||||
|
@ -130,6 +131,7 @@
|
||||||
#![feature(const_size_of_val)]
|
#![feature(const_size_of_val)]
|
||||||
#![feature(const_slice_from_raw_parts)]
|
#![feature(const_slice_from_raw_parts)]
|
||||||
#![feature(const_slice_ptr_len)]
|
#![feature(const_slice_ptr_len)]
|
||||||
|
#![feature(const_str_from_utf8_unchecked_mut)]
|
||||||
#![feature(const_swap)]
|
#![feature(const_swap)]
|
||||||
#![feature(const_trait_impl)]
|
#![feature(const_trait_impl)]
|
||||||
#![feature(const_type_id)]
|
#![feature(const_type_id)]
|
||||||
|
@ -138,6 +140,7 @@
|
||||||
#![feature(duration_consts_2)]
|
#![feature(duration_consts_2)]
|
||||||
#![feature(ptr_metadata)]
|
#![feature(ptr_metadata)]
|
||||||
#![feature(slice_ptr_get)]
|
#![feature(slice_ptr_get)]
|
||||||
|
#![feature(str_internals)]
|
||||||
#![feature(variant_count)]
|
#![feature(variant_count)]
|
||||||
#![feature(const_array_from_ref)]
|
#![feature(const_array_from_ref)]
|
||||||
#![feature(const_slice_from_ref)]
|
#![feature(const_slice_from_ref)]
|
||||||
|
|
|
@ -82,10 +82,16 @@ use super::Utf8Error;
|
||||||
/// assert_eq!("💖", sparkle_heart);
|
/// assert_eq!("💖", sparkle_heart);
|
||||||
/// ```
|
/// ```
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
|
#[rustc_const_unstable(feature = "const_str_from_utf8", issue = "none")]
|
||||||
run_utf8_validation(v)?;
|
pub const fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
|
||||||
// SAFETY: Just ran validation.
|
// This should use `?` again, once it's `const`
|
||||||
Ok(unsafe { from_utf8_unchecked(v) })
|
match run_utf8_validation(v) {
|
||||||
|
Ok(_) => {
|
||||||
|
// SAFETY: validation succeeded.
|
||||||
|
Ok(unsafe { from_utf8_unchecked(v) })
|
||||||
|
}
|
||||||
|
Err(err) => Err(err),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Converts a mutable slice of bytes to a mutable string slice.
|
/// Converts a mutable slice of bytes to a mutable string slice.
|
||||||
|
@ -119,10 +125,16 @@ pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
|
||||||
/// See the docs for [`Utf8Error`] for more details on the kinds of
|
/// See the docs for [`Utf8Error`] for more details on the kinds of
|
||||||
/// errors that can be returned.
|
/// errors that can be returned.
|
||||||
#[stable(feature = "str_mut_extras", since = "1.20.0")]
|
#[stable(feature = "str_mut_extras", since = "1.20.0")]
|
||||||
pub fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> {
|
#[rustc_const_unstable(feature = "const_str_from_utf8", issue = "none")]
|
||||||
run_utf8_validation(v)?;
|
pub const fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> {
|
||||||
// SAFETY: Just ran validation.
|
// This should use `?` again, once it's `const`
|
||||||
Ok(unsafe { from_utf8_unchecked_mut(v) })
|
match run_utf8_validation(v) {
|
||||||
|
Ok(_) => {
|
||||||
|
// SAFETY: validation succeeded.
|
||||||
|
Ok(unsafe { from_utf8_unchecked_mut(v) })
|
||||||
|
}
|
||||||
|
Err(err) => Err(err),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Converts a slice of bytes to a string slice without checking
|
/// Converts a slice of bytes to a string slice without checking
|
||||||
|
@ -184,7 +196,8 @@ pub const unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
|
||||||
#[inline]
|
#[inline]
|
||||||
#[must_use]
|
#[must_use]
|
||||||
#[stable(feature = "str_mut_extras", since = "1.20.0")]
|
#[stable(feature = "str_mut_extras", since = "1.20.0")]
|
||||||
pub unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str {
|
#[rustc_const_unstable(feature = "const_str_from_utf8_unchecked_mut", issue = "none")]
|
||||||
|
pub const unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str {
|
||||||
// SAFETY: the caller must guarantee that the bytes `v`
|
// SAFETY: the caller must guarantee that the bytes `v`
|
||||||
// are valid UTF-8, thus the cast to `*mut str` is safe.
|
// are valid UTF-8, thus the cast to `*mut str` is safe.
|
||||||
// Also, the pointer dereference is safe because that pointer
|
// Also, the pointer dereference is safe because that pointer
|
||||||
|
|
|
@ -72,9 +72,10 @@ impl Utf8Error {
|
||||||
/// assert_eq!(1, error.valid_up_to());
|
/// assert_eq!(1, error.valid_up_to());
|
||||||
/// ```
|
/// ```
|
||||||
#[stable(feature = "utf8_error", since = "1.5.0")]
|
#[stable(feature = "utf8_error", since = "1.5.0")]
|
||||||
|
#[rustc_const_unstable(feature = "const_str_from_utf8", issue = "none")]
|
||||||
#[must_use]
|
#[must_use]
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn valid_up_to(&self) -> usize {
|
pub const fn valid_up_to(&self) -> usize {
|
||||||
self.valid_up_to
|
self.valid_up_to
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -94,10 +95,15 @@ impl Utf8Error {
|
||||||
///
|
///
|
||||||
/// [U+FFFD]: ../../std/char/constant.REPLACEMENT_CHARACTER.html
|
/// [U+FFFD]: ../../std/char/constant.REPLACEMENT_CHARACTER.html
|
||||||
#[stable(feature = "utf8_error_error_len", since = "1.20.0")]
|
#[stable(feature = "utf8_error_error_len", since = "1.20.0")]
|
||||||
|
#[rustc_const_unstable(feature = "const_str_from_utf8", issue = "none")]
|
||||||
#[must_use]
|
#[must_use]
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn error_len(&self) -> Option<usize> {
|
pub const fn error_len(&self) -> Option<usize> {
|
||||||
self.error_len.map(|len| len as usize)
|
// This should become `map` again, once it's `const`
|
||||||
|
match self.error_len {
|
||||||
|
Some(len) => Some(len as usize),
|
||||||
|
None => None,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -8,25 +8,25 @@ use super::Utf8Error;
|
||||||
/// The first byte is special, only want bottom 5 bits for width 2, 4 bits
|
/// The first byte is special, only want bottom 5 bits for width 2, 4 bits
|
||||||
/// for width 3, and 3 bits for width 4.
|
/// for width 3, and 3 bits for width 4.
|
||||||
#[inline]
|
#[inline]
|
||||||
fn utf8_first_byte(byte: u8, width: u32) -> u32 {
|
const fn utf8_first_byte(byte: u8, width: u32) -> u32 {
|
||||||
(byte & (0x7F >> width)) as u32
|
(byte & (0x7F >> width)) as u32
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the value of `ch` updated with continuation byte `byte`.
|
/// Returns the value of `ch` updated with continuation byte `byte`.
|
||||||
#[inline]
|
#[inline]
|
||||||
fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
|
const fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
|
||||||
(ch << 6) | (byte & CONT_MASK) as u32
|
(ch << 6) | (byte & CONT_MASK) as u32
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Checks whether the byte is a UTF-8 continuation byte (i.e., starts with the
|
/// Checks whether the byte is a UTF-8 continuation byte (i.e., starts with the
|
||||||
/// bits `10`).
|
/// bits `10`).
|
||||||
#[inline]
|
#[inline]
|
||||||
pub(super) fn utf8_is_cont_byte(byte: u8) -> bool {
|
pub(super) const fn utf8_is_cont_byte(byte: u8) -> bool {
|
||||||
(byte as i8) < -64
|
(byte as i8) < -64
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn unwrap_or_0(opt: Option<&u8>) -> u8 {
|
const fn unwrap_or_0(opt: Option<&u8>) -> u8 {
|
||||||
match opt {
|
match opt {
|
||||||
Some(&byte) => byte,
|
Some(&byte) => byte,
|
||||||
None => 0,
|
None => 0,
|
||||||
|
@ -105,14 +105,15 @@ const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
|
||||||
|
|
||||||
/// Returns `true` if any byte in the word `x` is nonascii (>= 128).
|
/// Returns `true` if any byte in the word `x` is nonascii (>= 128).
|
||||||
#[inline]
|
#[inline]
|
||||||
fn contains_nonascii(x: usize) -> bool {
|
const fn contains_nonascii(x: usize) -> bool {
|
||||||
(x & NONASCII_MASK) != 0
|
(x & NONASCII_MASK) != 0
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Walks through `v` checking that it's a valid UTF-8 sequence,
|
/// Walks through `v` checking that it's a valid UTF-8 sequence,
|
||||||
/// returning `Ok(())` in that case, or, if it is invalid, `Err(err)`.
|
/// returning `Ok(())` in that case, or, if it is invalid, `Err(err)`.
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
pub(super) fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
|
#[rustc_const_unstable(feature = "str_internals", issue = "none")]
|
||||||
|
pub(super) const fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
|
||||||
let mut index = 0;
|
let mut index = 0;
|
||||||
let len = v.len();
|
let len = v.len();
|
||||||
|
|
||||||
|
@ -142,7 +143,7 @@ pub(super) fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
|
||||||
|
|
||||||
let first = v[index];
|
let first = v[index];
|
||||||
if first >= 128 {
|
if first >= 128 {
|
||||||
let w = UTF8_CHAR_WIDTH[first as usize];
|
let w = utf8_char_width(first);
|
||||||
// 2-byte encoding is for codepoints \u{0080} to \u{07ff}
|
// 2-byte encoding is for codepoints \u{0080} to \u{07ff}
|
||||||
// first C2 80 last DF BF
|
// first C2 80 last DF BF
|
||||||
// 3-byte encoding is for codepoints \u{0800} to \u{ffff}
|
// 3-byte encoding is for codepoints \u{0800} to \u{ffff}
|
||||||
|
@ -230,7 +231,7 @@ pub(super) fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://tools.ietf.org/html/rfc3629
|
// https://tools.ietf.org/html/rfc3629
|
||||||
static UTF8_CHAR_WIDTH: [u8; 256] = [
|
const UTF8_CHAR_WIDTH: &[u8; 256] = &[
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
1, // 0x1F
|
1, // 0x1F
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
@ -253,7 +254,7 @@ static UTF8_CHAR_WIDTH: [u8; 256] = [
|
||||||
#[unstable(feature = "str_internals", issue = "none")]
|
#[unstable(feature = "str_internals", issue = "none")]
|
||||||
#[must_use]
|
#[must_use]
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn utf8_char_width(b: u8) -> usize {
|
pub const fn utf8_char_width(b: u8) -> usize {
|
||||||
UTF8_CHAR_WIDTH[b as usize] as usize
|
UTF8_CHAR_WIDTH[b as usize] as usize
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue