Rollup merge of #105076 - mina86:a, r=scottmcm
Refactor core::char::EscapeDefault and co. structures Change core::char::{EscapeUnicode, EscapeDefault and EscapeDebug} structures from using a state machine to computing escaped sequence upfront and during iteration just going through the characters. This is arguably simpler since it’s easier to think about having a buffer and start..end range to iterate over rather than thinking about a state machine. This also harmonises implementation of aforementioned iterators and core::ascii::EscapeDefault struct. This is done by introducing a new helper EscapeIterInner struct which holds the buffer and offers simple methods for iterating over range. As a side effect, this probably optimises Display implementation for those types since rather than calling write_char repeatedly, write_str is invoked once. On 64-bit platforms, it also reduces size of some of the structs: | Struct | Before | After | |----------------------------+--------+-------+ | core::char::EscapeUnicode | 16 | 12 | | core::char::EscapeDefault | 16 | 12 | | core::char::EscapeDebug | 16 | 16 | My ulterior motive and reason why I started looking into this is addition of as_str method to the iterators. With this change this will became trivial. It’s also going to be trivial to implement DoubleEndedIterator if that’s ever desired.
This commit is contained in:
commit
f916c44aec
5 changed files with 278 additions and 211 deletions
|
@ -9,10 +9,10 @@
|
||||||
|
|
||||||
#![stable(feature = "core_ascii", since = "1.26.0")]
|
#![stable(feature = "core_ascii", since = "1.26.0")]
|
||||||
|
|
||||||
|
use crate::escape;
|
||||||
use crate::fmt;
|
use crate::fmt;
|
||||||
use crate::iter::FusedIterator;
|
use crate::iter::FusedIterator;
|
||||||
use crate::ops::Range;
|
use crate::num::NonZeroUsize;
|
||||||
use crate::str::from_utf8_unchecked;
|
|
||||||
|
|
||||||
/// An iterator over the escaped version of a byte.
|
/// An iterator over the escaped version of a byte.
|
||||||
///
|
///
|
||||||
|
@ -21,10 +21,7 @@ use crate::str::from_utf8_unchecked;
|
||||||
#[must_use = "iterators are lazy and do nothing unless consumed"]
|
#[must_use = "iterators are lazy and do nothing unless consumed"]
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct EscapeDefault {
|
pub struct EscapeDefault(escape::EscapeIterInner<4>);
|
||||||
range: Range<u8>,
|
|
||||||
data: [u8; 4],
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns an iterator that produces an escaped version of a `u8`.
|
/// Returns an iterator that produces an escaped version of a `u8`.
|
||||||
///
|
///
|
||||||
|
@ -90,21 +87,9 @@ pub struct EscapeDefault {
|
||||||
/// ```
|
/// ```
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
pub fn escape_default(c: u8) -> EscapeDefault {
|
pub fn escape_default(c: u8) -> EscapeDefault {
|
||||||
let (data, len) = match c {
|
let mut data = [0; 4];
|
||||||
b'\t' => ([b'\\', b't', 0, 0], 2),
|
let range = escape::escape_ascii_into(&mut data, c);
|
||||||
b'\r' => ([b'\\', b'r', 0, 0], 2),
|
EscapeDefault(escape::EscapeIterInner::new(data, range))
|
||||||
b'\n' => ([b'\\', b'n', 0, 0], 2),
|
|
||||||
b'\\' => ([b'\\', b'\\', 0, 0], 2),
|
|
||||||
b'\'' => ([b'\\', b'\'', 0, 0], 2),
|
|
||||||
b'"' => ([b'\\', b'"', 0, 0], 2),
|
|
||||||
b'\x20'..=b'\x7e' => ([c, 0, 0, 0], 1),
|
|
||||||
_ => {
|
|
||||||
let hex_digits: &[u8; 16] = b"0123456789abcdef";
|
|
||||||
([b'\\', b'x', hex_digits[(c >> 4) as usize], hex_digits[(c & 0xf) as usize]], 4)
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
return EscapeDefault { range: 0..len, data };
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
|
@ -113,33 +98,59 @@ impl Iterator for EscapeDefault {
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn next(&mut self) -> Option<u8> {
|
fn next(&mut self) -> Option<u8> {
|
||||||
self.range.next().map(|i| self.data[i as usize])
|
self.0.next()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||||
self.range.size_hint()
|
let n = self.0.len();
|
||||||
|
(n, Some(n))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn count(self) -> usize {
|
||||||
|
self.0.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn last(mut self) -> Option<u8> {
|
fn last(mut self) -> Option<u8> {
|
||||||
self.next_back()
|
self.0.next_back()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn advance_by(&mut self, n: usize) -> Result<(), NonZeroUsize> {
|
||||||
|
self.0.advance_by(n)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
impl DoubleEndedIterator for EscapeDefault {
|
impl DoubleEndedIterator for EscapeDefault {
|
||||||
|
#[inline]
|
||||||
fn next_back(&mut self) -> Option<u8> {
|
fn next_back(&mut self) -> Option<u8> {
|
||||||
self.range.next_back().map(|i| self.data[i as usize])
|
self.0.next_back()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn advance_back_by(&mut self, n: usize) -> Result<(), NonZeroUsize> {
|
||||||
|
self.0.advance_back_by(n)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
impl ExactSizeIterator for EscapeDefault {}
|
impl ExactSizeIterator for EscapeDefault {
|
||||||
|
#[inline]
|
||||||
|
fn len(&self) -> usize {
|
||||||
|
self.0.len()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[stable(feature = "fused", since = "1.26.0")]
|
#[stable(feature = "fused", since = "1.26.0")]
|
||||||
impl FusedIterator for EscapeDefault {}
|
impl FusedIterator for EscapeDefault {}
|
||||||
|
|
||||||
#[stable(feature = "ascii_escape_display", since = "1.39.0")]
|
#[stable(feature = "ascii_escape_display", since = "1.39.0")]
|
||||||
impl fmt::Display for EscapeDefault {
|
impl fmt::Display for EscapeDefault {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
// SAFETY: ok because `escape_default` created only valid utf-8 data
|
f.write_str(self.0.as_str())
|
||||||
f.write_str(unsafe {
|
|
||||||
from_utf8_unchecked(&self.data[(self.range.start as usize)..(self.range.end as usize)])
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -380,20 +380,7 @@ impl char {
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn escape_unicode(self) -> EscapeUnicode {
|
pub fn escape_unicode(self) -> EscapeUnicode {
|
||||||
let c = self as u32;
|
EscapeUnicode::new(self)
|
||||||
|
|
||||||
// or-ing 1 ensures that for c==0 the code computes that one
|
|
||||||
// digit should be printed and (which is the same) avoids the
|
|
||||||
// (31 - 32) underflow
|
|
||||||
let msb = 31 - (c | 1).leading_zeros();
|
|
||||||
|
|
||||||
// the index of the most significant hex digit
|
|
||||||
let ms_hex_digit = msb / 4;
|
|
||||||
EscapeUnicode {
|
|
||||||
c: self,
|
|
||||||
state: EscapeUnicodeState::Backslash,
|
|
||||||
hex_digit_idx: ms_hex_digit as usize,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// An extended version of `escape_debug` that optionally permits escaping
|
/// An extended version of `escape_debug` that optionally permits escaping
|
||||||
|
@ -403,21 +390,20 @@ impl char {
|
||||||
/// characters, and double quotes in strings.
|
/// characters, and double quotes in strings.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub(crate) fn escape_debug_ext(self, args: EscapeDebugExtArgs) -> EscapeDebug {
|
pub(crate) fn escape_debug_ext(self, args: EscapeDebugExtArgs) -> EscapeDebug {
|
||||||
let init_state = match self {
|
match self {
|
||||||
'\0' => EscapeDefaultState::Backslash('0'),
|
'\0' => EscapeDebug::backslash(b'0'),
|
||||||
'\t' => EscapeDefaultState::Backslash('t'),
|
'\t' => EscapeDebug::backslash(b't'),
|
||||||
'\r' => EscapeDefaultState::Backslash('r'),
|
'\r' => EscapeDebug::backslash(b'r'),
|
||||||
'\n' => EscapeDefaultState::Backslash('n'),
|
'\n' => EscapeDebug::backslash(b'n'),
|
||||||
'\\' => EscapeDefaultState::Backslash(self),
|
'\\' => EscapeDebug::backslash(b'\\'),
|
||||||
'"' if args.escape_double_quote => EscapeDefaultState::Backslash(self),
|
'"' if args.escape_double_quote => EscapeDebug::backslash(b'"'),
|
||||||
'\'' if args.escape_single_quote => EscapeDefaultState::Backslash(self),
|
'\'' if args.escape_single_quote => EscapeDebug::backslash(b'\''),
|
||||||
_ if args.escape_grapheme_extended && self.is_grapheme_extended() => {
|
_ if args.escape_grapheme_extended && self.is_grapheme_extended() => {
|
||||||
EscapeDefaultState::Unicode(self.escape_unicode())
|
EscapeDebug::from_unicode(self.escape_unicode())
|
||||||
}
|
}
|
||||||
_ if is_printable(self) => EscapeDefaultState::Char(self),
|
_ if is_printable(self) => EscapeDebug::printable(self),
|
||||||
_ => EscapeDefaultState::Unicode(self.escape_unicode()),
|
_ => EscapeDebug::from_unicode(self.escape_unicode()),
|
||||||
};
|
}
|
||||||
EscapeDebug(EscapeDefault { state: init_state })
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns an iterator that yields the literal escape code of a character
|
/// Returns an iterator that yields the literal escape code of a character
|
||||||
|
@ -515,15 +501,14 @@ impl char {
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn escape_default(self) -> EscapeDefault {
|
pub fn escape_default(self) -> EscapeDefault {
|
||||||
let init_state = match self {
|
match self {
|
||||||
'\t' => EscapeDefaultState::Backslash('t'),
|
'\t' => EscapeDefault::backslash(b't'),
|
||||||
'\r' => EscapeDefaultState::Backslash('r'),
|
'\r' => EscapeDefault::backslash(b'r'),
|
||||||
'\n' => EscapeDefaultState::Backslash('n'),
|
'\n' => EscapeDefault::backslash(b'n'),
|
||||||
'\\' | '\'' | '"' => EscapeDefaultState::Backslash(self),
|
'\\' | '\'' | '"' => EscapeDefault::backslash(self as u8),
|
||||||
'\x20'..='\x7e' => EscapeDefaultState::Char(self),
|
'\x20'..='\x7e' => EscapeDefault::printable(self as u8),
|
||||||
_ => EscapeDefaultState::Unicode(self.escape_unicode()),
|
_ => EscapeDefault::from_unicode(self.escape_unicode()),
|
||||||
};
|
}
|
||||||
EscapeDefault { state: init_state }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the number of bytes this `char` would need if encoded in UTF-8.
|
/// Returns the number of bytes this `char` would need if encoded in UTF-8.
|
||||||
|
|
|
@ -39,8 +39,10 @@ pub use self::methods::encode_utf16_raw;
|
||||||
pub use self::methods::encode_utf8_raw;
|
pub use self::methods::encode_utf8_raw;
|
||||||
|
|
||||||
use crate::error::Error;
|
use crate::error::Error;
|
||||||
|
use crate::escape;
|
||||||
use crate::fmt::{self, Write};
|
use crate::fmt::{self, Write};
|
||||||
use crate::iter::FusedIterator;
|
use crate::iter::FusedIterator;
|
||||||
|
use crate::num::NonZeroUsize;
|
||||||
|
|
||||||
pub(crate) use self::methods::EscapeDebugExtArgs;
|
pub(crate) use self::methods::EscapeDebugExtArgs;
|
||||||
|
|
||||||
|
@ -146,86 +148,44 @@ pub const fn from_digit(num: u32, radix: u32) -> Option<char> {
|
||||||
/// [`escape_unicode`]: char::escape_unicode
|
/// [`escape_unicode`]: char::escape_unicode
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
pub struct EscapeUnicode {
|
pub struct EscapeUnicode(escape::EscapeIterInner<10>);
|
||||||
c: char,
|
|
||||||
state: EscapeUnicodeState,
|
|
||||||
|
|
||||||
// The index of the next hex digit to be printed (0 if none),
|
impl EscapeUnicode {
|
||||||
// i.e., the number of remaining hex digits to be printed;
|
fn new(chr: char) -> Self {
|
||||||
// increasing from the least significant digit: 0x543210
|
let mut data = [0; 10];
|
||||||
hex_digit_idx: usize,
|
let range = escape::escape_unicode_into(&mut data, chr);
|
||||||
}
|
Self(escape::EscapeIterInner::new(data, range))
|
||||||
|
}
|
||||||
// The enum values are ordered so that their representation is the
|
|
||||||
// same as the remaining length (besides the hexadecimal digits). This
|
|
||||||
// likely makes `len()` a single load from memory) and inline-worth.
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
enum EscapeUnicodeState {
|
|
||||||
Done,
|
|
||||||
RightBrace,
|
|
||||||
Value,
|
|
||||||
LeftBrace,
|
|
||||||
Type,
|
|
||||||
Backslash,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
impl Iterator for EscapeUnicode {
|
impl Iterator for EscapeUnicode {
|
||||||
type Item = char;
|
type Item = char;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn next(&mut self) -> Option<char> {
|
fn next(&mut self) -> Option<char> {
|
||||||
match self.state {
|
self.0.next().map(char::from)
|
||||||
EscapeUnicodeState::Backslash => {
|
|
||||||
self.state = EscapeUnicodeState::Type;
|
|
||||||
Some('\\')
|
|
||||||
}
|
|
||||||
EscapeUnicodeState::Type => {
|
|
||||||
self.state = EscapeUnicodeState::LeftBrace;
|
|
||||||
Some('u')
|
|
||||||
}
|
|
||||||
EscapeUnicodeState::LeftBrace => {
|
|
||||||
self.state = EscapeUnicodeState::Value;
|
|
||||||
Some('{')
|
|
||||||
}
|
|
||||||
EscapeUnicodeState::Value => {
|
|
||||||
let hex_digit = ((self.c as u32) >> (self.hex_digit_idx * 4)) & 0xf;
|
|
||||||
let c = char::from_digit(hex_digit, 16).unwrap();
|
|
||||||
if self.hex_digit_idx == 0 {
|
|
||||||
self.state = EscapeUnicodeState::RightBrace;
|
|
||||||
} else {
|
|
||||||
self.hex_digit_idx -= 1;
|
|
||||||
}
|
|
||||||
Some(c)
|
|
||||||
}
|
|
||||||
EscapeUnicodeState::RightBrace => {
|
|
||||||
self.state = EscapeUnicodeState::Done;
|
|
||||||
Some('}')
|
|
||||||
}
|
|
||||||
EscapeUnicodeState::Done => None,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||||
let n = self.len();
|
let n = self.0.len();
|
||||||
(n, Some(n))
|
(n, Some(n))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn count(self) -> usize {
|
fn count(self) -> usize {
|
||||||
self.len()
|
self.0.len()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn last(self) -> Option<char> {
|
#[inline]
|
||||||
match self.state {
|
fn last(mut self) -> Option<char> {
|
||||||
EscapeUnicodeState::Done => None,
|
self.0.next_back().map(char::from)
|
||||||
|
}
|
||||||
|
|
||||||
EscapeUnicodeState::RightBrace
|
#[inline]
|
||||||
| EscapeUnicodeState::Value
|
fn advance_by(&mut self, n: usize) -> Result<(), NonZeroUsize> {
|
||||||
| EscapeUnicodeState::LeftBrace
|
self.0.advance_by(n)
|
||||||
| EscapeUnicodeState::Type
|
|
||||||
| EscapeUnicodeState::Backslash => Some('}'),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -233,16 +193,7 @@ impl Iterator for EscapeUnicode {
|
||||||
impl ExactSizeIterator for EscapeUnicode {
|
impl ExactSizeIterator for EscapeUnicode {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn len(&self) -> usize {
|
fn len(&self) -> usize {
|
||||||
// The match is a single memory access with no branching
|
self.0.len()
|
||||||
self.hex_digit_idx
|
|
||||||
+ match self.state {
|
|
||||||
EscapeUnicodeState::Done => 0,
|
|
||||||
EscapeUnicodeState::RightBrace => 1,
|
|
||||||
EscapeUnicodeState::Value => 2,
|
|
||||||
EscapeUnicodeState::LeftBrace => 3,
|
|
||||||
EscapeUnicodeState::Type => 4,
|
|
||||||
EscapeUnicodeState::Backslash => 5,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -252,10 +203,7 @@ impl FusedIterator for EscapeUnicode {}
|
||||||
#[stable(feature = "char_struct_display", since = "1.16.0")]
|
#[stable(feature = "char_struct_display", since = "1.16.0")]
|
||||||
impl fmt::Display for EscapeUnicode {
|
impl fmt::Display for EscapeUnicode {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
for c in self.clone() {
|
f.write_str(self.0.as_str())
|
||||||
f.write_char(c)?;
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -267,90 +215,60 @@ impl fmt::Display for EscapeUnicode {
|
||||||
/// [`escape_default`]: char::escape_default
|
/// [`escape_default`]: char::escape_default
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
pub struct EscapeDefault {
|
pub struct EscapeDefault(escape::EscapeIterInner<10>);
|
||||||
state: EscapeDefaultState,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
impl EscapeDefault {
|
||||||
enum EscapeDefaultState {
|
fn printable(chr: u8) -> Self {
|
||||||
Done,
|
let data = [chr, 0, 0, 0, 0, 0, 0, 0, 0, 0];
|
||||||
Char(char),
|
Self(escape::EscapeIterInner::new(data, 0..1))
|
||||||
Backslash(char),
|
}
|
||||||
Unicode(EscapeUnicode),
|
|
||||||
|
fn backslash(chr: u8) -> Self {
|
||||||
|
let data = [b'\\', chr, 0, 0, 0, 0, 0, 0, 0, 0];
|
||||||
|
Self(escape::EscapeIterInner::new(data, 0..2))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn from_unicode(esc: EscapeUnicode) -> Self {
|
||||||
|
Self(esc.0)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
impl Iterator for EscapeDefault {
|
impl Iterator for EscapeDefault {
|
||||||
type Item = char;
|
type Item = char;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn next(&mut self) -> Option<char> {
|
fn next(&mut self) -> Option<char> {
|
||||||
match self.state {
|
self.0.next().map(char::from)
|
||||||
EscapeDefaultState::Backslash(c) => {
|
|
||||||
self.state = EscapeDefaultState::Char(c);
|
|
||||||
Some('\\')
|
|
||||||
}
|
|
||||||
EscapeDefaultState::Char(c) => {
|
|
||||||
self.state = EscapeDefaultState::Done;
|
|
||||||
Some(c)
|
|
||||||
}
|
|
||||||
EscapeDefaultState::Done => None,
|
|
||||||
EscapeDefaultState::Unicode(ref mut iter) => iter.next(),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||||
let n = self.len();
|
let n = self.0.len();
|
||||||
(n, Some(n))
|
(n, Some(n))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn count(self) -> usize {
|
fn count(self) -> usize {
|
||||||
self.len()
|
self.0.len()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn nth(&mut self, n: usize) -> Option<char> {
|
#[inline]
|
||||||
match self.state {
|
fn last(mut self) -> Option<char> {
|
||||||
EscapeDefaultState::Backslash(c) if n == 0 => {
|
self.0.next_back().map(char::from)
|
||||||
self.state = EscapeDefaultState::Char(c);
|
|
||||||
Some('\\')
|
|
||||||
}
|
|
||||||
EscapeDefaultState::Backslash(c) if n == 1 => {
|
|
||||||
self.state = EscapeDefaultState::Done;
|
|
||||||
Some(c)
|
|
||||||
}
|
|
||||||
EscapeDefaultState::Backslash(_) => {
|
|
||||||
self.state = EscapeDefaultState::Done;
|
|
||||||
None
|
|
||||||
}
|
|
||||||
EscapeDefaultState::Char(c) => {
|
|
||||||
self.state = EscapeDefaultState::Done;
|
|
||||||
|
|
||||||
if n == 0 { Some(c) } else { None }
|
|
||||||
}
|
|
||||||
EscapeDefaultState::Done => None,
|
|
||||||
EscapeDefaultState::Unicode(ref mut i) => i.nth(n),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn last(self) -> Option<char> {
|
#[inline]
|
||||||
match self.state {
|
fn advance_by(&mut self, n: usize) -> Result<(), NonZeroUsize> {
|
||||||
EscapeDefaultState::Unicode(iter) => iter.last(),
|
self.0.advance_by(n)
|
||||||
EscapeDefaultState::Done => None,
|
|
||||||
EscapeDefaultState::Backslash(c) | EscapeDefaultState::Char(c) => Some(c),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[stable(feature = "exact_size_escape", since = "1.11.0")]
|
#[stable(feature = "exact_size_escape", since = "1.11.0")]
|
||||||
impl ExactSizeIterator for EscapeDefault {
|
impl ExactSizeIterator for EscapeDefault {
|
||||||
|
#[inline]
|
||||||
fn len(&self) -> usize {
|
fn len(&self) -> usize {
|
||||||
match self.state {
|
self.0.len()
|
||||||
EscapeDefaultState::Done => 0,
|
|
||||||
EscapeDefaultState::Char(_) => 1,
|
|
||||||
EscapeDefaultState::Backslash(_) => 2,
|
|
||||||
EscapeDefaultState::Unicode(ref iter) => iter.len(),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -360,10 +278,7 @@ impl FusedIterator for EscapeDefault {}
|
||||||
#[stable(feature = "char_struct_display", since = "1.16.0")]
|
#[stable(feature = "char_struct_display", since = "1.16.0")]
|
||||||
impl fmt::Display for EscapeDefault {
|
impl fmt::Display for EscapeDefault {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
for c in self.clone() {
|
f.write_str(self.0.as_str())
|
||||||
f.write_char(c)?;
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -375,21 +290,74 @@ impl fmt::Display for EscapeDefault {
|
||||||
/// [`escape_debug`]: char::escape_debug
|
/// [`escape_debug`]: char::escape_debug
|
||||||
#[stable(feature = "char_escape_debug", since = "1.20.0")]
|
#[stable(feature = "char_escape_debug", since = "1.20.0")]
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct EscapeDebug(EscapeDefault);
|
pub struct EscapeDebug(EscapeDebugInner);
|
||||||
|
|
||||||
#[stable(feature = "char_escape_debug", since = "1.20.0")]
|
#[derive(Clone, Debug)]
|
||||||
impl Iterator for EscapeDebug {
|
// Note: It’s possible to manually encode the EscapeDebugInner inside of
|
||||||
type Item = char;
|
// EscapeIterInner (e.g. with alive=254..255 indicating that data[0..4] holds
|
||||||
fn next(&mut self) -> Option<char> {
|
// a char) which would likely result in a more optimised code. For now we use
|
||||||
self.0.next()
|
// the option easier to implement.
|
||||||
|
enum EscapeDebugInner {
|
||||||
|
Bytes(escape::EscapeIterInner<10>),
|
||||||
|
Char(char),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EscapeDebug {
|
||||||
|
fn printable(chr: char) -> Self {
|
||||||
|
Self(EscapeDebugInner::Char(chr))
|
||||||
}
|
}
|
||||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
|
||||||
self.0.size_hint()
|
fn backslash(chr: u8) -> Self {
|
||||||
|
let data = [b'\\', chr, 0, 0, 0, 0, 0, 0, 0, 0];
|
||||||
|
let iter = escape::EscapeIterInner::new(data, 0..2);
|
||||||
|
Self(EscapeDebugInner::Bytes(iter))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn from_unicode(esc: EscapeUnicode) -> Self {
|
||||||
|
Self(EscapeDebugInner::Bytes(esc.0))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn clear(&mut self) {
|
||||||
|
let bytes = escape::EscapeIterInner::new([0; 10], 0..0);
|
||||||
|
self.0 = EscapeDebugInner::Bytes(bytes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[stable(feature = "char_escape_debug", since = "1.20.0")]
|
#[stable(feature = "char_escape_debug", since = "1.20.0")]
|
||||||
impl ExactSizeIterator for EscapeDebug {}
|
impl Iterator for EscapeDebug {
|
||||||
|
type Item = char;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn next(&mut self) -> Option<char> {
|
||||||
|
match self.0 {
|
||||||
|
EscapeDebugInner::Bytes(ref mut bytes) => bytes.next().map(char::from),
|
||||||
|
EscapeDebugInner::Char(chr) => {
|
||||||
|
self.clear();
|
||||||
|
Some(chr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||||
|
let n = self.len();
|
||||||
|
(n, Some(n))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn count(self) -> usize {
|
||||||
|
self.len()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[stable(feature = "char_escape_debug", since = "1.20.0")]
|
||||||
|
impl ExactSizeIterator for EscapeDebug {
|
||||||
|
fn len(&self) -> usize {
|
||||||
|
match &self.0 {
|
||||||
|
EscapeDebugInner::Bytes(bytes) => bytes.len(),
|
||||||
|
EscapeDebugInner::Char(_) => 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[stable(feature = "fused", since = "1.26.0")]
|
#[stable(feature = "fused", since = "1.26.0")]
|
||||||
impl FusedIterator for EscapeDebug {}
|
impl FusedIterator for EscapeDebug {}
|
||||||
|
@ -397,7 +365,10 @@ impl FusedIterator for EscapeDebug {}
|
||||||
#[stable(feature = "char_escape_debug", since = "1.20.0")]
|
#[stable(feature = "char_escape_debug", since = "1.20.0")]
|
||||||
impl fmt::Display for EscapeDebug {
|
impl fmt::Display for EscapeDebug {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
fmt::Display::fmt(&self.0, f)
|
match &self.0 {
|
||||||
|
EscapeDebugInner::Bytes(bytes) => f.write_str(bytes.as_str()),
|
||||||
|
EscapeDebugInner::Char(chr) => f.write_char(*chr),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
99
library/core/src/escape.rs
Normal file
99
library/core/src/escape.rs
Normal file
|
@ -0,0 +1,99 @@
|
||||||
|
//! Helper code for character escaping.
|
||||||
|
|
||||||
|
use crate::num::NonZeroUsize;
|
||||||
|
use crate::ops::Range;
|
||||||
|
|
||||||
|
const HEX_DIGITS: [u8; 16] = *b"0123456789abcdef";
|
||||||
|
|
||||||
|
/// Escapes a byte into provided buffer; returns length of escaped
|
||||||
|
/// representation.
|
||||||
|
pub(crate) fn escape_ascii_into(output: &mut [u8; 4], byte: u8) -> Range<u8> {
|
||||||
|
let (data, len) = match byte {
|
||||||
|
b'\t' => ([b'\\', b't', 0, 0], 2),
|
||||||
|
b'\r' => ([b'\\', b'r', 0, 0], 2),
|
||||||
|
b'\n' => ([b'\\', b'n', 0, 0], 2),
|
||||||
|
b'\\' => ([b'\\', b'\\', 0, 0], 2),
|
||||||
|
b'\'' => ([b'\\', b'\'', 0, 0], 2),
|
||||||
|
b'"' => ([b'\\', b'"', 0, 0], 2),
|
||||||
|
b'\x20'..=b'\x7e' => ([byte, 0, 0, 0], 1),
|
||||||
|
_ => {
|
||||||
|
let hi = HEX_DIGITS[usize::from(byte >> 4)];
|
||||||
|
let lo = HEX_DIGITS[usize::from(byte & 0xf)];
|
||||||
|
([b'\\', b'x', hi, lo], 4)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
*output = data;
|
||||||
|
0..(len as u8)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Escapes a character into provided buffer using `\u{NNNN}` representation.
|
||||||
|
pub(crate) fn escape_unicode_into(output: &mut [u8; 10], ch: char) -> Range<u8> {
|
||||||
|
output[9] = b'}';
|
||||||
|
|
||||||
|
let ch = ch as u32;
|
||||||
|
output[3] = HEX_DIGITS[((ch >> 20) & 15) as usize];
|
||||||
|
output[4] = HEX_DIGITS[((ch >> 16) & 15) as usize];
|
||||||
|
output[5] = HEX_DIGITS[((ch >> 12) & 15) as usize];
|
||||||
|
output[6] = HEX_DIGITS[((ch >> 8) & 15) as usize];
|
||||||
|
output[7] = HEX_DIGITS[((ch >> 4) & 15) as usize];
|
||||||
|
output[8] = HEX_DIGITS[((ch >> 0) & 15) as usize];
|
||||||
|
|
||||||
|
// or-ing 1 ensures that for ch==0 the code computes that one digit should
|
||||||
|
// be printed.
|
||||||
|
let start = (ch | 1).leading_zeros() as usize / 4 - 2;
|
||||||
|
output[start..start + 3].copy_from_slice(b"\\u{");
|
||||||
|
|
||||||
|
(start as u8)..10
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An iterator over an fixed-size array.
|
||||||
|
///
|
||||||
|
/// This is essentially equivalent to array’s IntoIter except that indexes are
|
||||||
|
/// limited to u8 to reduce size of the structure.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub(crate) struct EscapeIterInner<const N: usize> {
|
||||||
|
// Invariant: data[alive] is all ASCII.
|
||||||
|
pub(crate) data: [u8; N],
|
||||||
|
|
||||||
|
// Invariant: alive.start <= alive.end <= N.
|
||||||
|
pub(crate) alive: Range<u8>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<const N: usize> EscapeIterInner<N> {
|
||||||
|
pub fn new(data: [u8; N], alive: Range<u8>) -> Self {
|
||||||
|
const { assert!(N < 256) };
|
||||||
|
debug_assert!(alive.start <= alive.end && usize::from(alive.end) <= N, "{alive:?}");
|
||||||
|
let this = Self { data, alive };
|
||||||
|
debug_assert!(this.as_bytes().is_ascii(), "Expected ASCII, got {:?}", this.as_bytes());
|
||||||
|
this
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_bytes(&self) -> &[u8] {
|
||||||
|
&self.data[usize::from(self.alive.start)..usize::from(self.alive.end)]
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn as_str(&self) -> &str {
|
||||||
|
// SAFETY: self.data[self.alive] is all ASCII characters.
|
||||||
|
unsafe { crate::str::from_utf8_unchecked(self.as_bytes()) }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn len(&self) -> usize {
|
||||||
|
usize::from(self.alive.end - self.alive.start)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn next(&mut self) -> Option<u8> {
|
||||||
|
self.alive.next().map(|i| self.data[usize::from(i)])
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn next_back(&mut self) -> Option<u8> {
|
||||||
|
self.alive.next_back().map(|i| self.data[usize::from(i)])
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn advance_by(&mut self, n: usize) -> Result<(), NonZeroUsize> {
|
||||||
|
self.alive.advance_by(n)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn advance_back_by(&mut self, n: usize) -> Result<(), NonZeroUsize> {
|
||||||
|
self.alive.advance_back_by(n)
|
||||||
|
}
|
||||||
|
}
|
|
@ -376,6 +376,7 @@ pub mod alloc;
|
||||||
|
|
||||||
// note: does not need to be public
|
// note: does not need to be public
|
||||||
mod bool;
|
mod bool;
|
||||||
|
mod escape;
|
||||||
mod tuple;
|
mod tuple;
|
||||||
mod unit;
|
mod unit;
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue