Rollup merge of #94316 - nnethercote:improve-string-literal-unescaping, r=petrochenkov
Improve string literal unescaping Some easy wins that affect a few popular crates. r? ```@matklad```
This commit is contained in:
commit
ec44d48ae3
3 changed files with 45 additions and 40 deletions
|
@ -16,6 +16,7 @@
|
||||||
#![feature(min_specialization)]
|
#![feature(min_specialization)]
|
||||||
#![recursion_limit = "256"]
|
#![recursion_limit = "256"]
|
||||||
#![feature(slice_internals)]
|
#![feature(slice_internals)]
|
||||||
|
#![feature(stmt_expr_attributes)]
|
||||||
|
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate rustc_macros;
|
extern crate rustc_macros;
|
||||||
|
|
|
@ -56,25 +56,30 @@ impl LitKind {
|
||||||
// new symbol because the string in the LitKind is different to the
|
// new symbol because the string in the LitKind is different to the
|
||||||
// string in the token.
|
// string in the token.
|
||||||
let s = symbol.as_str();
|
let s = symbol.as_str();
|
||||||
let symbol =
|
let symbol = if s.contains(&['\\', '\r']) {
|
||||||
if s.contains(&['\\', '\r']) {
|
let mut buf = String::with_capacity(s.len());
|
||||||
let mut buf = String::with_capacity(s.len());
|
let mut error = Ok(());
|
||||||
let mut error = Ok(());
|
// Force-inlining here is aggressive but the closure is
|
||||||
unescape_literal(&s, Mode::Str, &mut |_, unescaped_char| {
|
// called on every char in the string, so it can be
|
||||||
match unescaped_char {
|
// hot in programs with many long strings.
|
||||||
Ok(c) => buf.push(c),
|
unescape_literal(
|
||||||
Err(err) => {
|
&s,
|
||||||
if err.is_fatal() {
|
Mode::Str,
|
||||||
error = Err(LitError::LexerError);
|
&mut #[inline(always)]
|
||||||
}
|
|_, unescaped_char| match unescaped_char {
|
||||||
|
Ok(c) => buf.push(c),
|
||||||
|
Err(err) => {
|
||||||
|
if err.is_fatal() {
|
||||||
|
error = Err(LitError::LexerError);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
},
|
||||||
error?;
|
);
|
||||||
Symbol::intern(&buf)
|
error?;
|
||||||
} else {
|
Symbol::intern(&buf)
|
||||||
symbol
|
} else {
|
||||||
};
|
symbol
|
||||||
|
};
|
||||||
LitKind::Str(symbol, ast::StrStyle::Cooked)
|
LitKind::Str(symbol, ast::StrStyle::Cooked)
|
||||||
}
|
}
|
||||||
token::StrRaw(n) => {
|
token::StrRaw(n) => {
|
||||||
|
|
|
@ -159,26 +159,8 @@ impl Mode {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn scan_escape(first_char: char, chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
|
fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
|
||||||
if first_char != '\\' {
|
// Previous character was '\\', unescape what follows.
|
||||||
// Previous character was not a slash, and we don't expect it to be
|
|
||||||
// an escape-only character.
|
|
||||||
return match first_char {
|
|
||||||
'\t' | '\n' => Err(EscapeError::EscapeOnlyChar),
|
|
||||||
'\r' => Err(EscapeError::BareCarriageReturn),
|
|
||||||
'\'' if mode.in_single_quotes() => Err(EscapeError::EscapeOnlyChar),
|
|
||||||
'"' if mode.in_double_quotes() => Err(EscapeError::EscapeOnlyChar),
|
|
||||||
_ => {
|
|
||||||
if mode.is_bytes() && !first_char.is_ascii() {
|
|
||||||
// Byte literal can't be a non-ascii character.
|
|
||||||
return Err(EscapeError::NonAsciiCharInByte);
|
|
||||||
}
|
|
||||||
Ok(first_char)
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// Previous character is '\\', try to unescape it.
|
|
||||||
|
|
||||||
let second_char = chars.next().ok_or(EscapeError::LoneSlash)?;
|
let second_char = chars.next().ok_or(EscapeError::LoneSlash)?;
|
||||||
|
|
||||||
|
@ -270,9 +252,24 @@ fn scan_escape(first_char: char, chars: &mut Chars<'_>, mode: Mode) -> Result<ch
|
||||||
Ok(res)
|
Ok(res)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn ascii_check(first_char: char, mode: Mode) -> Result<char, EscapeError> {
|
||||||
|
if mode.is_bytes() && !first_char.is_ascii() {
|
||||||
|
// Byte literal can't be a non-ascii character.
|
||||||
|
Err(EscapeError::NonAsciiCharInByte)
|
||||||
|
} else {
|
||||||
|
Ok(first_char)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
|
fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
|
||||||
let first_char = chars.next().ok_or(EscapeError::ZeroChars)?;
|
let first_char = chars.next().ok_or(EscapeError::ZeroChars)?;
|
||||||
let res = scan_escape(first_char, chars, mode)?;
|
let res = match first_char {
|
||||||
|
'\\' => scan_escape(chars, mode),
|
||||||
|
'\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar),
|
||||||
|
'\r' => Err(EscapeError::BareCarriageReturn),
|
||||||
|
_ => ascii_check(first_char, mode),
|
||||||
|
}?;
|
||||||
if chars.next().is_some() {
|
if chars.next().is_some() {
|
||||||
return Err(EscapeError::MoreThanOneChar);
|
return Err(EscapeError::MoreThanOneChar);
|
||||||
}
|
}
|
||||||
|
@ -303,12 +300,14 @@ where
|
||||||
skip_ascii_whitespace(&mut chars, start, callback);
|
skip_ascii_whitespace(&mut chars, start, callback);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
_ => scan_escape(first_char, &mut chars, mode),
|
_ => scan_escape(&mut chars, mode),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
'\n' => Ok('\n'),
|
'\n' => Ok('\n'),
|
||||||
'\t' => Ok('\t'),
|
'\t' => Ok('\t'),
|
||||||
_ => scan_escape(first_char, &mut chars, mode),
|
'"' => Err(EscapeError::EscapeOnlyChar),
|
||||||
|
'\r' => Err(EscapeError::BareCarriageReturn),
|
||||||
|
_ => ascii_check(first_char, mode),
|
||||||
};
|
};
|
||||||
let end = initial_len - chars.as_str().len();
|
let end = initial_len - chars.as_str().len();
|
||||||
callback(start..end, unescaped_char);
|
callback(start..end, unescaped_char);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue