1
Fork 0

Avoid useless checking in from_token_lit.

The parser already does a check-only unescaping which catches all
errors. So the checking done in `from_token_lit` never hits.

But literals causing warnings can still occur in `from_token_lit`. So
the commit changes `str-escape.rs` to use byte string literals and C
string literals as well, to give better coverage and ensure the new
assertions in `from_token_lit` are correct.
This commit is contained in:
Nicholas Nethercote 2024-01-22 16:58:39 +11:00
parent 0011fac90d
commit 314dbc7f22
3 changed files with 33 additions and 73 deletions

View file

@ -48,6 +48,9 @@ impl LitKind {
return Err(LitError::InvalidSuffix); return Err(LitError::InvalidSuffix);
} }
// For byte/char/string literals, chars and escapes have already been
// checked in the lexer (in `cook_lexer_literal`). So we can assume all
// chars and escapes are valid here.
Ok(match kind { Ok(match kind {
token::Bool => { token::Bool => {
assert!(symbol.is_bool_lit()); assert!(symbol.is_bool_lit());
@ -56,12 +59,12 @@ impl LitKind {
token::Byte => { token::Byte => {
return unescape_byte(symbol.as_str()) return unescape_byte(symbol.as_str())
.map(LitKind::Byte) .map(LitKind::Byte)
.map_err(|_| LitError::LexerError); .map_err(|_| panic!("failed to unescape byte literal"));
} }
token::Char => { token::Char => {
return unescape_char(symbol.as_str()) return unescape_char(symbol.as_str())
.map(LitKind::Char) .map(LitKind::Char)
.map_err(|_| LitError::LexerError); .map_err(|_| panic!("failed to unescape char literal"));
} }
// There are some valid suffixes for integer and float literals, // There are some valid suffixes for integer and float literals,
@ -77,26 +80,22 @@ impl LitKind {
let s = symbol.as_str(); let s = symbol.as_str();
// Vanilla strings are so common we optimize for the common case where no chars // Vanilla strings are so common we optimize for the common case where no chars
// requiring special behaviour are present. // requiring special behaviour are present.
let symbol = if s.contains(['\\', '\r']) { let symbol = if s.contains('\\') {
let mut buf = String::with_capacity(s.len()); let mut buf = String::with_capacity(s.len());
let mut error = Ok(());
// Force-inlining here is aggressive but the closure is // Force-inlining here is aggressive but the closure is
// called on every char in the string, so it can be // called on every char in the string, so it can be hot in
// hot in programs with many long strings. // programs with many long strings containing escapes.
unescape_literal( unescape_literal(
s, s,
Mode::Str, Mode::Str,
&mut #[inline(always)] &mut #[inline(always)]
|_, unescaped_char| match unescaped_char { |_, c| match c {
Ok(c) => buf.push(c), Ok(c) => buf.push(c),
Err(err) => { Err(err) => {
if err.is_fatal() { assert!(!err.is_fatal(), "failed to unescape string literal")
error = Err(LitError::LexerError);
}
} }
}, },
); );
error?;
Symbol::intern(&buf) Symbol::intern(&buf)
} else { } else {
symbol symbol
@ -104,86 +103,46 @@ impl LitKind {
LitKind::Str(symbol, ast::StrStyle::Cooked) LitKind::Str(symbol, ast::StrStyle::Cooked)
} }
token::StrRaw(n) => { token::StrRaw(n) => {
// Raw strings have no escapes, so we only need to check for invalid chars, and we // Raw strings have no escapes so no work is needed here.
// can reuse the symbol on success.
let mut error = Ok(());
unescape_literal(symbol.as_str(), Mode::RawStr, &mut |_, unescaped_char| {
match unescaped_char {
Ok(_) => {}
Err(err) => {
if err.is_fatal() {
error = Err(LitError::LexerError);
}
}
}
});
error?;
LitKind::Str(symbol, ast::StrStyle::Raw(n)) LitKind::Str(symbol, ast::StrStyle::Raw(n))
} }
token::ByteStr => { token::ByteStr => {
let s = symbol.as_str(); let s = symbol.as_str();
let mut buf = Vec::with_capacity(s.len()); let mut buf = Vec::with_capacity(s.len());
let mut error = Ok(());
unescape_literal(s, Mode::ByteStr, &mut |_, c| match c { unescape_literal(s, Mode::ByteStr, &mut |_, c| match c {
Ok(c) => buf.push(byte_from_char(c)), Ok(c) => buf.push(byte_from_char(c)),
Err(err) => { Err(err) => {
if err.is_fatal() { assert!(!err.is_fatal(), "failed to unescape string literal")
error = Err(LitError::LexerError);
}
} }
}); });
error?;
LitKind::ByteStr(buf.into(), StrStyle::Cooked) LitKind::ByteStr(buf.into(), StrStyle::Cooked)
} }
token::ByteStrRaw(n) => { token::ByteStrRaw(n) => {
// Raw strings have no escapes, so we only need to check for invalid chars, and we // Raw strings have no escapes so we can convert the symbol
// can convert the symbol directly to a `Lrc<u8>` on success. // directly to a `Lrc<u8>`.
let s = symbol.as_str(); let buf = symbol.as_str().to_owned().into_bytes();
let mut error = Ok(()); LitKind::ByteStr(buf.into(), StrStyle::Raw(n))
unescape_literal(s, Mode::RawByteStr, &mut |_, c| match c {
Ok(_) => {}
Err(err) => {
if err.is_fatal() {
error = Err(LitError::LexerError);
}
}
});
LitKind::ByteStr(s.to_owned().into_bytes().into(), StrStyle::Raw(n))
} }
token::CStr => { token::CStr => {
let s = symbol.as_str(); let s = symbol.as_str();
let mut buf = Vec::with_capacity(s.len()); let mut buf = Vec::with_capacity(s.len());
let mut error = Ok(());
unescape_c_string(s, Mode::CStr, &mut |_span, c| match c { unescape_c_string(s, Mode::CStr, &mut |_span, c| match c {
Ok(CStrUnit::Byte(b)) => buf.push(b), Ok(CStrUnit::Byte(b)) => buf.push(b),
Ok(CStrUnit::Char(c)) => { Ok(CStrUnit::Char(c)) => {
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes()) buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
} }
Err(err) => { Err(err) => {
if err.is_fatal() { assert!(!err.is_fatal(), "failed to unescape C string literal")
error = Err(LitError::LexerError);
}
} }
}); });
error?;
buf.push(0); buf.push(0);
LitKind::CStr(buf.into(), StrStyle::Cooked) LitKind::CStr(buf.into(), StrStyle::Cooked)
} }
token::CStrRaw(n) => { token::CStrRaw(n) => {
// Raw strings have no escapes, so we only need to check for invalid chars, and we // Raw strings have no escapes so we can convert the symbol
// can convert the symbol directly to a `Lrc<u8>` on success. // directly to a `Lrc<u8>` after appending the terminating NUL
let s = symbol.as_str(); // char.
let mut error = Ok(()); let mut buf = symbol.as_str().to_owned().into_bytes();
unescape_c_string(s, Mode::RawCStr, &mut |_, c| match c {
Ok(_) => {}
Err(err) => {
if err.is_fatal() {
error = Err(LitError::LexerError);
}
}
});
error?;
let mut buf = s.to_owned().into_bytes();
buf.push(0); buf.push(0);
LitKind::CStr(buf.into(), StrStyle::Raw(n)) LitKind::CStr(buf.into(), StrStyle::Raw(n))
} }

View file

@ -1,5 +1,6 @@
// check-pass // check-pass
// ignore-tidy-tab // ignore-tidy-tab
// edition: 2021
fn main() { fn main() {
let s = "\ let s = "\
@ -8,11 +9,11 @@ fn main() {
//~^^^ WARNING multiple lines skipped by escaped newline //~^^^ WARNING multiple lines skipped by escaped newline
assert_eq!(s, ""); assert_eq!(s, "");
let s = "foo\ let s = c"foo\
  bar   bar
"; ";
//~^^^ WARNING whitespace symbol '\u{a0}' is not skipped //~^^^ WARNING whitespace symbol '\u{a0}' is not skipped
assert_eq!(s, "foo  bar\n "); assert_eq!(s, c"foo  bar\n ");
let s = "a\ let s = "a\
b"; b";
@ -22,10 +23,10 @@ fn main() {
b"; b";
assert_eq!(s, "ab"); assert_eq!(s, "ab");
let s = "a\ let s = b"a\
b"; b";
//~^^ WARNING whitespace symbol '\u{c}' is not skipped //~^^ WARNING whitespace symbol '\u{c}' is not skipped
// '\x0c' is ASCII whitespace, but it may not need skipped // '\x0c' is ASCII whitespace, but it may not need skipped
// discussion: https://github.com/rust-lang/rust/pull/108403 // discussion: https://github.com/rust-lang/rust/pull/108403
assert_eq!(s, "a\x0cb"); assert_eq!(s, b"a\x0cb");
} }

View file

@ -1,5 +1,5 @@
warning: multiple lines skipped by escaped newline warning: multiple lines skipped by escaped newline
--> $DIR/str-escape.rs:5:14 --> $DIR/str-escape.rs:6:14
| |
LL | let s = "\ LL | let s = "\
| ______________^ | ______________^
@ -8,20 +8,20 @@ LL | | ";
| |_____________^ skipping everything up to and including this point | |_____________^ skipping everything up to and including this point
warning: whitespace symbol '\u{a0}' is not skipped warning: whitespace symbol '\u{a0}' is not skipped
--> $DIR/str-escape.rs:11:17 --> $DIR/str-escape.rs:12:18
| |
LL | let s = "foo\ LL | let s = c"foo\
| _________________^ | __________________^
LL | |   bar LL | |   bar
| | ^ whitespace symbol '\u{a0}' is not skipped | | ^ whitespace symbol '\u{a0}' is not skipped
| |___| | |___|
| |
warning: whitespace symbol '\u{c}' is not skipped warning: whitespace symbol '\u{c}' is not skipped
--> $DIR/str-escape.rs:25:15 --> $DIR/str-escape.rs:26:16
| |
LL | let s = "a\ LL | let s = b"a\
| _______________^ | ________________^
LL | | b"; LL | | b";
| | ^- whitespace symbol '\u{c}' is not skipped | | ^- whitespace symbol '\u{c}' is not skipped
| |____| | |____|