Avoid useless checking in from_token_lit
.
The parser already does a check-only unescaping which catches all errors. So the checking done in `from_token_lit` never hits. But literals causing warnings can still occur in `from_token_lit`. So the commit changes `str-escape.rs` to use byte string literals and C string literals as well, to give better coverage and ensure the new assertions in `from_token_lit` are correct.
This commit is contained in:
parent
0011fac90d
commit
314dbc7f22
3 changed files with 33 additions and 73 deletions
|
@ -48,6 +48,9 @@ impl LitKind {
|
||||||
return Err(LitError::InvalidSuffix);
|
return Err(LitError::InvalidSuffix);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// For byte/char/string literals, chars and escapes have already been
|
||||||
|
// checked in the lexer (in `cook_lexer_literal`). So we can assume all
|
||||||
|
// chars and escapes are valid here.
|
||||||
Ok(match kind {
|
Ok(match kind {
|
||||||
token::Bool => {
|
token::Bool => {
|
||||||
assert!(symbol.is_bool_lit());
|
assert!(symbol.is_bool_lit());
|
||||||
|
@ -56,12 +59,12 @@ impl LitKind {
|
||||||
token::Byte => {
|
token::Byte => {
|
||||||
return unescape_byte(symbol.as_str())
|
return unescape_byte(symbol.as_str())
|
||||||
.map(LitKind::Byte)
|
.map(LitKind::Byte)
|
||||||
.map_err(|_| LitError::LexerError);
|
.map_err(|_| panic!("failed to unescape byte literal"));
|
||||||
}
|
}
|
||||||
token::Char => {
|
token::Char => {
|
||||||
return unescape_char(symbol.as_str())
|
return unescape_char(symbol.as_str())
|
||||||
.map(LitKind::Char)
|
.map(LitKind::Char)
|
||||||
.map_err(|_| LitError::LexerError);
|
.map_err(|_| panic!("failed to unescape char literal"));
|
||||||
}
|
}
|
||||||
|
|
||||||
// There are some valid suffixes for integer and float literals,
|
// There are some valid suffixes for integer and float literals,
|
||||||
|
@ -77,26 +80,22 @@ impl LitKind {
|
||||||
let s = symbol.as_str();
|
let s = symbol.as_str();
|
||||||
// Vanilla strings are so common we optimize for the common case where no chars
|
// Vanilla strings are so common we optimize for the common case where no chars
|
||||||
// requiring special behaviour are present.
|
// requiring special behaviour are present.
|
||||||
let symbol = if s.contains(['\\', '\r']) {
|
let symbol = if s.contains('\\') {
|
||||||
let mut buf = String::with_capacity(s.len());
|
let mut buf = String::with_capacity(s.len());
|
||||||
let mut error = Ok(());
|
|
||||||
// Force-inlining here is aggressive but the closure is
|
// Force-inlining here is aggressive but the closure is
|
||||||
// called on every char in the string, so it can be
|
// called on every char in the string, so it can be hot in
|
||||||
// hot in programs with many long strings.
|
// programs with many long strings containing escapes.
|
||||||
unescape_literal(
|
unescape_literal(
|
||||||
s,
|
s,
|
||||||
Mode::Str,
|
Mode::Str,
|
||||||
&mut #[inline(always)]
|
&mut #[inline(always)]
|
||||||
|_, unescaped_char| match unescaped_char {
|
|_, c| match c {
|
||||||
Ok(c) => buf.push(c),
|
Ok(c) => buf.push(c),
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
if err.is_fatal() {
|
assert!(!err.is_fatal(), "failed to unescape string literal")
|
||||||
error = Err(LitError::LexerError);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
error?;
|
|
||||||
Symbol::intern(&buf)
|
Symbol::intern(&buf)
|
||||||
} else {
|
} else {
|
||||||
symbol
|
symbol
|
||||||
|
@ -104,86 +103,46 @@ impl LitKind {
|
||||||
LitKind::Str(symbol, ast::StrStyle::Cooked)
|
LitKind::Str(symbol, ast::StrStyle::Cooked)
|
||||||
}
|
}
|
||||||
token::StrRaw(n) => {
|
token::StrRaw(n) => {
|
||||||
// Raw strings have no escapes, so we only need to check for invalid chars, and we
|
// Raw strings have no escapes so no work is needed here.
|
||||||
// can reuse the symbol on success.
|
|
||||||
let mut error = Ok(());
|
|
||||||
unescape_literal(symbol.as_str(), Mode::RawStr, &mut |_, unescaped_char| {
|
|
||||||
match unescaped_char {
|
|
||||||
Ok(_) => {}
|
|
||||||
Err(err) => {
|
|
||||||
if err.is_fatal() {
|
|
||||||
error = Err(LitError::LexerError);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
error?;
|
|
||||||
LitKind::Str(symbol, ast::StrStyle::Raw(n))
|
LitKind::Str(symbol, ast::StrStyle::Raw(n))
|
||||||
}
|
}
|
||||||
token::ByteStr => {
|
token::ByteStr => {
|
||||||
let s = symbol.as_str();
|
let s = symbol.as_str();
|
||||||
let mut buf = Vec::with_capacity(s.len());
|
let mut buf = Vec::with_capacity(s.len());
|
||||||
let mut error = Ok(());
|
|
||||||
unescape_literal(s, Mode::ByteStr, &mut |_, c| match c {
|
unescape_literal(s, Mode::ByteStr, &mut |_, c| match c {
|
||||||
Ok(c) => buf.push(byte_from_char(c)),
|
Ok(c) => buf.push(byte_from_char(c)),
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
if err.is_fatal() {
|
assert!(!err.is_fatal(), "failed to unescape string literal")
|
||||||
error = Err(LitError::LexerError);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
error?;
|
|
||||||
LitKind::ByteStr(buf.into(), StrStyle::Cooked)
|
LitKind::ByteStr(buf.into(), StrStyle::Cooked)
|
||||||
}
|
}
|
||||||
token::ByteStrRaw(n) => {
|
token::ByteStrRaw(n) => {
|
||||||
// Raw strings have no escapes, so we only need to check for invalid chars, and we
|
// Raw strings have no escapes so we can convert the symbol
|
||||||
// can convert the symbol directly to a `Lrc<u8>` on success.
|
// directly to a `Lrc<u8>`.
|
||||||
let s = symbol.as_str();
|
let buf = symbol.as_str().to_owned().into_bytes();
|
||||||
let mut error = Ok(());
|
LitKind::ByteStr(buf.into(), StrStyle::Raw(n))
|
||||||
unescape_literal(s, Mode::RawByteStr, &mut |_, c| match c {
|
|
||||||
Ok(_) => {}
|
|
||||||
Err(err) => {
|
|
||||||
if err.is_fatal() {
|
|
||||||
error = Err(LitError::LexerError);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
LitKind::ByteStr(s.to_owned().into_bytes().into(), StrStyle::Raw(n))
|
|
||||||
}
|
}
|
||||||
token::CStr => {
|
token::CStr => {
|
||||||
let s = symbol.as_str();
|
let s = symbol.as_str();
|
||||||
let mut buf = Vec::with_capacity(s.len());
|
let mut buf = Vec::with_capacity(s.len());
|
||||||
let mut error = Ok(());
|
|
||||||
unescape_c_string(s, Mode::CStr, &mut |_span, c| match c {
|
unescape_c_string(s, Mode::CStr, &mut |_span, c| match c {
|
||||||
Ok(CStrUnit::Byte(b)) => buf.push(b),
|
Ok(CStrUnit::Byte(b)) => buf.push(b),
|
||||||
Ok(CStrUnit::Char(c)) => {
|
Ok(CStrUnit::Char(c)) => {
|
||||||
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
|
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
|
||||||
}
|
}
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
if err.is_fatal() {
|
assert!(!err.is_fatal(), "failed to unescape C string literal")
|
||||||
error = Err(LitError::LexerError);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
error?;
|
|
||||||
buf.push(0);
|
buf.push(0);
|
||||||
LitKind::CStr(buf.into(), StrStyle::Cooked)
|
LitKind::CStr(buf.into(), StrStyle::Cooked)
|
||||||
}
|
}
|
||||||
token::CStrRaw(n) => {
|
token::CStrRaw(n) => {
|
||||||
// Raw strings have no escapes, so we only need to check for invalid chars, and we
|
// Raw strings have no escapes so we can convert the symbol
|
||||||
// can convert the symbol directly to a `Lrc<u8>` on success.
|
// directly to a `Lrc<u8>` after appending the terminating NUL
|
||||||
let s = symbol.as_str();
|
// char.
|
||||||
let mut error = Ok(());
|
let mut buf = symbol.as_str().to_owned().into_bytes();
|
||||||
unescape_c_string(s, Mode::RawCStr, &mut |_, c| match c {
|
|
||||||
Ok(_) => {}
|
|
||||||
Err(err) => {
|
|
||||||
if err.is_fatal() {
|
|
||||||
error = Err(LitError::LexerError);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
error?;
|
|
||||||
let mut buf = s.to_owned().into_bytes();
|
|
||||||
buf.push(0);
|
buf.push(0);
|
||||||
LitKind::CStr(buf.into(), StrStyle::Raw(n))
|
LitKind::CStr(buf.into(), StrStyle::Raw(n))
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
// check-pass
|
// check-pass
|
||||||
// ignore-tidy-tab
|
// ignore-tidy-tab
|
||||||
|
// edition: 2021
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let s = "\
|
let s = "\
|
||||||
|
@ -8,11 +9,11 @@ fn main() {
|
||||||
//~^^^ WARNING multiple lines skipped by escaped newline
|
//~^^^ WARNING multiple lines skipped by escaped newline
|
||||||
assert_eq!(s, "");
|
assert_eq!(s, "");
|
||||||
|
|
||||||
let s = "foo\
|
let s = c"foo\
|
||||||
bar
|
bar
|
||||||
";
|
";
|
||||||
//~^^^ WARNING whitespace symbol '\u{a0}' is not skipped
|
//~^^^ WARNING whitespace symbol '\u{a0}' is not skipped
|
||||||
assert_eq!(s, "foo bar\n ");
|
assert_eq!(s, c"foo bar\n ");
|
||||||
|
|
||||||
let s = "a\
|
let s = "a\
|
||||||
b";
|
b";
|
||||||
|
@ -22,10 +23,10 @@ fn main() {
|
||||||
b";
|
b";
|
||||||
assert_eq!(s, "ab");
|
assert_eq!(s, "ab");
|
||||||
|
|
||||||
let s = "a\
|
let s = b"a\
|
||||||
b";
|
b";
|
||||||
//~^^ WARNING whitespace symbol '\u{c}' is not skipped
|
//~^^ WARNING whitespace symbol '\u{c}' is not skipped
|
||||||
// '\x0c' is ASCII whitespace, but it may not need skipped
|
// '\x0c' is ASCII whitespace, but it may not need skipped
|
||||||
// discussion: https://github.com/rust-lang/rust/pull/108403
|
// discussion: https://github.com/rust-lang/rust/pull/108403
|
||||||
assert_eq!(s, "a\x0cb");
|
assert_eq!(s, b"a\x0cb");
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
warning: multiple lines skipped by escaped newline
|
warning: multiple lines skipped by escaped newline
|
||||||
--> $DIR/str-escape.rs:5:14
|
--> $DIR/str-escape.rs:6:14
|
||||||
|
|
|
|
||||||
LL | let s = "\
|
LL | let s = "\
|
||||||
| ______________^
|
| ______________^
|
||||||
|
@ -8,20 +8,20 @@ LL | | ";
|
||||||
| |_____________^ skipping everything up to and including this point
|
| |_____________^ skipping everything up to and including this point
|
||||||
|
|
||||||
warning: whitespace symbol '\u{a0}' is not skipped
|
warning: whitespace symbol '\u{a0}' is not skipped
|
||||||
--> $DIR/str-escape.rs:11:17
|
--> $DIR/str-escape.rs:12:18
|
||||||
|
|
|
|
||||||
LL | let s = "foo\
|
LL | let s = c"foo\
|
||||||
| _________________^
|
| __________________^
|
||||||
LL | | bar
|
LL | | bar
|
||||||
| | ^ whitespace symbol '\u{a0}' is not skipped
|
| | ^ whitespace symbol '\u{a0}' is not skipped
|
||||||
| |___|
|
| |___|
|
||||||
|
|
|
|
||||||
|
|
||||||
warning: whitespace symbol '\u{c}' is not skipped
|
warning: whitespace symbol '\u{c}' is not skipped
|
||||||
--> $DIR/str-escape.rs:25:15
|
--> $DIR/str-escape.rs:26:16
|
||||||
|
|
|
|
||||||
LL | let s = "a\
|
LL | let s = b"a\
|
||||||
| _______________^
|
| ________________^
|
||||||
LL | | b";
|
LL | | b";
|
||||||
| | ^- whitespace symbol '\u{c}' is not skipped
|
| | ^- whitespace symbol '\u{c}' is not skipped
|
||||||
| |____|
|
| |____|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue