Avoid useless checking in from_token_lit.

The parser already does a check-only unescaping which catches all errors. So the checking done in `from_token_lit` never hits. But literals causing warnings can still occur in `from_token_lit`. So the commit changes `str-escape.rs` to use byte string literals and C string literals as well, to give better coverage and ensure the new assertions in `from_token_lit` are correct.
2024-01-22 16:58:39 +11:00 · 2024-01-22 16:58:39 +11:00 · 314dbc7f22
commit 314dbc7f22
parent 0011fac90d
3 changed files with 33 additions and 73 deletions
--- a/compiler/rustc_ast/src/util/literal.rs
+++ b/compiler/rustc_ast/src/util/literal.rs
@ -48,6 +48,9 @@ impl LitKind {
            return Err(LitError::InvalidSuffix);
        }
        // For byte/char/string literals, chars and escapes have already been
        // checked in the lexer (in `cook_lexer_literal`). So we can assume all
        // chars and escapes are valid here.
        Ok(match kind {
            token::Bool => {
                assert!(symbol.is_bool_lit());
@ -56,12 +59,12 @@ impl LitKind {
            token::Byte => {
                return unescape_byte(symbol.as_str())
                    .map(LitKind::Byte)
-                    .map_err(|_| LitError::LexerError);
+                    .map_err(|_| panic!("failed to unescape byte literal"));
            }
            token::Char => {
                return unescape_char(symbol.as_str())
                    .map(LitKind::Char)
-                    .map_err(|_| LitError::LexerError);
+                    .map_err(|_| panic!("failed to unescape char literal"));
            }
            // There are some valid suffixes for integer and float literals,
@ -77,26 +80,22 @@ impl LitKind {
                let s = symbol.as_str();
                // Vanilla strings are so common we optimize for the common case where no chars
                // requiring special behaviour are present.
-                let symbol = if s.contains(['\\', '\r']) {
+                let symbol = if s.contains('\\') {
                    let mut buf = String::with_capacity(s.len());
                    let mut error = Ok(());
                    // Force-inlining here is aggressive but the closure is
-                    // called on every char in the string, so it can be
+                    // called on every char in the string, so it can be hot in
-                    // hot in programs with many long strings.
+                    // programs with many long strings containing escapes.
                    unescape_literal(
                        s,
                        Mode::Str,
                        &mut #[inline(always)]
-                        |_, unescaped_char| match unescaped_char {
+                        |_, c| match c {
                            Ok(c) => buf.push(c),
                            Err(err) => {
-                                if err.is_fatal() {
+                                assert!(!err.is_fatal(), "failed to unescape string literal")
                                    error = Err(LitError::LexerError);
                                }
                            }
                        },
                    );
                    error?;
                    Symbol::intern(&buf)
                } else {
                    symbol
@ -104,86 +103,46 @@ impl LitKind {
                LitKind::Str(symbol, ast::StrStyle::Cooked)
            }
            token::StrRaw(n) => {
-                // Raw strings have no escapes, so we only need to check for invalid chars, and we
+                // Raw strings have no escapes so no work is needed here.
                // can reuse the symbol on success.
                let mut error = Ok(());
                unescape_literal(symbol.as_str(), Mode::RawStr, &mut |_, unescaped_char| {
                    match unescaped_char {
                        Ok(_) => {}
                        Err(err) => {
                            if err.is_fatal() {
                                error = Err(LitError::LexerError);
                            }
                        }
                    }
                });
                error?;
                LitKind::Str(symbol, ast::StrStyle::Raw(n))
            }
            token::ByteStr => {
                let s = symbol.as_str();
                let mut buf = Vec::with_capacity(s.len());
                let mut error = Ok(());
                unescape_literal(s, Mode::ByteStr, &mut |_, c| match c {
                    Ok(c) => buf.push(byte_from_char(c)),
                    Err(err) => {
-                        if err.is_fatal() {
+                        assert!(!err.is_fatal(), "failed to unescape string literal")
                            error = Err(LitError::LexerError);
                        }
                    }
                });
                error?;
                LitKind::ByteStr(buf.into(), StrStyle::Cooked)
            }
            token::ByteStrRaw(n) => {
-                // Raw strings have no escapes, so we only need to check for invalid chars, and we
+                // Raw strings have no escapes so we can convert the symbol
-                // can convert the symbol directly to a `Lrc<u8>` on success.
+                // directly to a `Lrc<u8>`.
-                let s = symbol.as_str();
+                let buf = symbol.as_str().to_owned().into_bytes();
-                let mut error = Ok(());
+                LitKind::ByteStr(buf.into(), StrStyle::Raw(n))
                unescape_literal(s, Mode::RawByteStr, &mut |_, c| match c {
                    Ok(_) => {}
                    Err(err) => {
                        if err.is_fatal() {
                            error = Err(LitError::LexerError);
                        }
                    }
                });
                LitKind::ByteStr(s.to_owned().into_bytes().into(), StrStyle::Raw(n))
            }
            token::CStr => {
                let s = symbol.as_str();
                let mut buf = Vec::with_capacity(s.len());
                let mut error = Ok(());
                unescape_c_string(s, Mode::CStr, &mut |_span, c| match c {
                    Ok(CStrUnit::Byte(b)) => buf.push(b),
                    Ok(CStrUnit::Char(c)) => {
                        buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
                    }
                    Err(err) => {
-                        if err.is_fatal() {
+                        assert!(!err.is_fatal(), "failed to unescape C string literal")
                            error = Err(LitError::LexerError);
                        }
                    }
                });
                error?;
                buf.push(0);
                LitKind::CStr(buf.into(), StrStyle::Cooked)
            }
            token::CStrRaw(n) => {
-                // Raw strings have no escapes, so we only need to check for invalid chars, and we
+                // Raw strings have no escapes so we can convert the symbol
-                // can convert the symbol directly to a `Lrc<u8>` on success.
+                // directly to a `Lrc<u8>` after appending the terminating NUL
-                let s = symbol.as_str();
+                // char.
-                let mut error = Ok(());
+                let mut buf = symbol.as_str().to_owned().into_bytes();
                unescape_c_string(s, Mode::RawCStr, &mut |_, c| match c {
                    Ok(_) => {}
                    Err(err) => {
                        if err.is_fatal() {
                            error = Err(LitError::LexerError);
                        }
                    }
                });
                error?;
                let mut buf = s.to_owned().into_bytes();
                buf.push(0);
                LitKind::CStr(buf.into(), StrStyle::Raw(n))
            }
--- a/tests/ui/str/str-escape.rs
+++ b/tests/ui/str/str-escape.rs
@ -1,5 +1,6 @@
 // check-pass
 // ignore-tidy-tab
 // edition: 2021
 fn main() {
    let s = "\
@ -8,11 +9,11 @@ fn main() {
    //~^^^ WARNING multiple lines skipped by escaped newline
    assert_eq!(s, "");
-    let s = "foo\
+    let s = c"foo\
             bar
             ";
    //~^^^ WARNING whitespace symbol '\u{a0}' is not skipped
-    assert_eq!(s, "foo           bar\n             ");
+    assert_eq!(s, c"foo           bar\n             ");
    let s = "a\
 b";
@ -22,10 +23,10 @@ fn main() {
 	b";
    assert_eq!(s, "ab");
-    let s = "a\
+    let s = b"a\
    b";
    //~^^ WARNING whitespace symbol '\u{c}' is not skipped
    // '\x0c' is ASCII whitespace, but it may not need skipped
    // discussion: https://github.com/rust-lang/rust/pull/108403
-    assert_eq!(s, "a\x0cb");
+    assert_eq!(s, b"a\x0cb");
 }
--- a/tests/ui/str/str-escape.stderr
+++ b/tests/ui/str/str-escape.stderr
@ -1,5 +1,5 @@
 warning: multiple lines skipped by escaped newline
-  --> $DIR/str-escape.rs:5:14
+  --> $DIR/str-escape.rs:6:14
   |
 LL |       let s = "\
   |  ______________^
@ -8,20 +8,20 @@ LL | |              ";
   | |_____________^ skipping everything up to and including this point
 warning: whitespace symbol '\u{a0}' is not skipped
-  --> $DIR/str-escape.rs:11:17
+  --> $DIR/str-escape.rs:12:18
   |
-LL |       let s = "foo\
+LL |       let s = c"foo\
-   |  _________________^
+   |  __________________^
 LL | |              bar
   | |   ^ whitespace symbol '\u{a0}' is not skipped
   | |___|
   | 
 warning: whitespace symbol '\u{c}' is not skipped
-  --> $DIR/str-escape.rs:25:15
+  --> $DIR/str-escape.rs:26:16
   |
-LL |       let s = "a\
+LL |       let s = b"a\
-   |  _______________^
+   |  ________________^
 LL | |     b";
   | |    ^- whitespace symbol '\u{c}' is not skipped
   | |____|