Rollup merge of #123752 - estebank:emoji-prefix, r=wesleywiser

Properly handle emojis as literal prefix in macros Do not accept the following ```rust macro_rules! lexes {($($_:tt)*) => {}} lexes!(🐛"foo"); ``` Before, invalid emoji identifiers were gated during parsing instead of lexing in all cases, but this didn't account for macro pre-expansion of literal prefixes. Fix #123696.
2024-04-18 21:38:55 -07:00 · 2024-04-18 21:38:55 -07:00 · 0a0a5a956c
commit 0a0a5a956c
parent 3831cbb28f 19821ad234
6 changed files with 36 additions and 6 deletions
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@ -204,6 +204,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
                    self.ident(start)
                }
                rustc_lexer::TokenKind::InvalidIdent
+                | rustc_lexer::TokenKind::InvalidPrefix
                    // Do not recover an identifier with emoji if the codepoint is a confusable
                    // with a recoverable substitution token, like `➖`.
                    if !UNICODE_ARRAY
@ -301,7 +302,9 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
                rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret),
                rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent),

-                rustc_lexer::TokenKind::Unknown | rustc_lexer::TokenKind::InvalidIdent => {
+                rustc_lexer::TokenKind::Unknown
+                | rustc_lexer::TokenKind::InvalidIdent
+                | rustc_lexer::TokenKind::InvalidPrefix => {
                    // Don't emit diagnostics for sequences of the same invalid token
                    if swallow_next_invalid > 0 {
                        swallow_next_invalid -= 1;