Rollup merge of #123752 - estebank:emoji-prefix, r=wesleywiser

Properly handle emojis as literal prefix in macros

Do not accept the following

```rust
macro_rules! lexes {($($_:tt)*) => {}}
lexes!(🐛"foo");
```

Before, invalid emoji identifiers were gated during parsing instead of lexing in all cases, but this didn't account for macro pre-expansion of literal prefixes.

Fix #123696.
This commit is contained in:
Jubilee 2024-04-18 21:38:55 -07:00 committed by GitHub
commit 0a0a5a956c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 36 additions and 6 deletions

View file

@ -204,6 +204,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
self.ident(start)
}
rustc_lexer::TokenKind::InvalidIdent
| rustc_lexer::TokenKind::InvalidPrefix
// Do not recover an identifier with emoji if the codepoint is a confusable
// with a recoverable substitution token, like ``.
if !UNICODE_ARRAY
@ -301,7 +302,9 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret),
rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent),
rustc_lexer::TokenKind::Unknown | rustc_lexer::TokenKind::InvalidIdent => {
rustc_lexer::TokenKind::Unknown
| rustc_lexer::TokenKind::InvalidIdent
| rustc_lexer::TokenKind::InvalidPrefix => {
// Don't emit diagnostics for sequences of the same invalid token
if swallow_next_invalid > 0 {
swallow_next_invalid -= 1;