Tokenize emoji as if they were valid indentifiers

In the lexer, consider emojis to be valid identifiers and reject them later to avoid knock down parse errors.
2021-08-29 08:34:23 +00:00 · 2021-08-29 08:34:23 +00:00 · 5a68abb094
commit 5a68abb094
parent 311fa1f14d
8 changed files with 180 additions and 2 deletions
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@ -222,6 +222,12 @@ impl<'a> StringReader<'a> {
                }
                token::Ident(sym, is_raw_ident)
            }
+            rustc_lexer::TokenKind::InvalidIdent => {
+                let sym = nfc_normalize(self.str_from(start));
+                let span = self.mk_sp(start, self.pos);
+                self.sess.bad_unicode_identifiers.borrow_mut().entry(sym).or_default().push(span);
+                token::Ident(sym, false)
+            }
            rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
                let suffix_start = start + BytePos(suffix_start as u32);
                let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);