Tokenize emoji as if they were valid indentifiers

In the lexer, consider emojis to be valid identifiers and reject
them later to avoid knock down parse errors.
This commit is contained in:
Esteban Kuber 2021-08-29 08:34:23 +00:00
parent 311fa1f14d
commit 5a68abb094
8 changed files with 180 additions and 2 deletions

View file

@ -222,6 +222,12 @@ impl<'a> StringReader<'a> {
}
token::Ident(sym, is_raw_ident)
}
rustc_lexer::TokenKind::InvalidIdent => {
let sym = nfc_normalize(self.str_from(start));
let span = self.mk_sp(start, self.pos);
self.sess.bad_unicode_identifiers.borrow_mut().entry(sym).or_default().push(span);
token::Ident(sym, false)
}
rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
let suffix_start = start + BytePos(suffix_start as u32);
let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);