Tokenize emoji as if they were valid indentifiers
In the lexer, consider emojis to be valid identifiers and reject them later to avoid knock down parse errors.
This commit is contained in:
parent
311fa1f14d
commit
5a68abb094
8 changed files with 180 additions and 2 deletions
|
@ -64,6 +64,8 @@ pub enum TokenKind {
|
|||
/// "ident" or "continue"
|
||||
/// At this step keywords are also considered identifiers.
|
||||
Ident,
|
||||
/// Like the above, but containing invalid unicode codepoints.
|
||||
InvalidIdent,
|
||||
/// "r#ident"
|
||||
RawIdent,
|
||||
/// An unknown prefix like `foo#`, `foo'`, `foo"`. Note that only the
|
||||
|
@ -411,6 +413,11 @@ impl Cursor<'_> {
|
|||
let kind = Str { terminated };
|
||||
Literal { kind, suffix_start }
|
||||
}
|
||||
// Identifier (this should be checked after other variant that can
|
||||
// start as identifier).
|
||||
c if !c.is_ascii() && unic_emoji_char::is_emoji(c) => {
|
||||
self.fake_ident_or_unknown_prefix()
|
||||
}
|
||||
_ => Unknown,
|
||||
};
|
||||
Token::new(token_kind, self.len_consumed())
|
||||
|
@ -492,10 +499,28 @@ impl Cursor<'_> {
|
|||
// we see a prefix here, it is definitely an unknown prefix.
|
||||
match self.first() {
|
||||
'#' | '"' | '\'' => UnknownPrefix,
|
||||
c if !c.is_ascii() && unic_emoji_char::is_emoji(c) => {
|
||||
self.fake_ident_or_unknown_prefix()
|
||||
}
|
||||
_ => Ident,
|
||||
}
|
||||
}
|
||||
|
||||
fn fake_ident_or_unknown_prefix(&mut self) -> TokenKind {
|
||||
// Start is already eaten, eat the rest of identifier.
|
||||
self.eat_while(|c| {
|
||||
unicode_xid::UnicodeXID::is_xid_continue(c)
|
||||
|| (!c.is_ascii() && unic_emoji_char::is_emoji(c))
|
||||
|| c == '\u{200d}'
|
||||
});
|
||||
// Known prefixes must have been handled earlier. So if
|
||||
// we see a prefix here, it is definitely an unknown prefix.
|
||||
match self.first() {
|
||||
'#' | '"' | '\'' => UnknownPrefix,
|
||||
_ => InvalidIdent,
|
||||
}
|
||||
}
|
||||
|
||||
fn number(&mut self, first_digit: char) -> LiteralKind {
|
||||
debug_assert!('0' <= self.prev() && self.prev() <= '9');
|
||||
let mut base = Base::Decimal;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue