Don't recover lifetimes/labels containing emojis as character literals

Note that at the time of this commit, `unic-emoji-char` seems to have
data tables only up to Unicode 5.0, but Unicode is already newer than
this.

A newer emoji such as `🥺` will not be recognized as an emoji
but older emojis such as `🐱` will.
This commit is contained in:
许杰友 Jieyou Xu (Joe) 2023-02-14 17:31:58 +08:00
parent c3c6d73b04
commit 380fa26413
No known key found for this signature in database
GPG key ID: C5FD5D32014FDB47
5 changed files with 173 additions and 12 deletions

View file

@ -200,16 +200,21 @@ impl<'a> StringReader<'a> {
};
token::Literal(token::Lit { kind, symbol, suffix })
}
rustc_lexer::TokenKind::Lifetime { starts_with_number } => {
rustc_lexer::TokenKind::Lifetime { starts_with_number, contains_emoji } => {
// Include the leading `'` in the real identifier, for macro
// expansion purposes. See #12512 for the gory details of why
// this is necessary.
let lifetime_name = self.str_from(start);
if starts_with_number {
let span = self.mk_sp(start, self.pos);
let mut diag = self.sess.struct_err("lifetimes cannot start with a number");
let mut diag = self.sess.struct_err("lifetimes or labels cannot start with a number");
diag.set_span(span);
diag.stash(span, StashKey::LifetimeIsChar);
} else if contains_emoji {
let span = self.mk_sp(start, self.pos);
let mut diag = self.sess.struct_err("lifetimes or labels cannot contain emojis");
diag.set_span(span);
diag.stash(span, StashKey::LifetimeContainsEmoji);
}
let ident = Symbol::intern(lifetime_name);
token::Lifetime(ident)