Account for confusable codepoints when recovering emoji identifiers
This commit is contained in:
parent
4489aeb907
commit
21224e6ee0
4 changed files with 30 additions and 7 deletions
|
@ -1,3 +1,4 @@
|
||||||
|
use crate::lexer::unicode_chars::UNICODE_ARRAY;
|
||||||
use rustc_ast::ast::{self, AttrStyle};
|
use rustc_ast::ast::{self, AttrStyle};
|
||||||
use rustc_ast::token::{self, CommentKind, Token, TokenKind};
|
use rustc_ast::token::{self, CommentKind, Token, TokenKind};
|
||||||
use rustc_ast::tokenstream::{Spacing, TokenStream};
|
use rustc_ast::tokenstream::{Spacing, TokenStream};
|
||||||
|
@ -222,7 +223,17 @@ impl<'a> StringReader<'a> {
|
||||||
}
|
}
|
||||||
token::Ident(sym, is_raw_ident)
|
token::Ident(sym, is_raw_ident)
|
||||||
}
|
}
|
||||||
rustc_lexer::TokenKind::InvalidIdent => {
|
rustc_lexer::TokenKind::InvalidIdent
|
||||||
|
// Do not recover an identifier with emojis if the codepoint is a confusable
|
||||||
|
// with a recoverable substitution token, like `➖`.
|
||||||
|
if UNICODE_ARRAY
|
||||||
|
.iter()
|
||||||
|
.find(|&&(c, _, _)| {
|
||||||
|
let sym = self.str_from(start);
|
||||||
|
sym.chars().count() == 1 && c == sym.chars().next().unwrap()
|
||||||
|
})
|
||||||
|
.is_none() =>
|
||||||
|
{
|
||||||
let sym = nfc_normalize(self.str_from(start));
|
let sym = nfc_normalize(self.str_from(start));
|
||||||
let span = self.mk_sp(start, self.pos);
|
let span = self.mk_sp(start, self.pos);
|
||||||
self.sess.bad_unicode_identifiers.borrow_mut().entry(sym).or_default().push(span);
|
self.sess.bad_unicode_identifiers.borrow_mut().entry(sym).or_default().push(span);
|
||||||
|
@ -299,7 +310,7 @@ impl<'a> StringReader<'a> {
|
||||||
rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret),
|
rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret),
|
||||||
rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent),
|
rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent),
|
||||||
|
|
||||||
rustc_lexer::TokenKind::Unknown => {
|
rustc_lexer::TokenKind::Unknown | rustc_lexer::TokenKind::InvalidIdent => {
|
||||||
let c = self.str_from(start).chars().next().unwrap();
|
let c = self.str_from(start).chars().next().unwrap();
|
||||||
let mut err =
|
let mut err =
|
||||||
self.struct_fatal_span_char(start, self.pos, "unknown start of token", c);
|
self.struct_fatal_span_char(start, self.pos, "unknown start of token", c);
|
||||||
|
|
|
@ -7,7 +7,7 @@ use rustc_errors::{Applicability, DiagnosticBuilder};
|
||||||
use rustc_span::{symbol::kw, BytePos, Pos, Span};
|
use rustc_span::{symbol::kw, BytePos, Pos, Span};
|
||||||
|
|
||||||
#[rustfmt::skip] // for line breaks
|
#[rustfmt::skip] // for line breaks
|
||||||
const UNICODE_ARRAY: &[(char, &str, char)] = &[
|
pub(crate) const UNICODE_ARRAY: &[(char, &str, char)] = &[
|
||||||
('
', "Line Separator", ' '),
|
('
', "Line Separator", ' '),
|
||||||
('
', "Paragraph Separator", ' '),
|
('
', "Paragraph Separator", ' '),
|
||||||
(' ', "Ogham Space mark", ' '),
|
(' ', "Ogham Space mark", ' '),
|
||||||
|
|
|
@ -10,6 +10,7 @@ fn i_like_to_😅_a_lot() -> 👀 { //~ ERROR identifiers cannot contain emojis
|
||||||
//~^ ERROR identifiers cannot contain emojis
|
//~^ ERROR identifiers cannot contain emojis
|
||||||
}
|
}
|
||||||
fn main() {
|
fn main() {
|
||||||
let _ = i_like_to_😄_a_lot(); //~ ERROR cannot find function `i_like_to_😄_a_lot` in this scope
|
let _ = i_like_to_😄_a_lot() ➖ 4; //~ ERROR cannot find function `i_like_to_😄_a_lot` in this scope
|
||||||
//~^ ERROR identifiers cannot contain emojis
|
//~^ ERROR identifiers cannot contain emojis
|
||||||
|
//~| ERROR unknown start of token: \u{2796}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,16 +1,27 @@
|
||||||
|
error: unknown start of token: \u{2796}
|
||||||
|
--> $DIR/emoji-identifiers.rs:13:33
|
||||||
|
|
|
||||||
|
LL | let _ = i_like_to_😄_a_lot() ➖ 4;
|
||||||
|
| ^^
|
||||||
|
|
|
||||||
|
help: Unicode character '➖' (Heavy Minus Sign) looks like '-' (Minus/Hyphen), but it is not
|
||||||
|
|
|
||||||
|
LL | let _ = i_like_to_😄_a_lot() - 4;
|
||||||
|
| ~
|
||||||
|
|
||||||
error[E0425]: cannot find function `i_like_to_😄_a_lot` in this scope
|
error[E0425]: cannot find function `i_like_to_😄_a_lot` in this scope
|
||||||
--> $DIR/emoji-identifiers.rs:13:13
|
--> $DIR/emoji-identifiers.rs:13:13
|
||||||
|
|
|
|
||||||
LL | fn i_like_to_😅_a_lot() -> 👀 {
|
LL | fn i_like_to_😅_a_lot() -> 👀 {
|
||||||
| ----------------------------- similarly named function `i_like_to_😅_a_lot` defined here
|
| ----------------------------- similarly named function `i_like_to_😅_a_lot` defined here
|
||||||
...
|
...
|
||||||
LL | let _ = i_like_to_😄_a_lot();
|
LL | let _ = i_like_to_😄_a_lot() ➖ 4;
|
||||||
| ^^^^^^^^^^^^^^^^^^ help: a function with a similar name exists: `i_like_to_😅_a_lot`
|
| ^^^^^^^^^^^^^^^^^^ help: a function with a similar name exists: `i_like_to_😅_a_lot`
|
||||||
|
|
||||||
error: identifiers cannot contain emojis: `i_like_to_😄_a_lot`
|
error: identifiers cannot contain emojis: `i_like_to_😄_a_lot`
|
||||||
--> $DIR/emoji-identifiers.rs:13:13
|
--> $DIR/emoji-identifiers.rs:13:13
|
||||||
|
|
|
|
||||||
LL | let _ = i_like_to_😄_a_lot();
|
LL | let _ = i_like_to_😄_a_lot() ➖ 4;
|
||||||
| ^^^^^^^^^^^^^^^^^^
|
| ^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
error: identifiers cannot contain emojis: `full_of_✨`
|
error: identifiers cannot contain emojis: `full_of_✨`
|
||||||
|
@ -66,7 +77,7 @@ LL | 👀::full_of✨()
|
||||||
| function or associated item not found in `👀`
|
| function or associated item not found in `👀`
|
||||||
| help: there is an associated function with a similar name: `full_of_✨`
|
| help: there is an associated function with a similar name: `full_of_✨`
|
||||||
|
|
||||||
error: aborting due to 8 previous errors
|
error: aborting due to 9 previous errors
|
||||||
|
|
||||||
Some errors have detailed explanations: E0425, E0599.
|
Some errors have detailed explanations: E0425, E0599.
|
||||||
For more information about an error, try `rustc --explain E0425`.
|
For more information about an error, try `rustc --explain E0425`.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue