Rollup merge of #107058 - clubby789:eqeq-homoglyph, r=wesleywiser
Recognise double-equals homoglyph Recognise `⩵` as a homoglyph for `==`. The first commit switches `char` to `&str`, as all previous homoglyphs corresponded to a single ASCII character, while the second implements the fix. `@rustbot` label +A-diagnostics +A-parser
This commit is contained in:
commit
3693399ffc
3 changed files with 313 additions and 297 deletions
|
@ -7,329 +7,331 @@ use rustc_errors::{Applicability, Diagnostic};
|
||||||
use rustc_span::{symbol::kw, BytePos, Pos, Span};
|
use rustc_span::{symbol::kw, BytePos, Pos, Span};
|
||||||
|
|
||||||
#[rustfmt::skip] // for line breaks
|
#[rustfmt::skip] // for line breaks
|
||||||
pub(crate) const UNICODE_ARRAY: &[(char, &str, char)] = &[
|
pub(crate) const UNICODE_ARRAY: &[(char, &str, &str)] = &[
|
||||||
('
', "Line Separator", ' '),
|
('
', "Line Separator", " "),
|
||||||
('
', "Paragraph Separator", ' '),
|
('
', "Paragraph Separator", " "),
|
||||||
(' ', "Ogham Space mark", ' '),
|
(' ', "Ogham Space mark", " "),
|
||||||
(' ', "En Quad", ' '),
|
(' ', "En Quad", " "),
|
||||||
(' ', "Em Quad", ' '),
|
(' ', "Em Quad", " "),
|
||||||
(' ', "En Space", ' '),
|
(' ', "En Space", " "),
|
||||||
(' ', "Em Space", ' '),
|
(' ', "Em Space", " "),
|
||||||
(' ', "Three-Per-Em Space", ' '),
|
(' ', "Three-Per-Em Space", " "),
|
||||||
(' ', "Four-Per-Em Space", ' '),
|
(' ', "Four-Per-Em Space", " "),
|
||||||
(' ', "Six-Per-Em Space", ' '),
|
(' ', "Six-Per-Em Space", " "),
|
||||||
(' ', "Punctuation Space", ' '),
|
(' ', "Punctuation Space", " "),
|
||||||
(' ', "Thin Space", ' '),
|
(' ', "Thin Space", " "),
|
||||||
(' ', "Hair Space", ' '),
|
(' ', "Hair Space", " "),
|
||||||
(' ', "Medium Mathematical Space", ' '),
|
(' ', "Medium Mathematical Space", " "),
|
||||||
(' ', "No-Break Space", ' '),
|
(' ', "No-Break Space", " "),
|
||||||
(' ', "Figure Space", ' '),
|
(' ', "Figure Space", " "),
|
||||||
(' ', "Narrow No-Break Space", ' '),
|
(' ', "Narrow No-Break Space", " "),
|
||||||
(' ', "Ideographic Space", ' '),
|
(' ', "Ideographic Space", " "),
|
||||||
|
|
||||||
('ߺ', "Nko Lajanyalan", '_'),
|
('ߺ', "Nko Lajanyalan", "_"),
|
||||||
('﹍', "Dashed Low Line", '_'),
|
('﹍', "Dashed Low Line", "_"),
|
||||||
('﹎', "Centreline Low Line", '_'),
|
('﹎', "Centreline Low Line", "_"),
|
||||||
('﹏', "Wavy Low Line", '_'),
|
('﹏', "Wavy Low Line", "_"),
|
||||||
('_', "Fullwidth Low Line", '_'),
|
('_', "Fullwidth Low Line", "_"),
|
||||||
|
|
||||||
('‐', "Hyphen", '-'),
|
('‐', "Hyphen", "-"),
|
||||||
('‑', "Non-Breaking Hyphen", '-'),
|
('‑', "Non-Breaking Hyphen", "-"),
|
||||||
('‒', "Figure Dash", '-'),
|
('‒', "Figure Dash", "-"),
|
||||||
('–', "En Dash", '-'),
|
('–', "En Dash", "-"),
|
||||||
('—', "Em Dash", '-'),
|
('—', "Em Dash", "-"),
|
||||||
('﹘', "Small Em Dash", '-'),
|
('﹘', "Small Em Dash", "-"),
|
||||||
('۔', "Arabic Full Stop", '-'),
|
('۔', "Arabic Full Stop", "-"),
|
||||||
('⁃', "Hyphen Bullet", '-'),
|
('⁃', "Hyphen Bullet", "-"),
|
||||||
('˗', "Modifier Letter Minus Sign", '-'),
|
('˗', "Modifier Letter Minus Sign", "-"),
|
||||||
('−', "Minus Sign", '-'),
|
('−', "Minus Sign", "-"),
|
||||||
('➖', "Heavy Minus Sign", '-'),
|
('➖', "Heavy Minus Sign", "-"),
|
||||||
('Ⲻ', "Coptic Letter Dialect-P Ni", '-'),
|
('Ⲻ', "Coptic Letter Dialect-P Ni", "-"),
|
||||||
('ー', "Katakana-Hiragana Prolonged Sound Mark", '-'),
|
('ー', "Katakana-Hiragana Prolonged Sound Mark", "-"),
|
||||||
('-', "Fullwidth Hyphen-Minus", '-'),
|
('-', "Fullwidth Hyphen-Minus", "-"),
|
||||||
('―', "Horizontal Bar", '-'),
|
('―', "Horizontal Bar", "-"),
|
||||||
('─', "Box Drawings Light Horizontal", '-'),
|
('─', "Box Drawings Light Horizontal", "-"),
|
||||||
('━', "Box Drawings Heavy Horizontal", '-'),
|
('━', "Box Drawings Heavy Horizontal", "-"),
|
||||||
('㇐', "CJK Stroke H", '-'),
|
('㇐', "CJK Stroke H", "-"),
|
||||||
('ꟷ', "Latin Epigraphic Letter Sideways I", '-'),
|
('ꟷ', "Latin Epigraphic Letter Sideways I", "-"),
|
||||||
('ᅳ', "Hangul Jungseong Eu", '-'),
|
('ᅳ', "Hangul Jungseong Eu", "-"),
|
||||||
('ㅡ', "Hangul Letter Eu", '-'),
|
('ㅡ', "Hangul Letter Eu", "-"),
|
||||||
('一', "CJK Unified Ideograph-4E00", '-'),
|
('一', "CJK Unified Ideograph-4E00", "-"),
|
||||||
('⼀', "Kangxi Radical One", '-'),
|
('⼀', "Kangxi Radical One", "-"),
|
||||||
|
|
||||||
('؍', "Arabic Date Separator", ','),
|
('؍', "Arabic Date Separator", ","),
|
||||||
('٫', "Arabic Decimal Separator", ','),
|
('٫', "Arabic Decimal Separator", ","),
|
||||||
('‚', "Single Low-9 Quotation Mark", ','),
|
('‚', "Single Low-9 Quotation Mark", ","),
|
||||||
('¸', "Cedilla", ','),
|
('¸', "Cedilla", ","),
|
||||||
('ꓹ', "Lisu Letter Tone Na Po", ','),
|
('ꓹ', "Lisu Letter Tone Na Po", ","),
|
||||||
(',', "Fullwidth Comma", ','),
|
(',', "Fullwidth Comma", ","),
|
||||||
|
|
||||||
(';', "Greek Question Mark", ';'),
|
(';', "Greek Question Mark", ";"),
|
||||||
(';', "Fullwidth Semicolon", ';'),
|
(';', "Fullwidth Semicolon", ";"),
|
||||||
('︔', "Presentation Form For Vertical Semicolon", ';'),
|
('︔', "Presentation Form For Vertical Semicolon", ";"),
|
||||||
|
|
||||||
('ः', "Devanagari Sign Visarga", ':'),
|
('ः', "Devanagari Sign Visarga", ":"),
|
||||||
('ઃ', "Gujarati Sign Visarga", ':'),
|
('ઃ', "Gujarati Sign Visarga", ":"),
|
||||||
(':', "Fullwidth Colon", ':'),
|
(':', "Fullwidth Colon", ":"),
|
||||||
('։', "Armenian Full Stop", ':'),
|
('։', "Armenian Full Stop", ":"),
|
||||||
('܃', "Syriac Supralinear Colon", ':'),
|
('܃', "Syriac Supralinear Colon", ":"),
|
||||||
('܄', "Syriac Sublinear Colon", ':'),
|
('܄', "Syriac Sublinear Colon", ":"),
|
||||||
('᛬', "Runic Multiple Punctuation", ':'),
|
('᛬', "Runic Multiple Punctuation", ":"),
|
||||||
('︰', "Presentation Form For Vertical Two Dot Leader", ':'),
|
('︰', "Presentation Form For Vertical Two Dot Leader", ":"),
|
||||||
('᠃', "Mongolian Full Stop", ':'),
|
('᠃', "Mongolian Full Stop", ":"),
|
||||||
('᠉', "Mongolian Manchu Full Stop", ':'),
|
('᠉', "Mongolian Manchu Full Stop", ":"),
|
||||||
('⁚', "Two Dot Punctuation", ':'),
|
('⁚', "Two Dot Punctuation", ":"),
|
||||||
('׃', "Hebrew Punctuation Sof Pasuq", ':'),
|
('׃', "Hebrew Punctuation Sof Pasuq", ":"),
|
||||||
('˸', "Modifier Letter Raised Colon", ':'),
|
('˸', "Modifier Letter Raised Colon", ":"),
|
||||||
('꞉', "Modifier Letter Colon", ':'),
|
('꞉', "Modifier Letter Colon", ":"),
|
||||||
('∶', "Ratio", ':'),
|
('∶', "Ratio", ":"),
|
||||||
('ː', "Modifier Letter Triangular Colon", ':'),
|
('ː', "Modifier Letter Triangular Colon", ":"),
|
||||||
('ꓽ', "Lisu Letter Tone Mya Jeu", ':'),
|
('ꓽ', "Lisu Letter Tone Mya Jeu", ":"),
|
||||||
('︓', "Presentation Form For Vertical Colon", ':'),
|
('︓', "Presentation Form For Vertical Colon", ":"),
|
||||||
|
|
||||||
('!', "Fullwidth Exclamation Mark", '!'),
|
('!', "Fullwidth Exclamation Mark", "!"),
|
||||||
('ǃ', "Latin Letter Retroflex Click", '!'),
|
('ǃ', "Latin Letter Retroflex Click", "!"),
|
||||||
('ⵑ', "Tifinagh Letter Tuareg Yang", '!'),
|
('ⵑ', "Tifinagh Letter Tuareg Yang", "!"),
|
||||||
('︕', "Presentation Form For Vertical Exclamation Mark", '!'),
|
('︕', "Presentation Form For Vertical Exclamation Mark", "!"),
|
||||||
|
|
||||||
('ʔ', "Latin Letter Glottal Stop", '?'),
|
('ʔ', "Latin Letter Glottal Stop", "?"),
|
||||||
('Ɂ', "Latin Capital Letter Glottal Stop", '?'),
|
('Ɂ', "Latin Capital Letter Glottal Stop", "?"),
|
||||||
('ॽ', "Devanagari Letter Glottal Stop", '?'),
|
('ॽ', "Devanagari Letter Glottal Stop", "?"),
|
||||||
('Ꭾ', "Cherokee Letter He", '?'),
|
('Ꭾ', "Cherokee Letter He", "?"),
|
||||||
('ꛫ', "Bamum Letter Ntuu", '?'),
|
('ꛫ', "Bamum Letter Ntuu", "?"),
|
||||||
('?', "Fullwidth Question Mark", '?'),
|
('?', "Fullwidth Question Mark", "?"),
|
||||||
('︖', "Presentation Form For Vertical Question Mark", '?'),
|
('︖', "Presentation Form For Vertical Question Mark", "?"),
|
||||||
|
|
||||||
('𝅭', "Musical Symbol Combining Augmentation Dot", '.'),
|
('𝅭', "Musical Symbol Combining Augmentation Dot", "."),
|
||||||
('․', "One Dot Leader", '.'),
|
('․', "One Dot Leader", "."),
|
||||||
('܁', "Syriac Supralinear Full Stop", '.'),
|
('܁', "Syriac Supralinear Full Stop", "."),
|
||||||
('܂', "Syriac Sublinear Full Stop", '.'),
|
('܂', "Syriac Sublinear Full Stop", "."),
|
||||||
('꘎', "Vai Full Stop", '.'),
|
('꘎', "Vai Full Stop", "."),
|
||||||
('𐩐', "Kharoshthi Punctuation Dot", '.'),
|
('𐩐', "Kharoshthi Punctuation Dot", "."),
|
||||||
('٠', "Arabic-Indic Digit Zero", '.'),
|
('٠', "Arabic-Indic Digit Zero", "."),
|
||||||
('۰', "Extended Arabic-Indic Digit Zero", '.'),
|
('۰', "Extended Arabic-Indic Digit Zero", "."),
|
||||||
('ꓸ', "Lisu Letter Tone Mya Ti", '.'),
|
('ꓸ', "Lisu Letter Tone Mya Ti", "."),
|
||||||
('·', "Middle Dot", '.'),
|
('·', "Middle Dot", "."),
|
||||||
('・', "Katakana Middle Dot", '.'),
|
('・', "Katakana Middle Dot", "."),
|
||||||
('・', "Halfwidth Katakana Middle Dot", '.'),
|
('・', "Halfwidth Katakana Middle Dot", "."),
|
||||||
('᛫', "Runic Single Punctuation", '.'),
|
('᛫', "Runic Single Punctuation", "."),
|
||||||
('·', "Greek Ano Teleia", '.'),
|
('·', "Greek Ano Teleia", "."),
|
||||||
('⸱', "Word Separator Middle Dot", '.'),
|
('⸱', "Word Separator Middle Dot", "."),
|
||||||
('𐄁', "Aegean Word Separator Dot", '.'),
|
('𐄁', "Aegean Word Separator Dot", "."),
|
||||||
('•', "Bullet", '.'),
|
('•', "Bullet", "."),
|
||||||
('‧', "Hyphenation Point", '.'),
|
('‧', "Hyphenation Point", "."),
|
||||||
('∙', "Bullet Operator", '.'),
|
('∙', "Bullet Operator", "."),
|
||||||
('⋅', "Dot Operator", '.'),
|
('⋅', "Dot Operator", "."),
|
||||||
('ꞏ', "Latin Letter Sinological Dot", '.'),
|
('ꞏ', "Latin Letter Sinological Dot", "."),
|
||||||
('ᐧ', "Canadian Syllabics Final Middle Dot", '.'),
|
('ᐧ', "Canadian Syllabics Final Middle Dot", "."),
|
||||||
('ᐧ', "Canadian Syllabics Final Middle Dot", '.'),
|
('ᐧ', "Canadian Syllabics Final Middle Dot", "."),
|
||||||
('.', "Fullwidth Full Stop", '.'),
|
('.', "Fullwidth Full Stop", "."),
|
||||||
('。', "Ideographic Full Stop", '.'),
|
('。', "Ideographic Full Stop", "."),
|
||||||
('︒', "Presentation Form For Vertical Ideographic Full Stop", '.'),
|
('︒', "Presentation Form For Vertical Ideographic Full Stop", "."),
|
||||||
|
|
||||||
('՝', "Armenian Comma", '\''),
|
('՝', "Armenian Comma", "\'"),
|
||||||
(''', "Fullwidth Apostrophe", '\''),
|
(''', "Fullwidth Apostrophe", "\'"),
|
||||||
('‘', "Left Single Quotation Mark", '\''),
|
('‘', "Left Single Quotation Mark", "\'"),
|
||||||
('’', "Right Single Quotation Mark", '\''),
|
('’', "Right Single Quotation Mark", "\'"),
|
||||||
('‛', "Single High-Reversed-9 Quotation Mark", '\''),
|
('‛', "Single High-Reversed-9 Quotation Mark", "\'"),
|
||||||
('′', "Prime", '\''),
|
('′', "Prime", "\'"),
|
||||||
('‵', "Reversed Prime", '\''),
|
('‵', "Reversed Prime", "\'"),
|
||||||
('՚', "Armenian Apostrophe", '\''),
|
('՚', "Armenian Apostrophe", "\'"),
|
||||||
('׳', "Hebrew Punctuation Geresh", '\''),
|
('׳', "Hebrew Punctuation Geresh", "\'"),
|
||||||
('`', "Grave Accent", '\''),
|
('`', "Grave Accent", "\'"),
|
||||||
('`', "Greek Varia", '\''),
|
('`', "Greek Varia", "\'"),
|
||||||
('`', "Fullwidth Grave Accent", '\''),
|
('`', "Fullwidth Grave Accent", "\'"),
|
||||||
('´', "Acute Accent", '\''),
|
('´', "Acute Accent", "\'"),
|
||||||
('΄', "Greek Tonos", '\''),
|
('΄', "Greek Tonos", "\'"),
|
||||||
('´', "Greek Oxia", '\''),
|
('´', "Greek Oxia", "\'"),
|
||||||
('᾽', "Greek Koronis", '\''),
|
('᾽', "Greek Koronis", "\'"),
|
||||||
('᾿', "Greek Psili", '\''),
|
('᾿', "Greek Psili", "\'"),
|
||||||
('῾', "Greek Dasia", '\''),
|
('῾', "Greek Dasia", "\'"),
|
||||||
('ʹ', "Modifier Letter Prime", '\''),
|
('ʹ', "Modifier Letter Prime", "\'"),
|
||||||
('ʹ', "Greek Numeral Sign", '\''),
|
('ʹ', "Greek Numeral Sign", "\'"),
|
||||||
('ˈ', "Modifier Letter Vertical Line", '\''),
|
('ˈ', "Modifier Letter Vertical Line", "\'"),
|
||||||
('ˊ', "Modifier Letter Acute Accent", '\''),
|
('ˊ', "Modifier Letter Acute Accent", "\'"),
|
||||||
('ˋ', "Modifier Letter Grave Accent", '\''),
|
('ˋ', "Modifier Letter Grave Accent", "\'"),
|
||||||
('˴', "Modifier Letter Middle Grave Accent", '\''),
|
('˴', "Modifier Letter Middle Grave Accent", "\'"),
|
||||||
('ʻ', "Modifier Letter Turned Comma", '\''),
|
('ʻ', "Modifier Letter Turned Comma", "\'"),
|
||||||
('ʽ', "Modifier Letter Reversed Comma", '\''),
|
('ʽ', "Modifier Letter Reversed Comma", "\'"),
|
||||||
('ʼ', "Modifier Letter Apostrophe", '\''),
|
('ʼ', "Modifier Letter Apostrophe", "\'"),
|
||||||
('ʾ', "Modifier Letter Right Half Ring", '\''),
|
('ʾ', "Modifier Letter Right Half Ring", "\'"),
|
||||||
('ꞌ', "Latin Small Letter Saltillo", '\''),
|
('ꞌ', "Latin Small Letter Saltillo", "\'"),
|
||||||
('י', "Hebrew Letter Yod", '\''),
|
('י', "Hebrew Letter Yod", "\'"),
|
||||||
('ߴ', "Nko High Tone Apostrophe", '\''),
|
('ߴ', "Nko High Tone Apostrophe", "\'"),
|
||||||
('ߵ', "Nko Low Tone Apostrophe", '\''),
|
('ߵ', "Nko Low Tone Apostrophe", "\'"),
|
||||||
('ᑊ', "Canadian Syllabics West-Cree P", '\''),
|
('ᑊ', "Canadian Syllabics West-Cree P", "\'"),
|
||||||
('ᛌ', "Runic Letter Short-Twig-Sol S", '\''),
|
('ᛌ', "Runic Letter Short-Twig-Sol S", "\'"),
|
||||||
('𖽑', "Miao Sign Aspiration", '\''),
|
('𖽑', "Miao Sign Aspiration", "\'"),
|
||||||
('𖽒', "Miao Sign Reformed Voicing", '\''),
|
('𖽒', "Miao Sign Reformed Voicing", "\'"),
|
||||||
|
|
||||||
('᳓', "Vedic Sign Nihshvasa", '"'),
|
('᳓', "Vedic Sign Nihshvasa", "\""),
|
||||||
('"', "Fullwidth Quotation Mark", '"'),
|
('"', "Fullwidth Quotation Mark", "\""),
|
||||||
('“', "Left Double Quotation Mark", '"'),
|
('“', "Left Double Quotation Mark", "\""),
|
||||||
('”', "Right Double Quotation Mark", '"'),
|
('”', "Right Double Quotation Mark", "\""),
|
||||||
('‟', "Double High-Reversed-9 Quotation Mark", '"'),
|
('‟', "Double High-Reversed-9 Quotation Mark", "\""),
|
||||||
('″', "Double Prime", '"'),
|
('″', "Double Prime", "\""),
|
||||||
('‶', "Reversed Double Prime", '"'),
|
('‶', "Reversed Double Prime", "\""),
|
||||||
('〃', "Ditto Mark", '"'),
|
('〃', "Ditto Mark", "\""),
|
||||||
('״', "Hebrew Punctuation Gershayim", '"'),
|
('״', "Hebrew Punctuation Gershayim", "\""),
|
||||||
('˝', "Double Acute Accent", '"'),
|
('˝', "Double Acute Accent", "\""),
|
||||||
('ʺ', "Modifier Letter Double Prime", '"'),
|
('ʺ', "Modifier Letter Double Prime", "\""),
|
||||||
('˶', "Modifier Letter Middle Double Acute Accent", '"'),
|
('˶', "Modifier Letter Middle Double Acute Accent", "\""),
|
||||||
('˵', "Modifier Letter Middle Double Grave Accent", '"'),
|
('˵', "Modifier Letter Middle Double Grave Accent", "\""),
|
||||||
('ˮ', "Modifier Letter Double Apostrophe", '"'),
|
('ˮ', "Modifier Letter Double Apostrophe", "\""),
|
||||||
('ײ', "Hebrew Ligature Yiddish Double Yod", '"'),
|
('ײ', "Hebrew Ligature Yiddish Double Yod", "\""),
|
||||||
('❞', "Heavy Double Comma Quotation Mark Ornament", '"'),
|
('❞', "Heavy Double Comma Quotation Mark Ornament", "\""),
|
||||||
('❝', "Heavy Double Turned Comma Quotation Mark Ornament", '"'),
|
('❝', "Heavy Double Turned Comma Quotation Mark Ornament", "\""),
|
||||||
|
|
||||||
('(', "Fullwidth Left Parenthesis", '('),
|
('(', "Fullwidth Left Parenthesis", "("),
|
||||||
('❨', "Medium Left Parenthesis Ornament", '('),
|
('❨', "Medium Left Parenthesis Ornament", "("),
|
||||||
('﴾', "Ornate Left Parenthesis", '('),
|
('﴾', "Ornate Left Parenthesis", "("),
|
||||||
|
|
||||||
(')', "Fullwidth Right Parenthesis", ')'),
|
(')', "Fullwidth Right Parenthesis", ")"),
|
||||||
('❩', "Medium Right Parenthesis Ornament", ')'),
|
('❩', "Medium Right Parenthesis Ornament", ")"),
|
||||||
('﴿', "Ornate Right Parenthesis", ')'),
|
('﴿', "Ornate Right Parenthesis", ")"),
|
||||||
|
|
||||||
('[', "Fullwidth Left Square Bracket", '['),
|
('[', "Fullwidth Left Square Bracket", "["),
|
||||||
('❲', "Light Left Tortoise Shell Bracket Ornament", '['),
|
('❲', "Light Left Tortoise Shell Bracket Ornament", "["),
|
||||||
('「', "Left Corner Bracket", '['),
|
('「', "Left Corner Bracket", "["),
|
||||||
('『', "Left White Corner Bracket", '['),
|
('『', "Left White Corner Bracket", "["),
|
||||||
('【', "Left Black Lenticular Bracket", '['),
|
('【', "Left Black Lenticular Bracket", "["),
|
||||||
('〔', "Left Tortoise Shell Bracket", '['),
|
('〔', "Left Tortoise Shell Bracket", "["),
|
||||||
('〖', "Left White Lenticular Bracket", '['),
|
('〖', "Left White Lenticular Bracket", "["),
|
||||||
('〘', "Left White Tortoise Shell Bracket", '['),
|
('〘', "Left White Tortoise Shell Bracket", "["),
|
||||||
('〚', "Left White Square Bracket", '['),
|
('〚', "Left White Square Bracket", "["),
|
||||||
|
|
||||||
(']', "Fullwidth Right Square Bracket", ']'),
|
(']', "Fullwidth Right Square Bracket", "]"),
|
||||||
('❳', "Light Right Tortoise Shell Bracket Ornament", ']'),
|
('❳', "Light Right Tortoise Shell Bracket Ornament", "]"),
|
||||||
('」', "Right Corner Bracket", ']'),
|
('」', "Right Corner Bracket", "]"),
|
||||||
('』', "Right White Corner Bracket", ']'),
|
('』', "Right White Corner Bracket", "]"),
|
||||||
('】', "Right Black Lenticular Bracket", ']'),
|
('】', "Right Black Lenticular Bracket", "]"),
|
||||||
('〕', "Right Tortoise Shell Bracket", ']'),
|
('〕', "Right Tortoise Shell Bracket", "]"),
|
||||||
('〗', "Right White Lenticular Bracket", ']'),
|
('〗', "Right White Lenticular Bracket", "]"),
|
||||||
('〙', "Right White Tortoise Shell Bracket", ']'),
|
('〙', "Right White Tortoise Shell Bracket", "]"),
|
||||||
('〛', "Right White Square Bracket", ']'),
|
('〛', "Right White Square Bracket", "]"),
|
||||||
|
|
||||||
('❴', "Medium Left Curly Bracket Ornament", '{'),
|
('❴', "Medium Left Curly Bracket Ornament", "{"),
|
||||||
('𝄔', "Musical Symbol Brace", '{'),
|
('𝄔', "Musical Symbol Brace", "{"),
|
||||||
('{', "Fullwidth Left Curly Bracket", '{'),
|
('{', "Fullwidth Left Curly Bracket", "{"),
|
||||||
|
|
||||||
('❵', "Medium Right Curly Bracket Ornament", '}'),
|
('❵', "Medium Right Curly Bracket Ornament", "}"),
|
||||||
('}', "Fullwidth Right Curly Bracket", '}'),
|
('}', "Fullwidth Right Curly Bracket", "}"),
|
||||||
|
|
||||||
('⁎', "Low Asterisk", '*'),
|
('⁎', "Low Asterisk", "*"),
|
||||||
('٭', "Arabic Five Pointed Star", '*'),
|
('٭', "Arabic Five Pointed Star", "*"),
|
||||||
('∗', "Asterisk Operator", '*'),
|
('∗', "Asterisk Operator", "*"),
|
||||||
('𐌟', "Old Italic Letter Ess", '*'),
|
('𐌟', "Old Italic Letter Ess", "*"),
|
||||||
('*', "Fullwidth Asterisk", '*'),
|
('*', "Fullwidth Asterisk", "*"),
|
||||||
|
|
||||||
('᜵', "Philippine Single Punctuation", '/'),
|
('᜵', "Philippine Single Punctuation", "/"),
|
||||||
('⁁', "Caret Insertion Point", '/'),
|
('⁁', "Caret Insertion Point", "/"),
|
||||||
('∕', "Division Slash", '/'),
|
('∕', "Division Slash", "/"),
|
||||||
('⁄', "Fraction Slash", '/'),
|
('⁄', "Fraction Slash", "/"),
|
||||||
('╱', "Box Drawings Light Diagonal Upper Right To Lower Left", '/'),
|
('╱', "Box Drawings Light Diagonal Upper Right To Lower Left", "/"),
|
||||||
('⟋', "Mathematical Rising Diagonal", '/'),
|
('⟋', "Mathematical Rising Diagonal", "/"),
|
||||||
('⧸', "Big Solidus", '/'),
|
('⧸', "Big Solidus", "/"),
|
||||||
('𝈺', "Greek Instrumental Notation Symbol-47", '/'),
|
('𝈺', "Greek Instrumental Notation Symbol-47", "/"),
|
||||||
('㇓', "CJK Stroke Sp", '/'),
|
('㇓', "CJK Stroke Sp", "/"),
|
||||||
('〳', "Vertical Kana Repeat Mark Upper Half", '/'),
|
('〳', "Vertical Kana Repeat Mark Upper Half", "/"),
|
||||||
('Ⳇ', "Coptic Capital Letter Old Coptic Esh", '/'),
|
('Ⳇ', "Coptic Capital Letter Old Coptic Esh", "/"),
|
||||||
('ノ', "Katakana Letter No", '/'),
|
('ノ', "Katakana Letter No", "/"),
|
||||||
('丿', "CJK Unified Ideograph-4E3F", '/'),
|
('丿', "CJK Unified Ideograph-4E3F", "/"),
|
||||||
('⼃', "Kangxi Radical Slash", '/'),
|
('⼃', "Kangxi Radical Slash", "/"),
|
||||||
('/', "Fullwidth Solidus", '/'),
|
('/', "Fullwidth Solidus", "/"),
|
||||||
|
|
||||||
('\', "Fullwidth Reverse Solidus", '\\'),
|
('\', "Fullwidth Reverse Solidus", "\\"),
|
||||||
('﹨', "Small Reverse Solidus", '\\'),
|
('﹨', "Small Reverse Solidus", "\\"),
|
||||||
('∖', "Set Minus", '\\'),
|
('∖', "Set Minus", "\\"),
|
||||||
('⟍', "Mathematical Falling Diagonal", '\\'),
|
('⟍', "Mathematical Falling Diagonal", "\\"),
|
||||||
('⧵', "Reverse Solidus Operator", '\\'),
|
('⧵', "Reverse Solidus Operator", "\\"),
|
||||||
('⧹', "Big Reverse Solidus", '\\'),
|
('⧹', "Big Reverse Solidus", "\\"),
|
||||||
('⧹', "Greek Vocal Notation Symbol-16", '\\'),
|
('⧹', "Greek Vocal Notation Symbol-16", "\\"),
|
||||||
('⧹', "Greek Instrumental Symbol-48", '\\'),
|
('⧹', "Greek Instrumental Symbol-48", "\\"),
|
||||||
('㇔', "CJK Stroke D", '\\'),
|
('㇔', "CJK Stroke D", "\\"),
|
||||||
('丶', "CJK Unified Ideograph-4E36", '\\'),
|
('丶', "CJK Unified Ideograph-4E36", "\\"),
|
||||||
('⼂', "Kangxi Radical Dot", '\\'),
|
('⼂', "Kangxi Radical Dot", "\\"),
|
||||||
('、', "Ideographic Comma", '\\'),
|
('、', "Ideographic Comma", "\\"),
|
||||||
('ヽ', "Katakana Iteration Mark", '\\'),
|
('ヽ', "Katakana Iteration Mark", "\\"),
|
||||||
|
|
||||||
('ꝸ', "Latin Small Letter Um", '&'),
|
('ꝸ', "Latin Small Letter Um", "&"),
|
||||||
('&', "Fullwidth Ampersand", '&'),
|
('&', "Fullwidth Ampersand", "&"),
|
||||||
|
|
||||||
('᛭', "Runic Cross Punctuation", '+'),
|
('᛭', "Runic Cross Punctuation", "+"),
|
||||||
('➕', "Heavy Plus Sign", '+'),
|
('➕', "Heavy Plus Sign", "+"),
|
||||||
('𐊛', "Lycian Letter H", '+'),
|
('𐊛', "Lycian Letter H", "+"),
|
||||||
('﬩', "Hebrew Letter Alternative Plus Sign", '+'),
|
('﬩', "Hebrew Letter Alternative Plus Sign", "+"),
|
||||||
('+', "Fullwidth Plus Sign", '+'),
|
('+', "Fullwidth Plus Sign", "+"),
|
||||||
|
|
||||||
('‹', "Single Left-Pointing Angle Quotation Mark", '<'),
|
('‹', "Single Left-Pointing Angle Quotation Mark", "<"),
|
||||||
('❮', "Heavy Left-Pointing Angle Quotation Mark Ornament", '<'),
|
('❮', "Heavy Left-Pointing Angle Quotation Mark Ornament", "<"),
|
||||||
('˂', "Modifier Letter Left Arrowhead", '<'),
|
('˂', "Modifier Letter Left Arrowhead", "<"),
|
||||||
('𝈶', "Greek Instrumental Symbol-40", '<'),
|
('𝈶', "Greek Instrumental Symbol-40", "<"),
|
||||||
('ᐸ', "Canadian Syllabics Pa", '<'),
|
('ᐸ', "Canadian Syllabics Pa", "<"),
|
||||||
('ᚲ', "Runic Letter Kauna", '<'),
|
('ᚲ', "Runic Letter Kauna", "<"),
|
||||||
('❬', "Medium Left-Pointing Angle Bracket Ornament", '<'),
|
('❬', "Medium Left-Pointing Angle Bracket Ornament", "<"),
|
||||||
('⟨', "Mathematical Left Angle Bracket", '<'),
|
('⟨', "Mathematical Left Angle Bracket", "<"),
|
||||||
('〈', "Left-Pointing Angle Bracket", '<'),
|
('〈', "Left-Pointing Angle Bracket", "<"),
|
||||||
('〈', "Left Angle Bracket", '<'),
|
('〈', "Left Angle Bracket", "<"),
|
||||||
('㇛', "CJK Stroke Pd", '<'),
|
('㇛', "CJK Stroke Pd", "<"),
|
||||||
('く', "Hiragana Letter Ku", '<'),
|
('く', "Hiragana Letter Ku", "<"),
|
||||||
('𡿨', "CJK Unified Ideograph-21FE8", '<'),
|
('𡿨', "CJK Unified Ideograph-21FE8", "<"),
|
||||||
('《', "Left Double Angle Bracket", '<'),
|
('《', "Left Double Angle Bracket", "<"),
|
||||||
('<', "Fullwidth Less-Than Sign", '<'),
|
('<', "Fullwidth Less-Than Sign", "<"),
|
||||||
|
|
||||||
('᐀', "Canadian Syllabics Hyphen", '='),
|
('᐀', "Canadian Syllabics Hyphen", "="),
|
||||||
('⹀', "Double Hyphen", '='),
|
('⹀', "Double Hyphen", "="),
|
||||||
('゠', "Katakana-Hiragana Double Hyphen", '='),
|
('゠', "Katakana-Hiragana Double Hyphen", "="),
|
||||||
('꓿', "Lisu Punctuation Full Stop", '='),
|
('꓿', "Lisu Punctuation Full Stop", "="),
|
||||||
('=', "Fullwidth Equals Sign", '='),
|
('=', "Fullwidth Equals Sign", "="),
|
||||||
|
|
||||||
('›', "Single Right-Pointing Angle Quotation Mark", '>'),
|
('›', "Single Right-Pointing Angle Quotation Mark", ">"),
|
||||||
('❯', "Heavy Right-Pointing Angle Quotation Mark Ornament", '>'),
|
('❯', "Heavy Right-Pointing Angle Quotation Mark Ornament", ">"),
|
||||||
('˃', "Modifier Letter Right Arrowhead", '>'),
|
('˃', "Modifier Letter Right Arrowhead", ">"),
|
||||||
('𝈷', "Greek Instrumental Symbol-42", '>'),
|
('𝈷', "Greek Instrumental Symbol-42", ">"),
|
||||||
('ᐳ', "Canadian Syllabics Po", '>'),
|
('ᐳ', "Canadian Syllabics Po", ">"),
|
||||||
('𖼿', "Miao Letter Archaic Zza", '>'),
|
('𖼿', "Miao Letter Archaic Zza", ">"),
|
||||||
('❭', "Medium Right-Pointing Angle Bracket Ornament", '>'),
|
('❭', "Medium Right-Pointing Angle Bracket Ornament", ">"),
|
||||||
('⟩', "Mathematical Right Angle Bracket", '>'),
|
('⟩', "Mathematical Right Angle Bracket", ">"),
|
||||||
('〉', "Right-Pointing Angle Bracket", '>'),
|
('〉', "Right-Pointing Angle Bracket", ">"),
|
||||||
('〉', "Right Angle Bracket", '>'),
|
('〉', "Right Angle Bracket", ">"),
|
||||||
('》', "Right Double Angle Bracket", '>'),
|
('》', "Right Double Angle Bracket", ">"),
|
||||||
('>', "Fullwidth Greater-Than Sign", '>'),
|
('>', "Fullwidth Greater-Than Sign", ">"),
|
||||||
|
('⩵', "Two Consecutive Equals Signs", "==")
|
||||||
];
|
];
|
||||||
|
|
||||||
// FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs, instead of
|
// FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs, instead of
|
||||||
// keeping the substitution token in this table. Ideally, this should be inside `rustc_lexer`.
|
// keeping the substitution token in this table. Ideally, this should be inside `rustc_lexer`.
|
||||||
// However, we should first remove compound tokens like `<<` from `rustc_lexer`, and then add
|
// However, we should first remove compound tokens like `<<` from `rustc_lexer`, and then add
|
||||||
// fancier error recovery to it, as there will be less overall work to do this way.
|
// fancier error recovery to it, as there will be less overall work to do this way.
|
||||||
const ASCII_ARRAY: &[(char, &str, Option<token::TokenKind>)] = &[
|
const ASCII_ARRAY: &[(&str, &str, Option<token::TokenKind>)] = &[
|
||||||
(' ', "Space", None),
|
(" ", "Space", None),
|
||||||
('_', "Underscore", Some(token::Ident(kw::Underscore, false))),
|
("_", "Underscore", Some(token::Ident(kw::Underscore, false))),
|
||||||
('-', "Minus/Hyphen", Some(token::BinOp(token::Minus))),
|
("-", "Minus/Hyphen", Some(token::BinOp(token::Minus))),
|
||||||
(',', "Comma", Some(token::Comma)),
|
(",", "Comma", Some(token::Comma)),
|
||||||
(';', "Semicolon", Some(token::Semi)),
|
(";", "Semicolon", Some(token::Semi)),
|
||||||
(':', "Colon", Some(token::Colon)),
|
(":", "Colon", Some(token::Colon)),
|
||||||
('!', "Exclamation Mark", Some(token::Not)),
|
("!", "Exclamation Mark", Some(token::Not)),
|
||||||
('?', "Question Mark", Some(token::Question)),
|
("?", "Question Mark", Some(token::Question)),
|
||||||
('.', "Period", Some(token::Dot)),
|
(".", "Period", Some(token::Dot)),
|
||||||
('(', "Left Parenthesis", Some(token::OpenDelim(Delimiter::Parenthesis))),
|
("(", "Left Parenthesis", Some(token::OpenDelim(Delimiter::Parenthesis))),
|
||||||
(')', "Right Parenthesis", Some(token::CloseDelim(Delimiter::Parenthesis))),
|
(")", "Right Parenthesis", Some(token::CloseDelim(Delimiter::Parenthesis))),
|
||||||
('[', "Left Square Bracket", Some(token::OpenDelim(Delimiter::Bracket))),
|
("[", "Left Square Bracket", Some(token::OpenDelim(Delimiter::Bracket))),
|
||||||
(']', "Right Square Bracket", Some(token::CloseDelim(Delimiter::Bracket))),
|
("]", "Right Square Bracket", Some(token::CloseDelim(Delimiter::Bracket))),
|
||||||
('{', "Left Curly Brace", Some(token::OpenDelim(Delimiter::Brace))),
|
("{", "Left Curly Brace", Some(token::OpenDelim(Delimiter::Brace))),
|
||||||
('}', "Right Curly Brace", Some(token::CloseDelim(Delimiter::Brace))),
|
("}", "Right Curly Brace", Some(token::CloseDelim(Delimiter::Brace))),
|
||||||
('*', "Asterisk", Some(token::BinOp(token::Star))),
|
("*", "Asterisk", Some(token::BinOp(token::Star))),
|
||||||
('/', "Slash", Some(token::BinOp(token::Slash))),
|
("/", "Slash", Some(token::BinOp(token::Slash))),
|
||||||
('\\', "Backslash", None),
|
("\\", "Backslash", None),
|
||||||
('&', "Ampersand", Some(token::BinOp(token::And))),
|
("&", "Ampersand", Some(token::BinOp(token::And))),
|
||||||
('+', "Plus Sign", Some(token::BinOp(token::Plus))),
|
("+", "Plus Sign", Some(token::BinOp(token::Plus))),
|
||||||
('<', "Less-Than Sign", Some(token::Lt)),
|
("<", "Less-Than Sign", Some(token::Lt)),
|
||||||
('=', "Equals Sign", Some(token::Eq)),
|
("=", "Equals Sign", Some(token::Eq)),
|
||||||
('>', "Greater-Than Sign", Some(token::Gt)),
|
("==", "Double Equals Sign", Some(token::EqEq)),
|
||||||
|
(">", "Greater-Than Sign", Some(token::Gt)),
|
||||||
// FIXME: Literals are already lexed by this point, so we can't recover gracefully just by
|
// FIXME: Literals are already lexed by this point, so we can't recover gracefully just by
|
||||||
// spitting the correct token out.
|
// spitting the correct token out.
|
||||||
('\'', "Single Quote", None),
|
("\'", "Single Quote", None),
|
||||||
('"', "Quotation Mark", None),
|
("\"", "Quotation Mark", None),
|
||||||
];
|
];
|
||||||
|
|
||||||
pub(super) fn check_for_substitution<'a>(
|
pub(super) fn check_for_substitution<'a>(
|
||||||
|
@ -339,11 +341,11 @@ pub(super) fn check_for_substitution<'a>(
|
||||||
err: &mut Diagnostic,
|
err: &mut Diagnostic,
|
||||||
count: usize,
|
count: usize,
|
||||||
) -> Option<token::TokenKind> {
|
) -> Option<token::TokenKind> {
|
||||||
let &(_u_char, u_name, ascii_char) = UNICODE_ARRAY.iter().find(|&&(c, _, _)| c == ch)?;
|
let &(_, u_name, ascii_str) = UNICODE_ARRAY.iter().find(|&&(c, _, _)| c == ch)?;
|
||||||
|
|
||||||
let span = Span::with_root_ctxt(pos, pos + Pos::from_usize(ch.len_utf8() * count));
|
let span = Span::with_root_ctxt(pos, pos + Pos::from_usize(ch.len_utf8() * count));
|
||||||
|
|
||||||
let Some((_ascii_char, ascii_name, token)) = ASCII_ARRAY.iter().find(|&&(c, _, _)| c == ascii_char) else {
|
let Some((_, ascii_name, token)) = ASCII_ARRAY.iter().find(|&&(s, _, _)| s == ascii_str) else {
|
||||||
let msg = format!("substitution character not found for '{}'", ch);
|
let msg = format!("substitution character not found for '{}'", ch);
|
||||||
reader.sess.span_diagnostic.span_bug_no_panic(span, &msg);
|
reader.sess.span_diagnostic.span_bug_no_panic(span, &msg);
|
||||||
return None;
|
return None;
|
||||||
|
@ -354,7 +356,7 @@ pub(super) fn check_for_substitution<'a>(
|
||||||
let msg = format!(
|
let msg = format!(
|
||||||
"Unicode characters '“' (Left Double Quotation Mark) and \
|
"Unicode characters '“' (Left Double Quotation Mark) and \
|
||||||
'”' (Right Double Quotation Mark) look like '{}' ({}), but are not",
|
'”' (Right Double Quotation Mark) look like '{}' ({}), but are not",
|
||||||
ascii_char, ascii_name
|
ascii_str, ascii_name
|
||||||
);
|
);
|
||||||
err.span_suggestion(
|
err.span_suggestion(
|
||||||
Span::with_root_ctxt(
|
Span::with_root_ctxt(
|
||||||
|
@ -368,12 +370,12 @@ pub(super) fn check_for_substitution<'a>(
|
||||||
} else {
|
} else {
|
||||||
let msg = format!(
|
let msg = format!(
|
||||||
"Unicode character '{}' ({}) looks like '{}' ({}), but it is not",
|
"Unicode character '{}' ({}) looks like '{}' ({}), but it is not",
|
||||||
ch, u_name, ascii_char, ascii_name
|
ch, u_name, ascii_str, ascii_name
|
||||||
);
|
);
|
||||||
err.span_suggestion(
|
err.span_suggestion(
|
||||||
span,
|
span,
|
||||||
&msg,
|
&msg,
|
||||||
ascii_char.to_string().repeat(count),
|
ascii_str.to_string().repeat(count),
|
||||||
Applicability::MaybeIncorrect,
|
Applicability::MaybeIncorrect,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,4 +6,7 @@ fn main() {
|
||||||
//~^ ERROR unknown start of token: \u{a0}
|
//~^ ERROR unknown start of token: \u{a0}
|
||||||
//~^^ NOTE character appears 3 more times
|
//~^^ NOTE character appears 3 more times
|
||||||
//~^^^ HELP Unicode character ' ' (No-Break Space) looks like ' ' (Space), but it is not
|
//~^^^ HELP Unicode character ' ' (No-Break Space) looks like ' ' (Space), but it is not
|
||||||
|
let _ = 1 ⩵ 2;
|
||||||
|
//~^ ERROR unknown start of token
|
||||||
|
//~^^ HELP Unicode character '⩵' (Two Consecutive Equals Signs) looks like '==' (Double Equals Sign), but it is not
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,5 +21,16 @@ help: Unicode character ' ' (No-Break Space) looks like ' ' (Space), but it is
|
||||||
LL | let x = 0;
|
LL | let x = 0;
|
||||||
| ++++
|
| ++++
|
||||||
|
|
||||||
error: aborting due to 2 previous errors
|
error: unknown start of token: \u{2a75}
|
||||||
|
--> $DIR/unicode-chars.rs:9:15
|
||||||
|
|
|
||||||
|
LL | let _ = 1 ⩵ 2;
|
||||||
|
| ^
|
||||||
|
|
|
||||||
|
help: Unicode character '⩵' (Two Consecutive Equals Signs) looks like '==' (Double Equals Sign), but it is not
|
||||||
|
|
|
||||||
|
LL | let _ = 1 == 2;
|
||||||
|
| ~~
|
||||||
|
|
||||||
|
error: aborting due to 3 previous errors
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue