1
Fork 0

Rollup merge of #134253 - nnethercote:overhaul-keywords, r=petrochenkov

Overhaul keyword handling

The compiler's list of keywords has some problems.
- It contains several items that aren't keywords.
- The order isn't quite right in a couple of places.
- Some of the names of predicates relating to keywords are confusing.
- rustdoc and rustfmt have their own (incorrect) versions of the keyword list.
- `AllKeywords` is unnecessarily complex.

r? ```@jieyouxu```
This commit is contained in:
许杰友 Jieyou Xu (Joe) 2024-12-18 22:56:53 +08:00 committed by GitHub
commit 0a2d708c31
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 67 additions and 121 deletions

View file

@ -903,7 +903,8 @@ impl Token {
self.is_non_raw_ident_where(|id| id.name == kw) self.is_non_raw_ident_where(|id| id.name == kw)
} }
/// Returns `true` if the token is a given keyword, `kw` or if `case` is `Insensitive` and this token is an identifier equal to `kw` ignoring the case. /// Returns `true` if the token is a given keyword, `kw` or if `case` is `Insensitive` and this
/// token is an identifier equal to `kw` ignoring the case.
pub fn is_keyword_case(&self, kw: Symbol, case: Case) -> bool { pub fn is_keyword_case(&self, kw: Symbol, case: Case) -> bool {
self.is_keyword(kw) self.is_keyword(kw)
|| (case == Case::Insensitive || (case == Case::Insensitive
@ -916,6 +917,11 @@ impl Token {
self.is_non_raw_ident_where(Ident::is_path_segment_keyword) self.is_non_raw_ident_where(Ident::is_path_segment_keyword)
} }
/// Don't use this unless you're doing something very loose and heuristic-y.
pub fn is_any_keyword(&self) -> bool {
self.is_non_raw_ident_where(Ident::is_any_keyword)
}
/// Returns true for reserved identifiers used internally for elided lifetimes, /// Returns true for reserved identifiers used internally for elided lifetimes,
/// unnamed method parameters, crate root module, error recovery etc. /// unnamed method parameters, crate root module, error recovery etc.
pub fn is_special_ident(&self) -> bool { pub fn is_special_ident(&self) -> bool {

View file

@ -22,7 +22,7 @@ use rustc_errors::{
use rustc_session::errors::ExprParenthesesNeeded; use rustc_session::errors::ExprParenthesesNeeded;
use rustc_span::edit_distance::find_best_match_for_name; use rustc_span::edit_distance::find_best_match_for_name;
use rustc_span::source_map::Spanned; use rustc_span::source_map::Spanned;
use rustc_span::symbol::AllKeywords; use rustc_span::symbol::used_keywords;
use rustc_span::{BytePos, DUMMY_SP, Ident, Span, SpanSnippetError, Symbol, kw, sym}; use rustc_span::{BytePos, DUMMY_SP, Ident, Span, SpanSnippetError, Symbol, kw, sym};
use thin_vec::{ThinVec, thin_vec}; use thin_vec::{ThinVec, thin_vec};
use tracing::{debug, trace}; use tracing::{debug, trace};
@ -811,12 +811,12 @@ impl<'a> Parser<'a> {
// so that it gets generated only when the diagnostic needs it. // so that it gets generated only when the diagnostic needs it.
// Also, it is unlikely that this list is generated multiple times because the // Also, it is unlikely that this list is generated multiple times because the
// parser halts after execution hits this path. // parser halts after execution hits this path.
let all_keywords = AllKeywords::new().collect_used(|| prev_ident.span.edition()); let all_keywords = used_keywords(|| prev_ident.span.edition());
// Otherwise, check the previous token with all the keywords as possible candidates. // Otherwise, check the previous token with all the keywords as possible candidates.
// This handles code like `Struct Human;` and `While a < b {}`. // This handles code like `Struct Human;` and `While a < b {}`.
// We check the previous token only when the current token is an identifier to avoid false // We check the previous token only when the current token is an identifier to avoid
// positives like suggesting keyword `for` for `extern crate foo {}`. // false positives like suggesting keyword `for` for `extern crate foo {}`.
if let Some(misspelled_kw) = find_similar_kw(prev_ident, &all_keywords) { if let Some(misspelled_kw) = find_similar_kw(prev_ident, &all_keywords) {
err.subdiagnostic(misspelled_kw); err.subdiagnostic(misspelled_kw);
// We don't want other suggestions to be added as they are most likely meaningless // We don't want other suggestions to be added as they are most likely meaningless

View file

@ -20,18 +20,26 @@ mod tests;
// The proc macro code for this is in `compiler/rustc_macros/src/symbols.rs`. // The proc macro code for this is in `compiler/rustc_macros/src/symbols.rs`.
symbols! { symbols! {
// If you modify this list, adjust `is_special`, `is_used_keyword`/`is_unused_keyword` // This list includes things that are definitely keywords (e.g. `if`),
// and `AllKeywords`. // a few things that are definitely not keywords (e.g. the empty symbol,
// `{{root}}`) and things where there is disagreement between people and/or
// documents (such as the Rust Reference) about whether it is a keyword
// (e.g. `_`).
//
// If you modify this list, adjust any relevant `Symbol::{is,can_be}_*` predicates and
// `used_keywords`.
// But this should rarely be necessary if the keywords are kept in alphabetic order. // But this should rarely be necessary if the keywords are kept in alphabetic order.
Keywords { Keywords {
// Special reserved identifiers used internally for elided lifetimes, // Special reserved identifiers used internally for elided lifetimes,
// unnamed method parameters, crate root module, error recovery etc. // unnamed method parameters, crate root module, error recovery etc.
// Matching predicates: `is_any_keyword`, `is_special`/`is_reserved`
Empty: "", Empty: "",
PathRoot: "{{root}}", PathRoot: "{{root}}",
DollarCrate: "$crate", DollarCrate: "$crate",
Underscore: "_", Underscore: "_",
// Keywords that are used in stable Rust. // Keywords that are used in stable Rust.
// Matching predicates: `is_any_keyword`, `is_used_keyword_always`/`is_reserved`
As: "as", As: "as",
Break: "break", Break: "break",
Const: "const", Const: "const",
@ -69,6 +77,7 @@ symbols! {
While: "while", While: "while",
// Keywords that are used in unstable Rust or reserved for future use. // Keywords that are used in unstable Rust or reserved for future use.
// Matching predicates: `is_any_keyword`, `is_unused_keyword_always`/`is_reserved`
Abstract: "abstract", Abstract: "abstract",
Become: "become", Become: "become",
Box: "box", Box: "box",
@ -83,23 +92,29 @@ symbols! {
Yield: "yield", Yield: "yield",
// Edition-specific keywords that are used in stable Rust. // Edition-specific keywords that are used in stable Rust.
// Matching predicates: `is_any_keyword`, `is_used_keyword_conditional`/`is_reserved` (if
// the edition suffices)
Async: "async", // >= 2018 Edition only Async: "async", // >= 2018 Edition only
Await: "await", // >= 2018 Edition only Await: "await", // >= 2018 Edition only
Dyn: "dyn", // >= 2018 Edition only Dyn: "dyn", // >= 2018 Edition only
// Edition-specific keywords that are used in unstable Rust or reserved for future use. // Edition-specific keywords that are used in unstable Rust or reserved for future use.
// Matching predicates: `is_any_keyword`, `is_unused_keyword_conditional`/`is_reserved` (if
// the edition suffices)
Gen: "gen", // >= 2024 Edition only
Try: "try", // >= 2018 Edition only Try: "try", // >= 2018 Edition only
// Special lifetime names // "Lifetime keywords": regular keywords with a leading `'`.
// Matching predicates: `is_any_keyword`
UnderscoreLifetime: "'_", UnderscoreLifetime: "'_",
StaticLifetime: "'static", StaticLifetime: "'static",
// Weak keywords, have special meaning only in specific contexts. // Weak keywords, have special meaning only in specific contexts.
// Matching predicates: `is_any_keyword`
Auto: "auto", Auto: "auto",
Builtin: "builtin", Builtin: "builtin",
Catch: "catch", Catch: "catch",
Default: "default", Default: "default",
Gen: "gen",
MacroRules: "macro_rules", MacroRules: "macro_rules",
Raw: "raw", Raw: "raw",
Reuse: "reuse", Reuse: "reuse",
@ -2589,6 +2604,11 @@ pub mod sym {
} }
impl Symbol { impl Symbol {
/// Don't use this unless you're doing something very loose and heuristic-y.
pub fn is_any_keyword(self) -> bool {
self >= kw::As && self <= kw::Yeet
}
fn is_special(self) -> bool { fn is_special(self) -> bool {
self <= kw::Underscore self <= kw::Underscore
} }
@ -2606,8 +2626,8 @@ impl Symbol {
} }
fn is_unused_keyword_conditional(self, edition: impl Copy + FnOnce() -> Edition) -> bool { fn is_unused_keyword_conditional(self, edition: impl Copy + FnOnce() -> Edition) -> bool {
self == kw::Try && edition().at_least_rust_2018() self == kw::Gen && edition().at_least_rust_2024()
|| self == kw::Gen && edition().at_least_rust_2024() || self == kw::Try && edition().at_least_rust_2018()
} }
pub fn is_reserved(self, edition: impl Copy + FnOnce() -> Edition) -> bool { pub fn is_reserved(self, edition: impl Copy + FnOnce() -> Edition) -> bool {
@ -2645,6 +2665,11 @@ impl Symbol {
} }
impl Ident { impl Ident {
/// Don't use this unless you're doing something very loose and heuristic-y.
pub fn is_any_keyword(self) -> bool {
self.name.is_any_keyword()
}
/// Returns `true` for reserved identifiers used internally for elided lifetimes, /// Returns `true` for reserved identifiers used internally for elided lifetimes,
/// unnamed method parameters, crate root module, error recovery etc. /// unnamed method parameters, crate root module, error recovery etc.
pub fn is_special(self) -> bool { pub fn is_special(self) -> bool {
@ -2683,41 +2708,19 @@ impl Ident {
} }
} }
/// An iterator over all the keywords in Rust. /// Collect all the keywords in a given edition into a vector.
#[derive(Copy, Clone)]
pub struct AllKeywords {
curr_idx: u32,
end_idx: u32,
}
impl AllKeywords {
/// Initialize a new iterator over all the keywords.
/// ///
/// *Note:* Please update this if a new keyword is added beyond the current /// *Note:* Please update this if a new keyword is added beyond the current
/// range. /// range.
pub fn new() -> Self { pub fn used_keywords(edition: impl Copy + FnOnce() -> Edition) -> Vec<Symbol> {
AllKeywords { curr_idx: kw::Empty.as_u32(), end_idx: kw::Yeet.as_u32() } (kw::Empty.as_u32()..kw::Yeet.as_u32())
} .filter_map(|kw| {
let kw = Symbol::new(kw);
/// Collect all the keywords in a given edition into a vector. if kw.is_used_keyword_always() || kw.is_used_keyword_conditional(edition) {
pub fn collect_used(&self, edition: impl Copy + FnOnce() -> Edition) -> Vec<Symbol> { Some(kw)
self.filter(|&keyword| {
keyword.is_used_keyword_always() || keyword.is_used_keyword_conditional(edition)
})
.collect()
}
}
impl Iterator for AllKeywords {
type Item = Symbol;
fn next(&mut self) -> Option<Self::Item> {
if self.curr_idx <= self.end_idx {
let keyword = Symbol::new(self.curr_idx);
self.curr_idx += 1;
Some(keyword)
} else { } else {
None None
} }
} })
.collect()
} }

View file

@ -4,8 +4,7 @@ use rustc_ast::{ast, ptr};
use rustc_parse::MACRO_ARGUMENTS; use rustc_parse::MACRO_ARGUMENTS;
use rustc_parse::parser::{ForceCollect, Parser, Recovery}; use rustc_parse::parser::{ForceCollect, Parser, Recovery};
use rustc_session::parse::ParseSess; use rustc_session::parse::ParseSess;
use rustc_span::Symbol; use rustc_span::symbol;
use rustc_span::symbol::{self, kw};
use crate::macros::MacroArg; use crate::macros::MacroArg;
use crate::rewrite::RewriteContext; use crate::rewrite::RewriteContext;
@ -82,19 +81,19 @@ pub(crate) struct ParsedMacroArgs {
} }
fn check_keyword<'a, 'b: 'a>(parser: &'a mut Parser<'b>) -> Option<MacroArg> { fn check_keyword<'a, 'b: 'a>(parser: &'a mut Parser<'b>) -> Option<MacroArg> {
for &keyword in RUST_KW.iter() { if parser.token.is_any_keyword()
if parser.token.is_keyword(keyword)
&& parser.look_ahead(1, |t| *t == TokenKind::Eof || *t == TokenKind::Comma) && parser.look_ahead(1, |t| *t == TokenKind::Eof || *t == TokenKind::Comma)
{ {
let keyword = parser.token.ident().unwrap().0.name;
parser.bump(); parser.bump();
return Some(MacroArg::Keyword( Some(MacroArg::Keyword(
symbol::Ident::with_dummy_span(keyword), symbol::Ident::with_dummy_span(keyword),
parser.prev_token.span, parser.prev_token.span,
)); ))
} } else {
}
None None
} }
}
pub(crate) fn parse_macro_args( pub(crate) fn parse_macro_args(
context: &RewriteContext<'_>, context: &RewriteContext<'_>,
@ -169,65 +168,3 @@ pub(crate) fn parse_expr(
let mut parser = build_parser(context, tokens); let mut parser = build_parser(context, tokens);
parser.parse_expr().ok() parser.parse_expr().ok()
} }
const RUST_KW: [Symbol; 59] = [
kw::PathRoot,
kw::DollarCrate,
kw::Underscore,
kw::As,
kw::Box,
kw::Break,
kw::Const,
kw::Continue,
kw::Crate,
kw::Else,
kw::Enum,
kw::Extern,
kw::False,
kw::Fn,
kw::For,
kw::If,
kw::Impl,
kw::In,
kw::Let,
kw::Loop,
kw::Match,
kw::Mod,
kw::Move,
kw::Mut,
kw::Pub,
kw::Ref,
kw::Return,
kw::SelfLower,
kw::SelfUpper,
kw::Static,
kw::Struct,
kw::Super,
kw::Trait,
kw::True,
kw::Type,
kw::Unsafe,
kw::Use,
kw::Where,
kw::While,
kw::Abstract,
kw::Become,
kw::Do,
kw::Final,
kw::Macro,
kw::Override,
kw::Priv,
kw::Typeof,
kw::Unsized,
kw::Virtual,
kw::Yield,
kw::Dyn,
kw::Async,
kw::Try,
kw::UnderscoreLifetime,
kw::StaticLifetime,
kw::Auto,
kw::Catch,
kw::Default,
kw::Union,
];