1
Fork 0

Add Suggestions for Misspelled Keywords

This PR detects misspelled keywords using two heuristics:

1. Lowercasing the unexpected identifier.
2. Using edit distance to find a keyword similar to the unexpected identifier.

However, it does not detect each and every misspelled keyword to
minimize false positives and ambiguities. More details about the
implementation can be found in the comments.
This commit is contained in:
Veera 2024-09-02 12:43:35 -04:00
parent 265cd14cd4
commit 14e86eb7d9
32 changed files with 298 additions and 17 deletions

View file

@ -381,6 +381,7 @@ parse_invalid_char_in_escape_msg = invalid character in {$is_hex ->
*[false] unicode
} escape
parse_invalid_comparison_operator = invalid comparison operator `{$invalid}`
.use_instead = `{$invalid}` is not a valid comparison operator, use `{$correct}`
.spaceship_operator_invalid = `<=>` is not a valid comparison operator, use `std::cmp::Ordering`
@ -581,6 +582,11 @@ parse_missing_trait_in_trait_impl = missing trait in a trait impl
.suggestion_add_trait = add a trait here
.suggestion_remove_for = for an inherent impl, drop this `for`
parse_misspelled_kw = {$is_incorrect_case ->
[true] write keyword `{$similar_kw}` in lowercase
*[false] there is a keyword `{$similar_kw}` with a similar name
}
parse_modifier_lifetime = `{$modifier}` may only modify trait bounds, not lifetime bounds
.suggestion = remove the `{$modifier}`

View file

@ -19,8 +19,9 @@ use rustc_errors::{
Subdiagnostic,
};
use rustc_session::errors::ExprParenthesesNeeded;
use rustc_span::edit_distance::find_best_match_for_name;
use rustc_span::source_map::Spanned;
use rustc_span::symbol::{kw, sym, Ident};
use rustc_span::symbol::{kw, sym, AllKeywords, Ident};
use rustc_span::{BytePos, Span, SpanSnippetError, Symbol, DUMMY_SP};
use thin_vec::{thin_vec, ThinVec};
use tracing::{debug, trace};
@ -203,6 +204,37 @@ impl std::fmt::Display for UnaryFixity {
}
}
#[derive(Debug, rustc_macros::Subdiagnostic)]
#[suggestion(
parse_misspelled_kw,
applicability = "machine-applicable",
code = "{similar_kw}",
style = "verbose"
)]
struct MisspelledKw {
similar_kw: String,
#[primary_span]
span: Span,
is_incorrect_case: bool,
}
/// Checks if the given `lookup` identifier is similar to any keyword symbol in `candidates`.
fn find_similar_kw(lookup: Ident, candidates: &[Symbol]) -> Option<MisspelledKw> {
let lowercase = lookup.name.as_str().to_lowercase();
let lowercase_sym = Symbol::intern(&lowercase);
if candidates.contains(&lowercase_sym) {
Some(MisspelledKw { similar_kw: lowercase, span: lookup.span, is_incorrect_case: true })
} else if let Some(similar_sym) = find_best_match_for_name(candidates, lookup.name, None) {
Some(MisspelledKw {
similar_kw: similar_sym.to_string(),
span: lookup.span,
is_incorrect_case: false,
})
} else {
None
}
}
struct MultiSugg {
msg: String,
patches: Vec<(Span, String)>,
@ -638,9 +670,9 @@ impl<'a> Parser<'a> {
let concat = Symbol::intern(&format!("{prev}{cur}"));
let ident = Ident::new(concat, DUMMY_SP);
if ident.is_used_keyword() || ident.is_reserved() || ident.is_raw_guess() {
let span = self.prev_token.span.to(self.token.span);
let concat_span = self.prev_token.span.to(self.token.span);
err.span_suggestion_verbose(
span,
concat_span,
format!("consider removing the space to spell keyword `{concat}`"),
concat,
Applicability::MachineApplicable,
@ -741,9 +773,55 @@ impl<'a> Parser<'a> {
err.span_label(sp, label_exp);
err.span_label(self.token.span, "unexpected token");
}
// Check for misspelled keywords if there are no suggestions added to the diagnostic.
if err.suggestions.as_ref().is_ok_and(|code_suggestions| code_suggestions.is_empty()) {
self.check_for_misspelled_kw(&mut err, &expected);
}
Err(err)
}
/// Checks if the current token or the previous token are misspelled keywords
/// and adds a helpful suggestion.
fn check_for_misspelled_kw(&self, err: &mut Diag<'_>, expected: &[TokenType]) {
let Some((curr_ident, _)) = self.token.ident() else {
return;
};
let expected_tokens: &[TokenType] =
expected.len().checked_sub(10).map_or(&expected, |index| &expected[index..]);
let expected_keywords: Vec<Symbol> = expected_tokens
.iter()
.filter_map(|token| if let TokenType::Keyword(kw) = token { Some(*kw) } else { None })
.collect();
// When there are a few keywords in the last ten elements of `self.expected_tokens` and the current
// token is an identifier, it's probably a misspelled keyword.
// This handles code like `async Move {}`, misspelled `if` in match guard, misspelled `else` in `if`-`else`
// and mispelled `where` in a where clause.
if !expected_keywords.is_empty()
&& !curr_ident.is_used_keyword()
&& let Some(misspelled_kw) = find_similar_kw(curr_ident, &expected_keywords)
{
err.subdiagnostic(misspelled_kw);
} else if let Some((prev_ident, _)) = self.prev_token.ident()
&& !prev_ident.is_used_keyword()
{
// We generate a list of all keywords at runtime rather than at compile time
// so that it gets generated only when the diagnostic needs it.
// Also, it is unlikely that this list is generated multiple times because the
// parser halts after execution hits this path.
let all_keywords = AllKeywords::new().collect_used(|| prev_ident.span.edition());
// Otherwise, check the previous token with all the keywords as possible candidates.
// This handles code like `Struct Human;` and `While a < b {}`.
// We check the previous token only when the current token is an identifier to avoid false
// positives like suggesting keyword `for` for `extern crate foo {}`.
if let Some(misspelled_kw) = find_similar_kw(prev_ident, &all_keywords) {
err.subdiagnostic(misspelled_kw);
}
}
}
/// The user has written `#[attr] expr` which is unsupported. (#106020)
pub(super) fn attr_on_non_tail_expr(&self, expr: &Expr) -> ErrorGuaranteed {
// Missing semicolon typo error.
@ -846,6 +924,7 @@ impl<'a> Parser<'a> {
);
}
}
err.emit()
}

View file

@ -20,7 +20,8 @@ mod tests;
// The proc macro code for this is in `compiler/rustc_macros/src/symbols.rs`.
symbols! {
// If you modify this list, adjust `is_special` and `is_used_keyword`/`is_unused_keyword`.
// If you modify this list, adjust `is_special`, `is_used_keyword`/`is_unused_keyword`
// and `AllKeywords`.
// But this should rarely be necessary if the keywords are kept in alphabetic order.
Keywords {
// Special reserved identifiers used internally for elided lifetimes,
@ -2577,3 +2578,42 @@ impl Ident {
self.name.can_be_raw() && self.is_reserved()
}
}
/// An iterator over all the keywords in Rust.
#[derive(Copy, Clone)]
pub struct AllKeywords {
curr_idx: u32,
end_idx: u32,
}
impl AllKeywords {
/// Initialize a new iterator over all the keywords.
///
/// *Note:* Please update this if a new keyword is added beyond the current
/// range.
pub fn new() -> Self {
AllKeywords { curr_idx: kw::Empty.as_u32(), end_idx: kw::Yeet.as_u32() }
}
/// Collect all the keywords in a given edition into a vector.
pub fn collect_used(&self, edition: impl Copy + FnOnce() -> Edition) -> Vec<Symbol> {
self.filter(|&keyword| {
keyword.is_used_keyword_always() || keyword.is_used_keyword_conditional(edition)
})
.collect()
}
}
impl Iterator for AllKeywords {
type Item = Symbol;
fn next(&mut self) -> Option<Self::Item> {
if self.curr_idx <= self.end_idx {
let keyword = Symbol::new(self.curr_idx);
self.curr_idx += 1;
Some(keyword)
} else {
None
}
}
}