Add Suggestions for Misspelled Keywords
This PR detects misspelled keywords using two heuristics: 1. Lowercasing the unexpected identifier. 2. Using edit distance to find a keyword similar to the unexpected identifier. However, it does not detect each and every misspelled keyword to minimize false positives and ambiguities. More details about the implementation can be found in the comments.
This commit is contained in:
parent
265cd14cd4
commit
14e86eb7d9
32 changed files with 298 additions and 17 deletions
|
@ -19,8 +19,9 @@ use rustc_errors::{
|
|||
Subdiagnostic,
|
||||
};
|
||||
use rustc_session::errors::ExprParenthesesNeeded;
|
||||
use rustc_span::edit_distance::find_best_match_for_name;
|
||||
use rustc_span::source_map::Spanned;
|
||||
use rustc_span::symbol::{kw, sym, Ident};
|
||||
use rustc_span::symbol::{kw, sym, AllKeywords, Ident};
|
||||
use rustc_span::{BytePos, Span, SpanSnippetError, Symbol, DUMMY_SP};
|
||||
use thin_vec::{thin_vec, ThinVec};
|
||||
use tracing::{debug, trace};
|
||||
|
@ -203,6 +204,37 @@ impl std::fmt::Display for UnaryFixity {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, rustc_macros::Subdiagnostic)]
|
||||
#[suggestion(
|
||||
parse_misspelled_kw,
|
||||
applicability = "machine-applicable",
|
||||
code = "{similar_kw}",
|
||||
style = "verbose"
|
||||
)]
|
||||
struct MisspelledKw {
|
||||
similar_kw: String,
|
||||
#[primary_span]
|
||||
span: Span,
|
||||
is_incorrect_case: bool,
|
||||
}
|
||||
|
||||
/// Checks if the given `lookup` identifier is similar to any keyword symbol in `candidates`.
|
||||
fn find_similar_kw(lookup: Ident, candidates: &[Symbol]) -> Option<MisspelledKw> {
|
||||
let lowercase = lookup.name.as_str().to_lowercase();
|
||||
let lowercase_sym = Symbol::intern(&lowercase);
|
||||
if candidates.contains(&lowercase_sym) {
|
||||
Some(MisspelledKw { similar_kw: lowercase, span: lookup.span, is_incorrect_case: true })
|
||||
} else if let Some(similar_sym) = find_best_match_for_name(candidates, lookup.name, None) {
|
||||
Some(MisspelledKw {
|
||||
similar_kw: similar_sym.to_string(),
|
||||
span: lookup.span,
|
||||
is_incorrect_case: false,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
struct MultiSugg {
|
||||
msg: String,
|
||||
patches: Vec<(Span, String)>,
|
||||
|
@ -638,9 +670,9 @@ impl<'a> Parser<'a> {
|
|||
let concat = Symbol::intern(&format!("{prev}{cur}"));
|
||||
let ident = Ident::new(concat, DUMMY_SP);
|
||||
if ident.is_used_keyword() || ident.is_reserved() || ident.is_raw_guess() {
|
||||
let span = self.prev_token.span.to(self.token.span);
|
||||
let concat_span = self.prev_token.span.to(self.token.span);
|
||||
err.span_suggestion_verbose(
|
||||
span,
|
||||
concat_span,
|
||||
format!("consider removing the space to spell keyword `{concat}`"),
|
||||
concat,
|
||||
Applicability::MachineApplicable,
|
||||
|
@ -741,9 +773,55 @@ impl<'a> Parser<'a> {
|
|||
err.span_label(sp, label_exp);
|
||||
err.span_label(self.token.span, "unexpected token");
|
||||
}
|
||||
|
||||
// Check for misspelled keywords if there are no suggestions added to the diagnostic.
|
||||
if err.suggestions.as_ref().is_ok_and(|code_suggestions| code_suggestions.is_empty()) {
|
||||
self.check_for_misspelled_kw(&mut err, &expected);
|
||||
}
|
||||
Err(err)
|
||||
}
|
||||
|
||||
/// Checks if the current token or the previous token are misspelled keywords
|
||||
/// and adds a helpful suggestion.
|
||||
fn check_for_misspelled_kw(&self, err: &mut Diag<'_>, expected: &[TokenType]) {
|
||||
let Some((curr_ident, _)) = self.token.ident() else {
|
||||
return;
|
||||
};
|
||||
let expected_tokens: &[TokenType] =
|
||||
expected.len().checked_sub(10).map_or(&expected, |index| &expected[index..]);
|
||||
let expected_keywords: Vec<Symbol> = expected_tokens
|
||||
.iter()
|
||||
.filter_map(|token| if let TokenType::Keyword(kw) = token { Some(*kw) } else { None })
|
||||
.collect();
|
||||
|
||||
// When there are a few keywords in the last ten elements of `self.expected_tokens` and the current
|
||||
// token is an identifier, it's probably a misspelled keyword.
|
||||
// This handles code like `async Move {}`, misspelled `if` in match guard, misspelled `else` in `if`-`else`
|
||||
// and mispelled `where` in a where clause.
|
||||
if !expected_keywords.is_empty()
|
||||
&& !curr_ident.is_used_keyword()
|
||||
&& let Some(misspelled_kw) = find_similar_kw(curr_ident, &expected_keywords)
|
||||
{
|
||||
err.subdiagnostic(misspelled_kw);
|
||||
} else if let Some((prev_ident, _)) = self.prev_token.ident()
|
||||
&& !prev_ident.is_used_keyword()
|
||||
{
|
||||
// We generate a list of all keywords at runtime rather than at compile time
|
||||
// so that it gets generated only when the diagnostic needs it.
|
||||
// Also, it is unlikely that this list is generated multiple times because the
|
||||
// parser halts after execution hits this path.
|
||||
let all_keywords = AllKeywords::new().collect_used(|| prev_ident.span.edition());
|
||||
|
||||
// Otherwise, check the previous token with all the keywords as possible candidates.
|
||||
// This handles code like `Struct Human;` and `While a < b {}`.
|
||||
// We check the previous token only when the current token is an identifier to avoid false
|
||||
// positives like suggesting keyword `for` for `extern crate foo {}`.
|
||||
if let Some(misspelled_kw) = find_similar_kw(prev_ident, &all_keywords) {
|
||||
err.subdiagnostic(misspelled_kw);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The user has written `#[attr] expr` which is unsupported. (#106020)
|
||||
pub(super) fn attr_on_non_tail_expr(&self, expr: &Expr) -> ErrorGuaranteed {
|
||||
// Missing semicolon typo error.
|
||||
|
@ -846,6 +924,7 @@ impl<'a> Parser<'a> {
|
|||
);
|
||||
}
|
||||
}
|
||||
|
||||
err.emit()
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue