1
Fork 0

Rollup merge of #109203 - Ezrashaw:refactor-ident-parsing, r=Nilstrieb

refactor/feat: refactor identifier parsing a bit

\+ error recovery for `expected_ident_found`

Prior art: #108854
This commit is contained in:
Matthias Krüger 2023-03-22 22:44:39 +01:00 committed by GitHub
commit 34fa6daa5c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 256 additions and 101 deletions

View file

@ -336,7 +336,7 @@ parse_expected_identifier_found_reserved_keyword = expected identifier, found re
parse_expected_identifier_found_doc_comment = expected identifier, found doc comment parse_expected_identifier_found_doc_comment = expected identifier, found doc comment
parse_expected_identifier = expected identifier parse_expected_identifier = expected identifier
parse_sugg_escape_to_use_as_identifier = escape `{$ident_name}` to use it as an identifier parse_sugg_escape_identifier = escape `{$ident_name}` to use it as an identifier
parse_sugg_remove_comma = remove this comma parse_sugg_remove_comma = remove this comma

View file

@ -888,12 +888,12 @@ pub(crate) struct InvalidMetaItem {
#[derive(Subdiagnostic)] #[derive(Subdiagnostic)]
#[suggestion( #[suggestion(
parse_sugg_escape_to_use_as_identifier, parse_sugg_escape_identifier,
style = "verbose", style = "verbose",
applicability = "maybe-incorrect", applicability = "maybe-incorrect",
code = "r#" code = "r#"
)] )]
pub(crate) struct SuggEscapeToUseAsIdentifier { pub(crate) struct SuggEscapeIdentifier {
#[primary_span] #[primary_span]
pub span: Span, pub span: Span,
pub ident_name: String, pub ident_name: String,
@ -937,7 +937,7 @@ impl ExpectedIdentifierFound {
pub(crate) struct ExpectedIdentifier { pub(crate) struct ExpectedIdentifier {
pub span: Span, pub span: Span,
pub token: Token, pub token: Token,
pub suggest_raw: Option<SuggEscapeToUseAsIdentifier>, pub suggest_raw: Option<SuggEscapeIdentifier>,
pub suggest_remove_comma: Option<SuggRemoveComma>, pub suggest_remove_comma: Option<SuggRemoveComma>,
pub help_cannot_start_number: Option<HelpIdentifierStartsWithNumber>, pub help_cannot_start_number: Option<HelpIdentifierStartsWithNumber>,
} }
@ -986,7 +986,10 @@ impl<'a, G: EmissionGuarantee> IntoDiagnostic<'a, G> for ExpectedIdentifier {
#[derive(Subdiagnostic)] #[derive(Subdiagnostic)]
#[help(parse_invalid_identifier_with_leading_number)] #[help(parse_invalid_identifier_with_leading_number)]
pub(crate) struct HelpIdentifierStartsWithNumber; pub(crate) struct HelpIdentifierStartsWithNumber {
#[primary_span]
pub num_span: Span,
}
pub(crate) struct ExpectedSemi { pub(crate) struct ExpectedSemi {
pub span: Span, pub span: Span,

View file

@ -6,14 +6,14 @@ use super::{
use crate::errors::{ use crate::errors::{
AmbiguousPlus, AttributeOnParamType, BadQPathStage2, BadTypePlus, BadTypePlusSub, AmbiguousPlus, AttributeOnParamType, BadQPathStage2, BadTypePlus, BadTypePlusSub,
ComparisonOperatorsCannotBeChained, ComparisonOperatorsCannotBeChainedSugg, ComparisonOperatorsCannotBeChained, ComparisonOperatorsCannotBeChainedSugg,
ConstGenericWithoutBraces, ConstGenericWithoutBracesSugg, DocCommentOnParamType, ConstGenericWithoutBraces, ConstGenericWithoutBracesSugg, DocCommentDoesNotDocumentAnything,
DoubleColonInBound, ExpectedIdentifier, ExpectedSemi, ExpectedSemiSugg, DocCommentOnParamType, DoubleColonInBound, ExpectedIdentifier, ExpectedSemi, ExpectedSemiSugg,
GenericParamsWithoutAngleBrackets, GenericParamsWithoutAngleBracketsSugg, GenericParamsWithoutAngleBrackets, GenericParamsWithoutAngleBracketsSugg,
HelpIdentifierStartsWithNumber, InInTypo, IncorrectAwait, IncorrectSemicolon, HelpIdentifierStartsWithNumber, InInTypo, IncorrectAwait, IncorrectSemicolon,
IncorrectUseOfAwait, ParenthesesInForHead, ParenthesesInForHeadSugg, IncorrectUseOfAwait, ParenthesesInForHead, ParenthesesInForHeadSugg,
PatternMethodParamWithoutBody, QuestionMarkInType, QuestionMarkInTypeSugg, SelfParamNotFirst, PatternMethodParamWithoutBody, QuestionMarkInType, QuestionMarkInTypeSugg, SelfParamNotFirst,
StructLiteralBodyWithoutPath, StructLiteralBodyWithoutPathSugg, StructLiteralNeedingParens, StructLiteralBodyWithoutPath, StructLiteralBodyWithoutPathSugg, StructLiteralNeedingParens,
StructLiteralNeedingParensSugg, SuggEscapeToUseAsIdentifier, SuggRemoveComma, StructLiteralNeedingParensSugg, SuggEscapeIdentifier, SuggRemoveComma,
UnexpectedConstInGenericParam, UnexpectedConstParamDeclaration, UnexpectedConstInGenericParam, UnexpectedConstParamDeclaration,
UnexpectedConstParamDeclarationSugg, UnmatchedAngleBrackets, UseEqInstead, UnexpectedConstParamDeclarationSugg, UnmatchedAngleBrackets, UseEqInstead,
}; };
@ -38,7 +38,7 @@ use rustc_errors::{
use rustc_session::errors::ExprParenthesesNeeded; use rustc_session::errors::ExprParenthesesNeeded;
use rustc_span::source_map::Spanned; use rustc_span::source_map::Spanned;
use rustc_span::symbol::{kw, sym, Ident}; use rustc_span::symbol::{kw, sym, Ident};
use rustc_span::{Span, SpanSnippetError, DUMMY_SP}; use rustc_span::{Span, SpanSnippetError, Symbol, DUMMY_SP};
use std::mem::take; use std::mem::take;
use std::ops::{Deref, DerefMut}; use std::ops::{Deref, DerefMut};
use thin_vec::{thin_vec, ThinVec}; use thin_vec::{thin_vec, ThinVec};
@ -268,7 +268,21 @@ impl<'a> Parser<'a> {
self.sess.source_map().span_to_snippet(span) self.sess.source_map().span_to_snippet(span)
} }
pub(super) fn expected_ident_found(&mut self) -> DiagnosticBuilder<'a, ErrorGuaranteed> { /// Emits an error with suggestions if an identifier was expected but not found.
///
/// Returns a possibly recovered identifier.
pub(super) fn expected_ident_found(
&mut self,
recover: bool,
) -> PResult<'a, (Ident, /* is_raw */ bool)> {
if let TokenKind::DocComment(..) = self.prev_token.kind {
return Err(DocCommentDoesNotDocumentAnything {
span: self.prev_token.span,
missing_comma: None,
}
.into_diagnostic(&self.sess.span_diagnostic));
}
let valid_follow = &[ let valid_follow = &[
TokenKind::Eq, TokenKind::Eq,
TokenKind::Colon, TokenKind::Colon,
@ -281,31 +295,51 @@ impl<'a> Parser<'a> {
TokenKind::CloseDelim(Delimiter::Parenthesis), TokenKind::CloseDelim(Delimiter::Parenthesis),
]; ];
let suggest_raw = match self.token.ident() { let mut recovered_ident = None;
Some((ident, false)) // we take this here so that the correct original token is retained in
if ident.is_raw_guess() // the diagnostic, regardless of eager recovery.
&& self.look_ahead(1, |t| valid_follow.contains(&t.kind)) => let bad_token = self.token.clone();
{
Some(SuggEscapeToUseAsIdentifier {
span: ident.span.shrink_to_lo(),
// `Symbol::to_string()` is different from `Symbol::into_diagnostic_arg()`,
// which uses `Symbol::to_ident_string()` and "helpfully" adds an implicit `r#`
ident_name: ident.name.to_string(),
})
}
_ => None,
};
let suggest_remove_comma = (self.token == token::Comma // suggest prepending a keyword in identifier position with `r#`
&& self.look_ahead(1, |t| t.is_ident())) let suggest_raw = if let Some((ident, false)) = self.token.ident()
.then_some(SuggRemoveComma { span: self.token.span }); && ident.is_raw_guess()
&& self.look_ahead(1, |t| valid_follow.contains(&t.kind))
{
recovered_ident = Some((ident, true));
let help_cannot_start_number = // `Symbol::to_string()` is different from `Symbol::into_diagnostic_arg()`,
self.is_lit_bad_ident().then_some(HelpIdentifierStartsWithNumber); // which uses `Symbol::to_ident_string()` and "helpfully" adds an implicit `r#`
let ident_name = ident.name.to_string();
Some(SuggEscapeIdentifier {
span: ident.span.shrink_to_lo(),
ident_name
})
} else { None };
let suggest_remove_comma =
if self.token == token::Comma && self.look_ahead(1, |t| t.is_ident()) {
if recover {
self.bump();
recovered_ident = self.ident_or_err(false).ok();
};
Some(SuggRemoveComma { span: bad_token.span })
} else {
None
};
let help_cannot_start_number = self.is_lit_bad_ident().map(|(len, valid_portion)| {
let (invalid, valid) = self.token.span.split_at(len as u32);
recovered_ident = Some((Ident::new(valid_portion, valid), false));
HelpIdentifierStartsWithNumber { num_span: invalid }
});
let err = ExpectedIdentifier { let err = ExpectedIdentifier {
span: self.token.span, span: bad_token.span,
token: self.token.clone(), token: bad_token,
suggest_raw, suggest_raw,
suggest_remove_comma, suggest_remove_comma,
help_cannot_start_number, help_cannot_start_number,
@ -314,6 +348,7 @@ impl<'a> Parser<'a> {
// if the token we have is a `<` // if the token we have is a `<`
// it *might* be a misplaced generic // it *might* be a misplaced generic
// FIXME: could we recover with this?
if self.token == token::Lt { if self.token == token::Lt {
// all keywords that could have generic applied // all keywords that could have generic applied
let valid_prev_keywords = let valid_prev_keywords =
@ -364,18 +399,38 @@ impl<'a> Parser<'a> {
} }
} }
err if let Some(recovered_ident) = recovered_ident && recover {
err.emit();
Ok(recovered_ident)
} else {
Err(err)
}
}
pub(super) fn expected_ident_found_err(&mut self) -> DiagnosticBuilder<'a, ErrorGuaranteed> {
self.expected_ident_found(false).unwrap_err()
} }
/// Checks if the current token is a integer or float literal and looks like /// Checks if the current token is a integer or float literal and looks like
/// it could be a invalid identifier with digits at the start. /// it could be a invalid identifier with digits at the start.
pub(super) fn is_lit_bad_ident(&mut self) -> bool { ///
matches!(self.token.uninterpolate().kind, token::Literal(Lit { kind: token::LitKind::Integer | token::LitKind::Float, .. }) /// Returns the number of characters (bytes) composing the invalid portion
// ensure that the integer literal is followed by a *invalid* /// of the identifier and the valid portion of the identifier.
// suffix: this is how we know that it is a identifier with an pub(super) fn is_lit_bad_ident(&mut self) -> Option<(usize, Symbol)> {
// invalid beginning. // ensure that the integer literal is followed by a *invalid*
if rustc_ast::MetaItemLit::from_token(&self.token).is_none() // suffix: this is how we know that it is a identifier with an
) // invalid beginning.
if let token::Literal(Lit {
kind: token::LitKind::Integer | token::LitKind::Float,
symbol,
suffix,
}) = self.token.kind
&& rustc_ast::MetaItemLit::from_token(&self.token).is_none()
{
Some((symbol.as_str().len(), suffix.unwrap()))
} else {
None
}
} }
pub(super) fn expected_one_of_not_found( pub(super) fn expected_one_of_not_found(

View file

@ -1181,7 +1181,7 @@ impl<'a> Parser<'a> {
defaultness: Defaultness, defaultness: Defaultness,
) -> PResult<'a, ItemInfo> { ) -> PResult<'a, ItemInfo> {
let impl_span = self.token.span; let impl_span = self.token.span;
let mut err = self.expected_ident_found(); let mut err = self.expected_ident_found_err();
// Only try to recover if this is implementing a trait for a type // Only try to recover if this is implementing a trait for a type
let mut impl_info = match self.parse_item_impl(attrs, defaultness) { let mut impl_info = match self.parse_item_impl(attrs, defaultness) {
@ -1744,7 +1744,7 @@ impl<'a> Parser<'a> {
/// Parses a field identifier. Specialized version of `parse_ident_common` /// Parses a field identifier. Specialized version of `parse_ident_common`
/// for better diagnostics and suggestions. /// for better diagnostics and suggestions.
fn parse_field_ident(&mut self, adt_ty: &str, lo: Span) -> PResult<'a, Ident> { fn parse_field_ident(&mut self, adt_ty: &str, lo: Span) -> PResult<'a, Ident> {
let (ident, is_raw) = self.ident_or_err()?; let (ident, is_raw) = self.ident_or_err(true)?;
if !is_raw && ident.is_reserved() { if !is_raw && ident.is_reserved() {
let snapshot = self.create_snapshot_for_diagnostic(); let snapshot = self.create_snapshot_for_diagnostic();
let err = if self.check_fn_front_matter(false, Case::Sensitive) { let err = if self.check_fn_front_matter(false, Case::Sensitive) {
@ -1776,7 +1776,7 @@ impl<'a> Parser<'a> {
Err(err) => { Err(err) => {
err.cancel(); err.cancel();
self.restore_snapshot(snapshot); self.restore_snapshot(snapshot);
self.expected_ident_found() self.expected_ident_found_err()
} }
} }
} else if self.eat_keyword(kw::Struct) { } else if self.eat_keyword(kw::Struct) {
@ -1792,11 +1792,11 @@ impl<'a> Parser<'a> {
Err(err) => { Err(err) => {
err.cancel(); err.cancel();
self.restore_snapshot(snapshot); self.restore_snapshot(snapshot);
self.expected_ident_found() self.expected_ident_found_err()
} }
} }
} else { } else {
let mut err = self.expected_ident_found(); let mut err = self.expected_ident_found_err();
if self.eat_keyword_noexpect(kw::Let) if self.eat_keyword_noexpect(kw::Let)
&& let removal_span = self.prev_token.span.until(self.token.span) && let removal_span = self.prev_token.span.until(self.token.span)
&& let Ok(ident) = self.parse_ident_common(false) && let Ok(ident) = self.parse_ident_common(false)

View file

@ -42,8 +42,7 @@ use thin_vec::ThinVec;
use tracing::debug; use tracing::debug;
use crate::errors::{ use crate::errors::{
DocCommentDoesNotDocumentAnything, IncorrectVisibilityRestriction, MismatchedClosingDelimiter, IncorrectVisibilityRestriction, MismatchedClosingDelimiter, NonStringAbiLiteral,
NonStringAbiLiteral,
}; };
bitflags::bitflags! { bitflags::bitflags! {
@ -552,21 +551,11 @@ impl<'a> Parser<'a> {
self.parse_ident_common(true) self.parse_ident_common(true)
} }
fn ident_or_err(&mut self) -> PResult<'a, (Ident, /* is_raw */ bool)> {
self.token.ident().ok_or_else(|| match self.prev_token.kind {
TokenKind::DocComment(..) => DocCommentDoesNotDocumentAnything {
span: self.prev_token.span,
missing_comma: None,
}
.into_diagnostic(&self.sess.span_diagnostic),
_ => self.expected_ident_found(),
})
}
fn parse_ident_common(&mut self, recover: bool) -> PResult<'a, Ident> { fn parse_ident_common(&mut self, recover: bool) -> PResult<'a, Ident> {
let (ident, is_raw) = self.ident_or_err()?; let (ident, is_raw) = self.ident_or_err(recover)?;
if !is_raw && ident.is_reserved() { if !is_raw && ident.is_reserved() {
let mut err = self.expected_ident_found(); let mut err = self.expected_ident_found_err();
if recover { if recover {
err.emit(); err.emit();
} else { } else {
@ -577,6 +566,21 @@ impl<'a> Parser<'a> {
Ok(ident) Ok(ident)
} }
fn ident_or_err(&mut self, recover: bool) -> PResult<'a, (Ident, /* is_raw */ bool)> {
let result = self.token.ident().ok_or_else(|| self.expected_ident_found(recover));
let (ident, is_raw) = match result {
Ok(ident) => ident,
Err(err) => match err {
// we recovered!
Ok(ident) => ident,
Err(err) => return Err(err),
},
};
Ok((ident, is_raw))
}
/// Checks if the next token is `tok`, and returns `true` if so. /// Checks if the next token is `tok`, and returns `true` if so.
/// ///
/// This method will automatically add `tok` to `expected_tokens` if `tok` is not /// This method will automatically add `tok` to `expected_tokens` if `tok` is not

View file

@ -348,10 +348,6 @@ impl<'a> Parser<'a> {
lo = self.token.span; lo = self.token.span;
} }
if self.is_lit_bad_ident() {
return Err(self.expected_ident_found());
}
let pat = if self.check(&token::BinOp(token::And)) || self.token.kind == token::AndAnd { let pat = if self.check(&token::BinOp(token::And)) || self.token.kind == token::AndAnd {
self.parse_pat_deref(expected)? self.parse_pat_deref(expected)?
} else if self.check(&token::OpenDelim(Delimiter::Parenthesis)) { } else if self.check(&token::OpenDelim(Delimiter::Parenthesis)) {
@ -395,7 +391,13 @@ impl<'a> Parser<'a> {
} else { } else {
PatKind::Lit(const_expr) PatKind::Lit(const_expr)
} }
} else if self.can_be_ident_pat() { // Don't eagerly error on semantically invalid tokens when matching
// declarative macros, as the input to those doesn't have to be
// semantically valid. For attribute/derive proc macros this is not the
// case, so doing the recovery for them is fine.
} else if self.can_be_ident_pat()
|| (self.is_lit_bad_ident().is_some() && self.may_recover())
{
// Parse `ident @ pat` // Parse `ident @ pat`
// This can give false positives and parse nullary enums, // This can give false positives and parse nullary enums,
// they are dealt with later in resolve. // they are dealt with later in resolve.
@ -594,7 +596,7 @@ impl<'a> Parser<'a> {
// Make sure we don't allow e.g. `let mut $p;` where `$p:pat`. // Make sure we don't allow e.g. `let mut $p;` where `$p:pat`.
if let token::Interpolated(nt) = &self.token.kind { if let token::Interpolated(nt) = &self.token.kind {
if let token::NtPat(_) = **nt { if let token::NtPat(_) = **nt {
self.expected_ident_found().emit(); self.expected_ident_found_err().emit();
} }
} }

View file

@ -795,6 +795,18 @@ impl Span {
}) })
} }
/// Splits a span into two composite spans around a certain position.
pub fn split_at(self, pos: u32) -> (Span, Span) {
let len = self.hi().0 - self.lo().0;
debug_assert!(pos <= len);
let split_pos = BytePos(self.lo().0 + pos);
(
Span::new(self.lo(), split_pos, self.ctxt(), self.parent()),
Span::new(split_pos, self.hi(), self.ctxt(), self.parent()),
)
}
/// Returns a `Span` that would enclose both `self` and `end`. /// Returns a `Span` that would enclose both `self` and `end`.
/// ///
/// Note that this can also be used to extend the span "backwards": /// Note that this can also be used to extend the span "backwards":

View file

@ -0,0 +1,16 @@
fn ,comma() {
//~^ ERROR expected identifier, found `,`
struct Foo {
x: i32,,
//~^ ERROR expected identifier, found `,`
y: u32,
}
}
fn break() {
//~^ ERROR expected identifier, found keyword `break`
let continue = 5;
//~^ ERROR expected identifier, found keyword `continue`
}
fn main() {}

View file

@ -0,0 +1,42 @@
error: expected identifier, found `,`
--> $DIR/ident-recovery.rs:1:4
|
LL | fn ,comma() {
| ^
| |
| expected identifier
| help: remove this comma
error: expected identifier, found `,`
--> $DIR/ident-recovery.rs:4:16
|
LL | x: i32,,
| ^
| |
| expected identifier
| help: remove this comma
error: expected identifier, found keyword `break`
--> $DIR/ident-recovery.rs:10:4
|
LL | fn break() {
| ^^^^^ expected identifier, found keyword
|
help: escape `break` to use it as an identifier
|
LL | fn r#break() {
| ++
error: expected identifier, found keyword `continue`
--> $DIR/ident-recovery.rs:12:9
|
LL | let continue = 5;
| ^^^^^^^^ expected identifier, found keyword
|
help: escape `continue` to use it as an identifier
|
LL | let r#continue = 5;
| ++
error: aborting due to 4 previous errors

View file

@ -4,7 +4,11 @@ error: expected identifier, found `1main`
LL | fn 1main() {} LL | fn 1main() {}
| ^^^^^ expected identifier | ^^^^^ expected identifier
| |
= help: identifiers cannot start with a number help: identifiers cannot start with a number
--> $DIR/integer-literal-start-ident.rs:1:4
|
LL | fn 1main() {}
| ^
error: aborting due to previous error error: aborting due to previous error

View file

@ -1,26 +1,19 @@
fn test() { fn 1234test() {
//~^ ERROR expected identifier, found `1234test`
if let 123 = 123 { println!("yes"); } if let 123 = 123 { println!("yes"); }
}
fn test_2() { if let 2e1 = 123 {
//~^ ERROR mismatched types
}
let 23name = 123;
//~^ ERROR expected identifier, found `23name`
let 2x: i32 = 123;
//~^ ERROR expected identifier, found `2x`
let 1x = 123; let 1x = 123;
//~^ ERROR expected identifier, found `1x` //~^ ERROR expected identifier, found `1x`
} }
fn test_3() {
let 2x: i32 = 123;
//~^ ERROR expected identifier, found `2x`
}
fn test_4() {
if let 2e1 = 123 {
//~^ ERROR mismatched types
}
}
fn test_5() {
let 23name = 123;
//~^ ERROR expected identifier, found `23name`
}
fn main() {} fn main() {}

View file

@ -1,35 +1,59 @@
error: expected identifier, found `1x` error: expected identifier, found `1234test`
--> $DIR/issue-104088.rs:6:9 --> $DIR/issue-104088.rs:1:4
| |
LL | let 1x = 123; LL | fn 1234test() {
| ^^ expected identifier | ^^^^^^^^ expected identifier
| |
= help: identifiers cannot start with a number help: identifiers cannot start with a number
--> $DIR/issue-104088.rs:1:4
error: expected identifier, found `2x`
--> $DIR/issue-104088.rs:11:9
| |
LL | let 2x: i32 = 123; LL | fn 1234test() {
| ^^ expected identifier | ^^^^
|
= help: identifiers cannot start with a number
error: expected identifier, found `23name` error: expected identifier, found `23name`
--> $DIR/issue-104088.rs:22:9 --> $DIR/issue-104088.rs:9:9
| |
LL | let 23name = 123; LL | let 23name = 123;
| ^^^^^^ expected identifier | ^^^^^^ expected identifier
| |
= help: identifiers cannot start with a number help: identifiers cannot start with a number
--> $DIR/issue-104088.rs:9:9
|
LL | let 23name = 123;
| ^^
error: expected identifier, found `2x`
--> $DIR/issue-104088.rs:12:9
|
LL | let 2x: i32 = 123;
| ^^ expected identifier
|
help: identifiers cannot start with a number
--> $DIR/issue-104088.rs:12:9
|
LL | let 2x: i32 = 123;
| ^
error: expected identifier, found `1x`
--> $DIR/issue-104088.rs:15:9
|
LL | let 1x = 123;
| ^^ expected identifier
|
help: identifiers cannot start with a number
--> $DIR/issue-104088.rs:15:9
|
LL | let 1x = 123;
| ^
error[E0308]: mismatched types error[E0308]: mismatched types
--> $DIR/issue-104088.rs:16:12 --> $DIR/issue-104088.rs:5:12
| |
LL | if let 2e1 = 123 { LL | if let 2e1 = 123 {
| ^^^ --- this expression has type `{integer}` | ^^^ --- this expression has type `{integer}`
| | | |
| expected integer, found floating-point number | expected integer, found floating-point number
error: aborting due to 4 previous errors error: aborting due to 5 previous errors
For more information about this error, try `rustc --explain E0308`. For more information about this error, try `rustc --explain E0308`.