1
Fork 0

Be more careful about interpreting a label/lifetime as a mistyped char literal.

Currently the parser will interpret any label/lifetime in certain
positions as a mistyped char literal, on the assumption that the
trailing single quote was accidentally omitted. This is reasonable for a
something like 'a (because 'a' would be valid) but not reasonable for a
something like 'abc (because 'abc' is not valid).

This commit restricts this behaviour only to labels/lifetimes that would
be valid char literals, via the new `could_be_unclosed_char_literal`
function. The commit also augments the `label-is-actually-char.rs` test
in a couple of ways:
- Adds testing of labels/lifetimes with identifiers longer than one
  char, e.g. 'abc.
- Adds a new match with simpler patterns, because the
  `recover_unclosed_char` call in `parse_pat_with_range_pat` was not
  being exercised (in this test or any other ui tests).

Fixes #120397, an assertion failure, which was caused by this behaviour
in the parser interacting with some new stricter char literal checking
added in #120329.
This commit is contained in:
Nicholas Nethercote 2024-01-29 09:47:02 +11:00
parent 5bda589ff3
commit 306612ea60
4 changed files with 113 additions and 24 deletions

View file

@ -28,6 +28,7 @@ use rustc_data_structures::stack::ensure_sufficient_stack;
use rustc_errors::{
AddToDiagnostic, Applicability, Diagnostic, DiagnosticBuilder, PResult, StashKey,
};
use rustc_lexer::unescape::unescape_char;
use rustc_macros::Subdiagnostic;
use rustc_session::errors::{report_lit_error, ExprParenthesesNeeded};
use rustc_session::lint::builtin::BREAK_WITH_LABEL_AND_LOOP;
@ -1652,6 +1653,7 @@ impl<'a> Parser<'a> {
&& self.may_recover()
&& (matches!(self.token.kind, token::CloseDelim(_) | token::Comma)
|| self.token.is_punct())
&& could_be_unclosed_char_literal(label_.ident)
{
let (lit, _) =
self.recover_unclosed_char(label_.ident, Parser::mk_token_lit_char, |self_| {
@ -1744,6 +1746,7 @@ impl<'a> Parser<'a> {
mk_lit_char: impl FnOnce(Symbol, Span) -> L,
err: impl FnOnce(&Self) -> DiagnosticBuilder<'a>,
) -> L {
assert!(could_be_unclosed_char_literal(ident));
if let Some(diag) = self.dcx().steal_diagnostic(ident.span, StashKey::LifetimeIsChar) {
diag.with_span_suggestion_verbose(
ident.span.shrink_to_hi(),
@ -2034,8 +2037,11 @@ impl<'a> Parser<'a> {
let msg = format!("unexpected token: {}", super::token_descr(&token));
self_.dcx().struct_span_err(token.span, msg)
};
// On an error path, eagerly consider a lifetime to be an unclosed character lit
if self.token.is_lifetime() {
// On an error path, eagerly consider a lifetime to be an unclosed character lit, if that
// makes sense.
if let Some(ident) = self.token.lifetime()
&& could_be_unclosed_char_literal(ident)
{
let lt = self.expect_lifetime();
Ok(self.recover_unclosed_char(lt.ident, mk_lit_char, err))
} else {
@ -3763,6 +3769,13 @@ impl<'a> Parser<'a> {
}
}
/// Could this lifetime/label be an unclosed char literal? For example, `'a`
/// could be, but `'abc` could not.
pub(crate) fn could_be_unclosed_char_literal(ident: Ident) -> bool {
ident.name.as_str().starts_with('\'')
&& unescape_char(ident.without_first_quote().name.as_str()).is_ok()
}
/// Used to forbid `let` expressions in certain syntactic locations.
#[derive(Clone, Copy, Subdiagnostic)]
pub(crate) enum ForbiddenLetReason {

View file

@ -10,6 +10,7 @@ use crate::errors::{
UnexpectedParenInRangePatSugg, UnexpectedVertVertBeforeFunctionParam,
UnexpectedVertVertInPattern,
};
use crate::parser::expr::could_be_unclosed_char_literal;
use crate::{maybe_recover_from_interpolated_ty_qpath, maybe_whole};
use rustc_ast::mut_visit::{noop_visit_pat, MutVisitor};
use rustc_ast::ptr::P;
@ -443,11 +444,12 @@ impl<'a> Parser<'a> {
} else {
PatKind::Path(qself, path)
}
} else if matches!(self.token.kind, token::Lifetime(_))
} else if let token::Lifetime(lt) = self.token.kind
// In pattern position, we're totally fine with using "next token isn't colon"
// as a heuristic. We could probably just always try to recover if it's a lifetime,
// because we never have `'a: label {}` in a pattern position anyways, but it does
// keep us from suggesting something like `let 'a: Ty = ..` => `let 'a': Ty = ..`
&& could_be_unclosed_char_literal(Ident::with_dummy_span(lt))
&& !self.look_ahead(1, |token| matches!(token.kind, token::Colon))
{
// Recover a `'a` as a `'a'` literal