Rollup merge of #126452 - compiler-errors:raw-lifetimes, r=spastorino

Implement raw lifetimes and labels (`'r#ident`)

This PR does two things:
1. Reserve lifetime prefixes, e.g. `'prefix#lt` in edition 2021.
2. Implements raw lifetimes, e.g. `'r#async` in edition 2021.

This PR additionally extends the `keyword_idents_2024` lint to also check lifetimes.

cc `@traviscross`
r? parser
This commit is contained in:
Matthias Krüger 2024-09-07 23:30:10 +02:00 committed by GitHub
commit ccf3f6e59d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
40 changed files with 373 additions and 85 deletions

View file

@ -13,7 +13,6 @@ use rustc_session::lint::builtin::{
};
use rustc_session::lint::BuiltinLintDiag;
use rustc_session::parse::ParseSess;
use rustc_span::edition::Edition;
use rustc_span::symbol::Symbol;
use rustc_span::{BytePos, Pos, Span};
use tracing::debug;
@ -188,9 +187,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
preceded_by_whitespace = true;
continue;
}
rustc_lexer::TokenKind::Ident => {
self.ident(start)
}
rustc_lexer::TokenKind::Ident => self.ident(start),
rustc_lexer::TokenKind::RawIdent => {
let sym = nfc_normalize(self.str_from(start + BytePos(2)));
let span = self.mk_sp(start, self.pos);
@ -205,20 +202,31 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
self.report_unknown_prefix(start);
self.ident(start)
}
rustc_lexer::TokenKind::InvalidIdent
| rustc_lexer::TokenKind::InvalidPrefix
rustc_lexer::TokenKind::UnknownPrefixLifetime => {
self.report_unknown_prefix(start);
// Include the leading `'` in the real identifier, for macro
// expansion purposes. See #12512 for the gory details of why
// this is necessary.
let lifetime_name = self.str_from(start);
self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1)));
let ident = Symbol::intern(lifetime_name);
token::Lifetime(ident, IdentIsRaw::No)
}
rustc_lexer::TokenKind::InvalidIdent | rustc_lexer::TokenKind::InvalidPrefix
// Do not recover an identifier with emoji if the codepoint is a confusable
// with a recoverable substitution token, like ``.
if !UNICODE_ARRAY
.iter()
.any(|&(c, _, _)| {
let sym = self.str_from(start);
sym.chars().count() == 1 && c == sym.chars().next().unwrap()
}) =>
if !UNICODE_ARRAY.iter().any(|&(c, _, _)| {
let sym = self.str_from(start);
sym.chars().count() == 1 && c == sym.chars().next().unwrap()
}) =>
{
let sym = nfc_normalize(self.str_from(start));
let span = self.mk_sp(start, self.pos);
self.psess.bad_unicode_identifiers.borrow_mut().entry(sym).or_default()
self.psess
.bad_unicode_identifiers
.borrow_mut()
.entry(sym)
.or_default()
.push(span);
token::Ident(sym, IdentIsRaw::No)
}
@ -249,9 +257,9 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
let suffix = if suffix_start < self.pos {
let string = self.str_from(suffix_start);
if string == "_" {
self
.dcx()
.emit_err(errors::UnderscoreLiteralSuffix { span: self.mk_sp(suffix_start, self.pos) });
self.dcx().emit_err(errors::UnderscoreLiteralSuffix {
span: self.mk_sp(suffix_start, self.pos),
});
None
} else {
Some(Symbol::intern(string))
@ -269,12 +277,47 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1)));
if starts_with_number {
let span = self.mk_sp(start, self.pos);
self.dcx().struct_err("lifetimes cannot start with a number")
self.dcx()
.struct_err("lifetimes cannot start with a number")
.with_span(span)
.stash(span, StashKey::LifetimeIsChar);
}
let ident = Symbol::intern(lifetime_name);
token::Lifetime(ident)
token::Lifetime(ident, IdentIsRaw::No)
}
rustc_lexer::TokenKind::RawLifetime => {
self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1)));
let ident_start = start + BytePos(3);
let prefix_span = self.mk_sp(start, ident_start);
if prefix_span.at_least_rust_2021() {
let lifetime_name_without_tick = self.str_from(ident_start);
// Put the `'` back onto the lifetime name.
let mut lifetime_name = String::with_capacity(lifetime_name_without_tick.len() + 1);
lifetime_name.push('\'');
lifetime_name += lifetime_name_without_tick;
let sym = Symbol::intern(&lifetime_name);
token::Lifetime(sym, IdentIsRaw::Yes)
} else {
// Otherwise, this should be parsed like `'r`. Warn about it though.
self.psess.buffer_lint(
RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX,
prefix_span,
ast::CRATE_NODE_ID,
BuiltinLintDiag::RawPrefix(prefix_span),
);
// Reset the state so we just lex the `'r`.
let lt_start = start + BytePos(2);
self.pos = lt_start;
self.cursor = Cursor::new(&str_before[2 as usize..]);
let lifetime_name = self.str_from(start);
let ident = Symbol::intern(lifetime_name);
token::Lifetime(ident, IdentIsRaw::No)
}
}
rustc_lexer::TokenKind::Semi => token::Semi,
rustc_lexer::TokenKind::Comma => token::Comma,
@ -331,16 +374,19 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
// first remove compound tokens like `<<` from `rustc_lexer`, and then add
// fancier error recovery to it, as there will be less overall work to do this
// way.
let (token, sugg) = unicode_chars::check_for_substitution(self, start, c, repeats+1);
let (token, sugg) =
unicode_chars::check_for_substitution(self, start, c, repeats + 1);
self.dcx().emit_err(errors::UnknownTokenStart {
span: self.mk_sp(start, self.pos + Pos::from_usize(repeats * c.len_utf8())),
escaped: escaped_char(c),
sugg,
null: if c == '\x00' {Some(errors::UnknownTokenNull)} else {None},
null: if c == '\x00' { Some(errors::UnknownTokenNull) } else { None },
repeat: if repeats > 0 {
swallow_next_invalid = repeats;
Some(errors::UnknownTokenRepeat { repeats })
} else {None}
} else {
None
},
});
if let Some(token) = token {
@ -699,7 +745,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
let expn_data = prefix_span.ctxt().outer_expn_data();
if expn_data.edition >= Edition::Edition2021 {
if expn_data.edition.at_least_rust_2021() {
// In Rust 2021, this is a hard error.
let sugg = if prefix == "rb" {
Some(errors::UnknownPrefixSugg::UseBr(prefix_span))

View file

@ -2050,7 +2050,7 @@ impl<'a> Parser<'a> {
};
// On an error path, eagerly consider a lifetime to be an unclosed character lit, if that
// makes sense.
if let Some(ident) = self.token.lifetime()
if let Some((ident, IdentIsRaw::No)) = self.token.lifetime()
&& could_be_unclosed_char_literal(ident)
{
let lt = self.expect_lifetime();
@ -2925,9 +2925,9 @@ impl<'a> Parser<'a> {
}
pub(crate) fn eat_label(&mut self) -> Option<Label> {
if let Some(ident) = self.token.lifetime() {
if let Some((ident, is_raw)) = self.token.lifetime() {
// Disallow `'fn`, but with a better error message than `expect_lifetime`.
if ident.without_first_quote().is_reserved() {
if matches!(is_raw, IdentIsRaw::No) && ident.without_first_quote().is_reserved() {
self.dcx().emit_err(errors::InvalidLabel { span: ident.span, name: ident.name });
}

View file

@ -1666,7 +1666,7 @@ enum FlatToken {
pub enum ParseNtResult {
Tt(TokenTree),
Ident(Ident, IdentIsRaw),
Lifetime(Ident),
Lifetime(Ident, IdentIsRaw),
/// This case will eventually be removed, along with `Token::Interpolate`.
Nt(Lrc<Nonterminal>),

View file

@ -88,7 +88,7 @@ impl<'a> Parser<'a> {
},
NonterminalKind::Pat(pat_kind) => token.can_begin_pattern(pat_kind),
NonterminalKind::Lifetime => match &token.kind {
token::Lifetime(_) | token::NtLifetime(..) => true,
token::Lifetime(..) | token::NtLifetime(..) => true,
_ => false,
},
NonterminalKind::TT | NonterminalKind::Item | NonterminalKind::Stmt => {
@ -171,9 +171,9 @@ impl<'a> Parser<'a> {
NonterminalKind::Lifetime => {
// We want to keep `'keyword` parsing, just like `keyword` is still
// an ident for nonterminal purposes.
return if let Some(ident) = self.token.lifetime() {
return if let Some((ident, is_raw)) = self.token.lifetime() {
self.bump();
Ok(ParseNtResult::Lifetime(ident))
Ok(ParseNtResult::Lifetime(ident, is_raw))
} else {
Err(self.dcx().create_err(UnexpectedNonterminal::Lifetime {
span: self.token.span,

View file

@ -1,6 +1,6 @@
use rustc_ast::mut_visit::{walk_pat, MutVisitor};
use rustc_ast::ptr::P;
use rustc_ast::token::{self, BinOpToken, Delimiter, Token};
use rustc_ast::token::{self, BinOpToken, Delimiter, IdentIsRaw, Token};
use rustc_ast::{
self as ast, AttrVec, BindingMode, ByRef, Expr, ExprKind, MacCall, Mutability, Pat, PatField,
PatFieldsRest, PatKind, Path, QSelf, RangeEnd, RangeSyntax,
@ -548,7 +548,7 @@ impl<'a> Parser<'a> {
None => PatKind::Path(qself, path),
}
}
} else if let Some(lt) = self.token.lifetime()
} else if let Some((lt, IdentIsRaw::No)) = self.token.lifetime()
// In pattern position, we're totally fine with using "next token isn't colon"
// as a heuristic. We could probably just always try to recover if it's a lifetime,
// because we never have `'a: label {}` in a pattern position anyways, but it does
@ -689,7 +689,7 @@ impl<'a> Parser<'a> {
/// Parse `&pat` / `&mut pat`.
fn parse_pat_deref(&mut self, expected: Option<Expected>) -> PResult<'a, PatKind> {
self.expect_and()?;
if let Some(lifetime) = self.token.lifetime() {
if let Some((lifetime, _)) = self.token.lifetime() {
self.bump(); // `'a`
self.dcx().emit_err(UnexpectedLifetimeInPattern {

View file

@ -1,5 +1,5 @@
use rustc_ast::ptr::P;
use rustc_ast::token::{self, BinOpToken, Delimiter, Token, TokenKind};
use rustc_ast::token::{self, BinOpToken, Delimiter, IdentIsRaw, Token, TokenKind};
use rustc_ast::util::case::Case;
use rustc_ast::{
self as ast, BareFnTy, BoundAsyncness, BoundConstness, BoundPolarity, FnRetTy, GenericBound,
@ -1285,8 +1285,9 @@ impl<'a> Parser<'a> {
/// Parses a single lifetime `'a` or panics.
pub(super) fn expect_lifetime(&mut self) -> Lifetime {
if let Some(ident) = self.token.lifetime() {
if ident.without_first_quote().is_reserved()
if let Some((ident, is_raw)) = self.token.lifetime() {
if matches!(is_raw, IdentIsRaw::No)
&& ident.without_first_quote().is_reserved()
&& ![kw::UnderscoreLifetime, kw::StaticLifetime].contains(&ident.name)
{
self.dcx().emit_err(errors::KeywordLifetime { span: ident.span });