1
Fork 0

Remove NtExpr and NtLiteral.

Notes about tests:
- tests/ui/rfcs/rfc-2294-if-let-guard/feature-gate.rs: some messages are
  now duplicated due to repeated parsing.

- tests/ui/rfcs/rfc-2497-if-let-chains/disallowed-positions.rs: ditto.

- `tests/ui/proc-macro/macro-rules-derive-cfg.rs`: the diff looks large
  but the only difference is the insertion of a single
  invisible-delimited group around a metavar.

- `tests/ui/attributes/nonterminal-expansion.rs`: a slight span
  degradation, somehow related to the recent massive attr parsing
  rewrite (#135726). I couldn't work out exactly what is going wrong,
  but I don't think it's worth holding things up for a single slightly
  suboptimal error message.
This commit is contained in:
Nicholas Nethercote 2024-04-18 21:31:17 +10:00
parent 0b4a81a4ef
commit 49ed25b5d2
30 changed files with 864 additions and 648 deletions

View file

@ -4,10 +4,10 @@ use core::mem;
use core::ops::{Bound, ControlFlow};
use ast::mut_visit::{self, MutVisitor};
use ast::token::{IdentIsRaw, MetaVarKind};
use ast::token::IdentIsRaw;
use ast::{CoroutineKind, ForLoopKind, GenBlockKind, MatchKind, Pat, Path, PathSegment, Recovered};
use rustc_ast::ptr::P;
use rustc_ast::token::{self, Delimiter, Token, TokenKind};
use rustc_ast::token::{self, Delimiter, InvisibleOrigin, MetaVarKind, Token, TokenKind};
use rustc_ast::tokenstream::TokenTree;
use rustc_ast::util::case::Case;
use rustc_ast::util::classify;
@ -19,7 +19,6 @@ use rustc_ast::{
MetaItemLit, Movability, Param, RangeLimits, StmtKind, Ty, TyKind, UnOp, UnsafeBinderCastKind,
YieldKind,
};
use rustc_ast_pretty::pprust;
use rustc_data_structures::stack::ensure_sufficient_stack;
use rustc_errors::{Applicability, Diag, PResult, StashKey, Subdiagnostic};
use rustc_lexer::unescape::unescape_char;
@ -605,7 +604,7 @@ impl<'a> Parser<'a> {
// can't continue an expression after an ident
token::Ident(name, is_raw) => token::ident_can_begin_expr(name, t.span, is_raw),
token::Literal(..) | token::Pound => true,
_ => t.is_whole_expr(),
_ => t.is_metavar_expr(),
};
self.token.is_ident_named(sym::not) && self.look_ahead(1, token_cannot_continue_expr)
}
@ -641,6 +640,13 @@ impl<'a> Parser<'a> {
TokenKind::NtIdent(..) | TokenKind::NtLifetime(..) | TokenKind::Interpolated(..) => {
self.prev_token.span
}
TokenKind::CloseDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar(_))) => {
// `expr.span` is the interpolated span, because invisible open
// and close delims both get marked with the same span, one
// that covers the entire thing between them. (See
// `rustc_expand::mbe::transcribe::transcribe`.)
self.prev_token.span
}
_ => expr.span,
}
}
@ -979,12 +985,30 @@ impl<'a> Parser<'a> {
}
fn error_unexpected_after_dot(&self) {
let actual = pprust::token_to_string(&self.token);
let actual = super::token_descr(&self.token);
let span = self.token.span;
let sm = self.psess.source_map();
let (span, actual) = match (&self.token.kind, self.subparser_name) {
(token::Eof, Some(_)) if let Ok(actual) = sm.span_to_snippet(sm.next_point(span)) => {
(span.shrink_to_hi(), actual.into())
(token::Eof, Some(_)) if let Ok(snippet) = sm.span_to_snippet(sm.next_point(span)) => {
(span.shrink_to_hi(), format!("`{}`", snippet))
}
(token::CloseDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar(_))), _) => {
// No need to report an error. This case will only occur when parsing a pasted
// metavariable, and we should have emitted an error when parsing the macro call in
// the first place. E.g. in this code:
// ```
// macro_rules! m { ($e:expr) => { $e }; }
//
// fn main() {
// let f = 1;
// m!(f.);
// }
// ```
// we'll get an error "unexpected token: `)` when parsing the `m!(f.)`, so we don't
// want to issue a second error when parsing the expansion `«f.»` (where `«`/`»`
// represent the invisible delimiters).
self.dcx().span_delayed_bug(span, "bad dot expr in metavariable");
return;
}
_ => (span, actual),
};
@ -1364,17 +1388,31 @@ impl<'a> Parser<'a> {
let span = self.token.span;
if let token::Interpolated(nt) = &self.token.kind {
match &**nt {
token::NtExpr(e) | token::NtLiteral(e) => {
let e = e.clone();
self.bump();
return Ok(e);
}
token::NtBlock(block) => {
let block = block.clone();
self.bump();
return Ok(self.mk_expr(self.prev_token.span, ExprKind::Block(block, None)));
}
};
} else if let Some(expr) = self.eat_metavar_seq_with_matcher(
|mv_kind| matches!(mv_kind, MetaVarKind::Expr { .. }),
|this| {
let expr = this.parse_expr();
// FIXME(nnethercote) Sometimes with expressions we get a trailing comma, possibly
// related to the FIXME in `collect_tokens_for_expr`. Examples are the multi-line
// `assert_eq!` calls involving arguments annotated with `#[rustfmt::skip]` in
// `compiler/rustc_index/src/bit_set/tests.rs`.
if this.token.kind == token::Comma {
this.bump();
}
expr
},
) {
return Ok(expr);
} else if let Some(lit) =
self.eat_metavar_seq(MetaVarKind::Literal, |this| this.parse_literal_maybe_minus())
{
return Ok(lit);
} else if let Some(path) = self.eat_metavar_seq(MetaVarKind::Path, |this| {
this.collect_tokens_no_attrs(|this| this.parse_path(PathStyle::Type))
}) {
@ -2062,87 +2100,107 @@ impl<'a> Parser<'a> {
.or_else(|()| self.handle_missing_lit(Parser::mk_meta_item_lit_char))
}
fn recover_after_dot(&mut self) -> Option<Token> {
let mut recovered = None;
fn recover_after_dot(&mut self) {
if self.token == token::Dot {
// Attempt to recover `.4` as `0.4`. We don't currently have any syntax where
// dot would follow an optional literal, so we do this unconditionally.
recovered = self.look_ahead(1, |next_token| {
let recovered = self.look_ahead(1, |next_token| {
// If it's an integer that looks like a float, then recover as such.
//
// We will never encounter the exponent part of a floating
// point literal here, since there's no use of the exponent
// syntax that also constitutes a valid integer, so we need
// not check for that.
if let token::Literal(token::Lit { kind: token::Integer, symbol, suffix }) =
next_token.kind
&& suffix.is_none_or(|s| s == sym::f32 || s == sym::f64)
&& symbol.as_str().chars().all(|c| c.is_numeric() || c == '_')
&& self.token.span.hi() == next_token.span.lo()
{
// If this integer looks like a float, then recover as such.
//
// We will never encounter the exponent part of a floating
// point literal here, since there's no use of the exponent
// syntax that also constitutes a valid integer, so we need
// not check for that.
if suffix.is_none_or(|s| s == sym::f32 || s == sym::f64)
&& symbol.as_str().chars().all(|c| c.is_numeric() || c == '_')
&& self.token.span.hi() == next_token.span.lo()
{
let s = String::from("0.") + symbol.as_str();
let kind = TokenKind::lit(token::Float, Symbol::intern(&s), suffix);
return Some(Token::new(kind, self.token.span.to(next_token.span)));
}
let s = String::from("0.") + symbol.as_str();
let kind = TokenKind::lit(token::Float, Symbol::intern(&s), suffix);
Some(Token::new(kind, self.token.span.to(next_token.span)))
} else {
None
}
None
});
if let Some(token) = &recovered {
self.bump();
if let Some(recovered) = recovered {
self.dcx().emit_err(errors::FloatLiteralRequiresIntegerPart {
span: token.span,
suggestion: token.span.shrink_to_lo(),
span: recovered.span,
suggestion: recovered.span.shrink_to_lo(),
});
self.bump();
self.token = recovered;
}
}
}
recovered
/// Keep this in sync with `Token::can_begin_literal_maybe_minus` and
/// `Lit::from_token` (excluding unary negation).
fn eat_token_lit(&mut self) -> Option<token::Lit> {
match self.token.uninterpolate().kind {
token::Ident(name, IdentIsRaw::No) if name.is_bool_lit() => {
self.bump();
Some(token::Lit::new(token::Bool, name, None))
}
token::Literal(token_lit) => {
self.bump();
Some(token_lit)
}
token::OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar(
MetaVarKind::Literal,
))) => {
let lit = self
.eat_metavar_seq(MetaVarKind::Literal, |this| this.parse_literal_maybe_minus())
.expect("metavar seq literal");
let ast::ExprKind::Lit(token_lit) = lit.kind else {
panic!("didn't reparse a literal");
};
Some(token_lit)
}
token::OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar(
mv_kind @ MetaVarKind::Expr { can_begin_literal_maybe_minus: true, .. },
))) => {
let expr = self
.eat_metavar_seq(mv_kind, |this| this.parse_expr())
.expect("metavar seq expr");
let ast::ExprKind::Lit(token_lit) = expr.kind else {
panic!("didn't reparse an expr");
};
Some(token_lit)
}
_ => None,
}
}
/// Matches `lit = true | false | token_lit`.
/// Returns `None` if the next token is not a literal.
pub(super) fn parse_opt_token_lit(&mut self) -> Option<(token::Lit, Span)> {
let recovered = self.recover_after_dot();
let token = recovered.as_ref().unwrap_or(&self.token);
let span = token.span;
token::Lit::from_token(token).map(|token_lit| {
self.bump();
(token_lit, span)
})
fn parse_opt_token_lit(&mut self) -> Option<(token::Lit, Span)> {
self.recover_after_dot();
let span = self.token.span;
self.eat_token_lit().map(|token_lit| (token_lit, span))
}
/// Matches `lit = true | false | token_lit`.
/// Returns `None` if the next token is not a literal.
pub(super) fn parse_opt_meta_item_lit(&mut self) -> Option<MetaItemLit> {
let recovered = self.recover_after_dot();
let token = recovered.as_ref().unwrap_or(&self.token);
match token::Lit::from_token(token) {
Some(lit) => {
match MetaItemLit::from_token_lit(lit, token.span) {
Ok(lit) => {
self.bump();
Some(lit)
}
Err(err) => {
let span = token.uninterpolated_span();
self.bump();
let guar = report_lit_error(self.psess, err, lit, span);
// Pack possible quotes and prefixes from the original literal into
// the error literal's symbol so they can be pretty-printed faithfully.
let suffixless_lit = token::Lit::new(lit.kind, lit.symbol, None);
let symbol = Symbol::intern(&suffixless_lit.to_string());
let lit = token::Lit::new(token::Err(guar), symbol, lit.suffix);
Some(
MetaItemLit::from_token_lit(lit, span)
.unwrap_or_else(|_| unreachable!()),
)
}
fn parse_opt_meta_item_lit(&mut self) -> Option<MetaItemLit> {
self.recover_after_dot();
let span = self.token.span;
let uninterpolated_span = self.uninterpolated_token_span();
self.eat_token_lit().map(|token_lit| {
match MetaItemLit::from_token_lit(token_lit, span) {
Ok(lit) => lit,
Err(err) => {
let guar = report_lit_error(&self.psess, err, token_lit, uninterpolated_span);
// Pack possible quotes and prefixes from the original literal into
// the error literal's symbol so they can be pretty-printed faithfully.
let suffixless_lit = token::Lit::new(token_lit.kind, token_lit.symbol, None);
let symbol = Symbol::intern(&suffixless_lit.to_string());
let token_lit = token::Lit::new(token::Err(guar), symbol, token_lit.suffix);
MetaItemLit::from_token_lit(token_lit, uninterpolated_span).unwrap()
}
}
None => None,
}
})
}
pub(super) fn expect_no_tuple_index_suffix(&self, span: Span, suffix: Symbol) {
@ -2166,9 +2224,10 @@ impl<'a> Parser<'a> {
/// Matches `'-' lit | lit` (cf. `ast_validation::AstValidator::check_expr_within_pat`).
/// Keep this in sync with `Token::can_begin_literal_maybe_minus`.
pub fn parse_literal_maybe_minus(&mut self) -> PResult<'a, P<Expr>> {
if let token::Interpolated(nt) = &self.token.kind {
match &**nt {
// FIXME(nnethercote) The `NtExpr` case should only match if
if let Some(expr) = self.eat_metavar_seq_with_matcher(
|mv_kind| matches!(mv_kind, MetaVarKind::Expr { .. }),
|this| {
// FIXME(nnethercote) The `expr` case should only match if
// `e` is an `ExprKind::Lit` or an `ExprKind::Unary` containing
// an `UnOp::Neg` and an `ExprKind::Lit`, like how
// `can_begin_literal_maybe_minus` works. But this method has
@ -2178,13 +2237,14 @@ impl<'a> Parser<'a> {
// `ExprKind::Path` must be accepted when parsing range
// patterns. That requires some care. So for now, we continue
// being less strict here than we should be.
token::NtExpr(e) | token::NtLiteral(e) => {
let e = e.clone();
self.bump();
return Ok(e);
}
_ => {}
};
this.parse_expr()
},
) {
return Ok(expr);
} else if let Some(lit) =
self.eat_metavar_seq(MetaVarKind::Literal, |this| this.parse_literal_maybe_minus())
{
return Ok(lit);
}
let lo = self.token.span;

View file

@ -1290,12 +1290,24 @@ impl<'a> Parser<'a> {
}
fn is_unsafe_foreign_mod(&self) -> bool {
self.token.is_keyword(kw::Unsafe)
&& self.is_keyword_ahead(1, &[kw::Extern])
&& self.look_ahead(
2 + self.look_ahead(2, |t| t.can_begin_string_literal() as usize),
|t| *t == token::OpenDelim(Delimiter::Brace),
)
// Look for `unsafe`.
if !self.token.is_keyword(kw::Unsafe) {
return false;
}
// Look for `extern`.
if !self.is_keyword_ahead(1, &[kw::Extern]) {
return false;
}
// Look for the optional ABI string literal.
let n = if self.look_ahead(2, |t| t.can_begin_string_literal()) { 3 } else { 2 };
// Look for the `{`. Use `tree_look_ahead` because the ABI (if present)
// might be a metavariable i.e. an invisible-delimited sequence, and
// `tree_look_ahead` will consider that a single element when looking
// ahead.
self.tree_look_ahead(n, |t| matches!(t, TokenTree::Delimited(_, _, Delimiter::Brace, _)))
== Some(true)
}
fn is_static_global(&mut self) -> bool {
@ -2604,13 +2616,36 @@ impl<'a> Parser<'a> {
})
// `extern ABI fn`
|| self.check_keyword_case(exp!(Extern), case)
// Use `tree_look_ahead` because `ABI` might be a metavariable,
// i.e. an invisible-delimited sequence, and `tree_look_ahead`
// will consider that a single element when looking ahead.
&& self.look_ahead(1, |t| t.can_begin_string_literal())
&& (self.look_ahead(2, |t| t.is_keyword_case(kw::Fn, case)) ||
&& (self.tree_look_ahead(2, |tt| {
match tt {
TokenTree::Token(t, _) => t.is_keyword_case(kw::Fn, case),
TokenTree::Delimited(..) => false,
}
}) == Some(true) ||
// This branch is only for better diagnostics; `pub`, `unsafe`, etc. are not
// allowed here.
(self.may_recover()
&& self.look_ahead(2, |t| ALL_QUALS.iter().any(|exp| t.is_keyword(exp.kw)))
&& self.look_ahead(3, |t| t.is_keyword_case(kw::Fn, case))))
&& self.tree_look_ahead(2, |tt| {
match tt {
TokenTree::Token(t, _) =>
ALL_QUALS.iter().any(|exp| {
t.is_keyword(exp.kw)
}),
TokenTree::Delimited(..) => false,
}
}) == Some(true)
&& self.tree_look_ahead(3, |tt| {
match tt {
TokenTree::Token(t, _) => t.is_keyword_case(kw::Fn, case),
TokenTree::Delimited(..) => false,
}
}) == Some(true)
)
)
}
/// Parses all the "front matter" (or "qualifiers") for a `fn` declaration,

View file

@ -24,8 +24,8 @@ pub use pat::{CommaRecoveryMode, RecoverColon, RecoverComma};
use path::PathStyle;
use rustc_ast::ptr::P;
use rustc_ast::token::{
self, Delimiter, IdentIsRaw, InvisibleOrigin, MetaVarKind, Nonterminal, NtPatKind, Token,
TokenKind,
self, Delimiter, IdentIsRaw, InvisibleOrigin, MetaVarKind, Nonterminal, NtExprKind, NtPatKind,
Token, TokenKind,
};
use rustc_ast::tokenstream::{AttrsTarget, Spacing, TokenStream, TokenTree};
use rustc_ast::util::case::Case;
@ -101,6 +101,7 @@ pub enum ForceCollect {
#[macro_export]
macro_rules! maybe_whole {
($p:expr, $constructor:ident, |$x:ident| $e:expr) => {
#[allow(irrefutable_let_patterns)] // FIXME: temporary
if let token::Interpolated(nt) = &$p.token.kind
&& let token::$constructor(x) = &**nt
{
@ -299,6 +300,10 @@ impl TokenTreeCursor {
self.stream.get(self.index)
}
fn look_ahead(&self, n: usize) -> Option<&TokenTree> {
self.stream.get(self.index + n)
}
#[inline]
fn bump(&mut self) {
self.index += 1;
@ -1290,6 +1295,17 @@ impl<'a> Parser<'a> {
looker(&token)
}
/// Like `lookahead`, but skips over token trees rather than tokens. Useful
/// when looking past possible metavariable pasting sites.
pub fn tree_look_ahead<R>(
&self,
dist: usize,
looker: impl FnOnce(&TokenTree) -> R,
) -> Option<R> {
assert_ne!(dist, 0);
self.token_cursor.curr.look_ahead(dist - 1).map(looker)
}
/// Returns whether any of the given keywords are `dist` tokens ahead of the current one.
pub(crate) fn is_keyword_ahead(&self, dist: usize, kws: &[Symbol]) -> bool {
self.look_ahead(dist, |t| kws.iter().any(|&kw| t.is_keyword(kw)))
@ -1706,6 +1722,16 @@ impl<'a> Parser<'a> {
pub fn approx_token_stream_pos(&self) -> u32 {
self.num_bump_calls
}
pub fn uninterpolated_token_span(&self) -> Span {
match &self.token.kind {
token::Interpolated(nt) => nt.use_span(),
token::OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar(_))) => {
self.look_ahead(1, |t| t.span)
}
_ => self.token.span,
}
}
}
pub(crate) fn make_unclosed_delims_error(
@ -1758,6 +1784,8 @@ pub enum ParseNtResult {
Item(P<ast::Item>),
Stmt(P<ast::Stmt>),
Pat(P<ast::Pat>, NtPatKind),
Expr(P<ast::Expr>, NtExprKind),
Literal(P<ast::Expr>),
Ty(P<ast::Ty>),
Meta(P<ast::AttrItem>),
Path(P<ast::Path>),

View file

@ -48,10 +48,6 @@ impl<'a> Parser<'a> {
/// Old variant of `may_be_ident`. Being phased out.
fn nt_may_be_ident(nt: &Nonterminal) -> bool {
match nt {
NtExpr(_)
| NtLiteral(_) // `true`, `false`
=> true,
NtBlock(_) => false,
}
}
@ -95,7 +91,7 @@ impl<'a> Parser<'a> {
token::OpenDelim(Delimiter::Brace) => true,
token::NtLifetime(..) => true,
token::Interpolated(nt) => match &**nt {
NtBlock(_) | NtExpr(_) | NtLiteral(_) => true,
NtBlock(_) => true,
},
token::OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar(k))) => match k {
MetaVarKind::Block
@ -179,10 +175,14 @@ impl<'a> Parser<'a> {
pat_kind,
));
}
NonterminalKind::Expr(_) => NtExpr(self.parse_expr_force_collect()?),
NonterminalKind::Expr(expr_kind) => {
return Ok(ParseNtResult::Expr(self.parse_expr_force_collect()?, expr_kind));
}
NonterminalKind::Literal => {
// The `:literal` matcher does not support attributes
NtLiteral(self.collect_tokens_no_attrs(|this| this.parse_literal_maybe_minus())?)
// The `:literal` matcher does not support attributes.
return Ok(ParseNtResult::Literal(
self.collect_tokens_no_attrs(|this| this.parse_literal_maybe_minus())?,
));
}
NonterminalKind::Ty => {
return Ok(ParseNtResult::Ty(

View file

@ -1252,7 +1252,7 @@ impl<'a> Parser<'a> {
|| *t == token::Dot // e.g. `.5` for recovery;
|| matches!(t.kind, token::Literal(..) | token::Minus)
|| t.is_bool_lit()
|| t.is_whole_expr()
|| t.is_metavar_expr()
|| t.is_lifetime() // recover `'a` instead of `'a'`
|| (self.may_recover() // recover leading `(`
&& *t == token::OpenDelim(Delimiter::Parenthesis)