Overhaul token collection.

This commit does the following. - Renames `collect_tokens_trailing_token` as `collect_tokens`, because (a) it's annoying long, and (b) the `_trailing_token` bit is less accurate now that its types have changed. - In `collect_tokens`, adds a `Option<CollectPos>` argument and a `UsePreAttrPos` in the return type of `f`. These are used in `parse_expr_force_collect` (for vanilla expressions) and in `parse_stmt_without_recovery` (for two different cases of expression statements). Together these ensure are enough to fix all the problems with token collection and assoc expressions. The changes to the `stringify.rs` test demonstrate some of these. - Adds a new test. The code in this test was causing an assertion failure prior to this commit, due to an invalid `NodeRange`. The extra complexity is annoying, but necessary to fix the existing problems.
2024-08-06 17:16:40 +10:00 · 2024-08-06 17:16:40 +10:00 · 9d31f86f0d
commit 9d31f86f0d
parent fe460ac28b
12 changed files with 414 additions and 292 deletions
--- a/compiler/rustc_parse/src/parser/expr.rs
+++ b/compiler/rustc_parse/src/parser/expr.rs
@ -36,7 +36,7 @@ use super::pat::{CommaRecoveryMode, Expected, RecoverColon, RecoverComma};
 use super::ty::{AllowPlus, RecoverQPath, RecoverReturnSign};
 use super::{
    AttrWrapper, BlockMode, ClosureSpans, ForceCollect, Parser, PathStyle, Restrictions,
-    SemiColonMode, SeqSep, TokenType, Trailing,
+    SemiColonMode, SeqSep, TokenType, Trailing, UsePreAttrPos,
 };
 use crate::{errors, maybe_recover_from_interpolated_ty_qpath};

@ -59,15 +59,30 @@ impl<'a> Parser<'a> {
        self.current_closure.take();

        let attrs = self.parse_outer_attributes()?;
-        self.parse_expr_res(Restrictions::empty(), attrs)
+        self.parse_expr_res(Restrictions::empty(), attrs).map(|res| res.0)
    }

    /// Parses an expression, forcing tokens to be collected.
    pub fn parse_expr_force_collect(&mut self) -> PResult<'a, P<Expr>> {
        self.current_closure.take();

+        // If the expression is associative (e.g. `1 + 2`), then any preceding
+        // outer attribute actually belongs to the first inner sub-expression.
+        // In which case we must use the pre-attr pos to include the attribute
+        // in the collected tokens for the outer expression.
+        let pre_attr_pos = self.collect_pos();
        let attrs = self.parse_outer_attributes()?;
-        self.collect_tokens_no_attrs(|this| this.parse_expr_res(Restrictions::empty(), attrs))
+        self.collect_tokens(
+            Some(pre_attr_pos),
+            AttrWrapper::empty(),
+            ForceCollect::Yes,
+            |this, _empty_attrs| {
+                let (expr, is_assoc) = this.parse_expr_res(Restrictions::empty(), attrs)?;
+                let use_pre_attr_pos =
+                    if is_assoc { UsePreAttrPos::Yes } else { UsePreAttrPos::No };
+                Ok((expr, Trailing::No, use_pre_attr_pos))
+            },
+        )
    }

    pub fn parse_expr_anon_const(&mut self) -> PResult<'a, AnonConst> {
@ -77,7 +92,7 @@ impl<'a> Parser<'a> {
    fn parse_expr_catch_underscore(&mut self, restrictions: Restrictions) -> PResult<'a, P<Expr>> {
        let attrs = self.parse_outer_attributes()?;
        match self.parse_expr_res(restrictions, attrs) {
-            Ok(expr) => Ok(expr),
+            Ok((expr, _)) => Ok(expr),
            Err(err) => match self.token.ident() {
                Some((Ident { name: kw::Underscore, .. }, IdentIsRaw::No))
                    if self.may_recover() && self.look_ahead(1, |t| t == &token::Comma) =>
@ -104,18 +119,20 @@ impl<'a> Parser<'a> {
        &mut self,
        r: Restrictions,
        attrs: AttrWrapper,
-    ) -> PResult<'a, P<Expr>> {
+    ) -> PResult<'a, (P<Expr>, bool)> {
        self.with_res(r, |this| this.parse_expr_assoc_with(0, attrs))
    }

    /// Parses an associative expression with operators of at least `min_prec` precedence.
+    /// The `bool` in the return value indicates if it was an assoc expr, i.e. with an operator
+    /// followed by a subexpression (e.g. `1 + 2`).
    pub(super) fn parse_expr_assoc_with(
        &mut self,
        min_prec: usize,
        attrs: AttrWrapper,
-    ) -> PResult<'a, P<Expr>> {
+    ) -> PResult<'a, (P<Expr>, bool)> {
        let lhs = if self.token.is_range_separator() {
-            return self.parse_expr_prefix_range(attrs);
+            return self.parse_expr_prefix_range(attrs).map(|res| (res, false));
        } else {
            self.parse_expr_prefix(attrs)?
        };
@ -123,15 +140,17 @@ impl<'a> Parser<'a> {
    }

    /// Parses the rest of an associative expression (i.e. the part after the lhs) with operators
-    /// of at least `min_prec` precedence.
+    /// of at least `min_prec` precedence. The `bool` in the return value indicates if something
+    /// was actually parsed.
    pub(super) fn parse_expr_assoc_rest_with(
        &mut self,
        min_prec: usize,
        starts_stmt: bool,
        mut lhs: P<Expr>,
-    ) -> PResult<'a, P<Expr>> {
+    ) -> PResult<'a, (P<Expr>, bool)> {
+        let mut parsed_something = false;
        if !self.should_continue_as_assoc_expr(&lhs) {
-            return Ok(lhs);
+            return Ok((lhs, parsed_something));
        }

        self.expected_tokens.push(TokenType::Operator);
@ -156,10 +175,11 @@ impl<'a> Parser<'a> {
                self.err_larrow_operator(self.token.span);
            }

+            parsed_something = true;
            self.bump();
            if op.node.is_comparison() {
                if let Some(expr) = self.check_no_chained_comparison(&lhs, &op)? {
-                    return Ok(expr);
+                    return Ok((expr, parsed_something));
                }
            }

@ -263,7 +283,7 @@ impl<'a> Parser<'a> {
                // the special cases. The code is here only for future convenience.
                Fixity::None => 1,
            };
-            let rhs = self.with_res(restrictions - Restrictions::STMT_EXPR, |this| {
+            let (rhs, _) = self.with_res(restrictions - Restrictions::STMT_EXPR, |this| {
                let attrs = this.parse_outer_attributes()?;
                this.parse_expr_assoc_with(prec + prec_adjustment, attrs)
            })?;
@ -319,7 +339,7 @@ impl<'a> Parser<'a> {
            }
        }

-        Ok(lhs)
+        Ok((lhs, parsed_something))
    }

    fn should_continue_as_assoc_expr(&mut self, lhs: &Expr) -> bool {
@ -441,7 +461,8 @@ impl<'a> Parser<'a> {
            let attrs = self.parse_outer_attributes()?;
            Some(
                self.parse_expr_assoc_with(prec + 1, attrs)
-                    .map_err(|err| self.maybe_err_dotdotlt_syntax(maybe_lt, err))?,
+                    .map_err(|err| self.maybe_err_dotdotlt_syntax(maybe_lt, err))?
+                    .0,
            )
        } else {
            None
@ -498,7 +519,7 @@ impl<'a> Parser<'a> {
                // RHS must be parsed with more associativity than the dots.
                let attrs = this.parse_outer_attributes()?;
                this.parse_expr_assoc_with(op.unwrap().precedence() + 1, attrs)
-                    .map(|x| (lo.to(x.span), Some(x)))
+                    .map(|(x, _)| (lo.to(x.span), Some(x)))
                    .map_err(|err| this.maybe_err_dotdotlt_syntax(maybe_lt, err))?
            } else {
                (lo, None)
@ -2335,7 +2356,7 @@ impl<'a> Parser<'a> {
                let token = self.token.clone();
                let attrs = self.parse_outer_attributes()?;
                match self.parse_expr_res(restrictions, attrs) {
-                    Ok(expr) => expr,
+                    Ok((expr, _)) => expr,
                    Err(err) => self.recover_closure_body(err, before, prev, token, lo, decl_hi)?,
                }
            }
@ -2445,7 +2466,7 @@ impl<'a> Parser<'a> {
    fn parse_fn_block_param(&mut self) -> PResult<'a, Param> {
        let lo = self.token.span;
        let attrs = self.parse_outer_attributes()?;
-        self.collect_tokens_trailing_token(attrs, ForceCollect::No, |this, attrs| {
+        self.collect_tokens(None, attrs, ForceCollect::No, |this, attrs| {
            let pat = this.parse_pat_no_top_alt(Some(Expected::ParameterName), None)?;
            let ty = if this.eat(&token::Colon) {
                this.parse_ty()?
@ -2463,6 +2484,7 @@ impl<'a> Parser<'a> {
                    is_placeholder: false,
                },
                Trailing::from(this.token == token::Comma),
+                UsePreAttrPos::No,
            ))
        })
    }
@ -2583,7 +2605,7 @@ impl<'a> Parser<'a> {
    /// Parses the condition of a `if` or `while` expression.
    fn parse_expr_cond(&mut self) -> PResult<'a, P<Expr>> {
        let attrs = self.parse_outer_attributes()?;
-        let mut cond =
+        let (mut cond, _) =
            self.parse_expr_res(Restrictions::NO_STRUCT_LITERAL | Restrictions::ALLOW_LET, attrs)?;

        CondChecker::new(self).visit_expr(&mut cond);
@ -2632,7 +2654,7 @@ impl<'a> Parser<'a> {
            self.expect(&token::Eq)?;
        }
        let attrs = self.parse_outer_attributes()?;
-        let expr = self.parse_expr_assoc_with(1 + prec_let_scrutinee_needs_par(), attrs)?;
+        let (expr, _) = self.parse_expr_assoc_with(1 + prec_let_scrutinee_needs_par(), attrs)?;
        let span = lo.to(expr.span);
        Ok(self.mk_expr(span, ExprKind::Let(pat, expr, span, recovered)))
    }
@ -2766,7 +2788,7 @@ impl<'a> Parser<'a> {
                // We know for sure we have seen `for ($SOMETHING in`. In the happy path this would
                // happen right before the return of this method.
                let attrs = self.parse_outer_attributes()?;
-                let expr = match self.parse_expr_res(Restrictions::NO_STRUCT_LITERAL, attrs) {
+                let (expr, _) = match self.parse_expr_res(Restrictions::NO_STRUCT_LITERAL, attrs) {
                    Ok(expr) => expr,
                    Err(expr_err) => {
                        // We don't know what followed the `in`, so cancel and bubble up the
@ -2801,7 +2823,7 @@ impl<'a> Parser<'a> {
        }
        self.check_for_for_in_in_typo(self.prev_token.span);
        let attrs = self.parse_outer_attributes()?;
-        let expr = self.parse_expr_res(Restrictions::NO_STRUCT_LITERAL, attrs)?;
+        let (expr, _) = self.parse_expr_res(Restrictions::NO_STRUCT_LITERAL, attrs)?;
        Ok((pat, expr))
    }

@ -2921,7 +2943,7 @@ impl<'a> Parser<'a> {
    fn parse_expr_match(&mut self) -> PResult<'a, P<Expr>> {
        let match_span = self.prev_token.span;
        let attrs = self.parse_outer_attributes()?;
-        let scrutinee = self.parse_expr_res(Restrictions::NO_STRUCT_LITERAL, attrs)?;
+        let (scrutinee, _) = self.parse_expr_res(Restrictions::NO_STRUCT_LITERAL, attrs)?;

        self.parse_match_block(match_span, match_span, scrutinee, MatchKind::Prefix)
    }
@ -3069,7 +3091,7 @@ impl<'a> Parser<'a> {

    pub(super) fn parse_arm(&mut self) -> PResult<'a, Arm> {
        let attrs = self.parse_outer_attributes()?;
-        self.collect_tokens_trailing_token(attrs, ForceCollect::No, |this, attrs| {
+        self.collect_tokens(None, attrs, ForceCollect::No, |this, attrs| {
            let lo = this.token.span;
            let (pat, guard) = this.parse_match_arm_pat_and_guard()?;

@ -3126,7 +3148,7 @@ impl<'a> Parser<'a> {
                let arm_start_span = this.token.span;

                let attrs = this.parse_outer_attributes()?;
-                let expr =
+                let (expr, _) =
                    this.parse_expr_res(Restrictions::STMT_EXPR, attrs).map_err(|mut err| {
                        err.span_label(arrow_span, "while parsing the `match` arm starting here");
                        err
@ -3244,6 +3266,7 @@ impl<'a> Parser<'a> {
                    is_placeholder: false,
                },
                Trailing::No,
+                UsePreAttrPos::No,
            ))
        })
    }
@ -3334,8 +3357,9 @@ impl<'a> Parser<'a> {

    fn parse_match_guard_condition(&mut self) -> PResult<'a, P<Expr>> {
        let attrs = self.parse_outer_attributes()?;
-        self.parse_expr_res(Restrictions::ALLOW_LET | Restrictions::IN_IF_GUARD, attrs).map_err(
-            |mut err| {
+        match self.parse_expr_res(Restrictions::ALLOW_LET | Restrictions::IN_IF_GUARD, attrs) {
+            Ok((expr, _)) => Ok(expr),
+            Err(mut err) => {
                if self.prev_token == token::OpenDelim(Delimiter::Brace) {
                    let sugg_sp = self.prev_token.span.shrink_to_lo();
                    // Consume everything within the braces, let's avoid further parse
@ -3355,9 +3379,9 @@ impl<'a> Parser<'a> {
                        err.span_suggestion_verbose(sugg_sp, msg, "=> ", applicability);
                    }
                }
-                err
-            },
-        )
+                Err(err)
+            }
+        }
    }

    pub(crate) fn is_builtin(&self) -> bool {
@ -3708,7 +3732,7 @@ impl<'a> Parser<'a> {
    fn parse_expr_field(&mut self) -> PResult<'a, ExprField> {
        let attrs = self.parse_outer_attributes()?;
        self.recover_vcs_conflict_marker();
-        self.collect_tokens_trailing_token(attrs, ForceCollect::No, |this, attrs| {
+        self.collect_tokens(None, attrs, ForceCollect::No, |this, attrs| {
            let lo = this.token.span;

            // Check if a colon exists one ahead. This means we're parsing a fieldname.
@ -3753,6 +3777,7 @@ impl<'a> Parser<'a> {
                    is_placeholder: false,
                },
                Trailing::from(this.token == token::Comma),
+                UsePreAttrPos::No,
            ))
        })
    }
@ -3846,7 +3871,7 @@ impl<'a> Parser<'a> {
        attrs: AttrWrapper,
        f: impl FnOnce(&mut Self, ast::AttrVec) -> PResult<'a, P<Expr>>,
    ) -> PResult<'a, P<Expr>> {
-        self.collect_tokens_trailing_token(attrs, ForceCollect::No, |this, attrs| {
+        self.collect_tokens(None, attrs, ForceCollect::No, |this, attrs| {
            let res = f(this, attrs)?;
            let trailing = Trailing::from(
                this.restrictions.contains(Restrictions::STMT_EXPR)
@ -3856,7 +3881,7 @@ impl<'a> Parser<'a> {
                // `#[attr] expr,` always captures a trailing comma.
                || this.token == token::Comma,
            );
-            Ok((res, trailing))
+            Ok((res, trailing, UsePreAttrPos::No))
        })
    }
 }