Overhaul token collection.

This commit does the following. - Renames `collect_tokens_trailing_token` as `collect_tokens`, because (a) it's annoying long, and (b) the `_trailing_token` bit is less accurate now that its types have changed. - In `collect_tokens`, adds a `Option<CollectPos>` argument and a `UsePreAttrPos` in the return type of `f`. These are used in `parse_expr_force_collect` (for vanilla expressions) and in `parse_stmt_without_recovery` (for two different cases of expression statements). Together these ensure are enough to fix all the problems with token collection and assoc expressions. The changes to the `stringify.rs` test demonstrate some of these. - Adds a new test. The code in this test was causing an assertion failure prior to this commit, due to an invalid `NodeRange`. The extra complexity is annoying, but necessary to fix the existing problems.
2024-08-06 17:16:40 +10:00 · 2024-08-06 17:16:40 +10:00 · 9d31f86f0d
commit 9d31f86f0d
parent fe460ac28b
12 changed files with 414 additions and 292 deletions
--- a/compiler/rustc_parse/src/parser/attr_wrapper.rs
+++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs
@ -15,6 +15,20 @@ use super::{
    TokenCursor, Trailing,
 };

+// When collecting tokens, this fully captures the start point. Usually its
+// just after outer attributes, but occasionally it's before.
+#[derive(Clone, Debug)]
+pub(super) struct CollectPos {
+    start_token: (Token, Spacing),
+    cursor_snapshot: TokenCursor,
+    start_pos: u32,
+}
+
+pub(super) enum UsePreAttrPos {
+    No,
+    Yes,
+}
+
 /// A wrapper type to ensure that the parser handles outer attributes correctly.
 /// When we parse outer attributes, we need to ensure that we capture tokens
 /// for the attribute target. This allows us to perform cfg-expansion on
@ -22,7 +36,7 @@ use super::{
 ///
 /// This wrapper prevents direct access to the underlying `ast::AttrVec`.
 /// Parsing code can only get access to the underlying attributes
-/// by passing an `AttrWrapper` to `collect_tokens_trailing_token`.
+/// by passing an `AttrWrapper` to `collect_tokens`.
 /// This makes it difficult to accidentally construct an AST node
 /// (which stores an `ast::AttrVec`) without first collecting tokens.
 ///
@ -33,16 +47,18 @@ pub(super) struct AttrWrapper {
    attrs: AttrVec,
    // The start of the outer attributes in the parser's token stream.
    // This lets us create a `NodeReplacement` for the entire attribute
-    // target, including outer attributes.
-    start_pos: u32,
+    // target, including outer attributes. `None` if there are no outer
+    // attributes.
+    start_pos: Option<u32>,
 }

 impl AttrWrapper {
    pub(super) fn new(attrs: AttrVec, start_pos: u32) -> AttrWrapper {
-        AttrWrapper { attrs, start_pos }
+        AttrWrapper { attrs, start_pos: Some(start_pos) }
    }
+
    pub(super) fn empty() -> AttrWrapper {
-        AttrWrapper { attrs: AttrVec::new(), start_pos: u32::MAX }
+        AttrWrapper { attrs: AttrVec::new(), start_pos: None }
    }

    pub(super) fn take_for_recovery(self, psess: &ParseSess) -> AttrVec {
@ -77,7 +93,7 @@ fn has_cfg_or_cfg_attr(attrs: &[Attribute]) -> bool {
 }

 // From a value of this type we can reconstruct the `TokenStream` seen by the
-// `f` callback passed to a call to `Parser::collect_tokens_trailing_token`, by
+// `f` callback passed to a call to `Parser::collect_tokens`, by
 // replaying the getting of the tokens. This saves us producing a `TokenStream`
 // if it is never needed, e.g. a captured `macro_rules!` argument that is never
 // passed to a proc macro. In practice, token stream creation happens rarely
@ -166,16 +182,30 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
 }

 impl<'a> Parser<'a> {
+    pub(super) fn collect_pos(&self) -> CollectPos {
+        CollectPos {
+            start_token: (self.token.clone(), self.token_spacing),
+            cursor_snapshot: self.token_cursor.clone(),
+            start_pos: self.num_bump_calls,
+        }
+    }
+
    /// Parses code with `f`. If appropriate, it records the tokens (in
    /// `LazyAttrTokenStream` form) that were parsed in the result, accessible
    /// via the `HasTokens` trait. The `Trailing` part of the callback's
    /// result indicates if an extra token should be captured, e.g. a comma or
-    /// semicolon.
+    /// semicolon. The `UsePreAttrPos` part of the callback's result indicates
+    /// if we should use `pre_attr_pos` as the collection start position (only
+    /// required in a few cases).
    ///
    /// The `attrs` passed in are in `AttrWrapper` form, which is opaque. The
    /// `AttrVec` within is passed to `f`. See the comment on `AttrWrapper` for
    /// details.
    ///
+    /// `pre_attr_pos` is the position before the outer attributes (or the node
+    /// itself, if no outer attributes are present). It is only needed if `f`
+    /// can return `UsePreAttrPos::Yes`.
+    ///
    /// Note: If your callback consumes an opening delimiter (including the
    /// case where `self.token` is an opening delimiter on entry to this
    /// function), you must also consume the corresponding closing delimiter.
@ -197,11 +227,12 @@ impl<'a> Parser<'a> {
    ///     }                               //  32..33
    /// }                                   //  33..34
    /// ```
-    pub(super) fn collect_tokens_trailing_token<R: HasAttrs + HasTokens>(
+    pub(super) fn collect_tokens<R: HasAttrs + HasTokens>(
        &mut self,
+        pre_attr_pos: Option<CollectPos>,
        attrs: AttrWrapper,
        force_collect: ForceCollect,
-        f: impl FnOnce(&mut Self, ast::AttrVec) -> PResult<'a, (R, Trailing)>,
+        f: impl FnOnce(&mut Self, AttrVec) -> PResult<'a, (R, Trailing, UsePreAttrPos)>,
    ) -> PResult<'a, R> {
        // We must collect if anything could observe the collected tokens, i.e.
        // if any of the following conditions hold.
@ -220,23 +251,20 @@ impl<'a> Parser<'a> {
            return Ok(f(self, attrs.attrs)?.0);
        }

-        let start_token = (self.token.clone(), self.token_spacing);
-        let cursor_snapshot = self.token_cursor.clone();
-        let start_pos = self.num_bump_calls;
+        let mut collect_pos = self.collect_pos();
        let has_outer_attrs = !attrs.attrs.is_empty();
        let parser_replacements_start = self.capture_state.parser_replacements.len();

        // We set and restore `Capturing::Yes` on either side of the call to
-        // `f`, so we can distinguish the outermost call to
-        // `collect_tokens_trailing_token` (e.g. parsing `m` in the example
-        // above) from any inner (indirectly recursive) calls (e.g. parsing `g`
-        // in the example above). This distinction is used below and in
-        // `Parser::parse_inner_attributes`.
-        let (mut ret, capture_trailing) = {
+        // `f`, so we can distinguish the outermost call to `collect_tokens`
+        // (e.g. parsing `m` in the example above) from any inner (indirectly
+        // recursive) calls (e.g. parsing `g` in the example above). This
+        // distinction is used below and in `Parser::parse_inner_attributes`.
+        let (mut ret, capture_trailing, use_pre_attr_pos) = {
            let prev_capturing = mem::replace(&mut self.capture_state.capturing, Capturing::Yes);
-            let f_res = f(self, attrs.attrs);
+            let res = f(self, attrs.attrs);
            self.capture_state.capturing = prev_capturing;
-            f_res?
+            res?
        };

        // When we're not in `capture_cfg` mode, then skip collecting and
@ -279,6 +307,14 @@ impl<'a> Parser<'a> {
            return Ok(ret);
        }

+        // Replace the post-attribute collection start position with the
+        // pre-attribute position supplied, if `f` indicated it is necessary.
+        // (The caller is responsible for providing a non-`None` `pre_attr_pos`
+        // if this is a possibility.)
+        if matches!(use_pre_attr_pos, UsePreAttrPos::Yes) {
+            collect_pos = pre_attr_pos.unwrap();
+        }
+
        let parser_replacements_end = self.capture_state.parser_replacements.len();

        assert!(
@ -294,7 +330,7 @@ impl<'a> Parser<'a> {
            // `AttrTokenStream`, we will create the proper token.
            + self.break_last_token as u32;

-        let num_calls = end_pos - start_pos;
+        let num_calls = end_pos - collect_pos.start_pos;

        // Take the captured `ParserRange`s for any inner attributes that we parsed in
        // `Parser::parse_inner_attributes`, and pair them in a `ParserReplacement` with `None`,
@ -328,7 +364,9 @@ impl<'a> Parser<'a> {
                .iter()
                .cloned()
                .chain(inner_attr_parser_replacements.iter().cloned())
-                .map(|(parser_range, data)| (NodeRange::new(parser_range, start_pos), data))
+                .map(|(parser_range, data)| {
+                    (NodeRange::new(parser_range, collect_pos.start_pos), data)
+                })
                .collect()
        };

@ -355,9 +393,9 @@ impl<'a> Parser<'a> {
        //     - `tokens`: lazy tokens for `g` (with its inner attr deleted).

        let tokens = LazyAttrTokenStream::new(LazyAttrTokenStreamImpl {
-            start_token,
+            start_token: collect_pos.start_token,
+            cursor_snapshot: collect_pos.cursor_snapshot,
            num_calls,
-            cursor_snapshot,
            break_last_token: self.break_last_token,
            node_replacements,
        });
@ -368,9 +406,9 @@ impl<'a> Parser<'a> {
        }

        // If `capture_cfg` is set and we're inside a recursive call to
-        // `collect_tokens_trailing_token`, then we need to register a replace range
-        // if we have `#[cfg]` or `#[cfg_attr]`. This allows us to run eager cfg-expansion
-        // on the captured token stream.
+        // `collect_tokens`, then we need to register a replace range if we
+        // have `#[cfg]` or `#[cfg_attr]`. This allows us to run eager
+        // cfg-expansion on the captured token stream.
        if self.capture_cfg
            && matches!(self.capture_state.capturing, Capturing::Yes)
            && has_cfg_or_cfg_attr(ret.attrs())
@ -389,7 +427,8 @@ impl<'a> Parser<'a> {
            // Set things up so that the entire AST node that we just parsed, including attributes,
            // will be replaced with `target` in the lazy token stream. This will allow us to
            // cfg-expand this AST node.
-            let start_pos = if has_outer_attrs { attrs.start_pos } else { start_pos };
+            let start_pos =
+                if has_outer_attrs { attrs.start_pos.unwrap() } else { collect_pos.start_pos };
            let target = AttrsTarget { attrs: ret.attrs().iter().cloned().collect(), tokens };
            self.capture_state
                .parser_replacements