Implement token-based handling of attributes during expansion

This PR modifies the macro expansion infrastructure to handle attributes in a fully token-based manner. As a result: * Derives macros no longer lose spans when their input is modified by eager cfg-expansion. This is accomplished by performing eager cfg-expansion on the token stream that we pass to the derive proc-macro * Inner attributes now preserve spans in all cases, including when we have multiple inner attributes in a row. This is accomplished through the following changes: * New structs `AttrAnnotatedTokenStream` and `AttrAnnotatedTokenTree` are introduced. These are very similar to a normal `TokenTree`, but they also track the position of attributes and attribute targets within the stream. They are built when we collect tokens during parsing. An `AttrAnnotatedTokenStream` is converted to a regular `TokenStream` when we invoke a macro. * Token capturing and `LazyTokenStream` are modified to work with `AttrAnnotatedTokenStream`. A new `ReplaceRange` type is introduced, which is created during the parsing of a nested AST node to make the 'outer' AST node aware of the attributes and attribute target stored deeper in the token stream. * When we need to perform eager cfg-expansion (either due to `#[derive]` or `#[cfg_eval]`), we tokenize and reparse our target, capturing additional information about the locations of `#[cfg]` and `#[cfg_attr]` attributes at any depth within the target. This is a performance optimization, allowing us to perform less work in the typical case where captured tokens never have eager cfg-expansion run.
2020-11-28 18:33:17 -05:00 · 2020-11-28 18:33:17 -05:00 · a93c4f05de
commit a93c4f05de
parent 25ea6be13e
33 changed files with 2046 additions and 1192 deletions
--- a/compiler/rustc_parse/src/parser/mod.rs
+++ b/compiler/rustc_parse/src/parser/mod.rs
@ -19,13 +19,16 @@ pub use path::PathStyle;

 use rustc_ast::ptr::P;
 use rustc_ast::token::{self, DelimToken, Token, TokenKind};
+use rustc_ast::tokenstream::AttributesData;
 use rustc_ast::tokenstream::{self, DelimSpan, Spacing};
-use rustc_ast::tokenstream::{TokenStream, TokenTree, TreeAndSpacing};
+use rustc_ast::tokenstream::{TokenStream, TokenTree};
+use rustc_ast::AttrId;
 use rustc_ast::DUMMY_NODE_ID;
 use rustc_ast::{self as ast, AnonConst, AstLike, AttrStyle, AttrVec, Const, CrateSugar, Extern};
 use rustc_ast::{Async, Expr, ExprKind, MacArgs, MacDelimiter, Mutability, StrLit, Unsafe};
 use rustc_ast::{Visibility, VisibilityKind};
 use rustc_ast_pretty::pprust;
+use rustc_data_structures::fx::FxHashMap;
 use rustc_data_structures::sync::Lrc;
 use rustc_errors::PResult;
 use rustc_errors::{struct_span_err, Applicability, DiagnosticBuilder, FatalError};
@ -34,6 +37,7 @@ use rustc_span::source_map::{Span, DUMMY_SP};
 use rustc_span::symbol::{kw, sym, Ident, Symbol};
 use tracing::debug;

+use std::ops::Range;
 use std::{cmp, mem, slice};

 bitflags::bitflags! {
@ -64,6 +68,7 @@ pub enum ForceCollect {
    No,
 }

+#[derive(Debug, Eq, PartialEq)]
 pub enum TrailingToken {
    None,
    Semi,
@ -111,6 +116,7 @@ pub struct Parser<'a> {
    pub token_spacing: Spacing,
    /// The previous token.
    pub prev_token: Token,
+    pub capture_cfg: bool,
    restrictions: Restrictions,
    expected_tokens: Vec<TokenType>,
    // Important: This must only be advanced from `next_tok`
@ -134,6 +140,44 @@ pub struct Parser<'a> {
    pub last_type_ascription: Option<(Span, bool /* likely path typo */)>,
    /// If present, this `Parser` is not parsing Rust code but rather a macro call.
    subparser_name: Option<&'static str>,
+    capture_state: CaptureState,
+}
+
+/// Indicates a range of tokens that should be replaced by
+/// the tokens in the provided vector. This is used in two
+/// places during token collection:
+///
+/// 1. During the parsing of an AST node that may have a `#[derive]`
+/// attribute, we parse a nested AST node that has `#[cfg]` or `#[cfg_attr]`
+/// In this case, we use a `ReplaceRange` to replace the entire inner AST node
+/// with `FlatToken::AttrTarget`, allowing us to perform eager cfg-expansion
+/// on a `AttrAnnotatedTokenStream`
+///
+/// 2. When we parse an inner attribute while collecting tokens. We
+/// remove inner attributes from the token stream entirely, and
+/// instead track them through the `attrs` field on the AST node.
+/// This allows us to easily manipulate them (for example, removing
+/// the first macro inner attribute to invoke a proc-macro).
+/// When create a `TokenStream`, the inner attributes get inserted
+/// into the proper place in the token stream.
+pub type ReplaceRange = (Range<u32>, Vec<(FlatToken, Spacing)>);
+
+/// Controls how we capture tokens. Capturing can be expensive,
+/// so we try to avoid performing capturing in cases where
+/// we will never need a `AttrAnnotatedTokenStream`
+#[derive(Copy, Clone)]
+pub enum Capturing {
+    /// We aren't performing any capturing - this is the default mode.
+    No,
+    /// We are capturing tokens
+    Yes,
+}
+
+#[derive(Clone)]
+struct CaptureState {
+    capturing: Capturing,
+    replace_ranges: Vec<ReplaceRange>,
+    inner_attr_ranges: FxHashMap<AttrId, ReplaceRange>,
 }

 impl<'a> Drop for Parser<'a> {
@ -167,18 +211,11 @@ struct TokenCursor {
    // want to capture just the first 'unglued' token.
    // For example, capturing the `Vec<u8>`
    // in `Option<Vec<u8>>` requires us to unglue
-    // the trailing `>>` token. The `append_unglued_token`
+    // the trailing `>>` token. The `break_last_token`
    // field is used to track this token - it gets
    // appended to the captured stream when
    // we evaluate a `LazyTokenStream`
-    append_unglued_token: Option<TreeAndSpacing>,
-    // If `true`, skip the delimiters for `None`-delimited groups,
-    // and just yield the inner tokens. This is `true` during
-    // normal parsing, since the parser code is not currently prepared
-    // to handle `None` delimiters. When capturing a `TokenStream`,
-    // however, we want to handle `None`-delimiters, since
-    // proc-macros always see `None`-delimited groups.
-    skip_none_delims: bool,
+    break_last_token: bool,
 }

 #[derive(Clone)]
@ -191,13 +228,13 @@ struct TokenCursorFrame {
 }

 impl TokenCursorFrame {
-    fn new(span: DelimSpan, delim: DelimToken, tts: TokenStream, skip_none_delims: bool) -> Self {
+    fn new(span: DelimSpan, delim: DelimToken, tts: TokenStream) -> Self {
        TokenCursorFrame {
            delim,
            span,
-            open_delim: delim == token::NoDelim && skip_none_delims,
+            open_delim: false,
            tree_cursor: tts.into_trees(),
-            close_delim: delim == token::NoDelim && skip_none_delims,
+            close_delim: false,
        }
    }
 }
@ -225,7 +262,7 @@ impl TokenCursor {
                    return (token, spacing);
                }
                TokenTree::Delimited(sp, delim, tts) => {
-                    let frame = TokenCursorFrame::new(sp, delim, tts, self.skip_none_delims);
+                    let frame = TokenCursorFrame::new(sp, delim, tts);
                    self.stack.push(mem::replace(&mut self.frame, frame));
                }
            }
@ -283,7 +320,6 @@ impl TokenCursor {
                        .cloned()
                        .collect::<TokenStream>()
                },
-                self.skip_none_delims,
            ),
        ));

@ -372,26 +408,24 @@ impl<'a> Parser<'a> {
        desugar_doc_comments: bool,
        subparser_name: Option<&'static str>,
    ) -> Self {
+        let mut start_frame = TokenCursorFrame::new(DelimSpan::dummy(), token::NoDelim, tokens);
+        start_frame.open_delim = true;
+        start_frame.close_delim = true;
+
        let mut parser = Parser {
            sess,
            token: Token::dummy(),
            token_spacing: Spacing::Alone,
            prev_token: Token::dummy(),
+            capture_cfg: false,
            restrictions: Restrictions::empty(),
            expected_tokens: Vec::new(),
-            // Skip over the delimiters for `None`-delimited groups
            token_cursor: TokenCursor {
-                frame: TokenCursorFrame::new(
-                    DelimSpan::dummy(),
-                    token::NoDelim,
-                    tokens,
-                    /* skip_none_delims */ true,
-                ),
+                frame: start_frame,
                stack: Vec::new(),
                num_next_calls: 0,
                desugar_doc_comments,
-                append_unglued_token: None,
-                skip_none_delims: true,
+                break_last_token: false,
            },
            desugar_doc_comments,
            unmatched_angle_bracket_count: 0,
@ -400,6 +434,11 @@ impl<'a> Parser<'a> {
            last_unexpected_token_span: None,
            last_type_ascription: None,
            subparser_name,
+            capture_state: CaptureState {
+                capturing: Capturing::No,
+                replace_ranges: Vec::new(),
+                inner_attr_ranges: Default::default(),
+            },
        };

        // Make parser point to the first token.
@ -409,21 +448,29 @@ impl<'a> Parser<'a> {
    }

    fn next_tok(&mut self, fallback_span: Span) -> (Token, Spacing) {
-        let (mut next, spacing) = if self.desugar_doc_comments {
-            self.token_cursor.next_desugared()
-        } else {
-            self.token_cursor.next()
-        };
-        self.token_cursor.num_next_calls += 1;
-        // We've retrieved an token from the underlying
-        // cursor, so we no longer need to worry about
-        // an unglued token. See `break_and_eat` for more details
-        self.token_cursor.append_unglued_token = None;
-        if next.span.is_dummy() {
-            // Tweak the location for better diagnostics, but keep syntactic context intact.
-            next.span = fallback_span.with_ctxt(next.span.ctxt());
+        loop {
+            let (mut next, spacing) = if self.desugar_doc_comments {
+                self.token_cursor.next_desugared()
+            } else {
+                self.token_cursor.next()
+            };
+            self.token_cursor.num_next_calls += 1;
+            // We've retrieved an token from the underlying
+            // cursor, so we no longer need to worry about
+            // an unglued token. See `break_and_eat` for more details
+            self.token_cursor.break_last_token = false;
+            if next.span.is_dummy() {
+                // Tweak the location for better diagnostics, but keep syntactic context intact.
+                next.span = fallback_span.with_ctxt(next.span.ctxt());
+            }
+            if matches!(
+                next.kind,
+                token::OpenDelim(token::NoDelim) | token::CloseDelim(token::NoDelim)
+            ) {
+                continue;
+            }
+            return (next, spacing);
        }
-        (next, spacing)
    }

    pub fn unexpected<T>(&mut self) -> PResult<'a, T> {
@ -621,8 +668,7 @@ impl<'a> Parser<'a> {
                // If we consume any additional tokens, then this token
                // is not needed (we'll capture the entire 'glued' token),
                // and `next_tok` will set this field to `None`
-                self.token_cursor.append_unglued_token =
-                    Some((TokenTree::Token(self.token.clone()), Spacing::Alone));
+                self.token_cursor.break_last_token = true;
                // Use the spacing of the glued token as the spacing
                // of the unglued second token.
                self.bump_with((Token::new(second, second_span), self.token_spacing));
@ -1304,3 +1350,24 @@ pub fn emit_unclosed_delims(unclosed_delims: &mut Vec<UnmatchedBrace>, sess: &Pa
        }
    }
 }
+
+/// A helper struct used when building a `AttrAnnotatedTokenStream` from
+/// a `LazyTokenStream`. Both delimiter and non-delimited tokens
+/// are stored as `FlatToken::Token`. A vector of `FlatToken`s
+/// is then 'parsed' to build up a `AttrAnnotatedTokenStream` with nested
+/// `AttrAnnotatedTokenTree::Delimited` tokens
+#[derive(Debug, Clone)]
+pub enum FlatToken {
+    /// A token - this holds both delimiter (e.g. '{' and '}')
+    /// and non-delimiter tokens
+    Token(Token),
+    /// Holds the `AttributesData` for an AST node. The
+    /// `AttributesData` is inserted directly into the
+    /// constructed `AttrAnnotatedTokenStream` as
+    /// a `AttrAnnotatedTokenTree::Attributes`
+    AttrTarget(AttributesData),
+    /// A special 'empty' token that is ignored during the conversion
+    /// to a `AttrAnnotatedTokenStream`. This is used to simplify the
+    /// handling of replace ranges.
+    Empty,
+}