Implement token-based handling of attributes during expansion

This PR modifies the macro expansion infrastructure to handle attributes
in a fully token-based manner. As a result:

* Derives macros no longer lose spans when their input is modified
  by eager cfg-expansion. This is accomplished by performing eager
  cfg-expansion on the token stream that we pass to the derive
  proc-macro
* Inner attributes now preserve spans in all cases, including when we
  have multiple inner attributes in a row.

This is accomplished through the following changes:

* New structs `AttrAnnotatedTokenStream` and `AttrAnnotatedTokenTree` are introduced.
  These are very similar to a normal `TokenTree`, but they also track
  the position of attributes and attribute targets within the stream.
  They are built when we collect tokens during parsing.
  An `AttrAnnotatedTokenStream` is converted to a regular `TokenStream` when
  we invoke a macro.
* Token capturing and `LazyTokenStream` are modified to work with
  `AttrAnnotatedTokenStream`. A new `ReplaceRange` type is introduced, which
  is created during the parsing of a nested AST node to make the 'outer'
  AST node aware of the attributes and attribute target stored deeper in the token stream.
* When we need to perform eager cfg-expansion (either due to `#[derive]` or `#[cfg_eval]`),
we tokenize and reparse our target, capturing additional information about the locations of
`#[cfg]` and `#[cfg_attr]` attributes at any depth within the target.
This is a performance optimization, allowing us to perform less work
in the typical case where captured tokens never have eager cfg-expansion run.
This commit is contained in:
Aaron Hill 2020-11-28 18:33:17 -05:00
parent 25ea6be13e
commit a93c4f05de
No known key found for this signature in database
GPG key ID: B4087E510E98B164
33 changed files with 2046 additions and 1192 deletions

View file

@ -19,13 +19,16 @@ pub use path::PathStyle;
use rustc_ast::ptr::P;
use rustc_ast::token::{self, DelimToken, Token, TokenKind};
use rustc_ast::tokenstream::AttributesData;
use rustc_ast::tokenstream::{self, DelimSpan, Spacing};
use rustc_ast::tokenstream::{TokenStream, TokenTree, TreeAndSpacing};
use rustc_ast::tokenstream::{TokenStream, TokenTree};
use rustc_ast::AttrId;
use rustc_ast::DUMMY_NODE_ID;
use rustc_ast::{self as ast, AnonConst, AstLike, AttrStyle, AttrVec, Const, CrateSugar, Extern};
use rustc_ast::{Async, Expr, ExprKind, MacArgs, MacDelimiter, Mutability, StrLit, Unsafe};
use rustc_ast::{Visibility, VisibilityKind};
use rustc_ast_pretty::pprust;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::sync::Lrc;
use rustc_errors::PResult;
use rustc_errors::{struct_span_err, Applicability, DiagnosticBuilder, FatalError};
@ -34,6 +37,7 @@ use rustc_span::source_map::{Span, DUMMY_SP};
use rustc_span::symbol::{kw, sym, Ident, Symbol};
use tracing::debug;
use std::ops::Range;
use std::{cmp, mem, slice};
bitflags::bitflags! {
@ -64,6 +68,7 @@ pub enum ForceCollect {
No,
}
#[derive(Debug, Eq, PartialEq)]
pub enum TrailingToken {
None,
Semi,
@ -111,6 +116,7 @@ pub struct Parser<'a> {
pub token_spacing: Spacing,
/// The previous token.
pub prev_token: Token,
pub capture_cfg: bool,
restrictions: Restrictions,
expected_tokens: Vec<TokenType>,
// Important: This must only be advanced from `next_tok`
@ -134,6 +140,44 @@ pub struct Parser<'a> {
pub last_type_ascription: Option<(Span, bool /* likely path typo */)>,
/// If present, this `Parser` is not parsing Rust code but rather a macro call.
subparser_name: Option<&'static str>,
capture_state: CaptureState,
}
/// Indicates a range of tokens that should be replaced by
/// the tokens in the provided vector. This is used in two
/// places during token collection:
///
/// 1. During the parsing of an AST node that may have a `#[derive]`
/// attribute, we parse a nested AST node that has `#[cfg]` or `#[cfg_attr]`
/// In this case, we use a `ReplaceRange` to replace the entire inner AST node
/// with `FlatToken::AttrTarget`, allowing us to perform eager cfg-expansion
/// on a `AttrAnnotatedTokenStream`
///
/// 2. When we parse an inner attribute while collecting tokens. We
/// remove inner attributes from the token stream entirely, and
/// instead track them through the `attrs` field on the AST node.
/// This allows us to easily manipulate them (for example, removing
/// the first macro inner attribute to invoke a proc-macro).
/// When create a `TokenStream`, the inner attributes get inserted
/// into the proper place in the token stream.
pub type ReplaceRange = (Range<u32>, Vec<(FlatToken, Spacing)>);
/// Controls how we capture tokens. Capturing can be expensive,
/// so we try to avoid performing capturing in cases where
/// we will never need a `AttrAnnotatedTokenStream`
#[derive(Copy, Clone)]
pub enum Capturing {
/// We aren't performing any capturing - this is the default mode.
No,
/// We are capturing tokens
Yes,
}
#[derive(Clone)]
struct CaptureState {
capturing: Capturing,
replace_ranges: Vec<ReplaceRange>,
inner_attr_ranges: FxHashMap<AttrId, ReplaceRange>,
}
impl<'a> Drop for Parser<'a> {
@ -167,18 +211,11 @@ struct TokenCursor {
// want to capture just the first 'unglued' token.
// For example, capturing the `Vec<u8>`
// in `Option<Vec<u8>>` requires us to unglue
// the trailing `>>` token. The `append_unglued_token`
// the trailing `>>` token. The `break_last_token`
// field is used to track this token - it gets
// appended to the captured stream when
// we evaluate a `LazyTokenStream`
append_unglued_token: Option<TreeAndSpacing>,
// If `true`, skip the delimiters for `None`-delimited groups,
// and just yield the inner tokens. This is `true` during
// normal parsing, since the parser code is not currently prepared
// to handle `None` delimiters. When capturing a `TokenStream`,
// however, we want to handle `None`-delimiters, since
// proc-macros always see `None`-delimited groups.
skip_none_delims: bool,
break_last_token: bool,
}
#[derive(Clone)]
@ -191,13 +228,13 @@ struct TokenCursorFrame {
}
impl TokenCursorFrame {
fn new(span: DelimSpan, delim: DelimToken, tts: TokenStream, skip_none_delims: bool) -> Self {
fn new(span: DelimSpan, delim: DelimToken, tts: TokenStream) -> Self {
TokenCursorFrame {
delim,
span,
open_delim: delim == token::NoDelim && skip_none_delims,
open_delim: false,
tree_cursor: tts.into_trees(),
close_delim: delim == token::NoDelim && skip_none_delims,
close_delim: false,
}
}
}
@ -225,7 +262,7 @@ impl TokenCursor {
return (token, spacing);
}
TokenTree::Delimited(sp, delim, tts) => {
let frame = TokenCursorFrame::new(sp, delim, tts, self.skip_none_delims);
let frame = TokenCursorFrame::new(sp, delim, tts);
self.stack.push(mem::replace(&mut self.frame, frame));
}
}
@ -283,7 +320,6 @@ impl TokenCursor {
.cloned()
.collect::<TokenStream>()
},
self.skip_none_delims,
),
));
@ -372,26 +408,24 @@ impl<'a> Parser<'a> {
desugar_doc_comments: bool,
subparser_name: Option<&'static str>,
) -> Self {
let mut start_frame = TokenCursorFrame::new(DelimSpan::dummy(), token::NoDelim, tokens);
start_frame.open_delim = true;
start_frame.close_delim = true;
let mut parser = Parser {
sess,
token: Token::dummy(),
token_spacing: Spacing::Alone,
prev_token: Token::dummy(),
capture_cfg: false,
restrictions: Restrictions::empty(),
expected_tokens: Vec::new(),
// Skip over the delimiters for `None`-delimited groups
token_cursor: TokenCursor {
frame: TokenCursorFrame::new(
DelimSpan::dummy(),
token::NoDelim,
tokens,
/* skip_none_delims */ true,
),
frame: start_frame,
stack: Vec::new(),
num_next_calls: 0,
desugar_doc_comments,
append_unglued_token: None,
skip_none_delims: true,
break_last_token: false,
},
desugar_doc_comments,
unmatched_angle_bracket_count: 0,
@ -400,6 +434,11 @@ impl<'a> Parser<'a> {
last_unexpected_token_span: None,
last_type_ascription: None,
subparser_name,
capture_state: CaptureState {
capturing: Capturing::No,
replace_ranges: Vec::new(),
inner_attr_ranges: Default::default(),
},
};
// Make parser point to the first token.
@ -409,21 +448,29 @@ impl<'a> Parser<'a> {
}
fn next_tok(&mut self, fallback_span: Span) -> (Token, Spacing) {
let (mut next, spacing) = if self.desugar_doc_comments {
self.token_cursor.next_desugared()
} else {
self.token_cursor.next()
};
self.token_cursor.num_next_calls += 1;
// We've retrieved an token from the underlying
// cursor, so we no longer need to worry about
// an unglued token. See `break_and_eat` for more details
self.token_cursor.append_unglued_token = None;
if next.span.is_dummy() {
// Tweak the location for better diagnostics, but keep syntactic context intact.
next.span = fallback_span.with_ctxt(next.span.ctxt());
loop {
let (mut next, spacing) = if self.desugar_doc_comments {
self.token_cursor.next_desugared()
} else {
self.token_cursor.next()
};
self.token_cursor.num_next_calls += 1;
// We've retrieved an token from the underlying
// cursor, so we no longer need to worry about
// an unglued token. See `break_and_eat` for more details
self.token_cursor.break_last_token = false;
if next.span.is_dummy() {
// Tweak the location for better diagnostics, but keep syntactic context intact.
next.span = fallback_span.with_ctxt(next.span.ctxt());
}
if matches!(
next.kind,
token::OpenDelim(token::NoDelim) | token::CloseDelim(token::NoDelim)
) {
continue;
}
return (next, spacing);
}
(next, spacing)
}
pub fn unexpected<T>(&mut self) -> PResult<'a, T> {
@ -621,8 +668,7 @@ impl<'a> Parser<'a> {
// If we consume any additional tokens, then this token
// is not needed (we'll capture the entire 'glued' token),
// and `next_tok` will set this field to `None`
self.token_cursor.append_unglued_token =
Some((TokenTree::Token(self.token.clone()), Spacing::Alone));
self.token_cursor.break_last_token = true;
// Use the spacing of the glued token as the spacing
// of the unglued second token.
self.bump_with((Token::new(second, second_span), self.token_spacing));
@ -1304,3 +1350,24 @@ pub fn emit_unclosed_delims(unclosed_delims: &mut Vec<UnmatchedBrace>, sess: &Pa
}
}
}
/// A helper struct used when building a `AttrAnnotatedTokenStream` from
/// a `LazyTokenStream`. Both delimiter and non-delimited tokens
/// are stored as `FlatToken::Token`. A vector of `FlatToken`s
/// is then 'parsed' to build up a `AttrAnnotatedTokenStream` with nested
/// `AttrAnnotatedTokenTree::Delimited` tokens
#[derive(Debug, Clone)]
pub enum FlatToken {
/// A token - this holds both delimiter (e.g. '{' and '}')
/// and non-delimiter tokens
Token(Token),
/// Holds the `AttributesData` for an AST node. The
/// `AttributesData` is inserted directly into the
/// constructed `AttrAnnotatedTokenStream` as
/// a `AttrAnnotatedTokenTree::Attributes`
AttrTarget(AttributesData),
/// A special 'empty' token that is ignored during the conversion
/// to a `AttrAnnotatedTokenStream`. This is used to simplify the
/// handling of replace ranges.
Empty,
}