Implement token-based handling of attributes during expansion
This PR modifies the macro expansion infrastructure to handle attributes in a fully token-based manner. As a result: * Derives macros no longer lose spans when their input is modified by eager cfg-expansion. This is accomplished by performing eager cfg-expansion on the token stream that we pass to the derive proc-macro * Inner attributes now preserve spans in all cases, including when we have multiple inner attributes in a row. This is accomplished through the following changes: * New structs `AttrAnnotatedTokenStream` and `AttrAnnotatedTokenTree` are introduced. These are very similar to a normal `TokenTree`, but they also track the position of attributes and attribute targets within the stream. They are built when we collect tokens during parsing. An `AttrAnnotatedTokenStream` is converted to a regular `TokenStream` when we invoke a macro. * Token capturing and `LazyTokenStream` are modified to work with `AttrAnnotatedTokenStream`. A new `ReplaceRange` type is introduced, which is created during the parsing of a nested AST node to make the 'outer' AST node aware of the attributes and attribute target stored deeper in the token stream. * When we need to perform eager cfg-expansion (either due to `#[derive]` or `#[cfg_eval]`), we tokenize and reparse our target, capturing additional information about the locations of `#[cfg]` and `#[cfg_attr]` attributes at any depth within the target. This is a performance optimization, allowing us to perform less work in the typical case where captured tokens never have eager cfg-expansion run.
This commit is contained in:
parent
25ea6be13e
commit
a93c4f05de
33 changed files with 2046 additions and 1192 deletions
|
@ -19,13 +19,16 @@ pub use path::PathStyle;
|
|||
|
||||
use rustc_ast::ptr::P;
|
||||
use rustc_ast::token::{self, DelimToken, Token, TokenKind};
|
||||
use rustc_ast::tokenstream::AttributesData;
|
||||
use rustc_ast::tokenstream::{self, DelimSpan, Spacing};
|
||||
use rustc_ast::tokenstream::{TokenStream, TokenTree, TreeAndSpacing};
|
||||
use rustc_ast::tokenstream::{TokenStream, TokenTree};
|
||||
use rustc_ast::AttrId;
|
||||
use rustc_ast::DUMMY_NODE_ID;
|
||||
use rustc_ast::{self as ast, AnonConst, AstLike, AttrStyle, AttrVec, Const, CrateSugar, Extern};
|
||||
use rustc_ast::{Async, Expr, ExprKind, MacArgs, MacDelimiter, Mutability, StrLit, Unsafe};
|
||||
use rustc_ast::{Visibility, VisibilityKind};
|
||||
use rustc_ast_pretty::pprust;
|
||||
use rustc_data_structures::fx::FxHashMap;
|
||||
use rustc_data_structures::sync::Lrc;
|
||||
use rustc_errors::PResult;
|
||||
use rustc_errors::{struct_span_err, Applicability, DiagnosticBuilder, FatalError};
|
||||
|
@ -34,6 +37,7 @@ use rustc_span::source_map::{Span, DUMMY_SP};
|
|||
use rustc_span::symbol::{kw, sym, Ident, Symbol};
|
||||
use tracing::debug;
|
||||
|
||||
use std::ops::Range;
|
||||
use std::{cmp, mem, slice};
|
||||
|
||||
bitflags::bitflags! {
|
||||
|
@ -64,6 +68,7 @@ pub enum ForceCollect {
|
|||
No,
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
pub enum TrailingToken {
|
||||
None,
|
||||
Semi,
|
||||
|
@ -111,6 +116,7 @@ pub struct Parser<'a> {
|
|||
pub token_spacing: Spacing,
|
||||
/// The previous token.
|
||||
pub prev_token: Token,
|
||||
pub capture_cfg: bool,
|
||||
restrictions: Restrictions,
|
||||
expected_tokens: Vec<TokenType>,
|
||||
// Important: This must only be advanced from `next_tok`
|
||||
|
@ -134,6 +140,44 @@ pub struct Parser<'a> {
|
|||
pub last_type_ascription: Option<(Span, bool /* likely path typo */)>,
|
||||
/// If present, this `Parser` is not parsing Rust code but rather a macro call.
|
||||
subparser_name: Option<&'static str>,
|
||||
capture_state: CaptureState,
|
||||
}
|
||||
|
||||
/// Indicates a range of tokens that should be replaced by
|
||||
/// the tokens in the provided vector. This is used in two
|
||||
/// places during token collection:
|
||||
///
|
||||
/// 1. During the parsing of an AST node that may have a `#[derive]`
|
||||
/// attribute, we parse a nested AST node that has `#[cfg]` or `#[cfg_attr]`
|
||||
/// In this case, we use a `ReplaceRange` to replace the entire inner AST node
|
||||
/// with `FlatToken::AttrTarget`, allowing us to perform eager cfg-expansion
|
||||
/// on a `AttrAnnotatedTokenStream`
|
||||
///
|
||||
/// 2. When we parse an inner attribute while collecting tokens. We
|
||||
/// remove inner attributes from the token stream entirely, and
|
||||
/// instead track them through the `attrs` field on the AST node.
|
||||
/// This allows us to easily manipulate them (for example, removing
|
||||
/// the first macro inner attribute to invoke a proc-macro).
|
||||
/// When create a `TokenStream`, the inner attributes get inserted
|
||||
/// into the proper place in the token stream.
|
||||
pub type ReplaceRange = (Range<u32>, Vec<(FlatToken, Spacing)>);
|
||||
|
||||
/// Controls how we capture tokens. Capturing can be expensive,
|
||||
/// so we try to avoid performing capturing in cases where
|
||||
/// we will never need a `AttrAnnotatedTokenStream`
|
||||
#[derive(Copy, Clone)]
|
||||
pub enum Capturing {
|
||||
/// We aren't performing any capturing - this is the default mode.
|
||||
No,
|
||||
/// We are capturing tokens
|
||||
Yes,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct CaptureState {
|
||||
capturing: Capturing,
|
||||
replace_ranges: Vec<ReplaceRange>,
|
||||
inner_attr_ranges: FxHashMap<AttrId, ReplaceRange>,
|
||||
}
|
||||
|
||||
impl<'a> Drop for Parser<'a> {
|
||||
|
@ -167,18 +211,11 @@ struct TokenCursor {
|
|||
// want to capture just the first 'unglued' token.
|
||||
// For example, capturing the `Vec<u8>`
|
||||
// in `Option<Vec<u8>>` requires us to unglue
|
||||
// the trailing `>>` token. The `append_unglued_token`
|
||||
// the trailing `>>` token. The `break_last_token`
|
||||
// field is used to track this token - it gets
|
||||
// appended to the captured stream when
|
||||
// we evaluate a `LazyTokenStream`
|
||||
append_unglued_token: Option<TreeAndSpacing>,
|
||||
// If `true`, skip the delimiters for `None`-delimited groups,
|
||||
// and just yield the inner tokens. This is `true` during
|
||||
// normal parsing, since the parser code is not currently prepared
|
||||
// to handle `None` delimiters. When capturing a `TokenStream`,
|
||||
// however, we want to handle `None`-delimiters, since
|
||||
// proc-macros always see `None`-delimited groups.
|
||||
skip_none_delims: bool,
|
||||
break_last_token: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
|
@ -191,13 +228,13 @@ struct TokenCursorFrame {
|
|||
}
|
||||
|
||||
impl TokenCursorFrame {
|
||||
fn new(span: DelimSpan, delim: DelimToken, tts: TokenStream, skip_none_delims: bool) -> Self {
|
||||
fn new(span: DelimSpan, delim: DelimToken, tts: TokenStream) -> Self {
|
||||
TokenCursorFrame {
|
||||
delim,
|
||||
span,
|
||||
open_delim: delim == token::NoDelim && skip_none_delims,
|
||||
open_delim: false,
|
||||
tree_cursor: tts.into_trees(),
|
||||
close_delim: delim == token::NoDelim && skip_none_delims,
|
||||
close_delim: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -225,7 +262,7 @@ impl TokenCursor {
|
|||
return (token, spacing);
|
||||
}
|
||||
TokenTree::Delimited(sp, delim, tts) => {
|
||||
let frame = TokenCursorFrame::new(sp, delim, tts, self.skip_none_delims);
|
||||
let frame = TokenCursorFrame::new(sp, delim, tts);
|
||||
self.stack.push(mem::replace(&mut self.frame, frame));
|
||||
}
|
||||
}
|
||||
|
@ -283,7 +320,6 @@ impl TokenCursor {
|
|||
.cloned()
|
||||
.collect::<TokenStream>()
|
||||
},
|
||||
self.skip_none_delims,
|
||||
),
|
||||
));
|
||||
|
||||
|
@ -372,26 +408,24 @@ impl<'a> Parser<'a> {
|
|||
desugar_doc_comments: bool,
|
||||
subparser_name: Option<&'static str>,
|
||||
) -> Self {
|
||||
let mut start_frame = TokenCursorFrame::new(DelimSpan::dummy(), token::NoDelim, tokens);
|
||||
start_frame.open_delim = true;
|
||||
start_frame.close_delim = true;
|
||||
|
||||
let mut parser = Parser {
|
||||
sess,
|
||||
token: Token::dummy(),
|
||||
token_spacing: Spacing::Alone,
|
||||
prev_token: Token::dummy(),
|
||||
capture_cfg: false,
|
||||
restrictions: Restrictions::empty(),
|
||||
expected_tokens: Vec::new(),
|
||||
// Skip over the delimiters for `None`-delimited groups
|
||||
token_cursor: TokenCursor {
|
||||
frame: TokenCursorFrame::new(
|
||||
DelimSpan::dummy(),
|
||||
token::NoDelim,
|
||||
tokens,
|
||||
/* skip_none_delims */ true,
|
||||
),
|
||||
frame: start_frame,
|
||||
stack: Vec::new(),
|
||||
num_next_calls: 0,
|
||||
desugar_doc_comments,
|
||||
append_unglued_token: None,
|
||||
skip_none_delims: true,
|
||||
break_last_token: false,
|
||||
},
|
||||
desugar_doc_comments,
|
||||
unmatched_angle_bracket_count: 0,
|
||||
|
@ -400,6 +434,11 @@ impl<'a> Parser<'a> {
|
|||
last_unexpected_token_span: None,
|
||||
last_type_ascription: None,
|
||||
subparser_name,
|
||||
capture_state: CaptureState {
|
||||
capturing: Capturing::No,
|
||||
replace_ranges: Vec::new(),
|
||||
inner_attr_ranges: Default::default(),
|
||||
},
|
||||
};
|
||||
|
||||
// Make parser point to the first token.
|
||||
|
@ -409,21 +448,29 @@ impl<'a> Parser<'a> {
|
|||
}
|
||||
|
||||
fn next_tok(&mut self, fallback_span: Span) -> (Token, Spacing) {
|
||||
let (mut next, spacing) = if self.desugar_doc_comments {
|
||||
self.token_cursor.next_desugared()
|
||||
} else {
|
||||
self.token_cursor.next()
|
||||
};
|
||||
self.token_cursor.num_next_calls += 1;
|
||||
// We've retrieved an token from the underlying
|
||||
// cursor, so we no longer need to worry about
|
||||
// an unglued token. See `break_and_eat` for more details
|
||||
self.token_cursor.append_unglued_token = None;
|
||||
if next.span.is_dummy() {
|
||||
// Tweak the location for better diagnostics, but keep syntactic context intact.
|
||||
next.span = fallback_span.with_ctxt(next.span.ctxt());
|
||||
loop {
|
||||
let (mut next, spacing) = if self.desugar_doc_comments {
|
||||
self.token_cursor.next_desugared()
|
||||
} else {
|
||||
self.token_cursor.next()
|
||||
};
|
||||
self.token_cursor.num_next_calls += 1;
|
||||
// We've retrieved an token from the underlying
|
||||
// cursor, so we no longer need to worry about
|
||||
// an unglued token. See `break_and_eat` for more details
|
||||
self.token_cursor.break_last_token = false;
|
||||
if next.span.is_dummy() {
|
||||
// Tweak the location for better diagnostics, but keep syntactic context intact.
|
||||
next.span = fallback_span.with_ctxt(next.span.ctxt());
|
||||
}
|
||||
if matches!(
|
||||
next.kind,
|
||||
token::OpenDelim(token::NoDelim) | token::CloseDelim(token::NoDelim)
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
return (next, spacing);
|
||||
}
|
||||
(next, spacing)
|
||||
}
|
||||
|
||||
pub fn unexpected<T>(&mut self) -> PResult<'a, T> {
|
||||
|
@ -621,8 +668,7 @@ impl<'a> Parser<'a> {
|
|||
// If we consume any additional tokens, then this token
|
||||
// is not needed (we'll capture the entire 'glued' token),
|
||||
// and `next_tok` will set this field to `None`
|
||||
self.token_cursor.append_unglued_token =
|
||||
Some((TokenTree::Token(self.token.clone()), Spacing::Alone));
|
||||
self.token_cursor.break_last_token = true;
|
||||
// Use the spacing of the glued token as the spacing
|
||||
// of the unglued second token.
|
||||
self.bump_with((Token::new(second, second_span), self.token_spacing));
|
||||
|
@ -1304,3 +1350,24 @@ pub fn emit_unclosed_delims(unclosed_delims: &mut Vec<UnmatchedBrace>, sess: &Pa
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A helper struct used when building a `AttrAnnotatedTokenStream` from
|
||||
/// a `LazyTokenStream`. Both delimiter and non-delimited tokens
|
||||
/// are stored as `FlatToken::Token`. A vector of `FlatToken`s
|
||||
/// is then 'parsed' to build up a `AttrAnnotatedTokenStream` with nested
|
||||
/// `AttrAnnotatedTokenTree::Delimited` tokens
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum FlatToken {
|
||||
/// A token - this holds both delimiter (e.g. '{' and '}')
|
||||
/// and non-delimiter tokens
|
||||
Token(Token),
|
||||
/// Holds the `AttributesData` for an AST node. The
|
||||
/// `AttributesData` is inserted directly into the
|
||||
/// constructed `AttrAnnotatedTokenStream` as
|
||||
/// a `AttrAnnotatedTokenTree::Attributes`
|
||||
AttrTarget(AttributesData),
|
||||
/// A special 'empty' token that is ignored during the conversion
|
||||
/// to a `AttrAnnotatedTokenStream`. This is used to simplify the
|
||||
/// handling of replace ranges.
|
||||
Empty,
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue