Overhaul token collection.
This commit does the following. - Renames `collect_tokens_trailing_token` as `collect_tokens`, because (a) it's annoying long, and (b) the `_trailing_token` bit is less accurate now that its types have changed. - In `collect_tokens`, adds a `Option<CollectPos>` argument and a `UsePreAttrPos` in the return type of `f`. These are used in `parse_expr_force_collect` (for vanilla expressions) and in `parse_stmt_without_recovery` (for two different cases of expression statements). Together these ensure are enough to fix all the problems with token collection and assoc expressions. The changes to the `stringify.rs` test demonstrate some of these. - Adds a new test. The code in this test was causing an assertion failure prior to this commit, due to an invalid `NodeRange`. The extra complexity is annoying, but necessary to fix the existing problems.
This commit is contained in:
parent
fe460ac28b
commit
9d31f86f0d
12 changed files with 414 additions and 292 deletions
|
@ -15,6 +15,20 @@ use super::{
|
|||
TokenCursor, Trailing,
|
||||
};
|
||||
|
||||
// When collecting tokens, this fully captures the start point. Usually its
|
||||
// just after outer attributes, but occasionally it's before.
|
||||
#[derive(Clone, Debug)]
|
||||
pub(super) struct CollectPos {
|
||||
start_token: (Token, Spacing),
|
||||
cursor_snapshot: TokenCursor,
|
||||
start_pos: u32,
|
||||
}
|
||||
|
||||
pub(super) enum UsePreAttrPos {
|
||||
No,
|
||||
Yes,
|
||||
}
|
||||
|
||||
/// A wrapper type to ensure that the parser handles outer attributes correctly.
|
||||
/// When we parse outer attributes, we need to ensure that we capture tokens
|
||||
/// for the attribute target. This allows us to perform cfg-expansion on
|
||||
|
@ -22,7 +36,7 @@ use super::{
|
|||
///
|
||||
/// This wrapper prevents direct access to the underlying `ast::AttrVec`.
|
||||
/// Parsing code can only get access to the underlying attributes
|
||||
/// by passing an `AttrWrapper` to `collect_tokens_trailing_token`.
|
||||
/// by passing an `AttrWrapper` to `collect_tokens`.
|
||||
/// This makes it difficult to accidentally construct an AST node
|
||||
/// (which stores an `ast::AttrVec`) without first collecting tokens.
|
||||
///
|
||||
|
@ -33,16 +47,18 @@ pub(super) struct AttrWrapper {
|
|||
attrs: AttrVec,
|
||||
// The start of the outer attributes in the parser's token stream.
|
||||
// This lets us create a `NodeReplacement` for the entire attribute
|
||||
// target, including outer attributes.
|
||||
start_pos: u32,
|
||||
// target, including outer attributes. `None` if there are no outer
|
||||
// attributes.
|
||||
start_pos: Option<u32>,
|
||||
}
|
||||
|
||||
impl AttrWrapper {
|
||||
pub(super) fn new(attrs: AttrVec, start_pos: u32) -> AttrWrapper {
|
||||
AttrWrapper { attrs, start_pos }
|
||||
AttrWrapper { attrs, start_pos: Some(start_pos) }
|
||||
}
|
||||
|
||||
pub(super) fn empty() -> AttrWrapper {
|
||||
AttrWrapper { attrs: AttrVec::new(), start_pos: u32::MAX }
|
||||
AttrWrapper { attrs: AttrVec::new(), start_pos: None }
|
||||
}
|
||||
|
||||
pub(super) fn take_for_recovery(self, psess: &ParseSess) -> AttrVec {
|
||||
|
@ -77,7 +93,7 @@ fn has_cfg_or_cfg_attr(attrs: &[Attribute]) -> bool {
|
|||
}
|
||||
|
||||
// From a value of this type we can reconstruct the `TokenStream` seen by the
|
||||
// `f` callback passed to a call to `Parser::collect_tokens_trailing_token`, by
|
||||
// `f` callback passed to a call to `Parser::collect_tokens`, by
|
||||
// replaying the getting of the tokens. This saves us producing a `TokenStream`
|
||||
// if it is never needed, e.g. a captured `macro_rules!` argument that is never
|
||||
// passed to a proc macro. In practice, token stream creation happens rarely
|
||||
|
@ -166,16 +182,30 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
|
|||
}
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
pub(super) fn collect_pos(&self) -> CollectPos {
|
||||
CollectPos {
|
||||
start_token: (self.token.clone(), self.token_spacing),
|
||||
cursor_snapshot: self.token_cursor.clone(),
|
||||
start_pos: self.num_bump_calls,
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses code with `f`. If appropriate, it records the tokens (in
|
||||
/// `LazyAttrTokenStream` form) that were parsed in the result, accessible
|
||||
/// via the `HasTokens` trait. The `Trailing` part of the callback's
|
||||
/// result indicates if an extra token should be captured, e.g. a comma or
|
||||
/// semicolon.
|
||||
/// semicolon. The `UsePreAttrPos` part of the callback's result indicates
|
||||
/// if we should use `pre_attr_pos` as the collection start position (only
|
||||
/// required in a few cases).
|
||||
///
|
||||
/// The `attrs` passed in are in `AttrWrapper` form, which is opaque. The
|
||||
/// `AttrVec` within is passed to `f`. See the comment on `AttrWrapper` for
|
||||
/// details.
|
||||
///
|
||||
/// `pre_attr_pos` is the position before the outer attributes (or the node
|
||||
/// itself, if no outer attributes are present). It is only needed if `f`
|
||||
/// can return `UsePreAttrPos::Yes`.
|
||||
///
|
||||
/// Note: If your callback consumes an opening delimiter (including the
|
||||
/// case where `self.token` is an opening delimiter on entry to this
|
||||
/// function), you must also consume the corresponding closing delimiter.
|
||||
|
@ -197,11 +227,12 @@ impl<'a> Parser<'a> {
|
|||
/// } // 32..33
|
||||
/// } // 33..34
|
||||
/// ```
|
||||
pub(super) fn collect_tokens_trailing_token<R: HasAttrs + HasTokens>(
|
||||
pub(super) fn collect_tokens<R: HasAttrs + HasTokens>(
|
||||
&mut self,
|
||||
pre_attr_pos: Option<CollectPos>,
|
||||
attrs: AttrWrapper,
|
||||
force_collect: ForceCollect,
|
||||
f: impl FnOnce(&mut Self, ast::AttrVec) -> PResult<'a, (R, Trailing)>,
|
||||
f: impl FnOnce(&mut Self, AttrVec) -> PResult<'a, (R, Trailing, UsePreAttrPos)>,
|
||||
) -> PResult<'a, R> {
|
||||
// We must collect if anything could observe the collected tokens, i.e.
|
||||
// if any of the following conditions hold.
|
||||
|
@ -220,23 +251,20 @@ impl<'a> Parser<'a> {
|
|||
return Ok(f(self, attrs.attrs)?.0);
|
||||
}
|
||||
|
||||
let start_token = (self.token.clone(), self.token_spacing);
|
||||
let cursor_snapshot = self.token_cursor.clone();
|
||||
let start_pos = self.num_bump_calls;
|
||||
let mut collect_pos = self.collect_pos();
|
||||
let has_outer_attrs = !attrs.attrs.is_empty();
|
||||
let parser_replacements_start = self.capture_state.parser_replacements.len();
|
||||
|
||||
// We set and restore `Capturing::Yes` on either side of the call to
|
||||
// `f`, so we can distinguish the outermost call to
|
||||
// `collect_tokens_trailing_token` (e.g. parsing `m` in the example
|
||||
// above) from any inner (indirectly recursive) calls (e.g. parsing `g`
|
||||
// in the example above). This distinction is used below and in
|
||||
// `Parser::parse_inner_attributes`.
|
||||
let (mut ret, capture_trailing) = {
|
||||
// `f`, so we can distinguish the outermost call to `collect_tokens`
|
||||
// (e.g. parsing `m` in the example above) from any inner (indirectly
|
||||
// recursive) calls (e.g. parsing `g` in the example above). This
|
||||
// distinction is used below and in `Parser::parse_inner_attributes`.
|
||||
let (mut ret, capture_trailing, use_pre_attr_pos) = {
|
||||
let prev_capturing = mem::replace(&mut self.capture_state.capturing, Capturing::Yes);
|
||||
let f_res = f(self, attrs.attrs);
|
||||
let res = f(self, attrs.attrs);
|
||||
self.capture_state.capturing = prev_capturing;
|
||||
f_res?
|
||||
res?
|
||||
};
|
||||
|
||||
// When we're not in `capture_cfg` mode, then skip collecting and
|
||||
|
@ -279,6 +307,14 @@ impl<'a> Parser<'a> {
|
|||
return Ok(ret);
|
||||
}
|
||||
|
||||
// Replace the post-attribute collection start position with the
|
||||
// pre-attribute position supplied, if `f` indicated it is necessary.
|
||||
// (The caller is responsible for providing a non-`None` `pre_attr_pos`
|
||||
// if this is a possibility.)
|
||||
if matches!(use_pre_attr_pos, UsePreAttrPos::Yes) {
|
||||
collect_pos = pre_attr_pos.unwrap();
|
||||
}
|
||||
|
||||
let parser_replacements_end = self.capture_state.parser_replacements.len();
|
||||
|
||||
assert!(
|
||||
|
@ -294,7 +330,7 @@ impl<'a> Parser<'a> {
|
|||
// `AttrTokenStream`, we will create the proper token.
|
||||
+ self.break_last_token as u32;
|
||||
|
||||
let num_calls = end_pos - start_pos;
|
||||
let num_calls = end_pos - collect_pos.start_pos;
|
||||
|
||||
// Take the captured `ParserRange`s for any inner attributes that we parsed in
|
||||
// `Parser::parse_inner_attributes`, and pair them in a `ParserReplacement` with `None`,
|
||||
|
@ -328,7 +364,9 @@ impl<'a> Parser<'a> {
|
|||
.iter()
|
||||
.cloned()
|
||||
.chain(inner_attr_parser_replacements.iter().cloned())
|
||||
.map(|(parser_range, data)| (NodeRange::new(parser_range, start_pos), data))
|
||||
.map(|(parser_range, data)| {
|
||||
(NodeRange::new(parser_range, collect_pos.start_pos), data)
|
||||
})
|
||||
.collect()
|
||||
};
|
||||
|
||||
|
@ -355,9 +393,9 @@ impl<'a> Parser<'a> {
|
|||
// - `tokens`: lazy tokens for `g` (with its inner attr deleted).
|
||||
|
||||
let tokens = LazyAttrTokenStream::new(LazyAttrTokenStreamImpl {
|
||||
start_token,
|
||||
start_token: collect_pos.start_token,
|
||||
cursor_snapshot: collect_pos.cursor_snapshot,
|
||||
num_calls,
|
||||
cursor_snapshot,
|
||||
break_last_token: self.break_last_token,
|
||||
node_replacements,
|
||||
});
|
||||
|
@ -368,9 +406,9 @@ impl<'a> Parser<'a> {
|
|||
}
|
||||
|
||||
// If `capture_cfg` is set and we're inside a recursive call to
|
||||
// `collect_tokens_trailing_token`, then we need to register a replace range
|
||||
// if we have `#[cfg]` or `#[cfg_attr]`. This allows us to run eager cfg-expansion
|
||||
// on the captured token stream.
|
||||
// `collect_tokens`, then we need to register a replace range if we
|
||||
// have `#[cfg]` or `#[cfg_attr]`. This allows us to run eager
|
||||
// cfg-expansion on the captured token stream.
|
||||
if self.capture_cfg
|
||||
&& matches!(self.capture_state.capturing, Capturing::Yes)
|
||||
&& has_cfg_or_cfg_attr(ret.attrs())
|
||||
|
@ -389,7 +427,8 @@ impl<'a> Parser<'a> {
|
|||
// Set things up so that the entire AST node that we just parsed, including attributes,
|
||||
// will be replaced with `target` in the lazy token stream. This will allow us to
|
||||
// cfg-expand this AST node.
|
||||
let start_pos = if has_outer_attrs { attrs.start_pos } else { start_pos };
|
||||
let start_pos =
|
||||
if has_outer_attrs { attrs.start_pos.unwrap() } else { collect_pos.start_pos };
|
||||
let target = AttrsTarget { attrs: ret.attrs().iter().cloned().collect(), tokens };
|
||||
self.capture_state
|
||||
.parser_replacements
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue