Avoid double-handling of attributes in collect_tokens.

By keeping track of attributes that have been previously processed.

This fixes the `macro-rules-derive-cfg.stdout` test, and is necessary
for #124141 which removes nonterminals.

Also shrink the `SmallVec` inline size used in `IntervalSet`. 2 gives
slightly better perf than 4 now that there's an `IntervalSet` in
`Parser`, which is cloned reasonably often.
This commit is contained in:
Nicholas Nethercote 2024-08-21 14:16:42 +10:00
parent b7e7f6e903
commit 1fdabfbebb
6 changed files with 30 additions and 35 deletions

View file

@ -256,6 +256,20 @@ impl<'a> Parser<'a> {
res?
};
// Ignore any attributes we've previously processed. This happens when
// an inner call to `collect_tokens` returns an AST node and then an
// outer call ends up with the same AST node without any additional
// wrapping layer.
let ret_attrs: AttrVec = ret
.attrs()
.iter()
.cloned()
.filter(|attr| {
let is_unseen = self.capture_state.seen_attrs.insert(attr.id);
is_unseen
})
.collect();
// When we're not in "definite capture mode", then skip collecting and
// return early if either of the following conditions hold.
// - `None`: Our target doesn't support tokens at all (e.g. `NtIdent`).
@ -269,7 +283,7 @@ impl<'a> Parser<'a> {
// tokens.
let definite_capture_mode = self.capture_cfg
&& matches!(self.capture_state.capturing, Capturing::Yes)
&& has_cfg_or_cfg_attr(ret.attrs());
&& has_cfg_or_cfg_attr(&ret_attrs);
if !definite_capture_mode && matches!(ret.tokens_mut(), None | Some(Some(_))) {
return Ok(ret);
}
@ -289,7 +303,7 @@ impl<'a> Parser<'a> {
// outer and inner attributes. So this check is more precise than
// the earlier `needs_tokens` check, and we don't need to
// check `R::SUPPORTS_CUSTOM_INNER_ATTRS`.)
|| needs_tokens(ret.attrs())
|| needs_tokens(&ret_attrs)
// - We are in "definite capture mode", which requires that there
// are `#[cfg]` or `#[cfg_attr]` attributes. (During normal
// non-`capture_cfg` parsing, we don't need any special capturing
@ -328,7 +342,7 @@ impl<'a> Parser<'a> {
// `Parser::parse_inner_attributes`, and pair them in a `ParserReplacement` with `None`,
// which means the relevant tokens will be removed. (More details below.)
let mut inner_attr_parser_replacements = Vec::new();
for attr in ret.attrs() {
for attr in ret_attrs.iter() {
if attr.style == ast::AttrStyle::Inner {
if let Some(inner_attr_parser_range) =
self.capture_state.inner_attr_parser_ranges.remove(&attr.id)
@ -418,7 +432,7 @@ impl<'a> Parser<'a> {
// cfg-expand this AST node.
let start_pos =
if has_outer_attrs { attrs.start_pos.unwrap() } else { collect_pos.start_pos };
let target = AttrsTarget { attrs: ret.attrs().iter().cloned().collect(), tokens };
let target = AttrsTarget { attrs: ret_attrs, tokens };
tokens_used = true;
self.capture_state
.parser_replacements
@ -428,6 +442,7 @@ impl<'a> Parser<'a> {
// the outermost call to this method.
self.capture_state.parser_replacements.clear();
self.capture_state.inner_attr_parser_ranges.clear();
self.capture_state.seen_attrs.clear();
}
assert!(tokens_used); // check we didn't create `tokens` unnecessarily
Ok(ret)

View file

@ -35,6 +35,7 @@ use rustc_ast_pretty::pprust;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::sync::Lrc;
use rustc_errors::{Applicability, Diag, FatalError, MultiSpan, PResult};
use rustc_index::interval::IntervalSet;
use rustc_session::parse::ParseSess;
use rustc_span::symbol::{kw, sym, Ident, Symbol};
use rustc_span::{Span, DUMMY_SP};
@ -183,7 +184,7 @@ pub struct Parser<'a> {
// This type is used a lot, e.g. it's cloned when matching many declarative macro rules with nonterminals. Make sure
// it doesn't unintentionally get bigger.
#[cfg(target_pointer_width = "64")]
rustc_data_structures::static_assert_size!(Parser<'_>, 256);
rustc_data_structures::static_assert_size!(Parser<'_>, 288);
/// Stores span information about a closure.
#[derive(Clone, Debug)]
@ -261,6 +262,9 @@ struct CaptureState {
capturing: Capturing,
parser_replacements: Vec<ParserReplacement>,
inner_attr_parser_ranges: FxHashMap<AttrId, ParserRange>,
// `IntervalSet` is good for perf because attrs are mostly added to this
// set in contiguous ranges.
seen_attrs: IntervalSet<AttrId>,
}
/// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that
@ -458,6 +462,7 @@ impl<'a> Parser<'a> {
capturing: Capturing::No,
parser_replacements: Vec::new(),
inner_attr_parser_ranges: Default::default(),
seen_attrs: IntervalSet::new(u32::MAX as usize),
},
current_closure: None,
recovery: Recovery::Allowed,