Auto merge of #95509 - nnethercote:simplify-MatcherPos-some-more, r=petrochenkov
Simplify `MatcherPos` some more A few more improvements. r? `@petrochenkov`
This commit is contained in:
commit
95f68702ff
2 changed files with 121 additions and 96 deletions
|
@ -1,5 +1,6 @@
|
||||||
#![feature(associated_type_bounds)]
|
#![feature(associated_type_bounds)]
|
||||||
#![feature(associated_type_defaults)]
|
#![feature(associated_type_defaults)]
|
||||||
|
#![feature(box_patterns)]
|
||||||
#![feature(box_syntax)]
|
#![feature(box_syntax)]
|
||||||
#![feature(crate_visibility_modifier)]
|
#![feature(crate_visibility_modifier)]
|
||||||
#![feature(decl_macro)]
|
#![feature(decl_macro)]
|
||||||
|
|
|
@ -75,7 +75,7 @@ crate use ParseResult::*;
|
||||||
|
|
||||||
use crate::mbe::{self, SequenceRepetition, TokenTree};
|
use crate::mbe::{self, SequenceRepetition, TokenTree};
|
||||||
|
|
||||||
use rustc_ast::token::{self, DocComment, Nonterminal, Token};
|
use rustc_ast::token::{self, DocComment, Nonterminal, Token, TokenKind};
|
||||||
use rustc_parse::parser::{NtOrTt, Parser};
|
use rustc_parse::parser::{NtOrTt, Parser};
|
||||||
use rustc_session::parse::ParseSess;
|
use rustc_session::parse::ParseSess;
|
||||||
use rustc_span::symbol::MacroRulesNormalizedIdent;
|
use rustc_span::symbol::MacroRulesNormalizedIdent;
|
||||||
|
@ -87,17 +87,6 @@ use rustc_data_structures::sync::Lrc;
|
||||||
use rustc_span::symbol::Ident;
|
use rustc_span::symbol::Ident;
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::collections::hash_map::Entry::{Occupied, Vacant};
|
use std::collections::hash_map::Entry::{Occupied, Vacant};
|
||||||
use std::mem;
|
|
||||||
|
|
||||||
/// This is used by `parse_tt_inner` to keep track of delimited submatchers that we have
|
|
||||||
/// descended into.
|
|
||||||
#[derive(Clone)]
|
|
||||||
struct MatcherPosFrame<'tt> {
|
|
||||||
/// The "parent" matcher that we have descended from.
|
|
||||||
tts: &'tt [TokenTree],
|
|
||||||
/// The position of the "dot" in `tt` at the time we descended.
|
|
||||||
idx: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
// One element is enough to cover 95-99% of vectors for most benchmarks. Also,
|
// One element is enough to cover 95-99% of vectors for most benchmarks. Also,
|
||||||
// vectors longer than one frequently have many elements, not just two or
|
// vectors longer than one frequently have many elements, not just two or
|
||||||
|
@ -108,6 +97,33 @@ type NamedMatchVec = SmallVec<[NamedMatch; 1]>;
|
||||||
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
|
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
|
||||||
rustc_data_structures::static_assert_size!(NamedMatchVec, 48);
|
rustc_data_structures::static_assert_size!(NamedMatchVec, 48);
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
enum MatcherKind<'tt> {
|
||||||
|
TopLevel,
|
||||||
|
Delimited(Box<DelimitedSubmatcher<'tt>>),
|
||||||
|
Sequence(Box<SequenceSubmatcher<'tt>>),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
struct DelimitedSubmatcher<'tt> {
|
||||||
|
parent: Parent<'tt>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
struct SequenceSubmatcher<'tt> {
|
||||||
|
parent: Parent<'tt>,
|
||||||
|
seq: &'tt SequenceRepetition,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Data used to ascend from a submatcher back to its parent matcher. A subset of the fields from
|
||||||
|
/// `MathcherPos`.
|
||||||
|
#[derive(Clone)]
|
||||||
|
struct Parent<'tt> {
|
||||||
|
tts: &'tt [TokenTree],
|
||||||
|
idx: usize,
|
||||||
|
kind: MatcherKind<'tt>,
|
||||||
|
}
|
||||||
|
|
||||||
/// A single matcher position, which could be within the top-level matcher, a submatcher, a
|
/// A single matcher position, which could be within the top-level matcher, a submatcher, a
|
||||||
/// subsubmatcher, etc. For example:
|
/// subsubmatcher, etc. For example:
|
||||||
/// ```text
|
/// ```text
|
||||||
|
@ -116,13 +132,14 @@ rustc_data_structures::static_assert_size!(NamedMatchVec, 48);
|
||||||
/// <--------------> first submatcher; three tts, zero metavars
|
/// <--------------> first submatcher; three tts, zero metavars
|
||||||
/// <--------------------------> top-level matcher; two tts, one metavar
|
/// <--------------------------> top-level matcher; two tts, one metavar
|
||||||
/// ```
|
/// ```
|
||||||
#[derive(Clone)]
|
|
||||||
struct MatcherPos<'tt> {
|
struct MatcherPos<'tt> {
|
||||||
/// The tokens that make up the current matcher. When we are within a `Sequence` or `Delimited`
|
/// The tokens that make up the current matcher. When we are within a `Sequence` or `Delimited`
|
||||||
/// submatcher, this is just the contents of that submatcher.
|
/// submatcher, this is just the contents of that submatcher.
|
||||||
tts: &'tt [TokenTree],
|
tts: &'tt [TokenTree],
|
||||||
|
|
||||||
/// The "dot" position within the current submatcher, i.e. the index into `tts`.
|
/// The "dot" position within the current submatcher, i.e. the index into `tts`. Can go one or
|
||||||
|
/// two positions past the final elements in `tts` when dealing with sequences, see
|
||||||
|
/// `parse_tt_inner` for details.
|
||||||
idx: usize,
|
idx: usize,
|
||||||
|
|
||||||
/// This vector ends up with one element per metavar in the *top-level* matcher, even when this
|
/// This vector ends up with one element per metavar in the *top-level* matcher, even when this
|
||||||
|
@ -134,25 +151,18 @@ struct MatcherPos<'tt> {
|
||||||
/// The number of sequences this mp is within.
|
/// The number of sequences this mp is within.
|
||||||
seq_depth: usize,
|
seq_depth: usize,
|
||||||
|
|
||||||
/// The position in `matches` of the first metavar in this (sub)matcher. Zero if there are
|
|
||||||
/// no metavars.
|
|
||||||
match_lo: usize,
|
|
||||||
|
|
||||||
/// The position in `matches` of the next metavar to be matched against the source token
|
/// The position in `matches` of the next metavar to be matched against the source token
|
||||||
/// stream. Should not be used if there are no metavars.
|
/// stream. Should not be used if there are no metavars.
|
||||||
match_cur: usize,
|
match_cur: usize,
|
||||||
|
|
||||||
/// This field is only used if we are matching a sequence.
|
/// What kind of matcher we are in. For submatchers, this contains enough information to
|
||||||
sequence: Option<MatcherPosSequence<'tt>>,
|
/// reconstitute a `MatcherPos` within the parent once we ascend out of the submatcher.
|
||||||
|
kind: MatcherKind<'tt>,
|
||||||
/// When we are within a `Delimited` submatcher (or subsubmatcher), this tracks the parent
|
|
||||||
/// matcher(s). The bottom of the stack is the top-level matcher.
|
|
||||||
stack: SmallVec<[MatcherPosFrame<'tt>; 1]>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// This type is used a lot. Make sure it doesn't unintentionally get bigger.
|
// This type is used a lot. Make sure it doesn't unintentionally get bigger.
|
||||||
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
|
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
|
||||||
rustc_data_structures::static_assert_size!(MatcherPos<'_>, 104);
|
rustc_data_structures::static_assert_size!(MatcherPos<'_>, 64);
|
||||||
|
|
||||||
impl<'tt> MatcherPos<'tt> {
|
impl<'tt> MatcherPos<'tt> {
|
||||||
fn top_level(matcher: &'tt [TokenTree], empty_matches: Lrc<NamedMatchVec>) -> Self {
|
fn top_level(matcher: &'tt [TokenTree], empty_matches: Lrc<NamedMatchVec>) -> Self {
|
||||||
|
@ -161,31 +171,50 @@ impl<'tt> MatcherPos<'tt> {
|
||||||
idx: 0,
|
idx: 0,
|
||||||
matches: empty_matches,
|
matches: empty_matches,
|
||||||
seq_depth: 0,
|
seq_depth: 0,
|
||||||
match_lo: 0,
|
|
||||||
match_cur: 0,
|
match_cur: 0,
|
||||||
stack: smallvec![],
|
kind: MatcherKind::TopLevel,
|
||||||
sequence: None,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn sequence(
|
fn empty_sequence(
|
||||||
parent: Box<MatcherPos<'tt>>,
|
parent_mp: &MatcherPos<'tt>,
|
||||||
seq: &'tt SequenceRepetition,
|
seq: &'tt SequenceRepetition,
|
||||||
empty_matches: Lrc<NamedMatchVec>,
|
empty_matches: Lrc<NamedMatchVec>,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
|
let mut mp = MatcherPos {
|
||||||
|
tts: parent_mp.tts,
|
||||||
|
idx: parent_mp.idx + 1,
|
||||||
|
matches: parent_mp.matches.clone(), // a cheap clone
|
||||||
|
seq_depth: parent_mp.seq_depth,
|
||||||
|
match_cur: parent_mp.match_cur + seq.num_captures,
|
||||||
|
kind: parent_mp.kind.clone(), // an expensive clone
|
||||||
|
};
|
||||||
|
for idx in parent_mp.match_cur..parent_mp.match_cur + seq.num_captures {
|
||||||
|
mp.push_match(idx, MatchedSeq(empty_matches.clone()));
|
||||||
|
}
|
||||||
|
mp
|
||||||
|
}
|
||||||
|
|
||||||
|
fn sequence(
|
||||||
|
parent_mp: Box<MatcherPos<'tt>>,
|
||||||
|
seq: &'tt SequenceRepetition,
|
||||||
|
empty_matches: Lrc<NamedMatchVec>,
|
||||||
|
) -> Self {
|
||||||
|
let seq_kind = box SequenceSubmatcher {
|
||||||
|
parent: Parent { tts: parent_mp.tts, idx: parent_mp.idx, kind: parent_mp.kind },
|
||||||
|
seq,
|
||||||
|
};
|
||||||
let mut mp = MatcherPos {
|
let mut mp = MatcherPos {
|
||||||
tts: &seq.tts,
|
tts: &seq.tts,
|
||||||
idx: 0,
|
idx: 0,
|
||||||
matches: parent.matches.clone(),
|
matches: parent_mp.matches,
|
||||||
seq_depth: parent.seq_depth,
|
seq_depth: parent_mp.seq_depth,
|
||||||
match_lo: parent.match_cur,
|
match_cur: parent_mp.match_cur,
|
||||||
match_cur: parent.match_cur,
|
kind: MatcherKind::Sequence(seq_kind),
|
||||||
sequence: Some(MatcherPosSequence { parent, seq }),
|
|
||||||
stack: smallvec![],
|
|
||||||
};
|
};
|
||||||
// Start with an empty vec for each metavar within the sequence. Note that `mp.seq_depth`
|
// Start with an empty vec for each metavar within the sequence. Note that `mp.seq_depth`
|
||||||
// must have the parent's depth at this point for these `push_match` calls to work.
|
// must have the parent's depth at this point for these `push_match` calls to work.
|
||||||
for idx in mp.match_lo..mp.match_lo + seq.num_captures {
|
for idx in mp.match_cur..mp.match_cur + seq.num_captures {
|
||||||
mp.push_match(idx, MatchedSeq(empty_matches.clone()));
|
mp.push_match(idx, MatchedSeq(empty_matches.clone()));
|
||||||
}
|
}
|
||||||
mp.seq_depth += 1;
|
mp.seq_depth += 1;
|
||||||
|
@ -226,16 +255,6 @@ impl<'tt> MatcherPos<'tt> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
|
||||||
struct MatcherPosSequence<'tt> {
|
|
||||||
/// The parent matcher position. Effectively gives a linked list of matches all the way to the
|
|
||||||
/// top-level matcher.
|
|
||||||
parent: Box<MatcherPos<'tt>>,
|
|
||||||
|
|
||||||
/// The sequence itself.
|
|
||||||
seq: &'tt SequenceRepetition,
|
|
||||||
}
|
|
||||||
|
|
||||||
enum EofMatcherPositions<'tt> {
|
enum EofMatcherPositions<'tt> {
|
||||||
None,
|
None,
|
||||||
One(Box<MatcherPos<'tt>>),
|
One(Box<MatcherPos<'tt>>),
|
||||||
|
@ -448,18 +467,6 @@ impl<'tt> TtParser<'tt> {
|
||||||
let mut eof_mps = EofMatcherPositions::None;
|
let mut eof_mps = EofMatcherPositions::None;
|
||||||
|
|
||||||
while let Some(mut mp) = self.cur_mps.pop() {
|
while let Some(mut mp) = self.cur_mps.pop() {
|
||||||
// Backtrack out of delimited submatcher when necessary. When backtracking out again,
|
|
||||||
// we need to advance the "dot" past the delimiters in the parent matcher(s).
|
|
||||||
while mp.idx >= mp.tts.len() {
|
|
||||||
match mp.stack.pop() {
|
|
||||||
Some(MatcherPosFrame { tts, idx }) => {
|
|
||||||
mp.tts = tts;
|
|
||||||
mp.idx = idx + 1;
|
|
||||||
}
|
|
||||||
None => break,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get the current position of the "dot" (`idx`) in `mp` and the number of token
|
// Get the current position of the "dot" (`idx`) in `mp` and the number of token
|
||||||
// trees in the matcher (`len`).
|
// trees in the matcher (`len`).
|
||||||
let idx = mp.idx;
|
let idx = mp.idx;
|
||||||
|
@ -473,13 +480,11 @@ impl<'tt> TtParser<'tt> {
|
||||||
let op = seq.kleene.op;
|
let op = seq.kleene.op;
|
||||||
if op == mbe::KleeneOp::ZeroOrMore || op == mbe::KleeneOp::ZeroOrOne {
|
if op == mbe::KleeneOp::ZeroOrMore || op == mbe::KleeneOp::ZeroOrOne {
|
||||||
// Allow for the possibility of zero matches of this sequence.
|
// Allow for the possibility of zero matches of this sequence.
|
||||||
let mut new_mp = mp.clone();
|
self.cur_mps.push(box MatcherPos::empty_sequence(
|
||||||
new_mp.match_cur += seq.num_captures;
|
&*mp,
|
||||||
new_mp.idx += 1;
|
&seq,
|
||||||
for idx in mp.match_cur..mp.match_cur + seq.num_captures {
|
self.empty_matches.clone(),
|
||||||
new_mp.push_match(idx, MatchedSeq(self.empty_matches.clone()));
|
));
|
||||||
}
|
|
||||||
self.cur_mps.push(new_mp);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Allow for the possibility of one or more matches of this sequence.
|
// Allow for the possibility of one or more matches of this sequence.
|
||||||
|
@ -509,16 +514,17 @@ impl<'tt> TtParser<'tt> {
|
||||||
}
|
}
|
||||||
|
|
||||||
TokenTree::Delimited(_, delimited) => {
|
TokenTree::Delimited(_, delimited) => {
|
||||||
// To descend into a delimited submatcher, we push the current matcher onto
|
// To descend into a delimited submatcher, we update `mp` appropriately,
|
||||||
// a stack and push a new mp containing the submatcher onto `cur_mps`.
|
// including enough information to re-ascend afterwards, and push it onto
|
||||||
//
|
// `cur_mps`. Later, when we reach the closing delimiter, we will recover
|
||||||
// At the beginning of the loop, if we reach the end of the delimited
|
// the parent matcher position to ascend. Note that we use `all_tts` to
|
||||||
// submatcher, we pop the stack to backtrack out of the descent. Note that
|
// include the open and close delimiter tokens.
|
||||||
// we use `all_tts` to include the open and close delimiter tokens.
|
let kind = MatcherKind::Delimited(box DelimitedSubmatcher {
|
||||||
let tts = mem::replace(&mut mp.tts, &delimited.all_tts);
|
parent: Parent { tts: mp.tts, idx: mp.idx, kind: mp.kind },
|
||||||
let idx = mp.idx;
|
});
|
||||||
mp.stack.push(MatcherPosFrame { tts, idx });
|
mp.tts = &delimited.all_tts;
|
||||||
mp.idx = 0;
|
mp.idx = 0;
|
||||||
|
mp.kind = kind;
|
||||||
self.cur_mps.push(mp);
|
self.cur_mps.push(mp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -536,6 +542,18 @@ impl<'tt> TtParser<'tt> {
|
||||||
mp.idx += 1;
|
mp.idx += 1;
|
||||||
self.cur_mps.push(mp);
|
self.cur_mps.push(mp);
|
||||||
} else if token_name_eq(&t, token) {
|
} else if token_name_eq(&t, token) {
|
||||||
|
if let TokenKind::CloseDelim(_) = token.kind {
|
||||||
|
// Ascend out of the delimited submatcher.
|
||||||
|
debug_assert_eq!(idx, len - 1);
|
||||||
|
match mp.kind {
|
||||||
|
MatcherKind::Delimited(submatcher) => {
|
||||||
|
mp.tts = submatcher.parent.tts;
|
||||||
|
mp.idx = submatcher.parent.idx;
|
||||||
|
mp.kind = submatcher.parent.kind;
|
||||||
|
}
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
mp.idx += 1;
|
mp.idx += 1;
|
||||||
self.next_mps.push(mp);
|
self.next_mps.push(mp);
|
||||||
}
|
}
|
||||||
|
@ -544,38 +562,44 @@ impl<'tt> TtParser<'tt> {
|
||||||
// These cannot appear in a matcher.
|
// These cannot appear in a matcher.
|
||||||
TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(),
|
TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(),
|
||||||
}
|
}
|
||||||
} else if let Some(sequence) = &mp.sequence {
|
} else if let MatcherKind::Sequence(box SequenceSubmatcher { parent, seq }) = &mp.kind {
|
||||||
// We are past the end of a sequence.
|
// We are past the end of a sequence.
|
||||||
debug_assert!(idx <= len + 1);
|
// - If it has no separator, we must be only one past the end.
|
||||||
|
// - If it has a separator, we may be one past the end, in which case we must
|
||||||
|
// look for a separator. Or we may be two past the end, in which case we have
|
||||||
|
// already dealt with the separator.
|
||||||
|
debug_assert!(idx == len || idx == len + 1 && seq.separator.is_some());
|
||||||
|
|
||||||
if idx == len {
|
if idx == len {
|
||||||
// Add all matches from the sequence to `parent`, and move the "dot" past the
|
// Sequence matching may have finished: move the "dot" past the sequence in
|
||||||
// sequence in `parent`. This allows for the case where the sequence matching
|
// `parent`. This applies whether a separator is used or not. If sequence
|
||||||
// is finished.
|
// matching hasn't finished, this `new_mp` will fail quietly when it is
|
||||||
let mut new_mp = sequence.parent.clone();
|
// processed next time around the loop.
|
||||||
new_mp.matches = mp.matches.clone();
|
let new_mp = box MatcherPos {
|
||||||
new_mp.match_cur = mp.match_lo + sequence.seq.num_captures;
|
tts: parent.tts,
|
||||||
new_mp.idx += 1;
|
idx: parent.idx + 1,
|
||||||
|
matches: mp.matches.clone(), // a cheap clone
|
||||||
|
seq_depth: mp.seq_depth - 1,
|
||||||
|
match_cur: mp.match_cur,
|
||||||
|
kind: parent.kind.clone(), // an expensive clone
|
||||||
|
};
|
||||||
self.cur_mps.push(new_mp);
|
self.cur_mps.push(new_mp);
|
||||||
}
|
}
|
||||||
|
|
||||||
if idx == len && sequence.seq.separator.is_some() {
|
if seq.separator.is_some() && idx == len {
|
||||||
if sequence
|
// Look for the separator.
|
||||||
.seq
|
if seq.separator.as_ref().map_or(false, |sep| token_name_eq(token, sep)) {
|
||||||
.separator
|
|
||||||
.as_ref()
|
|
||||||
.map_or(false, |sep| token_name_eq(token, sep))
|
|
||||||
{
|
|
||||||
// The matcher has a separator, and it matches the current token. We can
|
// The matcher has a separator, and it matches the current token. We can
|
||||||
// advance past the separator token.
|
// advance past the separator token.
|
||||||
mp.idx += 1;
|
mp.idx += 1;
|
||||||
self.next_mps.push(mp);
|
self.next_mps.push(mp);
|
||||||
}
|
}
|
||||||
} else if sequence.seq.kleene.op != mbe::KleeneOp::ZeroOrOne {
|
} else if seq.kleene.op != mbe::KleeneOp::ZeroOrOne {
|
||||||
// We don't need a separator. Move the "dot" back to the beginning of the
|
// We don't need to look for a separator: either this sequence doesn't have
|
||||||
// matcher and try to match again UNLESS we are only allowed to have _one_
|
// one, or it does and we've already handled it. Also, we are allowed to have
|
||||||
// repetition.
|
// more than one repetition. Move the "dot" back to the beginning of the
|
||||||
mp.match_cur = mp.match_lo;
|
// matcher and try to match again.
|
||||||
|
mp.match_cur -= seq.num_captures;
|
||||||
mp.idx = 0;
|
mp.idx = 0;
|
||||||
self.cur_mps.push(mp);
|
self.cur_mps.push(mp);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue