Rollup merge of #39118 - jseyfried:token_tree_based_parser, r=nrc
Refactor the parser to consume token trees This is groundwork for efficiently parsing attribute proc macro invocations, bang macro invocations, and `TokenStream`-based attributes and fragment matchers. This improves parsing performance by 8-15% and expansion performance by 0-5% on a sampling of the compiler's crates. r? @nrc
This commit is contained in:
commit
17294d98b9
20 changed files with 368 additions and 472 deletions
|
@ -46,7 +46,7 @@ use ext::tt::macro_parser;
|
|||
use parse;
|
||||
use parse::classify;
|
||||
use parse::common::SeqSep;
|
||||
use parse::lexer::{Reader, TokenAndSpan};
|
||||
use parse::lexer::TokenAndSpan;
|
||||
use parse::obsolete::ObsoleteSyntax;
|
||||
use parse::token::{self, MatchNt, SubstNt};
|
||||
use parse::{new_sub_parser_from_file, ParseSess, Directory, DirectoryOwnership};
|
||||
|
@ -156,22 +156,6 @@ enum PrevTokenKind {
|
|||
Other,
|
||||
}
|
||||
|
||||
// Simple circular buffer used for keeping few next tokens.
|
||||
#[derive(Default)]
|
||||
struct LookaheadBuffer {
|
||||
buffer: [TokenAndSpan; LOOKAHEAD_BUFFER_CAPACITY],
|
||||
start: usize,
|
||||
end: usize,
|
||||
}
|
||||
|
||||
const LOOKAHEAD_BUFFER_CAPACITY: usize = 8;
|
||||
|
||||
impl LookaheadBuffer {
|
||||
fn len(&self) -> usize {
|
||||
(LOOKAHEAD_BUFFER_CAPACITY + self.end - self.start) % LOOKAHEAD_BUFFER_CAPACITY
|
||||
}
|
||||
}
|
||||
|
||||
/* ident is handled by common.rs */
|
||||
|
||||
pub struct Parser<'a> {
|
||||
|
@ -184,19 +168,14 @@ pub struct Parser<'a> {
|
|||
pub prev_span: Span,
|
||||
/// the previous token kind
|
||||
prev_token_kind: PrevTokenKind,
|
||||
lookahead_buffer: LookaheadBuffer,
|
||||
pub tokens_consumed: usize,
|
||||
pub restrictions: Restrictions,
|
||||
pub quote_depth: usize, // not (yet) related to the quasiquoter
|
||||
parsing_token_tree: bool,
|
||||
pub reader: Box<Reader+'a>,
|
||||
/// The set of seen errors about obsolete syntax. Used to suppress
|
||||
/// extra detail when the same error is seen twice
|
||||
pub obsolete_set: HashSet<ObsoleteSyntax>,
|
||||
/// Used to determine the path to externally loaded source files
|
||||
pub directory: Directory,
|
||||
/// Stack of open delimiters and their spans. Used for error message.
|
||||
pub open_braces: Vec<(token::DelimToken, Span)>,
|
||||
/// Name of the root module this parser originated from. If `None`, then the
|
||||
/// name is not known. This does not change while the parser is descending
|
||||
/// into modules, and sub-parsers have new values for this name.
|
||||
|
@ -204,7 +183,6 @@ pub struct Parser<'a> {
|
|||
pub expected_tokens: Vec<TokenType>,
|
||||
pub tts: Vec<(TokenTree, usize)>,
|
||||
pub desugar_doc_comments: bool,
|
||||
pub allow_interpolated_tts: bool,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq, Clone)]
|
||||
|
@ -270,30 +248,31 @@ impl From<P<Expr>> for LhsExpr {
|
|||
|
||||
impl<'a> Parser<'a> {
|
||||
pub fn new(sess: &'a ParseSess,
|
||||
rdr: Box<Reader+'a>,
|
||||
tokens: Vec<TokenTree>,
|
||||
directory: Option<Directory>,
|
||||
desugar_doc_comments: bool)
|
||||
-> Self {
|
||||
let tt = TokenTree::Delimited(syntax_pos::DUMMY_SP, Rc::new(Delimited {
|
||||
delim: token::NoDelim,
|
||||
open_span: syntax_pos::DUMMY_SP,
|
||||
tts: tokens,
|
||||
close_span: syntax_pos::DUMMY_SP,
|
||||
}));
|
||||
let mut parser = Parser {
|
||||
reader: rdr,
|
||||
sess: sess,
|
||||
token: token::Underscore,
|
||||
span: syntax_pos::DUMMY_SP,
|
||||
prev_span: syntax_pos::DUMMY_SP,
|
||||
prev_token_kind: PrevTokenKind::Other,
|
||||
lookahead_buffer: Default::default(),
|
||||
tokens_consumed: 0,
|
||||
restrictions: Restrictions::empty(),
|
||||
quote_depth: 0,
|
||||
parsing_token_tree: false,
|
||||
obsolete_set: HashSet::new(),
|
||||
directory: Directory { path: PathBuf::new(), ownership: DirectoryOwnership::Owned },
|
||||
open_braces: Vec::new(),
|
||||
root_module_name: None,
|
||||
expected_tokens: Vec::new(),
|
||||
tts: Vec::new(),
|
||||
tts: if tt.len() > 0 { vec![(tt, 0)] } else { Vec::new() },
|
||||
desugar_doc_comments: desugar_doc_comments,
|
||||
allow_interpolated_tts: true,
|
||||
};
|
||||
|
||||
let tok = parser.next_tok();
|
||||
|
@ -309,8 +288,8 @@ impl<'a> Parser<'a> {
|
|||
}
|
||||
|
||||
fn next_tok(&mut self) -> TokenAndSpan {
|
||||
'outer: loop {
|
||||
let mut tok = if let Some((tts, i)) = self.tts.pop() {
|
||||
loop {
|
||||
let tok = if let Some((tts, i)) = self.tts.pop() {
|
||||
let tt = tts.get_tt(i);
|
||||
if i + 1 < tts.len() {
|
||||
self.tts.push((tts, i + 1));
|
||||
|
@ -322,28 +301,14 @@ impl<'a> Parser<'a> {
|
|||
continue
|
||||
}
|
||||
} else {
|
||||
self.reader.real_token()
|
||||
TokenAndSpan { tok: token::Eof, sp: self.span }
|
||||
};
|
||||
|
||||
loop {
|
||||
let nt = match tok.tok {
|
||||
token::Interpolated(ref nt) => nt.clone(),
|
||||
token::DocComment(name) if self.desugar_doc_comments => {
|
||||
self.tts.push((TokenTree::Token(tok.sp, token::DocComment(name)), 0));
|
||||
continue 'outer
|
||||
}
|
||||
_ => return tok,
|
||||
};
|
||||
match *nt {
|
||||
token::NtTT(TokenTree::Token(sp, ref t)) => {
|
||||
tok = TokenAndSpan { tok: t.clone(), sp: sp };
|
||||
}
|
||||
token::NtTT(ref tt) => {
|
||||
self.tts.push((tt.clone(), 0));
|
||||
continue 'outer
|
||||
}
|
||||
_ => return tok,
|
||||
match tok.tok {
|
||||
token::DocComment(name) if self.desugar_doc_comments => {
|
||||
self.tts.push((TokenTree::Token(tok.sp, token::DocComment(name)), 0));
|
||||
}
|
||||
_ => return tok,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -892,17 +857,9 @@ impl<'a> Parser<'a> {
|
|||
_ => PrevTokenKind::Other,
|
||||
};
|
||||
|
||||
let next = if self.lookahead_buffer.start == self.lookahead_buffer.end {
|
||||
self.next_tok()
|
||||
} else {
|
||||
// Avoid token copies with `replace`.
|
||||
let old_start = self.lookahead_buffer.start;
|
||||
self.lookahead_buffer.start = (old_start + 1) % LOOKAHEAD_BUFFER_CAPACITY;
|
||||
mem::replace(&mut self.lookahead_buffer.buffer[old_start], Default::default())
|
||||
};
|
||||
let next = self.next_tok();
|
||||
self.span = next.sp;
|
||||
self.token = next.tok;
|
||||
self.tokens_consumed += 1;
|
||||
self.expected_tokens.clear();
|
||||
// check after each token
|
||||
self.check_unknown_macro_variable();
|
||||
|
@ -935,18 +892,20 @@ impl<'a> Parser<'a> {
|
|||
F: FnOnce(&token::Token) -> R,
|
||||
{
|
||||
if dist == 0 {
|
||||
f(&self.token)
|
||||
} else if dist < LOOKAHEAD_BUFFER_CAPACITY {
|
||||
while self.lookahead_buffer.len() < dist {
|
||||
self.lookahead_buffer.buffer[self.lookahead_buffer.end] = self.next_tok();
|
||||
self.lookahead_buffer.end =
|
||||
(self.lookahead_buffer.end + 1) % LOOKAHEAD_BUFFER_CAPACITY;
|
||||
}
|
||||
let index = (self.lookahead_buffer.start + dist - 1) % LOOKAHEAD_BUFFER_CAPACITY;
|
||||
f(&self.lookahead_buffer.buffer[index].tok)
|
||||
} else {
|
||||
self.bug("lookahead distance is too large");
|
||||
return f(&self.token);
|
||||
}
|
||||
let mut tok = token::Eof;
|
||||
if let Some(&(ref tts, mut i)) = self.tts.last() {
|
||||
i += dist - 1;
|
||||
if i < tts.len() {
|
||||
tok = match tts.get_tt(i) {
|
||||
TokenTree::Token(_, tok) => tok,
|
||||
TokenTree::Delimited(_, delimited) => token::OpenDelim(delimited.delim),
|
||||
TokenTree::Sequence(..) => token::Dollar,
|
||||
};
|
||||
}
|
||||
}
|
||||
f(&tok)
|
||||
}
|
||||
pub fn fatal(&self, m: &str) -> DiagnosticBuilder<'a> {
|
||||
self.sess.span_diagnostic.struct_span_fatal(self.span, m)
|
||||
|
@ -2743,94 +2702,28 @@ impl<'a> Parser<'a> {
|
|||
// whether something will be a nonterminal or a seq
|
||||
// yet.
|
||||
match self.token {
|
||||
token::Eof => {
|
||||
let mut err: DiagnosticBuilder<'a> =
|
||||
self.diagnostic().struct_span_err(self.span,
|
||||
"this file contains an un-closed delimiter");
|
||||
for &(_, sp) in &self.open_braces {
|
||||
err.span_help(sp, "did you mean to close this delimiter?");
|
||||
}
|
||||
|
||||
Err(err)
|
||||
},
|
||||
token::OpenDelim(delim) => {
|
||||
if self.tts.last().map(|&(_, i)| i == 1).unwrap_or(false) {
|
||||
if self.quote_depth == 0 && self.tts.last().map(|&(_, i)| i == 1).unwrap_or(false) {
|
||||
let tt = self.tts.pop().unwrap().0;
|
||||
self.bump();
|
||||
return Ok(if self.allow_interpolated_tts {
|
||||
// avoid needlessly reparsing token trees in recursive macro expansions
|
||||
TokenTree::Token(tt.span(), token::Interpolated(Rc::new(token::NtTT(tt))))
|
||||
} else {
|
||||
tt
|
||||
});
|
||||
return Ok(tt);
|
||||
}
|
||||
|
||||
let parsing_token_tree = ::std::mem::replace(&mut self.parsing_token_tree, true);
|
||||
// The span for beginning of the delimited section
|
||||
let pre_span = self.span;
|
||||
|
||||
// Parse the open delimiter.
|
||||
self.open_braces.push((delim, self.span));
|
||||
let open_span = self.span;
|
||||
self.bump();
|
||||
|
||||
// Parse the token trees within the delimiters.
|
||||
// We stop at any delimiter so we can try to recover if the user
|
||||
// uses an incorrect delimiter.
|
||||
let tts = self.parse_seq_to_before_tokens(&[&token::CloseDelim(token::Brace),
|
||||
&token::CloseDelim(token::Paren),
|
||||
&token::CloseDelim(token::Bracket)],
|
||||
SeqSep::none(),
|
||||
|p| p.parse_token_tree(),
|
||||
|mut e| e.emit());
|
||||
self.parsing_token_tree = parsing_token_tree;
|
||||
|
||||
let close_span = self.span;
|
||||
// Expand to cover the entire delimited token tree
|
||||
let span = Span { hi: close_span.hi, ..pre_span };
|
||||
self.bump();
|
||||
|
||||
match self.token {
|
||||
// Correct delimiter.
|
||||
token::CloseDelim(d) if d == delim => {
|
||||
self.open_braces.pop().unwrap();
|
||||
|
||||
// Parse the close delimiter.
|
||||
self.bump();
|
||||
}
|
||||
// Incorrect delimiter.
|
||||
token::CloseDelim(other) => {
|
||||
let token_str = self.this_token_to_string();
|
||||
let mut err = self.diagnostic().struct_span_err(self.span,
|
||||
&format!("incorrect close delimiter: `{}`", token_str));
|
||||
// This is a conservative error: only report the last unclosed delimiter.
|
||||
// The previous unclosed delimiters could actually be closed! The parser
|
||||
// just hasn't gotten to them yet.
|
||||
if let Some(&(_, sp)) = self.open_braces.last() {
|
||||
err.span_note(sp, "unclosed delimiter");
|
||||
};
|
||||
err.emit();
|
||||
|
||||
self.open_braces.pop().unwrap();
|
||||
|
||||
// If the incorrect delimiter matches an earlier opening
|
||||
// delimiter, then don't consume it (it can be used to
|
||||
// close the earlier one). Otherwise, consume it.
|
||||
// E.g., we try to recover from:
|
||||
// fn foo() {
|
||||
// bar(baz(
|
||||
// } // Incorrect delimiter but matches the earlier `{`
|
||||
if !self.open_braces.iter().any(|&(b, _)| b == other) {
|
||||
self.bump();
|
||||
}
|
||||
}
|
||||
token::Eof => {
|
||||
// Silently recover, the EOF token will be seen again
|
||||
// and an error emitted then. Thus we don't pop from
|
||||
// self.open_braces here.
|
||||
},
|
||||
_ => {}
|
||||
}
|
||||
|
||||
self.parsing_token_tree = parsing_token_tree;
|
||||
let span = Span { lo: open_span.lo, ..close_span };
|
||||
Ok(TokenTree::Delimited(span, Rc::new(Delimited {
|
||||
delim: delim,
|
||||
open_span: open_span,
|
||||
|
@ -2838,21 +2731,9 @@ impl<'a> Parser<'a> {
|
|||
close_span: close_span,
|
||||
})))
|
||||
},
|
||||
token::CloseDelim(_) => {
|
||||
// An unexpected closing delimiter (i.e., there is no
|
||||
// matching opening delimiter).
|
||||
let token_str = self.this_token_to_string();
|
||||
let err = self.diagnostic().struct_span_err(self.span,
|
||||
&format!("unexpected close delimiter: `{}`", token_str));
|
||||
Err(err)
|
||||
},
|
||||
/* we ought to allow different depths of unquotation */
|
||||
token::Dollar | token::SubstNt(..) if self.quote_depth > 0 => {
|
||||
self.parse_unquoted()
|
||||
}
|
||||
_ => {
|
||||
Ok(TokenTree::Token(self.span, self.bump_and_get()))
|
||||
}
|
||||
token::CloseDelim(_) | token::Eof => unreachable!(),
|
||||
token::Dollar | token::SubstNt(..) if self.quote_depth > 0 => self.parse_unquoted(),
|
||||
_ => Ok(TokenTree::Token(self.span, self.bump_and_get())),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue