Speed up Parser::expected_token_types.

The parser pushes a `TokenType` to `Parser::expected_token_types` on
every call to the various `check`/`eat` methods, and clears it on every
call to `bump`. Some of those `TokenType` values are full tokens that
require cloning and dropping. This is a *lot* of work for something
that is only used in error messages and it accounts for a significant
fraction of parsing execution time.

This commit overhauls `TokenType` so that `Parser::expected_token_types`
can be implemented as a bitset. This requires changing `TokenType` to a
C-style parameterless enum, and adding `TokenTypeSet` which uses a
`u128` for the bits. (The new `TokenType` has 105 variants.)

The new types `ExpTokenPair` and `ExpKeywordPair` are now arguments to
the `check`/`eat` methods. This is for maximum speed. The elements in
the pairs are always statically known; e.g. a
`token::BinOp(token::Star)` is always paired with a `TokenType::Star`.
So we now compute `TokenType`s in advance and pass them in to
`check`/`eat` rather than the current approach of constructing them on
insertion into `expected_token_types`.

Values of these pair types can be produced by the new `exp!` macro,
which is used at every `check`/`eat` call site. The macro is for
convenience, allowing any pair to be generated from a single identifier.

The ident/keyword filtering in `expected_one_of_not_found` is no longer
necessary. It was there to account for some sloppiness in
`TokenKind`/`TokenType` comparisons.

The existing `TokenType` is moved to a new file `token_type.rs`, and all
its new infrastructure is added to that file. There is more boilerplate
code than I would like, but I can't see how to make it shorter.
This commit is contained in:
Nicholas Nethercote 2024-12-04 15:55:06 +11:00
parent d5370d981f
commit b9bf0b4b10
22 changed files with 1357 additions and 793 deletions

View file

@ -24,7 +24,7 @@ use super::{
Trailing, UsePreAttrPos,
};
use crate::errors::MalformedLoopLabel;
use crate::{errors, maybe_whole};
use crate::{errors, exp, maybe_whole};
impl<'a> Parser<'a> {
/// Parses a statement. This stops just before trailing semicolons on everything but items.
@ -71,7 +71,7 @@ impl<'a> Parser<'a> {
let stmt = if self.token.is_keyword(kw::Let) {
self.collect_tokens(None, attrs, force_collect, |this, attrs| {
this.expect_keyword(kw::Let)?;
this.expect_keyword(exp!(Let))?;
let local = this.parse_local(attrs)?;
let trailing = Trailing::from(capture_semi && this.token == token::Semi);
Ok((
@ -140,7 +140,7 @@ impl<'a> Parser<'a> {
force_collect,
)? {
self.mk_stmt(lo.to(item.span), StmtKind::Item(P(item)))
} else if self.eat(&token::Semi) {
} else if self.eat(exp!(Semi)) {
// Do not attempt to parse an expression if we're done here.
self.error_outer_attrs(attrs);
self.mk_stmt(lo, StmtKind::Empty)
@ -156,7 +156,7 @@ impl<'a> Parser<'a> {
Ok((expr, Trailing::No, UsePreAttrPos::Yes))
},
)?;
if matches!(e.kind, ExprKind::Assign(..)) && self.eat_keyword(kw::Else) {
if matches!(e.kind, ExprKind::Assign(..)) && self.eat_keyword(exp!(Else)) {
let bl = self.parse_block()?;
// Destructuring assignment ... else.
// This is not allowed, but point it out in a nice way.
@ -176,7 +176,7 @@ impl<'a> Parser<'a> {
let stmt = self.collect_tokens(None, attrs, ForceCollect::No, |this, attrs| {
let path = this.parse_path(PathStyle::Expr)?;
if this.eat(&token::Not) {
if this.eat(exp!(Not)) {
let stmt_mac = this.parse_stmt_mac(lo, attrs, path)?;
return Ok((
stmt_mac,
@ -185,7 +185,7 @@ impl<'a> Parser<'a> {
));
}
let expr = if this.eat(&token::OpenDelim(Delimiter::Brace)) {
let expr = if this.eat(exp!(OpenBrace)) {
this.parse_expr_struct(None, path, true)?
} else {
let hi = this.prev_token.span;
@ -370,7 +370,7 @@ impl<'a> Parser<'a> {
let kind = match init {
None => LocalKind::Decl,
Some(init) => {
if self.eat_keyword(kw::Else) {
if self.eat_keyword(exp!(Else)) {
if self.token.is_keyword(kw::If) {
// `let...else if`. Emit the same error that `parse_block()` would,
// but explicitly point out that this pattern is not allowed.
@ -449,7 +449,7 @@ impl<'a> Parser<'a> {
self.bump();
true
}
_ => self.eat(&token::Eq),
_ => self.eat(exp!(Eq)),
};
Ok(if eq_consumed || eq_optional { Some(self.parse_expr()?) } else { None })
@ -509,7 +509,7 @@ impl<'a> Parser<'a> {
Ok(Some(Stmt { kind: StmtKind::Empty, .. })) => {}
Ok(Some(stmt)) => {
let stmt_own_line = self.psess.source_map().is_line_before_span_empty(sp);
let stmt_span = if stmt_own_line && self.eat(&token::Semi) {
let stmt_span = if stmt_own_line && self.eat(exp!(Semi)) {
// Expand the span to include the semicolon.
stmt.span.with_hi(self.prev_token.span.hi())
} else {
@ -651,7 +651,7 @@ impl<'a> Parser<'a> {
let maybe_ident = self.prev_token.clone();
self.maybe_recover_unexpected_block_label();
if !self.eat(&token::OpenDelim(Delimiter::Brace)) {
if !self.eat(exp!(OpenBrace)) {
return self.error_block_no_opening_brace();
}
@ -678,7 +678,7 @@ impl<'a> Parser<'a> {
) -> PResult<'a, P<Block>> {
let mut stmts = ThinVec::new();
let mut snapshot = None;
while !self.eat(&token::CloseDelim(Delimiter::Brace)) {
while !self.eat(exp!(CloseBrace)) {
if self.token == token::Eof {
break;
}
@ -781,8 +781,7 @@ impl<'a> Parser<'a> {
{
// Just check for errors and recover; do not eat semicolon yet.
let expect_result =
self.expect_one_of(&[], &[token::Semi, token::CloseDelim(Delimiter::Brace)]);
let expect_result = self.expect_one_of(&[], &[exp!(Semi), exp!(CloseBrace)]);
// Try to both emit a better diagnostic, and avoid further errors by replacing
// the `expr` with `ExprKind::Err`.
@ -930,7 +929,7 @@ impl<'a> Parser<'a> {
}
}
if add_semi_to_stmt || (eat_semi && self.eat(&token::Semi)) {
if add_semi_to_stmt || (eat_semi && self.eat(exp!(Semi))) {
stmt = stmt.add_trailing_semicolon();
}