Speed up Parser::expected_token_types.

The parser pushes a `TokenType` to `Parser::expected_token_types` on every call to the various `check`/`eat` methods, and clears it on every call to `bump`. Some of those `TokenType` values are full tokens that require cloning and dropping. This is a *lot* of work for something that is only used in error messages and it accounts for a significant fraction of parsing execution time. This commit overhauls `TokenType` so that `Parser::expected_token_types` can be implemented as a bitset. This requires changing `TokenType` to a C-style parameterless enum, and adding `TokenTypeSet` which uses a `u128` for the bits. (The new `TokenType` has 105 variants.) The new types `ExpTokenPair` and `ExpKeywordPair` are now arguments to the `check`/`eat` methods. This is for maximum speed. The elements in the pairs are always statically known; e.g. a `token::BinOp(token::Star)` is always paired with a `TokenType::Star`. So we now compute `TokenType`s in advance and pass them in to `check`/`eat` rather than the current approach of constructing them on insertion into `expected_token_types`. Values of these pair types can be produced by the new `exp!` macro, which is used at every `check`/`eat` call site. The macro is for convenience, allowing any pair to be generated from a single identifier. The ident/keyword filtering in `expected_one_of_not_found` is no longer necessary. It was there to account for some sloppiness in `TokenKind`/`TokenType` comparisons. The existing `TokenType` is moved to a new file `token_type.rs`, and all its new infrastructure is added to that file. There is more boilerplate code than I would like, but I can't see how to make it shorter.
2024-12-04 15:55:06 +11:00 · 2024-12-04 15:55:06 +11:00 · b9bf0b4b10
commit b9bf0b4b10
parent d5370d981f
22 changed files with 1357 additions and 793 deletions
--- a/compiler/rustc_parse/src/parser/stmt.rs
+++ b/compiler/rustc_parse/src/parser/stmt.rs
@ -24,7 +24,7 @@ use super::{
    Trailing, UsePreAttrPos,
 };
 use crate::errors::MalformedLoopLabel;
-use crate::{errors, maybe_whole};
+use crate::{errors, exp, maybe_whole};

 impl<'a> Parser<'a> {
    /// Parses a statement. This stops just before trailing semicolons on everything but items.
@ -71,7 +71,7 @@ impl<'a> Parser<'a> {

        let stmt = if self.token.is_keyword(kw::Let) {
            self.collect_tokens(None, attrs, force_collect, |this, attrs| {
-                this.expect_keyword(kw::Let)?;
+                this.expect_keyword(exp!(Let))?;
                let local = this.parse_local(attrs)?;
                let trailing = Trailing::from(capture_semi && this.token == token::Semi);
                Ok((
@ -140,7 +140,7 @@ impl<'a> Parser<'a> {
            force_collect,
        )? {
            self.mk_stmt(lo.to(item.span), StmtKind::Item(P(item)))
-        } else if self.eat(&token::Semi) {
+        } else if self.eat(exp!(Semi)) {
            // Do not attempt to parse an expression if we're done here.
            self.error_outer_attrs(attrs);
            self.mk_stmt(lo, StmtKind::Empty)
@ -156,7 +156,7 @@ impl<'a> Parser<'a> {
                    Ok((expr, Trailing::No, UsePreAttrPos::Yes))
                },
            )?;
-            if matches!(e.kind, ExprKind::Assign(..)) && self.eat_keyword(kw::Else) {
+            if matches!(e.kind, ExprKind::Assign(..)) && self.eat_keyword(exp!(Else)) {
                let bl = self.parse_block()?;
                // Destructuring assignment ... else.
                // This is not allowed, but point it out in a nice way.
@ -176,7 +176,7 @@ impl<'a> Parser<'a> {
        let stmt = self.collect_tokens(None, attrs, ForceCollect::No, |this, attrs| {
            let path = this.parse_path(PathStyle::Expr)?;

-            if this.eat(&token::Not) {
+            if this.eat(exp!(Not)) {
                let stmt_mac = this.parse_stmt_mac(lo, attrs, path)?;
                return Ok((
                    stmt_mac,
@ -185,7 +185,7 @@ impl<'a> Parser<'a> {
                ));
            }

-            let expr = if this.eat(&token::OpenDelim(Delimiter::Brace)) {
+            let expr = if this.eat(exp!(OpenBrace)) {
                this.parse_expr_struct(None, path, true)?
            } else {
                let hi = this.prev_token.span;
@ -370,7 +370,7 @@ impl<'a> Parser<'a> {
        let kind = match init {
            None => LocalKind::Decl,
            Some(init) => {
-                if self.eat_keyword(kw::Else) {
+                if self.eat_keyword(exp!(Else)) {
                    if self.token.is_keyword(kw::If) {
                        // `let...else if`. Emit the same error that `parse_block()` would,
                        // but explicitly point out that this pattern is not allowed.
@ -449,7 +449,7 @@ impl<'a> Parser<'a> {
                self.bump();
                true
            }
-            _ => self.eat(&token::Eq),
+            _ => self.eat(exp!(Eq)),
        };

        Ok(if eq_consumed || eq_optional { Some(self.parse_expr()?) } else { None })
@ -509,7 +509,7 @@ impl<'a> Parser<'a> {
            Ok(Some(Stmt { kind: StmtKind::Empty, .. })) => {}
            Ok(Some(stmt)) => {
                let stmt_own_line = self.psess.source_map().is_line_before_span_empty(sp);
-                let stmt_span = if stmt_own_line && self.eat(&token::Semi) {
+                let stmt_span = if stmt_own_line && self.eat(exp!(Semi)) {
                    // Expand the span to include the semicolon.
                    stmt.span.with_hi(self.prev_token.span.hi())
                } else {
@ -651,7 +651,7 @@ impl<'a> Parser<'a> {

        let maybe_ident = self.prev_token.clone();
        self.maybe_recover_unexpected_block_label();
-        if !self.eat(&token::OpenDelim(Delimiter::Brace)) {
+        if !self.eat(exp!(OpenBrace)) {
            return self.error_block_no_opening_brace();
        }

@ -678,7 +678,7 @@ impl<'a> Parser<'a> {
    ) -> PResult<'a, P<Block>> {
        let mut stmts = ThinVec::new();
        let mut snapshot = None;
-        while !self.eat(&token::CloseDelim(Delimiter::Brace)) {
+        while !self.eat(exp!(CloseBrace)) {
            if self.token == token::Eof {
                break;
            }
@ -781,8 +781,7 @@ impl<'a> Parser<'a> {
            {
                // Just check for errors and recover; do not eat semicolon yet.

-                let expect_result =
-                    self.expect_one_of(&[], &[token::Semi, token::CloseDelim(Delimiter::Brace)]);
+                let expect_result = self.expect_one_of(&[], &[exp!(Semi), exp!(CloseBrace)]);

                // Try to both emit a better diagnostic, and avoid further errors by replacing
                // the `expr` with `ExprKind::Err`.
@ -930,7 +929,7 @@ impl<'a> Parser<'a> {
            }
        }

-        if add_semi_to_stmt || (eat_semi && self.eat(&token::Semi)) {
+        if add_semi_to_stmt || (eat_semi && self.eat(exp!(Semi))) {
            stmt = stmt.add_trailing_semicolon();
        }