Remove the bogus special case from Parser::look_ahead.

The general case at the bottom of `look_ahead` is slow, because it clones the token cursor. Above it there is a special case for performance that is hit most of the time and avoids the cloning. Unfortunately, its behaviour differs from the general case in two ways. - When within a pair of delimiters, if you look any distance past the closing delimiter you get the closing delimiter instead of what comes after the closing delimiter. - It uses `tree_cursor.look_ahead(dist - 1)` which totally confuses tokens with token trees. This means that only the first token in a token tree will be seen. E.g. in a sequence like `{ a }` the `a` and `}` will be skipped over. Bad! It's likely that these differences weren't noticed before now because the use of `look_ahead` in the parser is limited to small distances and relatively few contexts. Removing the special case causes slowdowns up of to 2% on a range of benchmarks. The next commit will add a new, correct special case to regain that lost performance.
2024-07-12 12:56:58 +10:00 · 2024-07-12 12:56:58 +10:00 · ebe1305b1e
commit ebe1305b1e
parent dad95578b0
2 changed files with 28 additions and 56 deletions
--- a/compiler/rustc_parse/src/parser/mod.rs
+++ b/compiler/rustc_parse/src/parser/mod.rs
@ -1118,41 +1118,8 @@ impl<'a> Parser<'a> {
            return looker(&self.token);
        }
-        if let Some(&(_, span, _, delim)) = self.token_cursor.stack.last()
+        // Just clone the token cursor and use `next`, skipping delimiters as
-            && delim != Delimiter::Invisible
+        // necessary. Slow but simple.
        {
            // We are not in the outermost token stream, and the token stream
            // we are in has non-skipped delimiters. Look for skipped
            // delimiters in the lookahead range.
            let tree_cursor = &self.token_cursor.tree_cursor;
            let all_normal = (0..dist).all(|i| {
                let token = tree_cursor.look_ahead(i);
                !matches!(token, Some(TokenTree::Delimited(.., Delimiter::Invisible, _)))
            });
            if all_normal {
                // There were no skipped delimiters. Do lookahead by plain indexing.
                return match tree_cursor.look_ahead(dist - 1) {
                    Some(tree) => {
                        // Indexing stayed within the current token stream.
                        match tree {
                            TokenTree::Token(token, _) => looker(token),
                            TokenTree::Delimited(dspan, _, delim, _) => {
                                looker(&Token::new(token::OpenDelim(*delim), dspan.open))
                            }
                        }
                    }
                    None => {
                        // Indexing went past the end of the current token
                        // stream. Use the close delimiter, no matter how far
                        // ahead `dist` went.
                        looker(&Token::new(token::CloseDelim(delim), span.close))
                    }
                };
            }
        }
        // We are in a more complex case. Just clone the token cursor and use
        // `next`, skipping delimiters as necessary. Slow but simple.
        let mut cursor = self.token_cursor.clone();
        let mut i = 0;
        let mut token = Token::dummy();
--- a/compiler/rustc_parse/src/parser/tests.rs
+++ b/compiler/rustc_parse/src/parser/tests.rs
@ -1424,12 +1424,15 @@ fn look_ahead() {
        look!(p, 1, token::Colon);
        look!(p, 2, token::Ident(sym::u32, raw_no));
        look!(p, 3, token::CloseDelim(Delimiter::Parenthesis));
-        // FIXME(nnethercote) If we lookahead any distance past a close delim
+        look!(p, 4, token::OpenDelim(Delimiter::Brace));
-        // we currently return that close delim.
+        look!(p, 5, token::Ident(sym_x, raw_no));
-        look!(p, 4, token::CloseDelim(Delimiter::Parenthesis));
+        look!(p, 6, token::CloseDelim(Delimiter::Brace));
-        look!(p, 5, token::CloseDelim(Delimiter::Parenthesis));
+        look!(p, 7, token::Ident(kw::Struct, raw_no));
-        look!(p, 6, token::CloseDelim(Delimiter::Parenthesis));
+        look!(p, 8, token::Ident(sym_S, raw_no));
-        look!(p, 100, token::CloseDelim(Delimiter::Parenthesis));
+        look!(p, 9, token::Semi);
        look!(p, 10, token::Eof);
        look!(p, 11, token::Eof);
        look!(p, 100, token::Eof);
        // Move forward to the `;`.
        for _ in 0..9 {
@ -1454,12 +1457,13 @@ fn look_ahead() {
    });
 }
-/// FIXME(nnethercote) Currently there is some buggy behaviour when using
+/// There used to be some buggy behaviour when using `look_ahead` not within
-/// `look_ahead` not within the outermost token stream, as this test shows.
+/// the outermost token stream, which this test covers.
 #[test]
 fn look_ahead_non_outermost_stream() {
    create_default_session_globals_then(|| {
        let sym_f = Symbol::intern("f");
        let sym_x = Symbol::intern("x");
        #[allow(non_snake_case)]
        let sym_S = Symbol::intern("S");
        let raw_no = IdentIsRaw::No;
@ -1475,20 +1479,21 @@ fn look_ahead_non_outermost_stream() {
        look!(p, 0, token::Ident(kw::Fn, raw_no));
        look!(p, 1, token::Ident(sym_f, raw_no));
        look!(p, 2, token::OpenDelim(Delimiter::Parenthesis));
-        // FIXME(nnethercote) The current code incorrectly skips the `x: u32)`
+        look!(p, 3, token::Ident(sym_x, raw_no));
-        // to the next token tree.
+        look!(p, 4, token::Colon);
-        look!(p, 3, token::OpenDelim(Delimiter::Brace));
+        look!(p, 5, token::Ident(sym::u32, raw_no));
-        // FIXME(nnethercote) The current code incorrectly skips the `x }`
+        look!(p, 6, token::CloseDelim(Delimiter::Parenthesis));
-        // to the next token tree.
+        look!(p, 7, token::OpenDelim(Delimiter::Brace));
-        look!(p, 4, token::Ident(kw::Struct, raw_no));
+        look!(p, 8, token::Ident(sym_x, raw_no));
        look!(p, 5, token::Ident(sym_S, raw_no));
        look!(p, 6, token::Semi);
        // FIXME(nnethercote) If we lookahead any distance past a close delim
        // we currently return that close delim.
        look!(p, 7, token::CloseDelim(Delimiter::Brace));
        look!(p, 8, token::CloseDelim(Delimiter::Brace));
        look!(p, 9, token::CloseDelim(Delimiter::Brace));
-        look!(p, 100, token::CloseDelim(Delimiter::Brace));
+        look!(p, 10, token::Ident(kw::Struct, raw_no));
        look!(p, 11, token::Ident(sym_S, raw_no));
        look!(p, 12, token::Semi);
        look!(p, 13, token::CloseDelim(Delimiter::Brace));
        // Any lookahead past the end of the token stream returns `Eof`.
        look!(p, 14, token::Eof);
        look!(p, 15, token::Eof);
        look!(p, 100, token::Eof);
    });
 }