Overhaul TokenTreeCursor.

- Move it to `rustc_parse`, which is the only crate that uses it. This lets us remove all the `pub` markers from it. - Change `next_ref` and `look_ahead` to `get` and `bump`, which work better for the `rustc_parse` uses. - This requires adding a `TokenStream::get` method, which is simple. - In `TokenCursor`, we currently duplicate the `DelimSpan`/`DelimSpacing`/`Delimiter` from the surrounding `TokenTree::Delimited` in the stack. This isn't necessary so long as we don't prematurely move past the `Delimited`, and is a small perf win on a very hot code path. - In `parse_token_tree`, we clone the relevant `TokenTree::Delimited` instead of constructing an identical one from pieces.
2024-12-10 19:18:44 +11:00 · 2024-12-10 19:18:44 +11:00 · 2903356b2e
commit 2903356b2e
parent fd83954d66
3 changed files with 66 additions and 69 deletions
--- a/compiler/rustc_ast/src/tokenstream.rs
+++ b/compiler/rustc_ast/src/tokenstream.rs
@ -423,12 +423,12 @@ impl TokenStream {
        self.0.len()
    }
-    pub fn iter(&self) -> TokenStreamIter<'_> {
+    pub fn get(&self, index: usize) -> Option<&TokenTree> {
-        TokenStreamIter::new(self)
+        self.0.get(index)
    }
-    pub fn into_trees(self) -> TokenTreeCursor {
+    pub fn iter(&self) -> TokenStreamIter<'_> {
-        TokenTreeCursor::new(self)
+        TokenStreamIter::new(self)
    }
    /// Compares two `TokenStream`s, checking equality without regarding span information.
@ -695,39 +695,6 @@ impl<'t> Iterator for TokenStreamIter<'t> {
    }
 }
 /// Owning by-value iterator over a [`TokenStream`], that produces `&TokenTree`
 /// items.
 ///
 /// Doesn't impl `Iterator` because Rust doesn't permit an owning iterator to
 /// return `&T` from `next`; the need for an explicit lifetime in the `Item`
 /// associated type gets in the way. Instead, use `next_ref` (which doesn't
 /// involve associated types) for getting individual elements, or
 /// `TokenStreamIter` if you really want an `Iterator`, e.g. in a `for`
 /// loop.
 #[derive(Clone, Debug)]
 pub struct TokenTreeCursor {
    pub stream: TokenStream,
    index: usize,
 }
 impl TokenTreeCursor {
    fn new(stream: TokenStream) -> Self {
        TokenTreeCursor { stream, index: 0 }
    }
    #[inline]
    pub fn next_ref(&mut self) -> Option<&TokenTree> {
        self.stream.0.get(self.index).map(|tree| {
            self.index += 1;
            tree
        })
    }
    pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> {
        self.stream.0.get(self.index + n)
    }
 }
 #[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]
 pub struct DelimSpan {
    pub open: Span,
--- a/compiler/rustc_parse/src/parser/expr.rs
+++ b/compiler/rustc_parse/src/parser/expr.rs
@ -8,6 +8,7 @@ use ast::token::IdentIsRaw;
 use ast::{CoroutineKind, ForLoopKind, GenBlockKind, MatchKind, Pat, Path, PathSegment, Recovered};
 use rustc_ast::ptr::P;
 use rustc_ast::token::{self, Delimiter, Token, TokenKind};
 use rustc_ast::tokenstream::TokenTree;
 use rustc_ast::util::case::Case;
 use rustc_ast::util::classify;
 use rustc_ast::util::parser::{AssocOp, ExprPrecedence, Fixity, prec_let_scrutinee_needs_par};
@ -2393,7 +2394,8 @@ impl<'a> Parser<'a> {
        }
        if self.token == TokenKind::Semi
-            && matches!(self.token_cursor.stack.last(), Some((.., Delimiter::Parenthesis)))
+            && let Some(last) = self.token_cursor.stack.last()
            && let Some(TokenTree::Delimited(_, _, Delimiter::Parenthesis, _)) = last.curr()
            && self.may_recover()
        {
            // It is likely that the closure body is a block but where the
--- a/compiler/rustc_parse/src/parser/mod.rs
+++ b/compiler/rustc_parse/src/parser/mod.rs
@ -24,9 +24,7 @@ use rustc_ast::ptr::P;
 use rustc_ast::token::{
    self, Delimiter, IdentIsRaw, InvisibleOrigin, MetaVarKind, Nonterminal, Token, TokenKind,
 };
-use rustc_ast::tokenstream::{
+use rustc_ast::tokenstream::{AttrsTarget, Spacing, TokenStream, TokenTree};
    AttrsTarget, DelimSpacing, DelimSpan, Spacing, TokenStream, TokenTree, TokenTreeCursor,
 };
 use rustc_ast::util::case::Case;
 use rustc_ast::{
    self as ast, AnonConst, AttrArgs, AttrId, ByRef, Const, CoroutineKind, DUMMY_NODE_ID,
@ -273,21 +271,48 @@ struct CaptureState {
    seen_attrs: IntervalSet<AttrId>,
 }
-/// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that
+#[derive(Clone, Debug)]
 struct TokenTreeCursor {
    stream: TokenStream,
    /// Points to the current token tree in the stream. In `TokenCursor::curr`,
    /// this can be any token tree. In `TokenCursor::stack`, this is always a
    /// `TokenTree::Delimited`.
    index: usize,
 }
 impl TokenTreeCursor {
    #[inline]
    fn new(stream: TokenStream) -> Self {
        TokenTreeCursor { stream, index: 0 }
    }
    #[inline]
    fn curr(&self) -> Option<&TokenTree> {
        self.stream.get(self.index)
    }
    #[inline]
    fn bump(&mut self) {
        self.index += 1;
    }
 }
 /// A `TokenStream` cursor that produces `Token`s. It's a bit odd that
 /// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b)
 /// use this type to emit them as a linear sequence. But a linear sequence is
 /// what the parser expects, for the most part.
 #[derive(Clone, Debug)]
 struct TokenCursor {
-    // Cursor for the current (innermost) token stream. The delimiters for this
+    // Cursor for the current (innermost) token stream. The index within the
-    // token stream are found in `self.stack.last()`; when that is `None` then
+    // cursor can point to any token tree in the stream (or one past the end).
-    // we are in the outermost token stream which never has delimiters.
+    // The delimiters for this token stream are found in `self.stack.last()`;
-    tree_cursor: TokenTreeCursor,
+    // if that is `None` we are in the outermost token stream which never has
    // delimiters.
    curr: TokenTreeCursor,
-    // Token streams surrounding the current one. The delimiters for stack[n]'s
+    // Token streams surrounding the current one. The index within each cursor
-    // tokens are in `stack[n-1]`. `stack[0]` (when present) has no delimiters
+    // always points to a `TokenTree::Delimited`.
-    // because it's the outermost token stream which never has delimiters.
+    stack: Vec<TokenTreeCursor>,
    stack: Vec<(TokenTreeCursor, DelimSpan, DelimSpacing, Delimiter)>,
 }
 impl TokenCursor {
@ -302,32 +327,33 @@ impl TokenCursor {
            // FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix
            // #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions
            // below can be removed.
-            if let Some(tree) = self.tree_cursor.next_ref() {
+            if let Some(tree) = self.curr.curr() {
                match tree {
                    &TokenTree::Token(ref token, spacing) => {
                        debug_assert!(!matches!(
                            token.kind,
                            token::OpenDelim(_) | token::CloseDelim(_)
                        ));
-                        return (token.clone(), spacing);
+                        let res = (token.clone(), spacing);
                        self.curr.bump();
                        return res;
                    }
                    &TokenTree::Delimited(sp, spacing, delim, ref tts) => {
-                        let trees = tts.clone().into_trees();
+                        let trees = TokenTreeCursor::new(tts.clone());
-                        self.stack.push((
+                        self.stack.push(mem::replace(&mut self.curr, trees));
                            mem::replace(&mut self.tree_cursor, trees),
                            sp,
                            spacing,
                            delim,
                        ));
                        if !delim.skip() {
                            return (Token::new(token::OpenDelim(delim), sp.open), spacing.open);
                        }
                        // No open delimiter to return; continue on to the next iteration.
                    }
                };
-            } else if let Some((tree_cursor, span, spacing, delim)) = self.stack.pop() {
+            } else if let Some(parent) = self.stack.pop() {
                // We have exhausted this token stream. Move back to its parent token stream.
-                self.tree_cursor = tree_cursor;
+                let Some(&TokenTree::Delimited(span, spacing, delim, _)) = parent.curr() else {
                    panic!("parent should be Delimited")
                };
                self.curr = parent;
                self.curr.bump(); // move past the `Delimited`
                if !delim.skip() {
                    return (Token::new(token::CloseDelim(delim), span.close), spacing.close);
                }
@ -466,7 +492,7 @@ impl<'a> Parser<'a> {
            capture_cfg: false,
            restrictions: Restrictions::empty(),
            expected_tokens: Vec::new(),
-            token_cursor: TokenCursor { tree_cursor: stream.into_trees(), stack: Vec::new() },
+            token_cursor: TokenCursor { curr: TokenTreeCursor::new(stream), stack: Vec::new() },
            num_bump_calls: 0,
            break_last_token: 0,
            unmatched_angle_bracket_count: 0,
@ -1192,7 +1218,7 @@ impl<'a> Parser<'a> {
        if dist == 1 {
            // The index is zero because the tree cursor's index always points
            // to the next token to be gotten.
-            match self.token_cursor.tree_cursor.look_ahead(0) {
+            match self.token_cursor.curr.curr() {
                Some(tree) => {
                    // Indexing stayed within the current token tree.
                    match tree {
@ -1202,12 +1228,13 @@ impl<'a> Parser<'a> {
                                return looker(&Token::new(token::OpenDelim(delim), dspan.open));
                            }
                        }
-                    };
+                    }
                }
                None => {
                    // The tree cursor lookahead went (one) past the end of the
                    // current token tree. Try to return a close delimiter.
-                    if let Some(&(_, span, _, delim)) = self.token_cursor.stack.last()
+                    if let Some(last) = self.token_cursor.stack.last()
                        && let Some(&TokenTree::Delimited(span, _, delim, _)) = last.curr()
                        && !delim.skip()
                    {
                        // We are not in the outermost token stream, so we have
@ -1399,9 +1426,10 @@ impl<'a> Parser<'a> {
    pub fn parse_token_tree(&mut self) -> TokenTree {
        match self.token.kind {
            token::OpenDelim(..) => {
-                // Grab the tokens within the delimiters.
+                // Clone the `TokenTree::Delimited` that we are currently
-                let stream = self.token_cursor.tree_cursor.stream.clone();
+                // within. That's what we are going to return.
-                let (_, span, spacing, delim) = *self.token_cursor.stack.last().unwrap();
+                let tree = self.token_cursor.stack.last().unwrap().curr().unwrap().clone();
                debug_assert_matches!(tree, TokenTree::Delimited(..));
                // Advance the token cursor through the entire delimited
                // sequence. After getting the `OpenDelim` we are *within* the
@ -1421,7 +1449,7 @@ impl<'a> Parser<'a> {
                // Consume close delimiter
                self.bump();
-                TokenTree::Delimited(span, spacing, delim, stream)
+                tree
            }
            token::CloseDelim(_) | token::Eof => unreachable!(),
            _ => {