Rollup merge of #107544 - nnethercote:improve-TokenCursor, r=petrochenkov

Improve `TokenCursor`. Some small improvements, for things that were bugging me. Best reviewed one commit at a time. r? ``@petrochenkov``
2023-02-03 23:04:51 +05:30 · 2023-02-03 23:04:51 +05:30 · 815dc9c480
commit 815dc9c480
parent d9db35785d a86fc727fa
6 changed files with 98 additions and 91 deletions
--- a/compiler/rustc_ast/src/tokenstream.rs
+++ b/compiler/rustc_ast/src/tokenstream.rs
@ -41,7 +41,8 @@ use std::{fmt, iter};
 /// Nothing special happens to misnamed or misplaced `SubstNt`s.
 #[derive(Debug, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]
 pub enum TokenTree {
-    /// A single token.
+    /// A single token. Should never be `OpenDelim` or `CloseDelim`, because
    /// delimiters are implicitly represented by `Delimited`.
    Token(Token, Spacing),
    /// A delimited sequence of token trees.
    Delimited(DelimSpan, Delimiter, TokenStream),
@ -388,12 +389,12 @@ impl TokenStream {
        self.0.len()
    }
-    pub fn trees(&self) -> CursorRef<'_> {
+    pub fn trees(&self) -> RefTokenTreeCursor<'_> {
-        CursorRef::new(self)
+        RefTokenTreeCursor::new(self)
    }
-    pub fn into_trees(self) -> Cursor {
+    pub fn into_trees(self) -> TokenTreeCursor {
-        Cursor::new(self)
+        TokenTreeCursor::new(self)
    }
    /// Compares two `TokenStream`s, checking equality without regarding span information.
@ -551,16 +552,17 @@ impl TokenStream {
    }
 }
-/// By-reference iterator over a [`TokenStream`].
+/// By-reference iterator over a [`TokenStream`], that produces `&TokenTree`
 /// items.
 #[derive(Clone)]
-pub struct CursorRef<'t> {
+pub struct RefTokenTreeCursor<'t> {
    stream: &'t TokenStream,
    index: usize,
 }
-impl<'t> CursorRef<'t> {
+impl<'t> RefTokenTreeCursor<'t> {
    fn new(stream: &'t TokenStream) -> Self {
-        CursorRef { stream, index: 0 }
+        RefTokenTreeCursor { stream, index: 0 }
    }
    pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> {
@ -568,7 +570,7 @@ impl<'t> CursorRef<'t> {
    }
 }
-impl<'t> Iterator for CursorRef<'t> {
+impl<'t> Iterator for RefTokenTreeCursor<'t> {
    type Item = &'t TokenTree;
    fn next(&mut self) -> Option<&'t TokenTree> {
@ -579,15 +581,16 @@ impl<'t> Iterator for CursorRef<'t> {
    }
 }
-/// Owning by-value iterator over a [`TokenStream`].
+/// Owning by-value iterator over a [`TokenStream`], that produces `TokenTree`
 /// items.
 // FIXME: Many uses of this can be replaced with by-reference iterator to avoid clones.
 #[derive(Clone)]
-pub struct Cursor {
+pub struct TokenTreeCursor {
    pub stream: TokenStream,
    index: usize,
 }
-impl Iterator for Cursor {
+impl Iterator for TokenTreeCursor {
    type Item = TokenTree;
    fn next(&mut self) -> Option<TokenTree> {
@ -598,9 +601,9 @@ impl Iterator for Cursor {
    }
 }
-impl Cursor {
+impl TokenTreeCursor {
    fn new(stream: TokenStream) -> Self {
-        Cursor { stream, index: 0 }
+        TokenTreeCursor { stream, index: 0 }
    }
    #[inline]
@ -614,6 +617,15 @@ impl Cursor {
    pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> {
        self.stream.0.get(self.index + n)
    }
    // Replace the previously obtained token tree with `tts`, and rewind to
    // just before them.
    pub fn replace_prev_and_rewind(&mut self, tts: Vec<TokenTree>) {
        assert!(self.index > 0);
        self.index -= 1;
        let stream = Lrc::make_mut(&mut self.stream.0);
        stream.splice(self.index..self.index + 1, tts);
    }
 }
 #[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]
--- a/compiler/rustc_expand/src/mbe/metavar_expr.rs
+++ b/compiler/rustc_expand/src/mbe/metavar_expr.rs
@ -1,5 +1,5 @@
 use rustc_ast::token::{self, Delimiter};
-use rustc_ast::tokenstream::{CursorRef, TokenStream, TokenTree};
+use rustc_ast::tokenstream::{RefTokenTreeCursor, TokenStream, TokenTree};
 use rustc_ast::{LitIntType, LitKind};
 use rustc_ast_pretty::pprust;
 use rustc_errors::{Applicability, PResult};
@ -72,7 +72,7 @@ impl MetaVarExpr {
 // Checks if there are any remaining tokens. For example, `${ignore(ident ... a b c ...)}`
 fn check_trailing_token<'sess>(
-    iter: &mut CursorRef<'_>,
+    iter: &mut RefTokenTreeCursor<'_>,
    sess: &'sess ParseSess,
 ) -> PResult<'sess, ()> {
    if let Some(tt) = iter.next() {
@ -88,7 +88,7 @@ fn check_trailing_token<'sess>(
 /// Parse a meta-variable `count` expression: `count(ident[, depth])`
 fn parse_count<'sess>(
-    iter: &mut CursorRef<'_>,
+    iter: &mut RefTokenTreeCursor<'_>,
    sess: &'sess ParseSess,
    span: Span,
 ) -> PResult<'sess, MetaVarExpr> {
@ -99,7 +99,7 @@ fn parse_count<'sess>(
 /// Parses the depth used by index(depth) and length(depth).
 fn parse_depth<'sess>(
-    iter: &mut CursorRef<'_>,
+    iter: &mut RefTokenTreeCursor<'_>,
    sess: &'sess ParseSess,
    span: Span,
 ) -> PResult<'sess, usize> {
@ -126,7 +126,7 @@ fn parse_depth<'sess>(
 /// Parses an generic ident
 fn parse_ident<'sess>(
-    iter: &mut CursorRef<'_>,
+    iter: &mut RefTokenTreeCursor<'_>,
    sess: &'sess ParseSess,
    span: Span,
 ) -> PResult<'sess, Ident> {
@ -152,7 +152,7 @@ fn parse_ident<'sess>(
 /// Tries to move the iterator forward returning `true` if there is a comma. If not, then the
 /// iterator is not modified and the result is `false`.
-fn try_eat_comma(iter: &mut CursorRef<'_>) -> bool {
+fn try_eat_comma(iter: &mut RefTokenTreeCursor<'_>) -> bool {
    if let Some(TokenTree::Token(token::Token { kind: token::Comma, .. }, _)) = iter.look_ahead(0) {
        let _ = iter.next();
        return true;
--- a/compiler/rustc_parse/src/parser/attr_wrapper.rs
+++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs
@ -469,6 +469,6 @@ mod size_asserts {
    use rustc_data_structures::static_assert_size;
    // tidy-alphabetical-start
    static_assert_size!(AttrWrapper, 16);
-    static_assert_size!(LazyAttrTokenStreamImpl, 144);
+    static_assert_size!(LazyAttrTokenStreamImpl, 120);
    // tidy-alphabetical-end
 }
--- a/compiler/rustc_parse/src/parser/expr.rs
+++ b/compiler/rustc_parse/src/parser/expr.rs
@ -2141,7 +2141,7 @@ impl<'a> Parser<'a> {
        }
        if self.token.kind == TokenKind::Semi
-            && matches!(self.token_cursor.frame.delim_sp, Some((Delimiter::Parenthesis, _)))
+            && matches!(self.token_cursor.stack.last(), Some((_, Delimiter::Parenthesis, _)))
            && self.may_recover()
        {
            // It is likely that the closure body is a block but where the
--- a/compiler/rustc_parse/src/parser/mod.rs
+++ b/compiler/rustc_parse/src/parser/mod.rs
@ -19,9 +19,8 @@ pub use path::PathStyle;
 use rustc_ast::ptr::P;
 use rustc_ast::token::{self, Delimiter, Nonterminal, Token, TokenKind};
-use rustc_ast::tokenstream::AttributesData;
+use rustc_ast::tokenstream::{AttributesData, DelimSpan, Spacing};
-use rustc_ast::tokenstream::{self, DelimSpan, Spacing};
+use rustc_ast::tokenstream::{TokenStream, TokenTree, TokenTreeCursor};
 use rustc_ast::tokenstream::{TokenStream, TokenTree};
 use rustc_ast::util::case::Case;
 use rustc_ast::AttrId;
 use rustc_ast::DUMMY_NODE_ID;
@ -168,7 +167,7 @@ pub struct Parser<'a> {
 // This type is used a lot, e.g. it's cloned when matching many declarative macro rules with nonterminals. Make sure
 // it doesn't unintentionally get bigger.
 #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
-rustc_data_structures::static_assert_size!(Parser<'_>, 336);
+rustc_data_structures::static_assert_size!(Parser<'_>, 312);
 /// Stores span information about a closure.
 #[derive(Clone)]
@ -221,18 +220,27 @@ impl<'a> Drop for Parser<'a> {
    }
 }
 /// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that
 /// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b)
 /// use this type to emit them as a linear sequence. But a linear sequence is
 /// what the parser expects, for the most part.
 #[derive(Clone)]
 struct TokenCursor {
-    // The current (innermost) frame. `frame` and `stack` could be combined,
+    // Cursor for the current (innermost) token stream. The delimiters for this
-    // but it's faster to have them separately to access `frame` directly
+    // token stream are found in `self.stack.last()`; when that is `None` then
-    // rather than via something like `stack.last().unwrap()` or
+    // we are in the outermost token stream which never has delimiters.
-    // `stack[stack.len() - 1]`.
+    tree_cursor: TokenTreeCursor,
-    frame: TokenCursorFrame,
+
-    // Additional frames that enclose `frame`.
+    // Token streams surrounding the current one. The delimiters for stack[n]'s
-    stack: Vec<TokenCursorFrame>,
+    // tokens are in `stack[n-1]`. `stack[0]` (when present) has no delimiters
    // because it's the outermost token stream which never has delimiters.
    stack: Vec<(TokenTreeCursor, Delimiter, DelimSpan)>,
    desugar_doc_comments: bool,
    // Counts the number of calls to `{,inlined_}next`.
    num_next_calls: usize,
    // During parsing, we may sometimes need to 'unglue' a
    // glued token into two component tokens
    // (e.g. '>>' into '>' and '>), so that the parser
@ -257,18 +265,6 @@ struct TokenCursor {
    break_last_token: bool,
 }
 #[derive(Clone)]
 struct TokenCursorFrame {
    delim_sp: Option<(Delimiter, DelimSpan)>,
    tree_cursor: tokenstream::Cursor,
 }
 impl TokenCursorFrame {
    fn new(delim_sp: Option<(Delimiter, DelimSpan)>, tts: TokenStream) -> Self {
        TokenCursorFrame { delim_sp, tree_cursor: tts.into_trees() }
    }
 }
 impl TokenCursor {
    fn next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) {
        self.inlined_next(desugar_doc_comments)
@ -281,38 +277,47 @@ impl TokenCursor {
            // FIXME: we currently don't return `Delimiter` open/close delims. To fix #67062 we will
            // need to, whereupon the `delim != Delimiter::Invisible` conditions below can be
            // removed.
-            if let Some(tree) = self.frame.tree_cursor.next_ref() {
+            if let Some(tree) = self.tree_cursor.next_ref() {
                match tree {
                    &TokenTree::Token(ref token, spacing) => match (desugar_doc_comments, token) {
                        (true, &Token { kind: token::DocComment(_, attr_style, data), span }) => {
-                            return self.desugar(attr_style, data, span);
+                            let desugared = self.desugar(attr_style, data, span);
                            self.tree_cursor.replace_prev_and_rewind(desugared);
                            // Continue to get the first token of the desugared doc comment.
                        }
                        _ => {
                            debug_assert!(!matches!(
                                token.kind,
                                token::OpenDelim(_) | token::CloseDelim(_)
                            ));
                            return (token.clone(), spacing);
                        }
                        _ => return (token.clone(), spacing),
                    },
                    &TokenTree::Delimited(sp, delim, ref tts) => {
-                        // Set `open_delim` to true here because we deal with it immediately.
+                        let trees = tts.clone().into_trees();
-                        let frame = TokenCursorFrame::new(Some((delim, sp)), tts.clone());
+                        self.stack.push((mem::replace(&mut self.tree_cursor, trees), delim, sp));
                        self.stack.push(mem::replace(&mut self.frame, frame));
                        if delim != Delimiter::Invisible {
                            return (Token::new(token::OpenDelim(delim), sp.open), Spacing::Alone);
                        }
                        // No open delimiter to return; continue on to the next iteration.
                    }
                };
-            } else if let Some(frame) = self.stack.pop() {
+            } else if let Some((tree_cursor, delim, span)) = self.stack.pop() {
-                if let Some((delim, span)) = self.frame.delim_sp && delim != Delimiter::Invisible {
+                // We have exhausted this token stream. Move back to its parent token stream.
-                    self.frame = frame;
+                self.tree_cursor = tree_cursor;
                if delim != Delimiter::Invisible {
                    return (Token::new(token::CloseDelim(delim), span.close), Spacing::Alone);
                }
                self.frame = frame;
                // No close delimiter to return; continue on to the next iteration.
            } else {
                // We have exhausted the outermost token stream.
                return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone);
            }
        }
    }
-    fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> (Token, Spacing) {
+    // Desugar a doc comment into something like `#[doc = r"foo"]`.
    fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> Vec<TokenTree> {
        // Searches for the occurrences of `"#*` and returns the minimum number of `#`s
        // required to wrap the text. E.g.
        // - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0)
@ -346,27 +351,15 @@ impl TokenCursor {
            .collect::<TokenStream>(),
        );
        self.stack.push(mem::replace(
            &mut self.frame,
            TokenCursorFrame::new(
                None,
        if attr_style == AttrStyle::Inner {
-                    [
+            vec![
                TokenTree::token_alone(token::Pound, span),
                TokenTree::token_alone(token::Not, span),
                body,
            ]
                    .into_iter()
                    .collect::<TokenStream>()
        } else {
-                    [TokenTree::token_alone(token::Pound, span), body]
+            vec![TokenTree::token_alone(token::Pound, span), body]
-                        .into_iter()
+        }
                        .collect::<TokenStream>()
                },
            ),
        ));
        self.next(/* desugar_doc_comments */ false)
    }
 }
@ -475,7 +468,7 @@ impl<'a> Parser<'a> {
            restrictions: Restrictions::empty(),
            expected_tokens: Vec::new(),
            token_cursor: TokenCursor {
-                frame: TokenCursorFrame::new(None, tokens),
+                tree_cursor: tokens.into_trees(),
                stack: Vec::new(),
                num_next_calls: 0,
                desugar_doc_comments,
@ -1142,14 +1135,16 @@ impl<'a> Parser<'a> {
            return looker(&self.token);
        }
-        let frame = &self.token_cursor.frame;
+        let tree_cursor = &self.token_cursor.tree_cursor;
-        if let Some((delim, span)) = frame.delim_sp && delim != Delimiter::Invisible {
+        if let Some(&(_, delim, span)) = self.token_cursor.stack.last()
            && delim != Delimiter::Invisible
        {
            let all_normal = (0..dist).all(|i| {
-                let token = frame.tree_cursor.look_ahead(i);
+                let token = tree_cursor.look_ahead(i);
                !matches!(token, Some(TokenTree::Delimited(_, Delimiter::Invisible, _)))
            });
            if all_normal {
-                return match frame.tree_cursor.look_ahead(dist - 1) {
+                return match tree_cursor.look_ahead(dist - 1) {
                    Some(tree) => match tree {
                        TokenTree::Token(token, _) => looker(token),
                        TokenTree::Delimited(dspan, delim, _) => {
@ -1310,10 +1305,10 @@ impl<'a> Parser<'a> {
    pub(crate) fn parse_token_tree(&mut self) -> TokenTree {
        match self.token.kind {
            token::OpenDelim(..) => {
-                // Grab the tokens from this frame.
+                // Grab the tokens within the delimiters.
-                let frame = &self.token_cursor.frame;
+                let tree_cursor = &self.token_cursor.tree_cursor;
-                let stream = frame.tree_cursor.stream.clone();
+                let stream = tree_cursor.stream.clone();
-                let (delim, span) = frame.delim_sp.unwrap();
+                let (_, delim, span) = *self.token_cursor.stack.last().unwrap();
                // Advance the token cursor through the entire delimited
                // sequence. After getting the `OpenDelim` we are *within* the
--- a/src/tools/rustfmt/src/macros.rs
+++ b/src/tools/rustfmt/src/macros.rs
@ -13,7 +13,7 @@ use std::collections::HashMap;
 use std::panic::{catch_unwind, AssertUnwindSafe};
 use rustc_ast::token::{BinOpToken, Delimiter, Token, TokenKind};
-use rustc_ast::tokenstream::{Cursor, TokenStream, TokenTree};
+use rustc_ast::tokenstream::{TokenStream, TokenTree, TokenTreeCursor};
 use rustc_ast::{ast, ptr};
 use rustc_ast_pretty::pprust;
 use rustc_span::{
@ -736,7 +736,7 @@ impl MacroArgParser {
        self.buf.clear();
    }
-    fn add_meta_variable(&mut self, iter: &mut Cursor) -> Option<()> {
+    fn add_meta_variable(&mut self, iter: &mut TokenTreeCursor) -> Option<()> {
        match iter.next() {
            Some(TokenTree::Token(
                Token {
@ -768,7 +768,7 @@ impl MacroArgParser {
        &mut self,
        inner: Vec<ParsedMacroArg>,
        delim: Delimiter,
-        iter: &mut Cursor,
+        iter: &mut TokenTreeCursor,
    ) -> Option<()> {
        let mut buffer = String::new();
        let mut first = true;
@ -1121,7 +1121,7 @@ pub(crate) fn macro_style(mac: &ast::MacCall, context: &RewriteContext<'_>) -> D
 // Currently we do not attempt to parse any further than that.
 #[derive(new)]
 struct MacroParser {
-    toks: Cursor,
+    toks: TokenTreeCursor,
 }
 impl MacroParser {