Move doc comment desugaring out of TokenCursor.

`TokenCursor` currently does doc comment desugaring on the fly, if the `desugar_doc_comment` field is set. This requires also modifying the token stream on the fly with `replace_prev_and_rewind`. This commit moves the doc comment desugaring out of `TokenCursor`, by introducing a new `TokenStream::desugar_doc_comment` method. This separation of desugaring and iterating makes the code nicer.
2023-07-30 17:16:20 +10:00 · 2023-07-30 17:16:20 +10:00 · ff7d5ba65e
commit ff7d5ba65e
parent c70c8b7196
2 changed files with 102 additions and 76 deletions
--- a/compiler/rustc_ast/src/tokenstream.rs
+++ b/compiler/rustc_ast/src/tokenstream.rs
@ -13,7 +13,7 @@
 //! and a borrowed `TokenStream` is sufficient to build an owned `TokenStream` without taking
 //! ownership of the original.
-use crate::ast::StmtKind;
+use crate::ast::{AttrStyle, StmtKind};
 use crate::ast_traits::{HasAttrs, HasSpan, HasTokens};
 use crate::token::{self, Delimiter, Nonterminal, Token, TokenKind};
 use crate::AttrVec;
@ -22,11 +22,11 @@ use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
 use rustc_data_structures::sync::{self, Lrc};
 use rustc_macros::HashStable_Generic;
 use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
-use rustc_span::{Span, DUMMY_SP};
+use rustc_span::{sym, Span, Symbol, DUMMY_SP};
 use smallvec::{smallvec, SmallVec};
 use std::borrow::Cow;
-use std::{fmt, iter, mem};
+use std::{cmp, fmt, iter, mem};
 /// When the main Rust parser encounters a syntax-extension invocation, it
 /// parses the arguments to the invocation as a token tree. This is a very
@ -566,6 +566,92 @@ impl TokenStream {
    pub fn chunks(&self, chunk_size: usize) -> core::slice::Chunks<'_, TokenTree> {
        self.0.chunks(chunk_size)
    }
    /// Desugar doc comments like `/// foo` in the stream into `#[doc =
    /// r"foo"]`. Modifies the `TokenStream` via `Lrc::make_mut`, but as little
    /// as possible.
    pub fn desugar_doc_comments(&mut self) {
        if let Some(desugared_stream) = desugar_inner(self.clone()) {
            *self = desugared_stream;
        }
        // The return value is `None` if nothing in `stream` changed.
        fn desugar_inner(mut stream: TokenStream) -> Option<TokenStream> {
            let mut i = 0;
            let mut modified = false;
            while let Some(tt) = stream.0.get(i) {
                match tt {
                    &TokenTree::Token(
                        Token { kind: token::DocComment(_, attr_style, data), span },
                        _spacing,
                    ) => {
                        let desugared = desugared_tts(attr_style, data, span);
                        let desugared_len = desugared.len();
                        Lrc::make_mut(&mut stream.0).splice(i..i + 1, desugared);
                        modified = true;
                        i += desugared_len;
                    }
                    &TokenTree::Token(..) => i += 1,
                    &TokenTree::Delimited(sp, delim, ref delim_stream) => {
                        if let Some(desugared_delim_stream) = desugar_inner(delim_stream.clone()) {
                            let new_tt = TokenTree::Delimited(sp, delim, desugared_delim_stream);
                            Lrc::make_mut(&mut stream.0)[i] = new_tt;
                            modified = true;
                        }
                        i += 1;
                    }
                }
            }
            if modified { Some(stream) } else { None }
        }
        fn desugared_tts(attr_style: AttrStyle, data: Symbol, span: Span) -> Vec<TokenTree> {
            // Searches for the occurrences of `"#*` and returns the minimum number of `#`s
            // required to wrap the text. E.g.
            // - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0)
            // - `abc "d"` is wrapped as `r#"abc "d""#` (num_of_hashes = 1)
            // - `abc "##d##"` is wrapped as `r###"abc ##"d"##"###` (num_of_hashes = 3)
            let mut num_of_hashes = 0;
            let mut count = 0;
            for ch in data.as_str().chars() {
                count = match ch {
                    '"' => 1,
                    '#' if count > 0 => count + 1,
                    _ => 0,
                };
                num_of_hashes = cmp::max(num_of_hashes, count);
            }
            // `/// foo` becomes `doc = r"foo"`.
            let delim_span = DelimSpan::from_single(span);
            let body = TokenTree::Delimited(
                delim_span,
                Delimiter::Bracket,
                [
                    TokenTree::token_alone(token::Ident(sym::doc, false), span),
                    TokenTree::token_alone(token::Eq, span),
                    TokenTree::token_alone(
                        TokenKind::lit(token::StrRaw(num_of_hashes), data, None),
                        span,
                    ),
                ]
                .into_iter()
                .collect::<TokenStream>(),
            );
            if attr_style == AttrStyle::Inner {
                vec![
                    TokenTree::token_alone(token::Pound, span),
                    TokenTree::token_alone(token::Not, span),
                    body,
                ]
            } else {
                vec![TokenTree::token_alone(token::Pound, span), body]
            }
        }
    }
 }
 /// By-reference iterator over a [`TokenStream`], that produces `&TokenTree`
--- a/compiler/rustc_parse/src/parser/mod.rs
+++ b/compiler/rustc_parse/src/parser/mod.rs
@ -24,7 +24,7 @@ use rustc_ast::tokenstream::{TokenStream, TokenTree, TokenTreeCursor};
 use rustc_ast::util::case::Case;
 use rustc_ast::AttrId;
 use rustc_ast::DUMMY_NODE_ID;
-use rustc_ast::{self as ast, AnonConst, AttrStyle, Const, DelimArgs, Extern};
+use rustc_ast::{self as ast, AnonConst, Const, DelimArgs, Extern};
 use rustc_ast::{Async, AttrArgs, AttrArgsEq, Expr, ExprKind, MacDelimiter, Mutability, StrLit};
 use rustc_ast::{HasAttrs, HasTokens, Unsafe, Visibility, VisibilityKind};
 use rustc_ast_pretty::pprust;
@ -38,7 +38,7 @@ use rustc_session::parse::ParseSess;
 use rustc_span::source_map::{Span, DUMMY_SP};
 use rustc_span::symbol::{kw, sym, Ident, Symbol};
 use std::ops::Range;
-use std::{cmp, mem, slice};
+use std::{mem, slice};
 use thin_vec::ThinVec;
 use tracing::debug;
@ -224,11 +224,6 @@ struct TokenCursor {
    // because it's the outermost token stream which never has delimiters.
    stack: Vec<(TokenTreeCursor, Delimiter, DelimSpan)>,
    // We need to desugar doc comments from `/// foo` form into `#[doc =
    // r"foo"]` form when parsing declarative macro inputs in `parse_tt`,
    // because some declarative macros look for `doc` attributes.
    desugar_doc_comments: bool,
    // Counts the number of calls to `{,inlined_}next`.
    num_next_calls: usize,
@ -271,23 +266,11 @@ impl TokenCursor {
            if let Some(tree) = self.tree_cursor.next_ref() {
                match tree {
                    &TokenTree::Token(ref token, spacing) => {
-                        match (self.desugar_doc_comments, token) {
+                        debug_assert!(!matches!(
-                            (
+                            token.kind,
-                                true,
+                            token::OpenDelim(_) | token::CloseDelim(_)
-                                &Token { kind: token::DocComment(_, attr_style, data), span },
+                        ));
-                            ) => {
+                        return (token.clone(), spacing);
                                let desugared = self.desugar(attr_style, data, span);
                                self.tree_cursor.replace_prev_and_rewind(desugared);
                                // Continue to get the first token of the desugared doc comment.
                            }
                            _ => {
                                debug_assert!(!matches!(
                                    token.kind,
                                    token::OpenDelim(_) | token::CloseDelim(_)
                                ));
                                return (token.clone(), spacing);
                            }
                        }
                    }
                    &TokenTree::Delimited(sp, delim, ref tts) => {
                        let trees = tts.clone().into_trees();
@ -311,52 +294,6 @@ impl TokenCursor {
            }
        }
    }
    // Desugar a doc comment into something like `#[doc = r"foo"]`.
    fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> Vec<TokenTree> {
        // Searches for the occurrences of `"#*` and returns the minimum number of `#`s
        // required to wrap the text. E.g.
        // - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0)
        // - `abc "d"` is wrapped as `r#"abc "d""#` (num_of_hashes = 1)
        // - `abc "##d##"` is wrapped as `r###"abc ##"d"##"###` (num_of_hashes = 3)
        let mut num_of_hashes = 0;
        let mut count = 0;
        for ch in data.as_str().chars() {
            count = match ch {
                '"' => 1,
                '#' if count > 0 => count + 1,
                _ => 0,
            };
            num_of_hashes = cmp::max(num_of_hashes, count);
        }
        // `/// foo` becomes `doc = r"foo"`.
        let delim_span = DelimSpan::from_single(span);
        let body = TokenTree::Delimited(
            delim_span,
            Delimiter::Bracket,
            [
                TokenTree::token_alone(token::Ident(sym::doc, false), span),
                TokenTree::token_alone(token::Eq, span),
                TokenTree::token_alone(
                    TokenKind::lit(token::StrRaw(num_of_hashes), data, None),
                    span,
                ),
            ]
            .into_iter()
            .collect::<TokenStream>(),
        );
        if attr_style == AttrStyle::Inner {
            vec![
                TokenTree::token_alone(token::Pound, span),
                TokenTree::token_alone(token::Not, span),
                body,
            ]
        } else {
            vec![TokenTree::token_alone(token::Pound, span), body]
        }
    }
 }
 #[derive(Debug, Clone, PartialEq)]
@ -451,10 +388,14 @@ pub(super) fn token_descr(token: &Token) -> String {
 impl<'a> Parser<'a> {
    pub fn new(
        sess: &'a ParseSess,
-        tokens: TokenStream,
+        mut stream: TokenStream,
        desugar_doc_comments: bool,
        subparser_name: Option<&'static str>,
    ) -> Self {
        if desugar_doc_comments {
            stream.desugar_doc_comments();
        }
        let mut parser = Parser {
            sess,
            token: Token::dummy(),
@ -464,10 +405,9 @@ impl<'a> Parser<'a> {
            restrictions: Restrictions::empty(),
            expected_tokens: Vec::new(),
            token_cursor: TokenCursor {
-                tree_cursor: tokens.into_trees(),
+                tree_cursor: stream.into_trees(),
                stack: Vec::new(),
                num_next_calls: 0,
                desugar_doc_comments,
                break_last_token: false,
            },
            unmatched_angle_bracket_count: 0,