Improve print_tts by changing tokenstream::Spacing.

`tokenstream::Spacing` appears on all `TokenTree::Token` instances, both punct and non-punct. Its current usage: - `Joint` means "can join with the next token *and* that token is a punct". - `Alone` means "cannot join with the next token *or* can join with the next token but that token is not a punct". The fact that `Alone` is used for two different cases is awkward. This commit augments `tokenstream::Spacing` with a new variant `JointHidden`, resulting in: - `Joint` means "can join with the next token *and* that token is a punct". - `JointHidden` means "can join with the next token *and* that token is a not a punct". - `Alone` means "cannot join with the next token". This *drastically* improves the output of `print_tts`. For example, this: ``` stringify!(let a: Vec<u32> = vec![];) ``` currently produces this string: ``` let a : Vec < u32 > = vec! [] ; ``` With this PR, it now produces this string: ``` let a: Vec<u32> = vec![] ; ``` (The space after the `]` is because `TokenTree::Delimited` currently doesn't have spacing information. The subsequent commit fixes this.) The new `print_tts` doesn't replicate original code perfectly. E.g. multiple space characters will be condensed into a single space character. But it's much improved. `print_tts` still produces the old, uglier output for code produced by proc macros. Because we have to translate the generated code from `proc_macro::Spacing` to the more expressive `token::Spacing`, which results in too much `proc_macro::Along` usage and no `proc_macro::JointHidden` usage. So `space_between` still exists and is used by `print_tts` in conjunction with the `Spacing` field. This change will also help with the removal of `Token::Interpolated`. Currently interpolated tokens are pretty-printed nicely via AST pretty printing. `Token::Interpolated` removal will mean they get printed with `print_tts`. Without this change, that would result in much uglier output for code produced by decl macro expansions. With this change, AST pretty printing and `print_tts` produce similar results. The commit also tweaks the comments on `proc_macro::Spacing`. In particular, it refers to "compound tokens" rather than "multi-char operators" because lifetimes aren't operators.
2023-08-08 11:43:44 +10:00 · 2023-08-08 11:43:44 +10:00 · 925f7fad57
commit 925f7fad57
parent 7e452c123c
56 changed files with 567 additions and 356 deletions
--- a/compiler/rustc_ast/src/tokenstream.rs
+++ b/compiler/rustc_ast/src/tokenstream.rs
@ -99,6 +99,11 @@ impl TokenTree {
        TokenTree::Token(Token::new(kind, span), Spacing::Joint)
    }

+    /// Create a `TokenTree::Token` with joint-hidden spacing.
+    pub fn token_joint_hidden(kind: TokenKind, span: Span) -> TokenTree {
+        TokenTree::Token(Token::new(kind, span), Spacing::JointHidden)
+    }
+
    pub fn uninterpolate(&self) -> Cow<'_, TokenTree> {
        match self {
            TokenTree::Token(token, spacing) => match token.uninterpolate() {
@ -303,21 +308,64 @@ pub struct AttributesData {
 #[derive(Clone, Debug, Default, Encodable, Decodable)]
 pub struct TokenStream(pub(crate) Lrc<Vec<TokenTree>>);

-/// Similar to `proc_macro::Spacing`, but for tokens.
-///
-/// Note that all `ast::TokenTree::Token` instances have a `Spacing`, but when
-/// we convert to `proc_macro::TokenTree` for proc macros only `Punct`
-/// `TokenTree`s have a `proc_macro::Spacing`.
+/// Indicates whether a token can join with the following token to form a
+/// compound token. Used for conversions to `proc_macro::Spacing`. Also used to
+/// guide pretty-printing, which is where the `JointHidden` value (which isn't
+/// part of `proc_macro::Spacing`) comes in useful.
 #[derive(Clone, Copy, Debug, PartialEq, Encodable, Decodable, HashStable_Generic)]
 pub enum Spacing {
-    /// The token is not immediately followed by an operator token (as
-    /// determined by `Token::is_op`). E.g. a `+` token is `Alone` in `+ =`,
-    /// `+/*foo*/=`, `+ident`, and `+()`.
+    /// The token cannot join with the following token to form a compound
+    /// token.
+    ///
+    /// In token streams parsed from source code, the compiler will use `Alone`
+    /// for any token immediately followed by whitespace, a non-doc comment, or
+    /// EOF.
+    ///
+    /// When constructing token streams within the compiler, use this for each
+    /// token that (a) should be pretty-printed with a space after it, or (b)
+    /// is the last token in the stream. (In the latter case the choice of
+    /// spacing doesn't matter because it is never used for the last token. We
+    /// arbitrarily use `Alone`.)
+    ///
+    /// Converts to `proc_macro::Spacing::Alone`, and
+    /// `proc_macro::Spacing::Alone` converts back to this.
    Alone,

-    /// The token is immediately followed by an operator token. E.g. a `+`
-    /// token is `Joint` in `+=` and `++`.
+    /// The token can join with the following token to form a compound token.
+    ///
+    /// In token streams parsed from source code, the compiler will use `Joint`
+    /// for any token immediately followed by punctuation (as determined by
+    /// `Token::is_punct`).
+    ///
+    /// When constructing token streams within the compiler, use this for each
+    /// token that (a) should be pretty-printed without a space after it, and
+    /// (b) is followed by a punctuation token.
+    ///
+    /// Converts to `proc_macro::Spacing::Joint`, and
+    /// `proc_macro::Spacing::Joint` converts back to this.
    Joint,
+
+    /// The token can join with the following token to form a compound token,
+    /// but this will not be visible at the proc macro level. (This is what the
+    /// `Hidden` means; see below.)
+    ///
+    /// In token streams parsed from source code, the compiler will use
+    /// `JointHidden` for any token immediately followed by anything not
+    /// covered by the `Alone` and `Joint` cases: an identifier, lifetime,
+    /// literal, delimiter, doc comment.
+    ///
+    /// When constructing token streams, use this for each token that (a)
+    /// should be pretty-printed without a space after it, and (b) is followed
+    /// by a non-punctuation token.
+    ///
+    /// Converts to `proc_macro::Spacing::Alone`, but
+    /// `proc_macro::Spacing::Alone` converts back to `token::Spacing::Alone`.
+    /// Because of that, pretty-printing of `TokenStream`s produced by proc
+    /// macros is unavoidably uglier (with more whitespace between tokens) than
+    /// pretty-printing of `TokenStream`'s produced by other means (i.e. parsed
+    /// source code, internally constructed token streams, and token streams
+    /// produced by declarative macros).
+    JointHidden,
 }

 impl TokenStream {
@ -421,16 +469,14 @@ impl TokenStream {
        self
    }

-    /// Create a token stream containing a single token with alone spacing.
+    /// Create a token stream containing a single token with alone spacing. The
+    /// spacing used for the final token in a constructed stream doesn't matter
+    /// because it's never used. In practice we arbitrarily use
+    /// `Spacing::Alone`.
    pub fn token_alone(kind: TokenKind, span: Span) -> TokenStream {
        TokenStream::new(vec![TokenTree::token_alone(kind, span)])
    }

-    /// Create a token stream containing a single token with joint spacing.
-    pub fn token_joint(kind: TokenKind, span: Span) -> TokenStream {
-        TokenStream::new(vec![TokenTree::token_joint(kind, span)])
-    }
-
    /// Create a token stream containing a single `Delimited`.
    pub fn delimited(span: DelimSpan, delim: Delimiter, tts: TokenStream) -> TokenStream {
        TokenStream::new(vec![TokenTree::Delimited(span, delim, tts)])
@ -517,7 +563,7 @@ impl TokenStream {
    // If `vec` is not empty, try to glue `tt` onto its last token. The return
    // value indicates if gluing took place.
    fn try_glue_to_last(vec: &mut Vec<TokenTree>, tt: &TokenTree) -> bool {
-        if let Some(TokenTree::Token(last_tok, Spacing::Joint)) = vec.last()
+        if let Some(TokenTree::Token(last_tok, Spacing::Joint | Spacing::JointHidden)) = vec.last()
            && let TokenTree::Token(tok, spacing) = tt
            && let Some(glued_tok) = last_tok.glue(tok)
        {
@ -641,7 +687,7 @@ impl TokenStream {

            if attr_style == AttrStyle::Inner {
                vec![
-                    TokenTree::token_alone(token::Pound, span),
+                    TokenTree::token_joint(token::Pound, span),
                    TokenTree::token_alone(token::Not, span),
                    body,
                ]