Auto merge of #96282 - petrochenkov:unindent, r=GuillaumeGomez

rustdoc: Unindent doc fragments on `Attributes` construction `Attributes` can be constructed at arbitrary points, even after the `unindent_comments` pass. `Attributes` that are constructed too late end up unindented. All doc fragments need to be eventually indented before use, so there are no reasons to not do this immediately during their construction. Fixes https://rust-lang.zulipchat.com/#narrow/stream/266220-rustdoc/topic/.60unindent_comments.60.20cannot.20work.20as.20a.20separate.20pass. I'm not sure how to make a minimized reproduction, but unindenting the fragments during their construction should fix the issue.. by construction, and I also verified that all doc strings now hit the `resolver_caches.markdown_links` cache in https://github.com/rust-lang/rust/pull/94857.
2022-04-22 02:30:27 +00:00 · 2022-04-22 02:30:27 +00:00 · 8b2393086f
commit 8b2393086f
parent 0b3404b01b 7803a41151
7 changed files with 89 additions and 133 deletions
--- a/src/librustdoc/clean/types.rs
+++ b/src/librustdoc/clean/types.rs
@ -1,13 +1,11 @@
 use std::cell::RefCell;
 use std::default::Default;
 use std::fmt;
 use std::hash::Hash;
 use std::iter;
 use std::lazy::SyncOnceCell as OnceCell;
 use std::path::PathBuf;
 use std::rc::Rc;
 use std::sync::Arc;
-use std::vec;
+use std::{cmp, fmt, iter};
 use arrayvec::ArrayVec;
@ -55,6 +53,9 @@ crate use self::Type::{
 };
 crate use self::Visibility::{Inherited, Public};
 #[cfg(test)]
 mod tests;
 crate type ItemIdSet = FxHashSet<ItemId>;
 #[derive(Debug, Clone, PartialEq, Eq, Hash, Copy)]
@ -1028,6 +1029,86 @@ crate fn collapse_doc_fragments(doc_strings: &[DocFragment]) -> String {
    acc
 }
 /// Removes excess indentation on comments in order for the Markdown
 /// to be parsed correctly. This is necessary because the convention for
 /// writing documentation is to provide a space between the /// or //! marker
 /// and the doc text, but Markdown is whitespace-sensitive. For example,
 /// a block of text with four-space indentation is parsed as a code block,
 /// so if we didn't unindent comments, these list items
 ///
 /// /// A list:
 /// ///
 /// ///    - Foo
 /// ///    - Bar
 ///
 /// would be parsed as if they were in a code block, which is likely not what the user intended.
 fn unindent_doc_fragments(docs: &mut Vec<DocFragment>) {
    // `add` is used in case the most common sugared doc syntax is used ("/// "). The other
    // fragments kind's lines are never starting with a whitespace unless they are using some
    // markdown formatting requiring it. Therefore, if the doc block have a mix between the two,
    // we need to take into account the fact that the minimum indent minus one (to take this
    // whitespace into account).
    //
    // For example:
    //
    // /// hello!
    // #[doc = "another"]
    //
    // In this case, you want "hello! another" and not "hello!  another".
    let add = if docs.windows(2).any(|arr| arr[0].kind != arr[1].kind)
        && docs.iter().any(|d| d.kind == DocFragmentKind::SugaredDoc)
    {
        // In case we have a mix of sugared doc comments and "raw" ones, we want the sugared one to
        // "decide" how much the minimum indent will be.
        1
    } else {
        0
    };
    // `min_indent` is used to know how much whitespaces from the start of each lines must be
    // removed. Example:
    //
    // ///     hello!
    // #[doc = "another"]
    //
    // In here, the `min_indent` is 1 (because non-sugared fragment are always counted with minimum
    // 1 whitespace), meaning that "hello!" will be considered a codeblock because it starts with 4
    // (5 - 1) whitespaces.
    let Some(min_indent) = docs
        .iter()
        .map(|fragment| {
            fragment.doc.as_str().lines().fold(usize::MAX, |min_indent, line| {
                if line.chars().all(|c| c.is_whitespace()) {
                    min_indent
                } else {
                    // Compare against either space or tab, ignoring whether they are
                    // mixed or not.
                    let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
                    cmp::min(min_indent, whitespace)
                        + if fragment.kind == DocFragmentKind::SugaredDoc { 0 } else { add }
                }
            })
        })
        .min()
    else {
        return;
    };
    for fragment in docs {
        if fragment.doc == kw::Empty {
            continue;
        }
        let min_indent = if fragment.kind != DocFragmentKind::SugaredDoc && min_indent > 0 {
            min_indent - add
        } else {
            min_indent
        };
        fragment.indent = min_indent;
    }
 }
 /// A link that has not yet been rendered.
 ///
 /// This link will be turned into a rendered link by [`Item::links`].
@ -1119,6 +1200,8 @@ impl Attributes {
            }
        }
        unindent_doc_fragments(&mut doc_strings);
        Attributes { doc_strings, other_attrs }
    }
--- a/src/librustdoc/passes/unindent_comments/tests.rs
+++ b/src/librustdoc/passes/unindent_comments/tests.rs
@ -20,7 +20,7 @@ fn create_doc_fragment(s: &str) -> Vec<DocFragment> {
 fn run_test(input: &str, expected: &str) {
    create_default_session_globals_then(|| {
        let mut s = create_doc_fragment(input);
-        unindent_fragments(&mut s);
+        unindent_doc_fragments(&mut s);
        assert_eq!(collapse_doc_fragments(&s), expected);
    });
 }
--- a/src/librustdoc/doctest.rs
+++ b/src/librustdoc/doctest.rs
@ -1174,8 +1174,6 @@ impl<'a, 'hir, 'tcx> HirCollector<'a, 'hir, 'tcx> {
        nested: F,
    ) {
        let ast_attrs = self.tcx.hir().attrs(hir_id);
        let mut attrs = Attributes::from_ast(ast_attrs, None);
        if let Some(ref cfg) = ast_attrs.cfg(self.tcx, &FxHashSet::default()) {
            if !cfg.matches(&self.sess.parse_sess, Some(self.sess.features_untracked())) {
                return;
@ -1187,9 +1185,9 @@ impl<'a, 'hir, 'tcx> HirCollector<'a, 'hir, 'tcx> {
            self.collector.names.push(name);
        }
        attrs.unindent_doc_comments();
        // The collapse-docs pass won't combine sugared/raw doc attributes, or included files with
        // anything else, this will combine them for us.
        let attrs = Attributes::from_ast(ast_attrs, None);
        if let Some(doc) = attrs.collapsed_doc_value() {
            // Use the outermost invocation, so that doctest names come from where the docs were written.
            let span = ast_attrs
--- a/src/librustdoc/passes/collect_intra_doc_links/early.rs
+++ b/src/librustdoc/passes/collect_intra_doc_links/early.rs
@ -66,9 +66,7 @@ crate fn early_resolve_intra_doc_links(
 }
 fn doc_attrs<'a>(attrs: impl Iterator<Item = &'a ast::Attribute>) -> Attributes {
-    let mut attrs = Attributes::from_ast_iter(attrs.map(|attr| (attr, None)), true);
+    Attributes::from_ast_iter(attrs.map(|attr| (attr, None)), true)
    attrs.unindent_doc_comments();
    attrs
 }
 struct EarlyDocLinkResolver<'r, 'ra> {
--- a/src/librustdoc/passes/mod.rs
+++ b/src/librustdoc/passes/mod.rs
@ -24,9 +24,6 @@ crate use self::strip_private::STRIP_PRIVATE;
 mod strip_priv_imports;
 crate use self::strip_priv_imports::STRIP_PRIV_IMPORTS;
 mod unindent_comments;
 crate use self::unindent_comments::UNINDENT_COMMENTS;
 mod propagate_doc_cfg;
 crate use self::propagate_doc_cfg::PROPAGATE_DOC_CFG;
@ -81,7 +78,6 @@ crate enum Condition {
 crate const PASSES: &[Pass] = &[
    CHECK_DOC_TEST_VISIBILITY,
    STRIP_HIDDEN,
    UNINDENT_COMMENTS,
    STRIP_PRIVATE,
    STRIP_PRIV_IMPORTS,
    PROPAGATE_DOC_CFG,
@ -96,7 +92,6 @@ crate const PASSES: &[Pass] = &[
 /// The list of passes run by default.
 crate const DEFAULT_PASSES: &[ConditionalPass] = &[
    ConditionalPass::always(COLLECT_TRAIT_IMPLS),
    ConditionalPass::always(UNINDENT_COMMENTS),
    ConditionalPass::always(CHECK_DOC_TEST_VISIBILITY),
    ConditionalPass::new(STRIP_HIDDEN, WhenNotDocumentHidden),
    ConditionalPass::new(STRIP_PRIVATE, WhenNotDocumentPrivate),
--- a/src/librustdoc/passes/unindent_comments.rs
+++ b/src/librustdoc/passes/unindent_comments.rs
@ -1,116 +0,0 @@
 //! Removes excess indentation on comments in order for the Markdown
 //! to be parsed correctly. This is necessary because the convention for
 //! writing documentation is to provide a space between the /// or //! marker
 //! and the doc text, but Markdown is whitespace-sensitive. For example,
 //! a block of text with four-space indentation is parsed as a code block,
 //! so if we didn't unindent comments, these list items
 //!
 //! /// A list:
 //! ///
 //! ///    - Foo
 //! ///    - Bar
 //!
 //! would be parsed as if they were in a code block, which is likely not what the user intended.
 use std::cmp;
 use rustc_span::symbol::kw;
 use crate::clean::{self, DocFragment, DocFragmentKind, Item};
 use crate::core::DocContext;
 use crate::fold::{self, DocFolder};
 use crate::passes::Pass;
 #[cfg(test)]
 mod tests;
 crate const UNINDENT_COMMENTS: Pass = Pass {
    name: "unindent-comments",
    run: unindent_comments,
    description: "removes excess indentation on comments in order for markdown to like it",
 };
 crate fn unindent_comments(krate: clean::Crate, _: &mut DocContext<'_>) -> clean::Crate {
    CommentCleaner.fold_crate(krate)
 }
 struct CommentCleaner;
 impl fold::DocFolder for CommentCleaner {
    fn fold_item(&mut self, mut i: Item) -> Option<Item> {
        i.attrs.unindent_doc_comments();
        Some(self.fold_item_recur(i))
    }
 }
 impl clean::Attributes {
    crate fn unindent_doc_comments(&mut self) {
        unindent_fragments(&mut self.doc_strings);
    }
 }
 fn unindent_fragments(docs: &mut Vec<DocFragment>) {
    // `add` is used in case the most common sugared doc syntax is used ("/// "). The other
    // fragments kind's lines are never starting with a whitespace unless they are using some
    // markdown formatting requiring it. Therefore, if the doc block have a mix between the two,
    // we need to take into account the fact that the minimum indent minus one (to take this
    // whitespace into account).
    //
    // For example:
    //
    // /// hello!
    // #[doc = "another"]
    //
    // In this case, you want "hello! another" and not "hello!  another".
    let add = if docs.windows(2).any(|arr| arr[0].kind != arr[1].kind)
        && docs.iter().any(|d| d.kind == DocFragmentKind::SugaredDoc)
    {
        // In case we have a mix of sugared doc comments and "raw" ones, we want the sugared one to
        // "decide" how much the minimum indent will be.
        1
    } else {
        0
    };
    // `min_indent` is used to know how much whitespaces from the start of each lines must be
    // removed. Example:
    //
    // ///     hello!
    // #[doc = "another"]
    //
    // In here, the `min_indent` is 1 (because non-sugared fragment are always counted with minimum
    // 1 whitespace), meaning that "hello!" will be considered a codeblock because it starts with 4
    // (5 - 1) whitespaces.
    let Some(min_indent) = docs
        .iter()
        .map(|fragment| {
            fragment.doc.as_str().lines().fold(usize::MAX, |min_indent, line| {
                if line.chars().all(|c| c.is_whitespace()) {
                    min_indent
                } else {
                    // Compare against either space or tab, ignoring whether they are
                    // mixed or not.
                    let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
                    cmp::min(min_indent, whitespace)
                        + if fragment.kind == DocFragmentKind::SugaredDoc { 0 } else { add }
                }
            })
        })
        .min()
    else {
        return;
    };
    for fragment in docs {
        if fragment.doc == kw::Empty {
            continue;
        }
        let min_indent = if fragment.kind != DocFragmentKind::SugaredDoc && min_indent > 0 {
            min_indent - add
        } else {
            min_indent
        };
        fragment.indent = min_indent;
    }
 }
--- a/src/test/rustdoc-ui/issue-91713.stdout
+++ b/src/test/rustdoc-ui/issue-91713.stdout
@ -1,7 +1,6 @@
 Available passes for running rustdoc:
 check_doc_test_visibility - run various visibility-related lints on doctests
        strip-hidden - strips all `#[doc(hidden)]` items from the output
   unindent-comments - removes excess indentation on comments in order for markdown to like it
       strip-private - strips all private items from a crate which cannot be seen externally, implies strip-priv-imports
  strip-priv-imports - strips all private import statements (`use`, `extern crate`) from a crate
   propagate-doc-cfg - propagates `#[doc(cfg(...))]` to child items
@ -14,7 +13,6 @@ check-invalid-html-tags - detects invalid HTML tags in doc comments
 Default passes for rustdoc:
 collect-trait-impls
   unindent-comments
 check_doc_test_visibility
        strip-hidden  (when not --document-hidden-items)
       strip-private  (when not --document-private-items)