1
Fork 0

Auto merge of #96282 - petrochenkov:unindent, r=GuillaumeGomez

rustdoc: Unindent doc fragments on `Attributes` construction

`Attributes` can be constructed at arbitrary points, even after the `unindent_comments` pass.
`Attributes` that are constructed too late end up unindented.

All doc fragments need to be eventually indented before use, so there are no reasons to not do this immediately during their construction.

Fixes https://rust-lang.zulipchat.com/#narrow/stream/266220-rustdoc/topic/.60unindent_comments.60.20cannot.20work.20as.20a.20separate.20pass.
I'm not sure how to make a minimized reproduction, but unindenting the fragments during their construction should fix the issue.. by construction, and I also verified that all doc strings now hit the `resolver_caches.markdown_links` cache in https://github.com/rust-lang/rust/pull/94857.
This commit is contained in:
bors 2022-04-22 02:30:27 +00:00
commit 8b2393086f
7 changed files with 89 additions and 133 deletions

View file

@ -1,13 +1,11 @@
use std::cell::RefCell;
use std::default::Default;
use std::fmt;
use std::hash::Hash;
use std::iter;
use std::lazy::SyncOnceCell as OnceCell;
use std::path::PathBuf;
use std::rc::Rc;
use std::sync::Arc;
use std::vec;
use std::{cmp, fmt, iter};
use arrayvec::ArrayVec;
@ -55,6 +53,9 @@ crate use self::Type::{
};
crate use self::Visibility::{Inherited, Public};
#[cfg(test)]
mod tests;
crate type ItemIdSet = FxHashSet<ItemId>;
#[derive(Debug, Clone, PartialEq, Eq, Hash, Copy)]
@ -1028,6 +1029,86 @@ crate fn collapse_doc_fragments(doc_strings: &[DocFragment]) -> String {
acc
}
/// Removes excess indentation on comments in order for the Markdown
/// to be parsed correctly. This is necessary because the convention for
/// writing documentation is to provide a space between the /// or //! marker
/// and the doc text, but Markdown is whitespace-sensitive. For example,
/// a block of text with four-space indentation is parsed as a code block,
/// so if we didn't unindent comments, these list items
///
/// /// A list:
/// ///
/// /// - Foo
/// /// - Bar
///
/// would be parsed as if they were in a code block, which is likely not what the user intended.
fn unindent_doc_fragments(docs: &mut Vec<DocFragment>) {
// `add` is used in case the most common sugared doc syntax is used ("/// "). The other
// fragments kind's lines are never starting with a whitespace unless they are using some
// markdown formatting requiring it. Therefore, if the doc block have a mix between the two,
// we need to take into account the fact that the minimum indent minus one (to take this
// whitespace into account).
//
// For example:
//
// /// hello!
// #[doc = "another"]
//
// In this case, you want "hello! another" and not "hello! another".
let add = if docs.windows(2).any(|arr| arr[0].kind != arr[1].kind)
&& docs.iter().any(|d| d.kind == DocFragmentKind::SugaredDoc)
{
// In case we have a mix of sugared doc comments and "raw" ones, we want the sugared one to
// "decide" how much the minimum indent will be.
1
} else {
0
};
// `min_indent` is used to know how much whitespaces from the start of each lines must be
// removed. Example:
//
// /// hello!
// #[doc = "another"]
//
// In here, the `min_indent` is 1 (because non-sugared fragment are always counted with minimum
// 1 whitespace), meaning that "hello!" will be considered a codeblock because it starts with 4
// (5 - 1) whitespaces.
let Some(min_indent) = docs
.iter()
.map(|fragment| {
fragment.doc.as_str().lines().fold(usize::MAX, |min_indent, line| {
if line.chars().all(|c| c.is_whitespace()) {
min_indent
} else {
// Compare against either space or tab, ignoring whether they are
// mixed or not.
let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
cmp::min(min_indent, whitespace)
+ if fragment.kind == DocFragmentKind::SugaredDoc { 0 } else { add }
}
})
})
.min()
else {
return;
};
for fragment in docs {
if fragment.doc == kw::Empty {
continue;
}
let min_indent = if fragment.kind != DocFragmentKind::SugaredDoc && min_indent > 0 {
min_indent - add
} else {
min_indent
};
fragment.indent = min_indent;
}
}
/// A link that has not yet been rendered.
///
/// This link will be turned into a rendered link by [`Item::links`].
@ -1119,6 +1200,8 @@ impl Attributes {
}
}
unindent_doc_fragments(&mut doc_strings);
Attributes { doc_strings, other_attrs }
}

View file

@ -20,7 +20,7 @@ fn create_doc_fragment(s: &str) -> Vec<DocFragment> {
fn run_test(input: &str, expected: &str) {
create_default_session_globals_then(|| {
let mut s = create_doc_fragment(input);
unindent_fragments(&mut s);
unindent_doc_fragments(&mut s);
assert_eq!(collapse_doc_fragments(&s), expected);
});
}

View file

@ -1174,8 +1174,6 @@ impl<'a, 'hir, 'tcx> HirCollector<'a, 'hir, 'tcx> {
nested: F,
) {
let ast_attrs = self.tcx.hir().attrs(hir_id);
let mut attrs = Attributes::from_ast(ast_attrs, None);
if let Some(ref cfg) = ast_attrs.cfg(self.tcx, &FxHashSet::default()) {
if !cfg.matches(&self.sess.parse_sess, Some(self.sess.features_untracked())) {
return;
@ -1187,9 +1185,9 @@ impl<'a, 'hir, 'tcx> HirCollector<'a, 'hir, 'tcx> {
self.collector.names.push(name);
}
attrs.unindent_doc_comments();
// The collapse-docs pass won't combine sugared/raw doc attributes, or included files with
// anything else, this will combine them for us.
let attrs = Attributes::from_ast(ast_attrs, None);
if let Some(doc) = attrs.collapsed_doc_value() {
// Use the outermost invocation, so that doctest names come from where the docs were written.
let span = ast_attrs

View file

@ -66,9 +66,7 @@ crate fn early_resolve_intra_doc_links(
}
fn doc_attrs<'a>(attrs: impl Iterator<Item = &'a ast::Attribute>) -> Attributes {
let mut attrs = Attributes::from_ast_iter(attrs.map(|attr| (attr, None)), true);
attrs.unindent_doc_comments();
attrs
Attributes::from_ast_iter(attrs.map(|attr| (attr, None)), true)
}
struct EarlyDocLinkResolver<'r, 'ra> {

View file

@ -24,9 +24,6 @@ crate use self::strip_private::STRIP_PRIVATE;
mod strip_priv_imports;
crate use self::strip_priv_imports::STRIP_PRIV_IMPORTS;
mod unindent_comments;
crate use self::unindent_comments::UNINDENT_COMMENTS;
mod propagate_doc_cfg;
crate use self::propagate_doc_cfg::PROPAGATE_DOC_CFG;
@ -81,7 +78,6 @@ crate enum Condition {
crate const PASSES: &[Pass] = &[
CHECK_DOC_TEST_VISIBILITY,
STRIP_HIDDEN,
UNINDENT_COMMENTS,
STRIP_PRIVATE,
STRIP_PRIV_IMPORTS,
PROPAGATE_DOC_CFG,
@ -96,7 +92,6 @@ crate const PASSES: &[Pass] = &[
/// The list of passes run by default.
crate const DEFAULT_PASSES: &[ConditionalPass] = &[
ConditionalPass::always(COLLECT_TRAIT_IMPLS),
ConditionalPass::always(UNINDENT_COMMENTS),
ConditionalPass::always(CHECK_DOC_TEST_VISIBILITY),
ConditionalPass::new(STRIP_HIDDEN, WhenNotDocumentHidden),
ConditionalPass::new(STRIP_PRIVATE, WhenNotDocumentPrivate),

View file

@ -1,116 +0,0 @@
//! Removes excess indentation on comments in order for the Markdown
//! to be parsed correctly. This is necessary because the convention for
//! writing documentation is to provide a space between the /// or //! marker
//! and the doc text, but Markdown is whitespace-sensitive. For example,
//! a block of text with four-space indentation is parsed as a code block,
//! so if we didn't unindent comments, these list items
//!
//! /// A list:
//! ///
//! /// - Foo
//! /// - Bar
//!
//! would be parsed as if they were in a code block, which is likely not what the user intended.
use std::cmp;
use rustc_span::symbol::kw;
use crate::clean::{self, DocFragment, DocFragmentKind, Item};
use crate::core::DocContext;
use crate::fold::{self, DocFolder};
use crate::passes::Pass;
#[cfg(test)]
mod tests;
crate const UNINDENT_COMMENTS: Pass = Pass {
name: "unindent-comments",
run: unindent_comments,
description: "removes excess indentation on comments in order for markdown to like it",
};
crate fn unindent_comments(krate: clean::Crate, _: &mut DocContext<'_>) -> clean::Crate {
CommentCleaner.fold_crate(krate)
}
struct CommentCleaner;
impl fold::DocFolder for CommentCleaner {
fn fold_item(&mut self, mut i: Item) -> Option<Item> {
i.attrs.unindent_doc_comments();
Some(self.fold_item_recur(i))
}
}
impl clean::Attributes {
crate fn unindent_doc_comments(&mut self) {
unindent_fragments(&mut self.doc_strings);
}
}
fn unindent_fragments(docs: &mut Vec<DocFragment>) {
// `add` is used in case the most common sugared doc syntax is used ("/// "). The other
// fragments kind's lines are never starting with a whitespace unless they are using some
// markdown formatting requiring it. Therefore, if the doc block have a mix between the two,
// we need to take into account the fact that the minimum indent minus one (to take this
// whitespace into account).
//
// For example:
//
// /// hello!
// #[doc = "another"]
//
// In this case, you want "hello! another" and not "hello! another".
let add = if docs.windows(2).any(|arr| arr[0].kind != arr[1].kind)
&& docs.iter().any(|d| d.kind == DocFragmentKind::SugaredDoc)
{
// In case we have a mix of sugared doc comments and "raw" ones, we want the sugared one to
// "decide" how much the minimum indent will be.
1
} else {
0
};
// `min_indent` is used to know how much whitespaces from the start of each lines must be
// removed. Example:
//
// /// hello!
// #[doc = "another"]
//
// In here, the `min_indent` is 1 (because non-sugared fragment are always counted with minimum
// 1 whitespace), meaning that "hello!" will be considered a codeblock because it starts with 4
// (5 - 1) whitespaces.
let Some(min_indent) = docs
.iter()
.map(|fragment| {
fragment.doc.as_str().lines().fold(usize::MAX, |min_indent, line| {
if line.chars().all(|c| c.is_whitespace()) {
min_indent
} else {
// Compare against either space or tab, ignoring whether they are
// mixed or not.
let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
cmp::min(min_indent, whitespace)
+ if fragment.kind == DocFragmentKind::SugaredDoc { 0 } else { add }
}
})
})
.min()
else {
return;
};
for fragment in docs {
if fragment.doc == kw::Empty {
continue;
}
let min_indent = if fragment.kind != DocFragmentKind::SugaredDoc && min_indent > 0 {
min_indent - add
} else {
min_indent
};
fragment.indent = min_indent;
}
}

View file

@ -1,7 +1,6 @@
Available passes for running rustdoc:
check_doc_test_visibility - run various visibility-related lints on doctests
strip-hidden - strips all `#[doc(hidden)]` items from the output
unindent-comments - removes excess indentation on comments in order for markdown to like it
strip-private - strips all private items from a crate which cannot be seen externally, implies strip-priv-imports
strip-priv-imports - strips all private import statements (`use`, `extern crate`) from a crate
propagate-doc-cfg - propagates `#[doc(cfg(...))]` to child items
@ -14,7 +13,6 @@ check-invalid-html-tags - detects invalid HTML tags in doc comments
Default passes for rustdoc:
collect-trait-impls
unindent-comments
check_doc_test_visibility
strip-hidden (when not --document-hidden-items)
strip-private (when not --document-private-items)