1
Fork 0

rustdoc: check code block syntax in early pass

This commit is contained in:
Andy Russell 2018-12-15 16:25:50 -05:00
parent ee10d99b9a
commit 8c93798e9f
No known key found for this signature in database
GPG key ID: BE2221033EDBC374
10 changed files with 474 additions and 71 deletions

View file

@ -25,40 +25,51 @@ pub fn render_with_highlighting(
tooltip: Option<(&str, &str)>,
) -> String {
debug!("highlighting: ================\n{}\n==============", src);
let sess = parse::ParseSess::new(FilePathMapping::empty());
let fm = sess.source_map().new_source_file(FileName::Custom("stdin".to_string()),
src.to_string());
let mut out = Vec::new();
if let Some((tooltip, class)) = tooltip {
write!(out, "<div class='information'><div class='tooltip {}'>ⓘ<span \
class='tooltiptext'>{}</span></div></div>",
class, tooltip).unwrap();
}
write_header(class, &mut out).unwrap();
let lexer = match lexer::StringReader::new_without_err(&sess, fm, None, "Output from rustc:") {
Ok(l) => l,
Err(_) => {
let first_line = src.lines().next().unwrap_or_else(|| "");
let mut err = sess.span_diagnostic
.struct_warn(&format!("Invalid doc comment starting with: `{}`\n\
(Ignoring this codeblock)",
first_line));
err.emit();
return String::new();
}
};
let sess = parse::ParseSess::new(FilePathMapping::empty());
let fm = sess.source_map().new_source_file(
FileName::Custom(String::from("rustdoc-highlighting")),
src.to_owned(),
);
let highlight_result =
lexer::StringReader::new_or_buffered_errs(&sess, fm, None).and_then(|lexer| {
let mut classifier = Classifier::new(lexer, sess.source_map());
if classifier.write_source(&mut out).is_err() {
classifier.lexer.emit_fatal_errors();
return format!("<pre>{}</pre>", src);
}
let mut highlighted_source = vec![];
if classifier.write_source(&mut highlighted_source).is_err() {
Err(classifier.lexer.buffer_fatal_errors())
} else {
Ok(String::from_utf8_lossy(&highlighted_source).into_owned())
}
});
match highlight_result {
Ok(highlighted_source) => {
write_header(class, &mut out).unwrap();
write!(out, "{}", highlighted_source).unwrap();
if let Some(extension) = extension {
write!(out, "{}", extension).unwrap();
}
write_footer(&mut out).unwrap();
}
Err(errors) => {
// If errors are encountered while trying to highlight, cancel the errors and just emit
// the unhighlighted source. The errors will have already been reported in the
// `check-code-block-syntax` pass.
for mut error in errors {
error.cancel();
}
write!(out, "<pre><code>{}</code></pre>", src).unwrap();
}
}
String::from_utf8_lossy(&out[..]).into_owned()
}
@ -151,6 +162,17 @@ impl<U: Write> Writer for U {
}
}
enum HighlightError {
LexError,
IoError(io::Error),
}
impl From<io::Error> for HighlightError {
fn from(err: io::Error) -> Self {
HighlightError::IoError(err)
}
}
impl<'a> Classifier<'a> {
fn new(lexer: lexer::StringReader<'a>, source_map: &'a SourceMap) -> Classifier<'a> {
Classifier {
@ -162,17 +184,11 @@ impl<'a> Classifier<'a> {
}
}
/// Gets the next token out of the lexer, emitting fatal errors if lexing fails.
fn try_next_token(&mut self) -> io::Result<TokenAndSpan> {
/// Gets the next token out of the lexer.
fn try_next_token(&mut self) -> Result<TokenAndSpan, HighlightError> {
match self.lexer.try_next_token() {
Ok(tas) => Ok(tas),
Err(_) => {
let mut err = self.lexer.sess.span_diagnostic
.struct_warn("Backing out of syntax highlighting");
err.note("You probably did not intend to render this as a rust code-block");
err.emit();
Err(io::Error::new(io::ErrorKind::Other, ""))
}
Err(_) => Err(HighlightError::LexError),
}
}
@ -185,7 +201,7 @@ impl<'a> Classifier<'a> {
/// source.
fn write_source<W: Writer>(&mut self,
out: &mut W)
-> io::Result<()> {
-> Result<(), HighlightError> {
loop {
let next = self.try_next_token()?;
if next.tok == token::Eof {
@ -202,7 +218,7 @@ impl<'a> Classifier<'a> {
fn write_token<W: Writer>(&mut self,
out: &mut W,
tas: TokenAndSpan)
-> io::Result<()> {
-> Result<(), HighlightError> {
let klass = match tas.tok {
token::Shebang(s) => {
out.string(Escape(&s.as_str()), Class::None)?;
@ -341,7 +357,9 @@ impl<'a> Classifier<'a> {
// Anything that didn't return above is the simple case where we the
// class just spans a single token, so we can use the `string` method.
out.string(Escape(&self.snip(tas.sp)), klass)
out.string(Escape(&self.snip(tas.sp)), klass)?;
Ok(())
}
// Helper function to get a snippet from the source_map.

View file

@ -919,6 +919,115 @@ pub fn markdown_links(md: &str) -> Vec<(String, Option<Range<usize>>)> {
links
}
#[derive(Debug)]
crate struct RustCodeBlock {
/// The range in the markdown that the code block occupies. Note that this includes the fences
/// for fenced code blocks.
pub range: Range<usize>,
/// The range in the markdown that the code within the code block occupies.
pub code: Range<usize>,
pub is_fenced: bool,
pub syntax: Option<String>,
}
/// Returns a range of bytes for each code block in the markdown that is tagged as `rust` or
/// untagged (and assumed to be rust).
crate fn rust_code_blocks(md: &str) -> Vec<RustCodeBlock> {
let mut code_blocks = vec![];
if md.is_empty() {
return code_blocks;
}
let mut opts = Options::empty();
opts.insert(OPTION_ENABLE_TABLES);
opts.insert(OPTION_ENABLE_FOOTNOTES);
let mut p = Parser::new_ext(md, opts);
let mut code_block_start = 0;
let mut code_start = 0;
let mut is_fenced = false;
let mut previous_offset = 0;
let mut in_rust_code_block = false;
while let Some(event) = p.next() {
let offset = p.get_offset();
match event {
Event::Start(Tag::CodeBlock(syntax)) => {
let lang_string = if syntax.is_empty() {
LangString::all_false()
} else {
LangString::parse(&*syntax, ErrorCodes::Yes)
};
if lang_string.rust {
in_rust_code_block = true;
code_start = offset;
code_block_start = match md[previous_offset..offset].find("```") {
Some(fence_idx) => {
is_fenced = true;
previous_offset + fence_idx
}
None => offset,
};
}
}
Event::End(Tag::CodeBlock(syntax)) if in_rust_code_block => {
in_rust_code_block = false;
let code_block_end = if is_fenced {
let fence_str = &md[previous_offset..offset]
.chars()
.rev()
.collect::<String>();
fence_str
.find("```")
.map(|fence_idx| offset - fence_idx)
.unwrap_or_else(|| offset)
} else if md
.as_bytes()
.get(offset)
.map(|b| *b == b'\n')
.unwrap_or_default()
{
offset - 1
} else {
offset
};
let code_end = if is_fenced {
previous_offset
} else {
code_block_end
};
code_blocks.push(RustCodeBlock {
is_fenced,
range: Range {
start: code_block_start,
end: code_block_end,
},
code: Range {
start: code_start,
end: code_end,
},
syntax: if !syntax.is_empty() {
Some(syntax.into_owned())
} else {
None
},
});
}
_ => (),
}
previous_offset = offset;
}
code_blocks
}
#[derive(Clone, Default, Debug)]
pub struct IdMap {
map: FxHashMap<String, usize>,

View file

@ -3,6 +3,7 @@
html_root_url = "https://doc.rust-lang.org/nightly/",
html_playground_url = "https://play.rust-lang.org/")]
#![feature(bind_by_move_pattern_guards)]
#![feature(rustc_private)]
#![feature(box_patterns)]
#![feature(box_syntax)]

View file

@ -0,0 +1,109 @@
use errors::Applicability;
use syntax::parse::lexer::{TokenAndSpan, StringReader as Lexer};
use syntax::parse::{ParseSess, token};
use syntax::source_map::FilePathMapping;
use syntax_pos::FileName;
use clean;
use core::DocContext;
use fold::DocFolder;
use html::markdown::{self, RustCodeBlock};
use passes::Pass;
pub const CHECK_CODE_BLOCK_SYNTAX: Pass =
Pass::early("check-code-block-syntax", check_code_block_syntax,
"validates syntax inside Rust code blocks");
pub fn check_code_block_syntax(krate: clean::Crate, cx: &DocContext) -> clean::Crate {
SyntaxChecker { cx }.fold_crate(krate)
}
struct SyntaxChecker<'a, 'tcx: 'a, 'rcx: 'a> {
cx: &'a DocContext<'a, 'tcx, 'rcx>,
}
impl<'a, 'tcx, 'rcx> SyntaxChecker<'a, 'tcx, 'rcx> {
fn check_rust_syntax(&self, item: &clean::Item, dox: &str, code_block: RustCodeBlock) {
let sess = ParseSess::new(FilePathMapping::empty());
let source_file = sess.source_map().new_source_file(
FileName::Custom(String::from("doctest")),
dox[code_block.code].to_owned(),
);
let errors = Lexer::new_or_buffered_errs(&sess, source_file, None).and_then(|mut lexer| {
while let Ok(TokenAndSpan { tok, .. }) = lexer.try_next_token() {
if tok == token::Eof {
break;
}
}
let errors = lexer.buffer_fatal_errors();
if !errors.is_empty() {
Err(errors)
} else {
Ok(())
}
});
if let Err(errors) = errors {
let mut diag = if let Some(sp) =
super::source_span_for_markdown_range(self.cx, &dox, &code_block.range, &item.attrs)
{
let mut diag = self
.cx
.sess()
.struct_span_warn(sp, "could not parse code block as Rust code");
for mut err in errors {
diag.note(&format!("error from rustc: {}", err.message()));
err.cancel();
}
if code_block.syntax.is_none() && code_block.is_fenced {
let sp = sp.from_inner_byte_pos(0, 3);
diag.span_suggestion_with_applicability(
sp,
"mark blocks that do not contain Rust code as text",
String::from("```text"),
Applicability::MachineApplicable,
);
}
diag
} else {
// We couldn't calculate the span of the markdown block that had the error, so our
// diagnostics are going to be a bit lacking.
let mut diag = self.cx.sess().struct_span_warn(
super::span_of_attrs(&item.attrs),
"doc comment contains an invalid Rust code block",
);
for mut err in errors {
// Don't bother reporting the error, because we can't show where it happened.
err.cancel();
}
if code_block.syntax.is_none() && code_block.is_fenced {
diag.help("mark blocks that do not contain Rust code as text: ```text");
}
diag
};
diag.emit();
}
}
}
impl<'a, 'tcx, 'rcx> DocFolder for SyntaxChecker<'a, 'tcx, 'rcx> {
fn fold_item(&mut self, item: clean::Item) -> Option<clean::Item> {
if let Some(dox) = &item.attrs.collapsed_doc_value() {
for code_block in markdown::rust_code_blocks(&dox) {
self.check_rust_syntax(&item, &dox, code_block);
}
}
self.fold_item_recur(item)
}
}

View file

@ -6,7 +6,7 @@ use syntax;
use syntax::ast::{self, Ident, NodeId};
use syntax::feature_gate::UnstableFeatures;
use syntax::symbol::Symbol;
use syntax_pos::{self, DUMMY_SP};
use syntax_pos::DUMMY_SP;
use std::ops::Range;
@ -16,6 +16,7 @@ use html::markdown::markdown_links;
use clean::*;
use passes::{look_for_tests, Pass};
use super::span_of_attrs;
pub const COLLECT_INTRA_DOC_LINKS: Pass =
Pass::early("collect-intra-doc-links", collect_intra_doc_links,
@ -440,15 +441,6 @@ fn macro_resolve(cx: &DocContext, path_str: &str) -> Option<Def> {
None
}
pub fn span_of_attrs(attrs: &Attributes) -> syntax_pos::Span {
if attrs.doc_strings.is_empty() {
return DUMMY_SP;
}
let start = attrs.doc_strings[0].span();
let end = attrs.doc_strings.last().expect("No doc strings provided").span();
start.to(end)
}
/// Reports a resolution failure diagnostic.
///
/// If we cannot find the exact source span of the resolution failure, we use the span of the

View file

@ -8,7 +8,7 @@ use rustc::util::nodemap::DefIdSet;
use std::mem;
use std::fmt;
use syntax::ast::NodeId;
use syntax_pos::Span;
use syntax_pos::{DUMMY_SP, Span};
use std::ops::Range;
use clean::{self, GetDefId, Item};
@ -18,8 +18,6 @@ use fold::StripItem;
use html::markdown::{find_testable_code, ErrorCodes, LangString};
use self::collect_intra_doc_links::span_of_attrs;
mod collapse_docs;
pub use self::collapse_docs::COLLAPSE_DOCS;
@ -47,6 +45,9 @@ pub use self::private_items_doc_tests::CHECK_PRIVATE_ITEMS_DOC_TESTS;
mod collect_trait_impls;
pub use self::collect_trait_impls::COLLECT_TRAIT_IMPLS;
mod check_code_block_syntax;
pub use self::check_code_block_syntax::CHECK_CODE_BLOCK_SYNTAX;
/// Represents a single pass.
#[derive(Copy, Clone)]
pub enum Pass {
@ -137,6 +138,7 @@ pub const PASSES: &'static [Pass] = &[
STRIP_PRIV_IMPORTS,
PROPAGATE_DOC_CFG,
COLLECT_INTRA_DOC_LINKS,
CHECK_CODE_BLOCK_SYNTAX,
COLLECT_TRAIT_IMPLS,
];
@ -147,6 +149,7 @@ pub const DEFAULT_PASSES: &'static [&'static str] = &[
"strip-hidden",
"strip-private",
"collect-intra-doc-links",
"check-code-block-syntax",
"collapse-docs",
"unindent-comments",
"propagate-doc-cfg",
@ -158,6 +161,7 @@ pub const DEFAULT_PRIVATE_PASSES: &'static [&'static str] = &[
"check-private-items-doc-tests",
"strip-priv-imports",
"collect-intra-doc-links",
"check-code-block-syntax",
"collapse-docs",
"unindent-comments",
"propagate-doc-cfg",
@ -399,6 +403,16 @@ pub fn look_for_tests<'a, 'tcx: 'a, 'rcx: 'a>(
}
}
/// Return a span encompassing all the given attributes.
crate fn span_of_attrs(attrs: &clean::Attributes) -> Span {
if attrs.doc_strings.is_empty() {
return DUMMY_SP;
}
let start = attrs.doc_strings[0].span();
let end = attrs.doc_strings.last().expect("No doc strings provided").span();
start.to(end)
}
/// Attempts to match a range of bytes from parsed markdown to a `Span` in the source code.
///
/// This method will return `None` if we cannot construct a span from the source map or if the

View file

@ -238,19 +238,6 @@ impl<'a> StringReader<'a> {
sr
}
pub fn new_without_err(sess: &'a ParseSess,
source_file: Lrc<syntax_pos::SourceFile>,
override_span: Option<Span>,
prepend_error_text: &str) -> Result<Self, ()> {
let mut sr = StringReader::new_raw(sess, source_file, override_span);
if sr.advance_token().is_err() {
eprintln!("{}", prepend_error_text);
sr.emit_fatal_errors();
return Err(());
}
Ok(sr)
}
pub fn new_or_buffered_errs(sess: &'a ParseSess,
source_file: Lrc<syntax_pos::SourceFile>,
override_span: Option<Span>) -> Result<Self, Vec<Diagnostic>> {

View file

@ -1,7 +1,66 @@
// compile-pass
// compile-flags: --error-format=human
/// ```
/// \__________pkt->size___________/ \_result->size_/ \__pkt->size__/
/// ```
pub fn foo() {}
/// ```
/// |
/// LL | use foobar::Baz;
/// | ^^^^^^ did you mean `baz::foobar`?
/// ```
pub fn bar() {}
/// ```
/// valid
/// ```
///
/// ```
/// \_
/// ```
///
/// ```text
/// "invalid
/// ```
pub fn valid_and_invalid() {}
/// This is a normal doc comment, but...
///
/// There's a code block with bad syntax in it:
///
/// ```rust
/// \_
/// ```
///
/// Good thing we tested it!
pub fn baz() {}
/// Indented block start
///
/// code with bad syntax
/// \_
///
/// Indented block end
pub fn quux() {}
/// Unclosed fence
///
/// ```
/// slkdjf
pub fn xyzzy() {}
/// Indented code that contains a fence
///
/// ```
pub fn blah() {}
/// ```edition2018
/// \_
/// ```
pub fn blargh() {}
#[doc = "```"]
/// \_
#[doc = "```"]
pub fn crazy_attrs() {}

View file

@ -1,10 +1,97 @@
Output from rustc:
error: unknown start of token: /
--> <stdin>:1:1
warning: could not parse code block as Rust code
--> $DIR/invalid-syntax.rs:3:5
|
1 | /__________pkt->size___________/ /_result->size_/ /__pkt->size__/
| ^
LL | /// ```
| _____^
LL | | /// /__________pkt->size___________/ /_result->size_/ /__pkt->size__/
LL | | /// ```
| |_______^
|
= note: error from rustc: unknown start of token: /
help: mark blocks that do not contain Rust code as text
|
LL | /// ```text
| ^^^^^^^
warning: Invalid doc comment starting with: `/__________pkt->size___________/ /_result->size_/ /__pkt->size__/`
(Ignoring this codeblock)
warning: could not parse code block as Rust code
--> $DIR/invalid-syntax.rs:8:5
|
LL | /// ```
| _____^
LL | | /// |
LL | | /// LL | use foobar::Baz;
LL | | /// | ^^^^^^ did you mean `baz::foobar`?
LL | | /// ```
| |_______^
|
= note: error from rustc: unknown start of token: `
help: mark blocks that do not contain Rust code as text
|
LL | /// ```text
| ^^^^^^^
warning: could not parse code block as Rust code
--> $DIR/invalid-syntax.rs:19:5
|
LL | /// ```
| _____^
LL | | /// /_
LL | | /// ```
| |_______^
|
= note: error from rustc: unknown start of token: /
help: mark blocks that do not contain Rust code as text
|
LL | /// ```text
| ^^^^^^^
warning: could not parse code block as Rust code
--> $DIR/invalid-syntax.rs:32:5
|
LL | /// ```rust
| _____^
LL | | /// /_
LL | | /// ```
| |_______^
|
= note: error from rustc: unknown start of token: /
warning: could not parse code block as Rust code
--> $DIR/invalid-syntax.rs:41:9
|
LL | /// code with bad syntax
| _________^
LL | | /// /_
| |__________^
|
= note: error from rustc: unknown start of token: /
warning: could not parse code block as Rust code
--> $DIR/invalid-syntax.rs:55:9
|
LL | /// ```
| ^^^
|
= note: error from rustc: unknown start of token: `
warning: could not parse code block as Rust code
--> $DIR/invalid-syntax.rs:58:5
|
LL | /// ```edition2018
| _____^
LL | | /// /_
LL | | /// ```
| |_______^
|
= note: error from rustc: unknown start of token: /
warning: doc comment contains an invalid Rust code block
--> $DIR/invalid-syntax.rs:63:1
|
LL | / #[doc = "```"]
LL | | /// /_
LL | | #[doc = "```"]
| |______________^
|
= help: mark blocks that do not contain Rust code as text: ```text

View file

@ -0,0 +1,27 @@
// @has bad_codeblock_syntax/fn.foo.html
// @has - '//*[@class="docblock"]/pre/code' '\_'
/// ```
/// \_
/// ```
pub fn foo() {}
// @has bad_codeblock_syntax/fn.bar.html
// @has - '//*[@class="docblock"]/pre/code' '`baz::foobar`'
/// ```
/// `baz::foobar`
/// ```
pub fn bar() {}
// @has bad_codeblock_syntax/fn.quux.html
// @has - '//*[@class="docblock"]/pre/code' '\_'
/// ```rust
/// \_
/// ```
pub fn quux() {}
// @has bad_codeblock_syntax/fn.ok.html
// @has - '//*[@class="docblock"]/pre/code[@class="language-text"]' '\_'
/// ```text
/// \_
/// ```
pub fn ok() {}