1
Fork 0

Allow generic parameters in intra-doc links

The contents of the generics will be mostly ignored (except for warning
if fully-qualified syntax is used, which is currently unsupported in
intra-doc links - see issue #74563).

* Allow links like `Vec<T>`, `Result<T, E>`, and `Option<Box<T>>`
* Allow links like `Vec::<T>::new()`
* Warn on
  * Unbalanced angle brackets (e.g. `Vec<T` or `Vec<T>>`)
  * Missing type to apply generics to (`<T>` or `<Box<T>>`)
  * Use of fully-qualified syntax (`<Vec as IntoIterator>::into_iter`)
  * Invalid path separator (`Vec:<T>:new`)
  * Too many angle brackets (`Vec<<T>>`)
  * Empty angle brackets (`Vec<>`)

Note that this implementation *does* allow some constructs that aren't
valid in the actual Rust syntax, for example `Box::<T>new()`. That may
not be supported in rustdoc in the future; it is an implementation
detail.
This commit is contained in:
Camelid 2020-10-08 22:24:34 -07:00
parent 9ba1d21868
commit 4c765f66a4
6 changed files with 381 additions and 2 deletions

View file

@ -3,11 +3,13 @@
html_playground_url = "https://play.rust-lang.org/"
)]
#![feature(rustc_private)]
#![feature(array_methods)]
#![feature(box_patterns)]
#![feature(box_syntax)]
#![feature(in_band_lifetimes)]
#![feature(nll)]
#![feature(or_patterns)]
#![feature(peekable_next_if)]
#![feature(test)]
#![feature(crate_visibility_modifier)]
#![feature(never_type)]

View file

@ -23,6 +23,7 @@ use smallvec::{smallvec, SmallVec};
use std::borrow::Cow;
use std::cell::Cell;
use std::mem;
use std::ops::Range;
use crate::clean::*;
@ -65,10 +66,53 @@ enum ResolutionFailure<'a> {
NotResolved { module_id: DefId, partial_res: Option<Res>, unresolved: Cow<'a, str> },
/// should not ever happen
NoParentItem,
/// This link has malformed generic parameters; e.g., the angle brackets are unbalanced.
MalformedGenerics(MalformedGenerics),
/// used to communicate that this should be ignored, but shouldn't be reported to the user
Dummy,
}
#[derive(Debug)]
enum MalformedGenerics {
/// This link has unbalanced angle brackets.
///
/// For example, `Vec<T` should trigger this, as should `Vec<T>>`.
UnbalancedAngleBrackets,
/// The generics are not attached to a type.
///
/// For example, `<T>` should trigger this.
///
/// This is detected by checking if the path is empty after the generics are stripped.
MissingType,
/// The link uses fully-qualified syntax, which is currently unsupported.
///
/// For example, `<Vec as IntoIterator>::into_iter` should trigger this.
///
/// This is detected by checking if ` as ` (the keyword `as` with spaces around it) is inside
/// angle brackets.
HasFullyQualifiedSyntax,
/// The link has an invalid path separator.
///
/// For example, `Vec:<T>:new()` should trigger this. Note that `Vec:new()` will **not**
/// trigger this because it has no generics and thus [`strip_generics_from_path`] will not be
/// called.
///
/// Note that this will also **not** be triggered if the invalid path separator is inside angle
/// brackets because rustdoc mostly ignores what's inside angle brackets (except for
/// [`HasFullyQualifiedSyntax`](MalformedGenerics::HasFullyQualifiedSyntax)).
///
/// This is detected by checking if there is a colon followed by a non-colon in the link.
InvalidPathSeparator,
/// The link has too many angle brackets.
///
/// For example, `Vec<<T>>` should trigger this.
TooManyAngleBrackets,
/// The link has empty angle brackets.
///
/// For example, `Vec<>` should trigger this.
EmptyAngleBrackets,
}
impl ResolutionFailure<'a> {
// This resolved fully (not just partially) but is erroneous for some other reason
fn full_res(&self) -> Option<Res> {
@ -912,6 +956,7 @@ impl LinkCollector<'_, '_> {
let link_text;
let mut path_str;
let disambiguator;
let stripped_path_string;
let (mut res, mut fragment) = {
path_str = if let Ok((d, path)) = Disambiguator::from_str(&link) {
disambiguator = Some(d);
@ -922,7 +967,7 @@ impl LinkCollector<'_, '_> {
}
.trim();
if path_str.contains(|ch: char| !(ch.is_alphanumeric() || ch == ':' || ch == '_')) {
if path_str.contains(|ch: char| !(ch.is_alphanumeric() || ":_<>, ".contains(ch))) {
return None;
}
@ -985,6 +1030,36 @@ impl LinkCollector<'_, '_> {
module_id = DefId { krate, index: CRATE_DEF_INDEX };
}
// Strip generics from the path.
if path_str.contains(['<', '>'].as_slice()) {
stripped_path_string = match strip_generics_from_path(path_str) {
Ok(path) => path,
Err(err_kind) => {
debug!("link has malformed generics: {}", path_str);
resolution_failure(
self,
&item,
path_str,
disambiguator,
dox,
link_range,
smallvec![err_kind],
);
return None;
}
};
path_str = &stripped_path_string;
}
// Sanity check to make sure we don't have any angle brackets after stripping generics.
assert!(!path_str.contains(['<', '>'].as_slice()));
// The link is not an intra-doc link if it still contains commas or spaces after
// stripping generics.
if path_str.contains([',', ' '].as_slice()) {
return None;
}
match self.resolve_with_disambiguator(
disambiguator,
item,
@ -1718,6 +1793,27 @@ fn resolution_failure(
diag.level = rustc_errors::Level::Bug;
"all intra doc links should have a parent item".to_owned()
}
ResolutionFailure::MalformedGenerics(variant) => match variant {
MalformedGenerics::UnbalancedAngleBrackets => {
String::from("unbalanced angle brackets")
}
MalformedGenerics::MissingType => {
String::from("missing type for generic parameters")
}
MalformedGenerics::HasFullyQualifiedSyntax => {
diag.note("see https://github.com/rust-lang/rust/issues/74563 for more information");
String::from("fully-qualified syntax is unsupported")
}
MalformedGenerics::InvalidPathSeparator => {
String::from("has invalid path separator")
}
MalformedGenerics::TooManyAngleBrackets => {
String::from("too many angle brackets")
}
MalformedGenerics::EmptyAngleBrackets => {
String::from("empty angle brackets")
}
},
};
if let Some(span) = sp {
diag.span_label(span, &note);
@ -1908,3 +2004,108 @@ fn is_primitive(path_str: &str, ns: Namespace) -> Option<(&'static str, Res)> {
fn primitive_impl(cx: &DocContext<'_>, path_str: &str) -> Option<&'static SmallVec<[DefId; 4]>> {
Some(PrimitiveType::from_symbol(Symbol::intern(path_str))?.impls(cx.tcx))
}
fn strip_generics_from_path(path_str: &str) -> Result<String, ResolutionFailure<'static>> {
let mut stripped_segments = vec![];
let mut path = path_str.chars().peekable();
let mut segment = Vec::new();
while let Some(chr) = path.next() {
match chr {
':' => {
if path.next_if_eq(&':').is_some() {
let stripped_segment =
strip_generics_from_path_segment(mem::take(&mut segment))?;
if !stripped_segment.is_empty() {
stripped_segments.push(stripped_segment);
}
} else {
return Err(ResolutionFailure::MalformedGenerics(
MalformedGenerics::InvalidPathSeparator,
));
}
}
'<' => {
segment.push(chr);
match path.peek() {
Some('<') => {
return Err(ResolutionFailure::MalformedGenerics(
MalformedGenerics::TooManyAngleBrackets,
));
}
Some('>') => {
return Err(ResolutionFailure::MalformedGenerics(
MalformedGenerics::EmptyAngleBrackets,
));
}
Some(_) => {
segment.push(path.next().unwrap());
while let Some(chr) = path.next_if(|c| *c != '>') {
segment.push(chr);
}
}
None => break,
}
}
_ => segment.push(chr),
}
debug!("raw segment: {:?}", segment);
}
if !segment.is_empty() {
let stripped_segment = strip_generics_from_path_segment(segment)?;
if !stripped_segment.is_empty() {
stripped_segments.push(stripped_segment);
}
}
debug!("path_str: {:?}\nstripped segments: {:?}", path_str, &stripped_segments);
let stripped_path = stripped_segments.join("::");
if !stripped_path.is_empty() {
Ok(stripped_path)
} else {
Err(ResolutionFailure::MalformedGenerics(MalformedGenerics::MissingType))
}
}
fn strip_generics_from_path_segment(
segment: Vec<char>,
) -> Result<String, ResolutionFailure<'static>> {
let mut stripped_segment = String::new();
let mut param_depth = 0;
let mut latest_generics_chunk = String::new();
for c in segment {
if c == '<' {
param_depth += 1;
latest_generics_chunk.clear();
} else if c == '>' {
param_depth -= 1;
if latest_generics_chunk.contains(" as ") {
// The segment tries to use fully-qualified syntax, which is currently unsupported.
// Give a helpful error message instead of completely ignoring the angle brackets.
return Err(ResolutionFailure::MalformedGenerics(
MalformedGenerics::HasFullyQualifiedSyntax,
));
}
} else {
if param_depth == 0 {
stripped_segment.push(c);
} else {
latest_generics_chunk.push(c);
}
}
}
if param_depth == 0 {
Ok(stripped_segment)
} else {
// The segment has unbalanced angle brackets, e.g. `Vec<T` or `Vec<T>>`
Err(ResolutionFailure::MalformedGenerics(MalformedGenerics::UnbalancedAngleBrackets))
}
}

View file

@ -2,7 +2,7 @@
//~^ NOTE lint level is defined
// FIXME: this should say that it was skipped (maybe an allowed by default lint?)
/// [<invalid syntax>]
/// [invalid intra-doc syntax!!]
/// [path::to::nonexistent::module]
//~^ ERROR unresolved link

View file

@ -0,0 +1,19 @@
#![deny(broken_intra_doc_links)]
//! [Vec<] //~ ERROR
//! [Vec<Box<T] //~ ERROR
//! [Vec<Box<T>] //~ ERROR
//! [Vec<Box<T>>>] //~ ERROR
//! [Vec<T>>>] //~ ERROR
//! [<Vec] //~ ERROR
//! [Vec::<] //~ ERROR
//! [<T>] //~ ERROR
//! [<invalid syntax>] //~ ERROR
//! [Vec:<T>:new()] //~ ERROR
//! [Vec<<T>>] //~ ERROR
//! [Vec<>] //~ ERROR
//! [Vec<<>>] //~ ERROR
// FIXME(#74563) support UFCS
//! [<Vec as IntoIterator>::into_iter] //~ ERROR
//! [<Vec<T> as IntoIterator>::iter] //~ ERROR

View file

@ -0,0 +1,102 @@
error: unresolved link to `Vec<`
--> $DIR/intra-link-malformed-generics.rs:3:6
|
LL | //! [Vec<]
| ^^^^ unbalanced angle brackets
|
note: the lint level is defined here
--> $DIR/intra-link-malformed-generics.rs:1:9
|
LL | #![deny(broken_intra_doc_links)]
| ^^^^^^^^^^^^^^^^^^^^^^
error: unresolved link to `Vec<Box<T`
--> $DIR/intra-link-malformed-generics.rs:4:6
|
LL | //! [Vec<Box<T]
| ^^^^^^^^^ unbalanced angle brackets
error: unresolved link to `Vec<Box<T>`
--> $DIR/intra-link-malformed-generics.rs:5:6
|
LL | //! [Vec<Box<T>]
| ^^^^^^^^^^ unbalanced angle brackets
error: unresolved link to `Vec<Box<T>>>`
--> $DIR/intra-link-malformed-generics.rs:6:6
|
LL | //! [Vec<Box<T>>>]
| ^^^^^^^^^^^^ unbalanced angle brackets
error: unresolved link to `Vec<T>>>`
--> $DIR/intra-link-malformed-generics.rs:7:6
|
LL | //! [Vec<T>>>]
| ^^^^^^^^ unbalanced angle brackets
error: unresolved link to `<Vec`
--> $DIR/intra-link-malformed-generics.rs:8:6
|
LL | //! [<Vec]
| ^^^^ unbalanced angle brackets
error: unresolved link to `Vec::<`
--> $DIR/intra-link-malformed-generics.rs:9:6
|
LL | //! [Vec::<]
| ^^^^^^ unbalanced angle brackets
error: unresolved link to `<T>`
--> $DIR/intra-link-malformed-generics.rs:10:6
|
LL | //! [<T>]
| ^^^ missing type for generic parameters
error: unresolved link to `<invalid syntax>`
--> $DIR/intra-link-malformed-generics.rs:11:6
|
LL | //! [<invalid syntax>]
| ^^^^^^^^^^^^^^^^ missing type for generic parameters
error: unresolved link to `Vec:<T>:new`
--> $DIR/intra-link-malformed-generics.rs:12:6
|
LL | //! [Vec:<T>:new()]
| ^^^^^^^^^^^^^ has invalid path separator
error: unresolved link to `Vec<<T>>`
--> $DIR/intra-link-malformed-generics.rs:13:6
|
LL | //! [Vec<<T>>]
| ^^^^^^^^ too many angle brackets
error: unresolved link to `Vec<>`
--> $DIR/intra-link-malformed-generics.rs:14:6
|
LL | //! [Vec<>]
| ^^^^^ empty angle brackets
error: unresolved link to `Vec<<>>`
--> $DIR/intra-link-malformed-generics.rs:15:6
|
LL | //! [Vec<<>>]
| ^^^^^^^ too many angle brackets
error: unresolved link to `<Vec as IntoIterator>::into_iter`
--> $DIR/intra-link-malformed-generics.rs:18:6
|
LL | //! [<Vec as IntoIterator>::into_iter]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ fully-qualified syntax is unsupported
|
= note: see https://github.com/rust-lang/rust/issues/74563 for more information
error: unresolved link to `<Vec<T> as IntoIterator>::iter`
--> $DIR/intra-link-malformed-generics.rs:19:6
|
LL | //! [<Vec<T> as IntoIterator>::iter]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ fully-qualified syntax is unsupported
|
= note: see https://github.com/rust-lang/rust/issues/74563 for more information
error: aborting due to 15 previous errors

View file

@ -0,0 +1,55 @@
// ignore-tidy-linelength
#![crate_name = "foo"]
//! Here's a link to [`Vec<T>`] and one to [`Box<Vec<Option<T>>>`].
//! Here's a link to [`Iterator<Box<T>>::Item`].
//!
//! And what about a link to [just `Option`](Option) and, [with the generic, `Option<T>`](Option<T>)?
//!
//! We should also try linking to [`Result<T, E>`]; it has *two* generics!
//!
//! Now let's test a trickier case: [`Vec::<T>::new`], or you could write it
//! [with parentheses as `Vec::<T>::new()`][Vec::<T>::new()].
//! And what about something even harder? That would be [`Vec::<Box<T>>::new()`].
//!
//! This is also pretty tricky: [`TypeId::of::<String>()`].
//! And this too: [`Vec::<std::error::Error>::len`].
//!
//! We unofficially and implicitly support things that aren't valid in the actual Rust syntax, like
//! [`Box::<T>new()`]. We may not support them in the future!
//!
//! These will be resolved as regular links:
//! - [`this is <invalid syntax> first`](https://www.rust-lang.org)
//! - [`this is <invalid syntax> twice`]
//! - [`<invalid syntax> thrice`](https://www.rust-lang.org)
//! - [`<invalid syntax> four times`][rlo]
//! - [a < b][rlo]
//! - [c > d]
//!
//! [`this is <invalid syntax> twice`]: https://www.rust-lang.org
//! [rlo]: https://www.rust-lang.org
//! [c > d]: https://www.rust-lang.org
// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/alloc/vec/struct.Vec.html"]' 'Vec<T>'
// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/alloc/boxed/struct.Box.html"]' 'Box<Vec<Option<T>>>'
// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/core/iter/traits/iterator/trait.Iterator.html#associatedtype.Item"]' 'Iterator<Box<T>>::Item'
// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/core/option/enum.Option.html"]' 'just Option'
// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/core/option/enum.Option.html"]' 'with the generic, Option<T>'
// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/core/result/enum.Result.html"]' 'Result<T, E>'
// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/alloc/vec/struct.Vec.html#method.new"]' 'Vec::<T>::new'
// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/alloc/vec/struct.Vec.html#method.new"]' 'with parentheses as Vec::<T>::new()'
// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/alloc/vec/struct.Vec.html#method.new"]' 'Vec::<Box<T>>::new()'
// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/core/any/struct.TypeId.html#method.of"]' 'TypeId::of::<String>()'
// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/alloc/vec/struct.Vec.html#method.len"]' 'Vec::<std::error::Error>::len'
// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/alloc/boxed/struct.Box.html#method.new"]' 'Box::<T>new()'
// @has foo/index.html '//a[@href="https://www.rust-lang.org"]' 'this is <invalid syntax> first'
// @has foo/index.html '//a[@href="https://www.rust-lang.org"]' 'this is <invalid syntax> twice'
// @has foo/index.html '//a[@href="https://www.rust-lang.org"]' '<invalid syntax> thrice'
// @has foo/index.html '//a[@href="https://www.rust-lang.org"]' '<invalid syntax> four times'
// @has foo/index.html '//a[@href="https://www.rust-lang.org"]' 'a < b'
// @has foo/index.html '//a[@href="https://www.rust-lang.org"]' 'c > d'
use std::any::TypeId;