2016-05-02 10:53:24 +12:00
|
|
|
//! Basic syntax highlighting functionality.
|
2014-02-20 01:14:51 -08:00
|
|
|
//!
|
2020-02-29 20:16:26 +03:00
|
|
|
//! This module uses librustc_ast's lexer to provide token-based highlighting for
|
2014-02-20 01:14:51 -08:00
|
|
|
//! the HTML documentation generated by rustdoc.
|
2016-05-02 10:53:24 +12:00
|
|
|
//!
|
2018-07-22 14:10:10 -06:00
|
|
|
//! Use the `render_with_highlighting` to highlight some rust code.
|
2014-02-20 01:14:51 -08:00
|
|
|
|
2019-02-23 16:40:07 +09:00
|
|
|
use crate::html::escape::Escape;
|
2014-02-20 01:14:51 -08:00
|
|
|
|
2020-08-21 16:38:35 +02:00
|
|
|
use std::fmt::{Display, Write};
|
|
|
|
use std::iter::Peekable;
|
2016-05-02 10:53:24 +12:00
|
|
|
|
2020-08-21 16:38:35 +02:00
|
|
|
use rustc_lexer::{LiteralKind, TokenKind};
|
2020-12-05 17:31:31 +01:00
|
|
|
use rustc_span::edition::Edition;
|
2020-08-21 16:38:35 +02:00
|
|
|
use rustc_span::symbol::Ident;
|
|
|
|
use rustc_span::with_default_session_globals;
|
2014-02-20 01:14:51 -08:00
|
|
|
|
2016-04-04 11:07:41 +12:00
|
|
|
/// Highlights `src`, returning the HTML output.
|
2020-11-14 17:59:58 -05:00
|
|
|
crate fn render_with_highlighting(
|
2020-06-27 13:55:15 -04:00
|
|
|
src: String,
|
2018-10-08 22:51:37 +02:00
|
|
|
class: Option<&str>,
|
2020-01-26 17:24:40 +01:00
|
|
|
playground_button: Option<&str>,
|
2020-12-05 17:31:31 +01:00
|
|
|
tooltip: Option<(Option<Edition>, &str)>,
|
2018-10-08 22:51:37 +02:00
|
|
|
) -> String {
|
2014-05-10 17:39:08 -07:00
|
|
|
debug!("highlighting: ================\n{}\n==============", src);
|
2020-08-21 16:38:35 +02:00
|
|
|
let mut out = String::with_capacity(src.len());
|
2020-12-05 17:31:31 +01:00
|
|
|
if let Some((edition_info, class)) = tooltip {
|
2019-12-22 17:42:04 -05:00
|
|
|
write!(
|
|
|
|
out,
|
2020-12-05 17:31:31 +01:00
|
|
|
"<div class='information'><div class='tooltip {}'{}>ⓘ</div></div>",
|
|
|
|
class,
|
|
|
|
if let Some(edition_info) = edition_info {
|
|
|
|
format!(" edition=\"{}\"", edition_info)
|
|
|
|
} else {
|
|
|
|
String::new()
|
|
|
|
},
|
2019-12-22 17:42:04 -05:00
|
|
|
)
|
|
|
|
.unwrap();
|
2017-09-07 00:08:39 +02:00
|
|
|
}
|
2018-12-15 16:25:50 -05:00
|
|
|
|
2020-08-21 16:38:35 +02:00
|
|
|
write_header(&mut out, class);
|
|
|
|
write_code(&mut out, &src);
|
|
|
|
write_footer(&mut out, playground_button);
|
2020-06-27 13:55:15 -04:00
|
|
|
|
2020-08-21 16:38:35 +02:00
|
|
|
out
|
|
|
|
}
|
2018-12-15 16:25:50 -05:00
|
|
|
|
2020-08-21 16:38:35 +02:00
|
|
|
fn write_header(out: &mut String, class: Option<&str>) {
|
|
|
|
write!(out, "<div class=\"example-wrap\"><pre class=\"rust {}\">\n", class.unwrap_or_default())
|
|
|
|
.unwrap()
|
2016-04-04 11:07:41 +12:00
|
|
|
}
|
|
|
|
|
2020-08-21 16:38:35 +02:00
|
|
|
fn write_code(out: &mut String, src: &str) {
|
2020-11-15 20:51:25 +01:00
|
|
|
// This replace allows to fix how the code source with DOS backline characters is displayed.
|
|
|
|
let src = src.replace("\r\n", "\n");
|
|
|
|
Classifier::new(&src).highlight(&mut |highlight| {
|
2020-08-21 16:38:35 +02:00
|
|
|
match highlight {
|
|
|
|
Highlight::Token { text, class } => string(out, Escape(text), class),
|
|
|
|
Highlight::EnterSpan { class } => enter_span(out, class),
|
|
|
|
Highlight::ExitSpan => exit_span(out),
|
|
|
|
};
|
|
|
|
});
|
|
|
|
}
|
2016-05-02 10:53:24 +12:00
|
|
|
|
2020-08-21 16:38:35 +02:00
|
|
|
fn write_footer(out: &mut String, playground_button: Option<&str>) {
|
|
|
|
write!(out, "</pre>{}</div>\n", playground_button.unwrap_or_default()).unwrap()
|
2016-05-02 10:53:24 +12:00
|
|
|
}
|
|
|
|
|
|
|
|
/// How a span of text is classified. Mostly corresponds to token kinds.
|
|
|
|
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
2018-07-22 14:10:10 -06:00
|
|
|
enum Class {
|
2016-05-02 10:53:24 +12:00
|
|
|
Comment,
|
|
|
|
DocComment,
|
|
|
|
Attribute,
|
|
|
|
KeyWord,
|
|
|
|
// Keywords that do pointer/reference stuff.
|
|
|
|
RefKeyWord,
|
|
|
|
Self_,
|
|
|
|
Op,
|
|
|
|
Macro,
|
|
|
|
MacroNonTerminal,
|
|
|
|
String,
|
|
|
|
Number,
|
|
|
|
Bool,
|
|
|
|
Ident,
|
|
|
|
Lifetime,
|
|
|
|
PreludeTy,
|
|
|
|
PreludeVal,
|
2016-10-12 05:23:37 +02:00
|
|
|
QuestionMark,
|
2014-02-20 01:14:51 -08:00
|
|
|
}
|
|
|
|
|
2020-08-21 16:38:35 +02:00
|
|
|
impl Class {
|
|
|
|
/// Returns the css class expected by rustdoc for each `Class`.
|
|
|
|
fn as_html(self) -> &'static str {
|
|
|
|
match self {
|
|
|
|
Class::Comment => "comment",
|
|
|
|
Class::DocComment => "doccomment",
|
|
|
|
Class::Attribute => "attribute",
|
|
|
|
Class::KeyWord => "kw",
|
|
|
|
Class::RefKeyWord => "kw-2",
|
|
|
|
Class::Self_ => "self",
|
|
|
|
Class::Op => "op",
|
|
|
|
Class::Macro => "macro",
|
|
|
|
Class::MacroNonTerminal => "macro-nonterminal",
|
|
|
|
Class::String => "string",
|
|
|
|
Class::Number => "number",
|
|
|
|
Class::Bool => "bool-val",
|
|
|
|
Class::Ident => "ident",
|
|
|
|
Class::Lifetime => "lifetime",
|
|
|
|
Class::PreludeTy => "prelude-ty",
|
|
|
|
Class::PreludeVal => "prelude-val",
|
|
|
|
Class::QuestionMark => "question-mark",
|
2016-05-02 10:53:24 +12:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-08-21 16:38:35 +02:00
|
|
|
enum Highlight<'a> {
|
2020-11-15 07:05:46 -05:00
|
|
|
Token { text: &'a str, class: Option<Class> },
|
2020-08-21 16:38:35 +02:00
|
|
|
EnterSpan { class: Class },
|
|
|
|
ExitSpan,
|
2018-12-15 16:25:50 -05:00
|
|
|
}
|
|
|
|
|
2020-08-21 16:38:35 +02:00
|
|
|
struct TokenIter<'a> {
|
|
|
|
src: &'a str,
|
2018-12-15 16:25:50 -05:00
|
|
|
}
|
|
|
|
|
2020-08-21 16:38:35 +02:00
|
|
|
impl Iterator for TokenIter<'a> {
|
|
|
|
type Item = (TokenKind, &'a str);
|
|
|
|
fn next(&mut self) -> Option<(TokenKind, &'a str)> {
|
|
|
|
if self.src.is_empty() {
|
|
|
|
return None;
|
2016-05-02 10:53:24 +12:00
|
|
|
}
|
2020-08-21 16:38:35 +02:00
|
|
|
let token = rustc_lexer::first_token(self.src);
|
|
|
|
let (text, rest) = self.src.split_at(token.len);
|
|
|
|
self.src = rest;
|
|
|
|
Some((token.kind, text))
|
2016-05-02 10:53:24 +12:00
|
|
|
}
|
2020-08-21 16:38:35 +02:00
|
|
|
}
|
2016-05-02 10:53:24 +12:00
|
|
|
|
2020-08-21 16:38:35 +02:00
|
|
|
/// Processes program tokens, classifying strings of text by highlighting
|
|
|
|
/// category (`Class`).
|
|
|
|
struct Classifier<'a> {
|
|
|
|
tokens: Peekable<TokenIter<'a>>,
|
|
|
|
in_attribute: bool,
|
|
|
|
in_macro: bool,
|
|
|
|
in_macro_nonterminal: bool,
|
|
|
|
}
|
2019-07-02 13:44:38 +03:00
|
|
|
|
2020-08-21 16:38:35 +02:00
|
|
|
impl<'a> Classifier<'a> {
|
|
|
|
fn new(src: &str) -> Classifier<'_> {
|
|
|
|
let tokens = TokenIter { src }.peekable();
|
|
|
|
Classifier { tokens, in_attribute: false, in_macro: false, in_macro_nonterminal: false }
|
2017-08-16 20:08:27 -04:00
|
|
|
}
|
|
|
|
|
2020-08-21 16:38:35 +02:00
|
|
|
/// Exhausts the `Classifier` writing the output into `sink`.
|
2016-05-02 10:53:24 +12:00
|
|
|
///
|
|
|
|
/// The general structure for this method is to iterate over each token,
|
2020-08-21 16:38:35 +02:00
|
|
|
/// possibly giving it an HTML span with a class specifying what flavor of
|
|
|
|
/// token is used.
|
|
|
|
fn highlight(mut self, sink: &mut dyn FnMut(Highlight<'a>)) {
|
|
|
|
with_default_session_globals(|| {
|
|
|
|
while let Some((token, text)) = self.tokens.next() {
|
|
|
|
self.advance(token, text, sink);
|
2020-06-27 13:55:15 -04:00
|
|
|
}
|
2020-08-21 16:38:35 +02:00
|
|
|
})
|
2016-05-02 10:53:24 +12:00
|
|
|
}
|
|
|
|
|
2020-08-21 16:38:35 +02:00
|
|
|
/// Single step of highlighting. This will classify `token`, but maybe also
|
|
|
|
/// a couple of following ones as well.
|
|
|
|
fn advance(&mut self, token: TokenKind, text: &'a str, sink: &mut dyn FnMut(Highlight<'a>)) {
|
|
|
|
let lookahead = self.peek();
|
2020-11-15 07:05:46 -05:00
|
|
|
let no_highlight = |sink: &mut dyn FnMut(_)| sink(Highlight::Token { text, class: None });
|
2020-08-21 16:38:35 +02:00
|
|
|
let class = match token {
|
2020-11-15 07:05:46 -05:00
|
|
|
TokenKind::Whitespace => return no_highlight(sink),
|
2020-08-21 16:38:35 +02:00
|
|
|
TokenKind::LineComment { doc_style } | TokenKind::BlockComment { doc_style, .. } => {
|
|
|
|
if doc_style.is_some() {
|
|
|
|
Class::DocComment
|
|
|
|
} else {
|
|
|
|
Class::Comment
|
|
|
|
}
|
2019-12-22 17:42:04 -05:00
|
|
|
}
|
2016-05-02 10:53:24 +12:00
|
|
|
// Consider this as part of a macro invocation if there was a
|
|
|
|
// leading identifier.
|
2020-08-21 16:38:35 +02:00
|
|
|
TokenKind::Bang if self.in_macro => {
|
2016-05-02 10:53:24 +12:00
|
|
|
self.in_macro = false;
|
|
|
|
Class::Macro
|
|
|
|
}
|
2014-02-20 01:14:51 -08:00
|
|
|
|
2020-08-21 16:38:35 +02:00
|
|
|
// Assume that '&' or '*' is the reference or dereference operator
|
|
|
|
// or a reference or pointer type. Unless, of course, it looks like
|
|
|
|
// a logical and or a multiplication operator: `&&` or `* `.
|
|
|
|
TokenKind::Star => match lookahead {
|
|
|
|
Some(TokenKind::Whitespace) => Class::Op,
|
|
|
|
_ => Class::RefKeyWord,
|
|
|
|
},
|
|
|
|
TokenKind::And => match lookahead {
|
|
|
|
Some(TokenKind::And) => {
|
|
|
|
let _and = self.tokens.next();
|
2020-11-15 07:05:46 -05:00
|
|
|
sink(Highlight::Token { text: "&&", class: Some(Class::Op) });
|
2020-08-21 16:38:35 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
Some(TokenKind::Eq) => {
|
|
|
|
let _eq = self.tokens.next();
|
2020-11-15 07:05:46 -05:00
|
|
|
sink(Highlight::Token { text: "&=", class: Some(Class::Op) });
|
2020-08-21 16:38:35 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
Some(TokenKind::Whitespace) => Class::Op,
|
|
|
|
_ => Class::RefKeyWord,
|
|
|
|
},
|
|
|
|
|
2016-05-02 10:53:24 +12:00
|
|
|
// Operators.
|
2020-08-21 16:38:35 +02:00
|
|
|
TokenKind::Minus
|
|
|
|
| TokenKind::Plus
|
|
|
|
| TokenKind::Or
|
|
|
|
| TokenKind::Slash
|
|
|
|
| TokenKind::Caret
|
|
|
|
| TokenKind::Percent
|
|
|
|
| TokenKind::Bang
|
|
|
|
| TokenKind::Eq
|
|
|
|
| TokenKind::Lt
|
|
|
|
| TokenKind::Gt => Class::Op,
|
2014-02-20 01:14:51 -08:00
|
|
|
|
2016-05-02 10:53:24 +12:00
|
|
|
// Miscellaneous, no highlighting.
|
2020-08-21 16:38:35 +02:00
|
|
|
TokenKind::Dot
|
|
|
|
| TokenKind::Semi
|
|
|
|
| TokenKind::Comma
|
|
|
|
| TokenKind::OpenParen
|
|
|
|
| TokenKind::CloseParen
|
|
|
|
| TokenKind::OpenBrace
|
|
|
|
| TokenKind::CloseBrace
|
|
|
|
| TokenKind::OpenBracket
|
|
|
|
| TokenKind::At
|
|
|
|
| TokenKind::Tilde
|
|
|
|
| TokenKind::Colon
|
2020-11-15 07:05:46 -05:00
|
|
|
| TokenKind::Unknown => return no_highlight(sink),
|
2020-08-21 16:38:35 +02:00
|
|
|
|
|
|
|
TokenKind::Question => Class::QuestionMark,
|
|
|
|
|
|
|
|
TokenKind::Dollar => match lookahead {
|
|
|
|
Some(TokenKind::Ident) => {
|
2016-05-02 10:53:24 +12:00
|
|
|
self.in_macro_nonterminal = true;
|
|
|
|
Class::MacroNonTerminal
|
2014-03-02 13:30:28 +11:00
|
|
|
}
|
2020-11-15 07:05:46 -05:00
|
|
|
_ => return no_highlight(sink),
|
2020-08-21 16:38:35 +02:00
|
|
|
},
|
2014-02-20 01:14:51 -08:00
|
|
|
|
2017-08-16 20:08:27 -04:00
|
|
|
// This might be the start of an attribute. We're going to want to
|
2014-02-20 01:14:51 -08:00
|
|
|
// continue highlighting it as an attribute until the ending ']' is
|
|
|
|
// seen, so skip out early. Down below we terminate the attribute
|
|
|
|
// span when we see the ']'.
|
2020-08-21 16:38:35 +02:00
|
|
|
TokenKind::Pound => {
|
|
|
|
match lookahead {
|
|
|
|
// Case 1: #![inner_attribute]
|
|
|
|
Some(TokenKind::Bang) => {
|
|
|
|
let _not = self.tokens.next().unwrap();
|
|
|
|
if let Some(TokenKind::OpenBracket) = self.peek() {
|
|
|
|
self.in_attribute = true;
|
|
|
|
sink(Highlight::EnterSpan { class: Class::Attribute });
|
|
|
|
}
|
2020-11-15 07:05:46 -05:00
|
|
|
sink(Highlight::Token { text: "#", class: None });
|
|
|
|
sink(Highlight::Token { text: "!", class: None });
|
2020-08-21 16:38:35 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
// Case 2: #[outer_attribute]
|
|
|
|
Some(TokenKind::OpenBracket) => {
|
2017-08-16 20:08:27 -04:00
|
|
|
self.in_attribute = true;
|
2020-08-21 16:38:35 +02:00
|
|
|
sink(Highlight::EnterSpan { class: Class::Attribute });
|
2017-08-16 20:08:27 -04:00
|
|
|
}
|
2020-08-21 16:38:35 +02:00
|
|
|
_ => (),
|
2017-08-16 20:08:27 -04:00
|
|
|
}
|
2020-11-15 07:05:46 -05:00
|
|
|
return no_highlight(sink);
|
2014-02-20 01:14:51 -08:00
|
|
|
}
|
2020-08-21 16:38:35 +02:00
|
|
|
TokenKind::CloseBracket => {
|
2016-05-02 10:53:24 +12:00
|
|
|
if self.in_attribute {
|
|
|
|
self.in_attribute = false;
|
2020-11-15 07:05:46 -05:00
|
|
|
sink(Highlight::Token { text: "]", class: None });
|
2020-08-21 16:38:35 +02:00
|
|
|
sink(Highlight::ExitSpan);
|
|
|
|
return;
|
2014-02-20 01:14:51 -08:00
|
|
|
}
|
2020-11-15 07:05:46 -05:00
|
|
|
return no_highlight(sink);
|
2014-02-20 01:14:51 -08:00
|
|
|
}
|
2020-08-21 16:38:35 +02:00
|
|
|
TokenKind::Literal { kind, .. } => match kind {
|
|
|
|
// Text literals.
|
|
|
|
LiteralKind::Byte { .. }
|
|
|
|
| LiteralKind::Char { .. }
|
|
|
|
| LiteralKind::Str { .. }
|
|
|
|
| LiteralKind::ByteStr { .. }
|
|
|
|
| LiteralKind::RawStr { .. }
|
|
|
|
| LiteralKind::RawByteStr { .. } => Class::String,
|
|
|
|
// Number literals.
|
|
|
|
LiteralKind::Float { .. } | LiteralKind::Int { .. } => Class::Number,
|
|
|
|
},
|
|
|
|
TokenKind::Ident | TokenKind::RawIdent if lookahead == Some(TokenKind::Bang) => {
|
|
|
|
self.in_macro = true;
|
|
|
|
Class::Macro
|
2014-11-19 15:48:38 +11:00
|
|
|
}
|
2020-08-21 16:38:35 +02:00
|
|
|
TokenKind::Ident => match text {
|
|
|
|
"ref" | "mut" => Class::RefKeyWord,
|
|
|
|
"self" | "Self" => Class::Self_,
|
|
|
|
"false" | "true" => Class::Bool,
|
|
|
|
"Option" | "Result" => Class::PreludeTy,
|
|
|
|
"Some" | "None" | "Ok" | "Err" => Class::PreludeVal,
|
|
|
|
// Keywords are also included in the identifier set.
|
|
|
|
_ if Ident::from_str(text).is_reserved() => Class::KeyWord,
|
|
|
|
_ if self.in_macro_nonterminal => {
|
|
|
|
self.in_macro_nonterminal = false;
|
|
|
|
Class::MacroNonTerminal
|
2014-02-20 01:14:51 -08:00
|
|
|
}
|
2020-08-21 16:38:35 +02:00
|
|
|
_ => Class::Ident,
|
2019-12-22 17:42:04 -05:00
|
|
|
},
|
2020-08-21 16:38:35 +02:00
|
|
|
TokenKind::RawIdent => Class::Ident,
|
|
|
|
TokenKind::Lifetime { .. } => Class::Lifetime,
|
2014-02-20 01:14:51 -08:00
|
|
|
};
|
2016-05-02 10:53:24 +12:00
|
|
|
// Anything that didn't return above is the simple case where we the
|
|
|
|
// class just spans a single token, so we can use the `string` method.
|
2020-11-15 07:05:46 -05:00
|
|
|
sink(Highlight::Token { text, class: Some(class) });
|
2014-02-20 01:14:51 -08:00
|
|
|
}
|
|
|
|
|
2020-08-21 16:38:35 +02:00
|
|
|
fn peek(&mut self) -> Option<TokenKind> {
|
|
|
|
self.tokens.peek().map(|(toke_kind, _text)| *toke_kind)
|
2016-05-02 10:53:24 +12:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-08-21 16:38:35 +02:00
|
|
|
/// Called when we start processing a span of text that should be highlighted.
|
|
|
|
/// The `Class` argument specifies how it should be highlighted.
|
|
|
|
fn enter_span(out: &mut String, klass: Class) {
|
|
|
|
write!(out, "<span class=\"{}\">", klass.as_html()).unwrap()
|
2016-04-04 11:07:41 +12:00
|
|
|
}
|
|
|
|
|
2020-08-21 16:38:35 +02:00
|
|
|
/// Called at the end of a span of highlighted text.
|
|
|
|
fn exit_span(out: &mut String) {
|
|
|
|
write!(out, "</span>").unwrap()
|
2016-04-04 11:07:41 +12:00
|
|
|
}
|
|
|
|
|
2020-08-21 16:38:35 +02:00
|
|
|
/// Called for a span of text. If the text should be highlighted differently
|
|
|
|
/// from the surrounding text, then the `Class` argument will be a value other
|
|
|
|
/// than `None`.
|
|
|
|
///
|
|
|
|
/// The following sequences of callbacks are equivalent:
|
|
|
|
/// ```plain
|
|
|
|
/// enter_span(Foo), string("text", None), exit_span()
|
|
|
|
/// string("text", Foo)
|
|
|
|
/// ```
|
|
|
|
/// The latter can be thought of as a shorthand for the former, which is more
|
|
|
|
/// flexible.
|
2020-11-15 07:05:46 -05:00
|
|
|
fn string<T: Display>(out: &mut String, text: T, klass: Option<Class>) {
|
2020-08-21 16:38:35 +02:00
|
|
|
match klass {
|
2020-11-15 07:05:46 -05:00
|
|
|
None => write!(out, "{}", text).unwrap(),
|
|
|
|
Some(klass) => write!(out, "<span class=\"{}\">{}</span>", klass.as_html(), text).unwrap(),
|
2020-08-21 16:38:35 +02:00
|
|
|
}
|
2014-02-20 01:14:51 -08:00
|
|
|
}
|
2020-06-27 13:55:15 -04:00
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests;
|