1
Fork 0

Auto merge of #33199 - mitaa:tokenize-responsibly, r=nrc

Make some fatal lexer errors recoverable

I've kept the changes to a minimum since I'm not really sure if this approach is a acceptable.

fixes #12834

cc @nrc
This commit is contained in:
bors 2016-04-27 13:49:45 -07:00
commit cda7c1cf24
5 changed files with 148 additions and 74 deletions

View file

@ -29,9 +29,11 @@ pub fn render_with_highlighting(src: &str, class: Option<&str>, id: Option<&str>
let mut out = Vec::new(); let mut out = Vec::new();
write_header(class, id, &mut out).unwrap(); write_header(class, id, &mut out).unwrap();
write_source(&sess, if let Err(_) = write_source(&sess,
lexer::StringReader::new(&sess.span_diagnostic, fm), lexer::StringReader::new(&sess.span_diagnostic, fm),
&mut out).unwrap(); &mut out) {
return format!("<pre>{}</pre>", src)
}
write_footer(&mut out).unwrap(); write_footer(&mut out).unwrap();
String::from_utf8_lossy(&out[..]).into_owned() String::from_utf8_lossy(&out[..]).into_owned()
} }
@ -39,15 +41,15 @@ pub fn render_with_highlighting(src: &str, class: Option<&str>, id: Option<&str>
/// Highlights `src`, returning the HTML output. Returns only the inner html to /// Highlights `src`, returning the HTML output. Returns only the inner html to
/// be inserted into an element. C.f., `render_with_highlighting` which includes /// be inserted into an element. C.f., `render_with_highlighting` which includes
/// an enclosing `<pre>` block. /// an enclosing `<pre>` block.
pub fn render_inner_with_highlighting(src: &str) -> String { pub fn render_inner_with_highlighting(src: &str) -> io::Result<String> {
let sess = parse::ParseSess::new(); let sess = parse::ParseSess::new();
let fm = sess.codemap().new_filemap("<stdin>".to_string(), src.to_string()); let fm = sess.codemap().new_filemap("<stdin>".to_string(), src.to_string());
let mut out = Vec::new(); let mut out = Vec::new();
write_source(&sess, write_source(&sess,
lexer::StringReader::new(&sess.span_diagnostic, fm), lexer::StringReader::new(&sess.span_diagnostic, fm),
&mut out).unwrap(); &mut out)?;
String::from_utf8_lossy(&out[..]).into_owned() Ok(String::from_utf8_lossy(&out[..]).into_owned())
} }
/// Exhausts the `lexer` writing the output into `out`. /// Exhausts the `lexer` writing the output into `out`.
@ -65,7 +67,17 @@ fn write_source(sess: &parse::ParseSess,
let mut is_macro = false; let mut is_macro = false;
let mut is_macro_nonterminal = false; let mut is_macro_nonterminal = false;
loop { loop {
let next = lexer.next_token(); let next = match lexer.try_next_token() {
Ok(tok) => tok,
Err(_) => {
lexer.emit_fatal_errors();
lexer.span_diagnostic.struct_warn("Backing out of syntax highlighting")
.note("You probably did not intend to render this \
as a rust code-block")
.emit();
return Err(io::Error::new(io::ErrorKind::Other, ""))
},
};
let snip = |sp| sess.codemap().span_to_snippet(sp).unwrap(); let snip = |sp| sess.codemap().span_to_snippet(sp).unwrap();

View file

@ -177,6 +177,7 @@ impl error::Error for ExplicitBug {
/// Used for emitting structured error messages and other diagnostic information. /// Used for emitting structured error messages and other diagnostic information.
#[must_use] #[must_use]
#[derive(Clone)]
pub struct DiagnosticBuilder<'a> { pub struct DiagnosticBuilder<'a> {
emitter: &'a RefCell<Box<Emitter>>, emitter: &'a RefCell<Box<Emitter>>,
level: Level, level: Level,
@ -187,6 +188,7 @@ pub struct DiagnosticBuilder<'a> {
} }
/// For example a note attached to an error. /// For example a note attached to an error.
#[derive(Clone)]
struct SubDiagnostic { struct SubDiagnostic {
level: Level, level: Level,
message: String, message: String,

View file

@ -12,7 +12,7 @@ use self::LockstepIterSize::*;
use ast; use ast;
use ast::{TokenTree, Ident, Name}; use ast::{TokenTree, Ident, Name};
use codemap::{Span, DUMMY_SP}; use codemap::{Span, DUMMY_SP};
use errors::Handler; use errors::{Handler, DiagnosticBuilder};
use ext::tt::macro_parser::{NamedMatch, MatchedSeq, MatchedNonterminal}; use ext::tt::macro_parser::{NamedMatch, MatchedSeq, MatchedNonterminal};
use parse::token::{DocComment, MatchNt, SubstNt}; use parse::token::{DocComment, MatchNt, SubstNt};
use parse::token::{Token, NtIdent, SpecialMacroVar}; use parse::token::{Token, NtIdent, SpecialMacroVar};
@ -50,6 +50,7 @@ pub struct TtReader<'a> {
pub cur_span: Span, pub cur_span: Span,
/// Transform doc comments. Only useful in macro invocations /// Transform doc comments. Only useful in macro invocations
pub desugar_doc_comments: bool, pub desugar_doc_comments: bool,
pub fatal_errs: Vec<DiagnosticBuilder<'a>>,
} }
/// This can do Macro-By-Example transcription. On the other hand, if /// This can do Macro-By-Example transcription. On the other hand, if
@ -99,6 +100,7 @@ pub fn new_tt_reader_with_doc_flag(sp_diag: &Handler,
/* dummy values, never read: */ /* dummy values, never read: */
cur_tok: token::Eof, cur_tok: token::Eof,
cur_span: DUMMY_SP, cur_span: DUMMY_SP,
fatal_errs: Vec::new(),
}; };
tt_next_token(&mut r); /* get cur_tok and cur_span set up */ tt_next_token(&mut r); /* get cur_tok and cur_span set up */
r r

View file

@ -29,24 +29,42 @@ mod unicode_chars;
pub trait Reader { pub trait Reader {
fn is_eof(&self) -> bool; fn is_eof(&self) -> bool;
fn next_token(&mut self) -> TokenAndSpan; fn try_next_token(&mut self) -> Result<TokenAndSpan, ()>;
fn next_token(&mut self) -> TokenAndSpan where Self: Sized {
let res = self.try_next_token();
self.unwrap_or_abort(res)
}
/// Report a fatal error with the current span. /// Report a fatal error with the current span.
fn fatal(&self, &str) -> FatalError; fn fatal(&self, &str) -> FatalError;
/// Report a non-fatal error with the current span. /// Report a non-fatal error with the current span.
fn err(&self, &str); fn err(&self, &str);
fn emit_fatal_errors(&mut self);
fn unwrap_or_abort(&mut self, res: Result<TokenAndSpan, ()>) -> TokenAndSpan {
match res {
Ok(tok) => tok,
Err(_) => {
self.emit_fatal_errors();
panic!(FatalError);
}
}
}
fn peek(&self) -> TokenAndSpan; fn peek(&self) -> TokenAndSpan;
/// Get a token the parser cares about. /// Get a token the parser cares about.
fn real_token(&mut self) -> TokenAndSpan { fn try_real_token(&mut self) -> Result<TokenAndSpan, ()> {
let mut t = self.next_token(); let mut t = self.try_next_token()?;
loop { loop {
match t.tok { match t.tok {
token::Whitespace | token::Comment | token::Shebang(_) => { token::Whitespace | token::Comment | token::Shebang(_) => {
t = self.next_token(); t = self.try_next_token()?;
} }
_ => break, _ => break,
} }
} }
t Ok(t)
}
fn real_token(&mut self) -> TokenAndSpan {
let res = self.try_real_token();
self.unwrap_or_abort(res)
} }
} }
@ -70,7 +88,7 @@ pub struct StringReader<'a> {
// cached: // cached:
pub peek_tok: token::Token, pub peek_tok: token::Token,
pub peek_span: Span, pub peek_span: Span,
pub fatal_errs: Vec<DiagnosticBuilder<'a>>,
// cache a direct reference to the source text, so that we don't have to // cache a direct reference to the source text, so that we don't have to
// retrieve it via `self.filemap.src.as_ref().unwrap()` all the time. // retrieve it via `self.filemap.src.as_ref().unwrap()` all the time.
source_text: Rc<String>, source_text: Rc<String>,
@ -81,13 +99,14 @@ impl<'a> Reader for StringReader<'a> {
self.curr.is_none() self.curr.is_none()
} }
/// Return the next token. EFFECT: advances the string_reader. /// Return the next token. EFFECT: advances the string_reader.
fn next_token(&mut self) -> TokenAndSpan { fn try_next_token(&mut self) -> Result<TokenAndSpan, ()> {
assert!(self.fatal_errs.is_empty());
let ret_val = TokenAndSpan { let ret_val = TokenAndSpan {
tok: replace(&mut self.peek_tok, token::Underscore), tok: replace(&mut self.peek_tok, token::Underscore),
sp: self.peek_span, sp: self.peek_span,
}; };
self.advance_token(); self.advance_token()?;
ret_val Ok(ret_val)
} }
fn fatal(&self, m: &str) -> FatalError { fn fatal(&self, m: &str) -> FatalError {
self.fatal_span(self.peek_span, m) self.fatal_span(self.peek_span, m)
@ -95,6 +114,12 @@ impl<'a> Reader for StringReader<'a> {
fn err(&self, m: &str) { fn err(&self, m: &str) {
self.err_span(self.peek_span, m) self.err_span(self.peek_span, m)
} }
fn emit_fatal_errors(&mut self) {
for err in &mut self.fatal_errs {
err.emit();
}
self.fatal_errs.clear();
}
fn peek(&self) -> TokenAndSpan { fn peek(&self) -> TokenAndSpan {
// FIXME(pcwalton): Bad copy! // FIXME(pcwalton): Bad copy!
TokenAndSpan { TokenAndSpan {
@ -108,10 +133,11 @@ impl<'a> Reader for TtReader<'a> {
fn is_eof(&self) -> bool { fn is_eof(&self) -> bool {
self.cur_tok == token::Eof self.cur_tok == token::Eof
} }
fn next_token(&mut self) -> TokenAndSpan { fn try_next_token(&mut self) -> Result<TokenAndSpan, ()> {
assert!(self.fatal_errs.is_empty());
let r = tt_next_token(self); let r = tt_next_token(self);
debug!("TtReader: r={:?}", r); debug!("TtReader: r={:?}", r);
r Ok(r)
} }
fn fatal(&self, m: &str) -> FatalError { fn fatal(&self, m: &str) -> FatalError {
self.sp_diag.span_fatal(self.cur_span, m) self.sp_diag.span_fatal(self.cur_span, m)
@ -119,6 +145,12 @@ impl<'a> Reader for TtReader<'a> {
fn err(&self, m: &str) { fn err(&self, m: &str) {
self.sp_diag.span_err(self.cur_span, m); self.sp_diag.span_err(self.cur_span, m);
} }
fn emit_fatal_errors(&mut self) {
for err in &mut self.fatal_errs {
err.emit();
}
self.fatal_errs.clear();
}
fn peek(&self) -> TokenAndSpan { fn peek(&self) -> TokenAndSpan {
TokenAndSpan { TokenAndSpan {
tok: self.cur_tok.clone(), tok: self.cur_tok.clone(),
@ -151,6 +183,7 @@ impl<'a> StringReader<'a> {
peek_tok: token::Eof, peek_tok: token::Eof,
peek_span: codemap::DUMMY_SP, peek_span: codemap::DUMMY_SP,
source_text: source_text, source_text: source_text,
fatal_errs: Vec::new(),
}; };
sr.bump(); sr.bump();
sr sr
@ -160,7 +193,10 @@ impl<'a> StringReader<'a> {
filemap: Rc<codemap::FileMap>) filemap: Rc<codemap::FileMap>)
-> StringReader<'b> { -> StringReader<'b> {
let mut sr = StringReader::new_raw(span_diagnostic, filemap); let mut sr = StringReader::new_raw(span_diagnostic, filemap);
sr.advance_token(); if let Err(_) = sr.advance_token() {
sr.emit_fatal_errors();
panic!(FatalError);
}
sr sr
} }
@ -249,7 +285,7 @@ impl<'a> StringReader<'a> {
/// Advance peek_tok and peek_span to refer to the next token, and /// Advance peek_tok and peek_span to refer to the next token, and
/// possibly update the interner. /// possibly update the interner.
fn advance_token(&mut self) { fn advance_token(&mut self) -> Result<(), ()> {
match self.scan_whitespace_or_comment() { match self.scan_whitespace_or_comment() {
Some(comment) => { Some(comment) => {
self.peek_span = comment.sp; self.peek_span = comment.sp;
@ -261,11 +297,12 @@ impl<'a> StringReader<'a> {
self.peek_span = codemap::mk_sp(self.filemap.end_pos, self.filemap.end_pos); self.peek_span = codemap::mk_sp(self.filemap.end_pos, self.filemap.end_pos);
} else { } else {
let start_bytepos = self.last_pos; let start_bytepos = self.last_pos;
self.peek_tok = self.next_token_inner(); self.peek_tok = self.next_token_inner()?;
self.peek_span = codemap::mk_sp(start_bytepos, self.last_pos); self.peek_span = codemap::mk_sp(start_bytepos, self.last_pos);
}; };
} }
} }
Ok(())
} }
fn byte_offset(&self, pos: BytePos) -> BytePos { fn byte_offset(&self, pos: BytePos) -> BytePos {
@ -1013,7 +1050,7 @@ impl<'a> StringReader<'a> {
/// Return the next token from the string, advances the input past that /// Return the next token from the string, advances the input past that
/// token, and updates the interner /// token, and updates the interner
fn next_token_inner(&mut self) -> token::Token { fn next_token_inner(&mut self) -> Result<token::Token, ()> {
let c = self.curr; let c = self.curr;
if ident_start(c) && if ident_start(c) &&
match (c.unwrap(), self.nextch(), self.nextnextch()) { match (c.unwrap(), self.nextch(), self.nextnextch()) {
@ -1033,32 +1070,32 @@ impl<'a> StringReader<'a> {
self.bump(); self.bump();
} }
return self.with_str_from(start, |string| { return Ok(self.with_str_from(start, |string| {
if string == "_" { if string == "_" {
token::Underscore token::Underscore
} else { } else {
// FIXME: perform NFKC normalization here. (Issue #2253) // FIXME: perform NFKC normalization here. (Issue #2253)
token::Ident(str_to_ident(string)) token::Ident(str_to_ident(string))
} }
}); }));
} }
if is_dec_digit(c) { if is_dec_digit(c) {
let num = self.scan_number(c.unwrap()); let num = self.scan_number(c.unwrap());
let suffix = self.scan_optional_raw_name(); let suffix = self.scan_optional_raw_name();
debug!("next_token_inner: scanned number {:?}, {:?}", num, suffix); debug!("next_token_inner: scanned number {:?}, {:?}", num, suffix);
return token::Literal(num, suffix); return Ok(token::Literal(num, suffix));
} }
match c.expect("next_token_inner called at EOF") { match c.expect("next_token_inner called at EOF") {
// One-byte tokens. // One-byte tokens.
';' => { ';' => {
self.bump(); self.bump();
return token::Semi; return Ok(token::Semi);
} }
',' => { ',' => {
self.bump(); self.bump();
return token::Comma; return Ok(token::Comma);
} }
'.' => { '.' => {
self.bump(); self.bump();
@ -1066,67 +1103,67 @@ impl<'a> StringReader<'a> {
self.bump(); self.bump();
if self.curr_is('.') { if self.curr_is('.') {
self.bump(); self.bump();
token::DotDotDot Ok(token::DotDotDot)
} else { } else {
token::DotDot Ok(token::DotDot)
} }
} else { } else {
token::Dot Ok(token::Dot)
}; };
} }
'(' => { '(' => {
self.bump(); self.bump();
return token::OpenDelim(token::Paren); return Ok(token::OpenDelim(token::Paren));
} }
')' => { ')' => {
self.bump(); self.bump();
return token::CloseDelim(token::Paren); return Ok(token::CloseDelim(token::Paren));
} }
'{' => { '{' => {
self.bump(); self.bump();
return token::OpenDelim(token::Brace); return Ok(token::OpenDelim(token::Brace));
} }
'}' => { '}' => {
self.bump(); self.bump();
return token::CloseDelim(token::Brace); return Ok(token::CloseDelim(token::Brace));
} }
'[' => { '[' => {
self.bump(); self.bump();
return token::OpenDelim(token::Bracket); return Ok(token::OpenDelim(token::Bracket));
} }
']' => { ']' => {
self.bump(); self.bump();
return token::CloseDelim(token::Bracket); return Ok(token::CloseDelim(token::Bracket));
} }
'@' => { '@' => {
self.bump(); self.bump();
return token::At; return Ok(token::At);
} }
'#' => { '#' => {
self.bump(); self.bump();
return token::Pound; return Ok(token::Pound);
} }
'~' => { '~' => {
self.bump(); self.bump();
return token::Tilde; return Ok(token::Tilde);
} }
'?' => { '?' => {
self.bump(); self.bump();
return token::Question; return Ok(token::Question);
} }
':' => { ':' => {
self.bump(); self.bump();
if self.curr_is(':') { if self.curr_is(':') {
self.bump(); self.bump();
return token::ModSep; return Ok(token::ModSep);
} else { } else {
return token::Colon; return Ok(token::Colon);
} }
} }
'$' => { '$' => {
self.bump(); self.bump();
return token::Dollar; return Ok(token::Dollar);
} }
// Multi-byte tokens. // Multi-byte tokens.
@ -1134,21 +1171,21 @@ impl<'a> StringReader<'a> {
self.bump(); self.bump();
if self.curr_is('=') { if self.curr_is('=') {
self.bump(); self.bump();
return token::EqEq; return Ok(token::EqEq);
} else if self.curr_is('>') { } else if self.curr_is('>') {
self.bump(); self.bump();
return token::FatArrow; return Ok(token::FatArrow);
} else { } else {
return token::Eq; return Ok(token::Eq);
} }
} }
'!' => { '!' => {
self.bump(); self.bump();
if self.curr_is('=') { if self.curr_is('=') {
self.bump(); self.bump();
return token::Ne; return Ok(token::Ne);
} else { } else {
return token::Not; return Ok(token::Not);
} }
} }
'<' => { '<' => {
@ -1156,21 +1193,21 @@ impl<'a> StringReader<'a> {
match self.curr.unwrap_or('\x00') { match self.curr.unwrap_or('\x00') {
'=' => { '=' => {
self.bump(); self.bump();
return token::Le; return Ok(token::Le);
} }
'<' => { '<' => {
return self.binop(token::Shl); return Ok(self.binop(token::Shl));
} }
'-' => { '-' => {
self.bump(); self.bump();
match self.curr.unwrap_or('\x00') { match self.curr.unwrap_or('\x00') {
_ => { _ => {
return token::LArrow; return Ok(token::LArrow);
} }
} }
} }
_ => { _ => {
return token::Lt; return Ok(token::Lt);
} }
} }
} }
@ -1179,13 +1216,13 @@ impl<'a> StringReader<'a> {
match self.curr.unwrap_or('\x00') { match self.curr.unwrap_or('\x00') {
'=' => { '=' => {
self.bump(); self.bump();
return token::Ge; return Ok(token::Ge);
} }
'>' => { '>' => {
return self.binop(token::Shr); return Ok(self.binop(token::Shr));
} }
_ => { _ => {
return token::Gt; return Ok(token::Gt);
} }
} }
} }
@ -1233,7 +1270,7 @@ impl<'a> StringReader<'a> {
self.err_span_(start, last_bpos, "lifetimes cannot use keyword names"); self.err_span_(start, last_bpos, "lifetimes cannot use keyword names");
} }
return token::Lifetime(ident); return Ok(token::Lifetime(ident));
} }
let valid = self.scan_char_or_byte(start, let valid = self.scan_char_or_byte(start,
@ -1255,7 +1292,7 @@ impl<'a> StringReader<'a> {
}; };
self.bump(); // advance curr past token self.bump(); // advance curr past token
let suffix = self.scan_optional_raw_name(); let suffix = self.scan_optional_raw_name();
return token::Literal(token::Char(id), suffix); return Ok(token::Literal(token::Char(id), suffix));
} }
'b' => { 'b' => {
self.bump(); self.bump();
@ -1266,7 +1303,7 @@ impl<'a> StringReader<'a> {
_ => unreachable!(), // Should have been a token::Ident above. _ => unreachable!(), // Should have been a token::Ident above.
}; };
let suffix = self.scan_optional_raw_name(); let suffix = self.scan_optional_raw_name();
return token::Literal(lit, suffix); return Ok(token::Literal(lit, suffix));
} }
'"' => { '"' => {
let start_bpos = self.last_pos; let start_bpos = self.last_pos;
@ -1297,7 +1334,7 @@ impl<'a> StringReader<'a> {
}; };
self.bump(); self.bump();
let suffix = self.scan_optional_raw_name(); let suffix = self.scan_optional_raw_name();
return token::Literal(token::Str_(id), suffix); return Ok(token::Literal(token::Str_(id), suffix));
} }
'r' => { 'r' => {
let start_bpos = self.last_pos; let start_bpos = self.last_pos;
@ -1368,24 +1405,24 @@ impl<'a> StringReader<'a> {
token::intern("??") token::intern("??")
}; };
let suffix = self.scan_optional_raw_name(); let suffix = self.scan_optional_raw_name();
return token::Literal(token::StrRaw(id, hash_count), suffix); return Ok(token::Literal(token::StrRaw(id, hash_count), suffix));
} }
'-' => { '-' => {
if self.nextch_is('>') { if self.nextch_is('>') {
self.bump(); self.bump();
self.bump(); self.bump();
return token::RArrow; return Ok(token::RArrow);
} else { } else {
return self.binop(token::Minus); return Ok(self.binop(token::Minus));
} }
} }
'&' => { '&' => {
if self.nextch_is('&') { if self.nextch_is('&') {
self.bump(); self.bump();
self.bump(); self.bump();
return token::AndAnd; return Ok(token::AndAnd);
} else { } else {
return self.binop(token::And); return Ok(self.binop(token::And));
} }
} }
'|' => { '|' => {
@ -1393,27 +1430,27 @@ impl<'a> StringReader<'a> {
Some('|') => { Some('|') => {
self.bump(); self.bump();
self.bump(); self.bump();
return token::OrOr; return Ok(token::OrOr);
} }
_ => { _ => {
return self.binop(token::Or); return Ok(self.binop(token::Or));
} }
} }
} }
'+' => { '+' => {
return self.binop(token::Plus); return Ok(self.binop(token::Plus));
} }
'*' => { '*' => {
return self.binop(token::Star); return Ok(self.binop(token::Star));
} }
'/' => { '/' => {
return self.binop(token::Slash); return Ok(self.binop(token::Slash));
} }
'^' => { '^' => {
return self.binop(token::Caret); return Ok(self.binop(token::Caret));
} }
'%' => { '%' => {
return self.binop(token::Percent); return Ok(self.binop(token::Percent));
} }
c => { c => {
let last_bpos = self.last_pos; let last_bpos = self.last_pos;
@ -1423,8 +1460,8 @@ impl<'a> StringReader<'a> {
"unknown start of token", "unknown start of token",
c); c);
unicode_chars::check_for_substitution(&self, c, &mut err); unicode_chars::check_for_substitution(&self, c, &mut err);
err.emit(); self.fatal_errs.push(err);
panic!(FatalError); Err(())
} }
} }
} }

View file

@ -0,0 +1,21 @@
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// Tests that failing to syntax highlight a rust code-block doesn't cause
// rustdoc to fail, while still rendering the code-block (without highlighting).
// @has issue_12834/fn.foo.html
// @has - //pre 'a + b '
/// ```
/// a + b ∈ Self ∀ a, b ∈ Self
/// ```
pub fn foo() {}