Migrate rustc_parse to derive diagnostics

This commit is contained in:
clubby789 2023-01-29 13:37:05 +00:00
parent e7813fee92
commit 521c5f36d6
9 changed files with 656 additions and 381 deletions

View file

@ -1,11 +1,10 @@
use crate::errors;
use crate::lexer::unicode_chars::UNICODE_ARRAY;
use rustc_ast::ast::{self, AttrStyle};
use rustc_ast::token::{self, CommentKind, Delimiter, Token, TokenKind};
use rustc_ast::tokenstream::TokenStream;
use rustc_ast::util::unicode::contains_text_flow_control_chars;
use rustc_errors::{
error_code, Applicability, DiagnosticBuilder, ErrorGuaranteed, PResult, StashKey,
};
use rustc_errors::{error_code, Applicability, DiagnosticBuilder, PResult, StashKey};
use rustc_lexer::unescape::{self, Mode};
use rustc_lexer::Cursor;
use rustc_lexer::{Base, DocStyle, RawStrError};
@ -151,7 +150,7 @@ impl<'a> StringReader<'a> {
let span = self.mk_sp(start, self.pos);
self.sess.symbol_gallery.insert(sym, span);
if !sym.can_be_raw() {
self.err_span(span, &format!("`{}` cannot be a raw identifier", sym));
self.sess.emit_err(errors::CannotBeRawIdent { span, ident: sym });
}
self.sess.raw_identifier_spans.borrow_mut().push(span);
token::Ident(sym, true)
@ -262,27 +261,24 @@ impl<'a> StringReader<'a> {
self.nbsp_is_whitespace = true;
}
let repeats = it.take_while(|c1| *c1 == c).count();
let mut err =
self.struct_err_span_char(start, self.pos + Pos::from_usize(repeats * c.len_utf8()), "unknown start of token", c);
// FIXME: the lexer could be used to turn the ASCII version of unicode
// homoglyphs, instead of keeping a table in `check_for_substitution`into the
// token. Ideally, this should be inside `rustc_lexer`. However, we should
// first remove compound tokens like `<<` from `rustc_lexer`, and then add
// fancier error recovery to it, as there will be less overall work to do this
// way.
let token = unicode_chars::check_for_substitution(self, start, c, &mut err, repeats+1);
if c == '\x00' {
err.help("source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used");
}
if repeats > 0 {
if repeats == 1 {
err.note(format!("character appears once more"));
} else {
err.note(format!("character appears {repeats} more times"));
}
swallow_next_invalid = repeats;
}
err.emit();
let (token, sugg) = unicode_chars::check_for_substitution(self, start, c, repeats+1);
self.sess.emit_err(errors::UnknownTokenStart {
span: self.mk_sp(start, self.pos + Pos::from_usize(repeats * c.len_utf8())),
escaped: escaped_char(c),
sugg,
null: if c == '\x00' {Some(errors::UnknownTokenNull)} else {None},
repeat: if repeats > 0 {
swallow_next_invalid = repeats;
Some(errors::UnknownTokenRepeat { repeats })
} else {None}
});
if let Some(token) = token {
token
} else {
@ -297,26 +293,6 @@ impl<'a> StringReader<'a> {
}
}
/// Report a fatal lexical error with a given span.
fn fatal_span(&self, sp: Span, m: &str) -> ! {
self.sess.span_diagnostic.span_fatal(sp, m)
}
/// Report a lexical error with a given span.
fn err_span(&self, sp: Span, m: &str) {
self.sess.span_diagnostic.struct_span_err(sp, m).emit();
}
/// Report a fatal error spanning [`from_pos`, `to_pos`).
fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> ! {
self.fatal_span(self.mk_sp(from_pos, to_pos), m)
}
/// Report a lexical error spanning [`from_pos`, `to_pos`).
fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) {
self.err_span(self.mk_sp(from_pos, to_pos), m)
}
fn struct_fatal_span_char(
&self,
from_pos: BytePos,
@ -329,18 +305,6 @@ impl<'a> StringReader<'a> {
.struct_span_fatal(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
}
fn struct_err_span_char(
&self,
from_pos: BytePos,
to_pos: BytePos,
m: &str,
c: char,
) -> DiagnosticBuilder<'a, ErrorGuaranteed> {
self.sess
.span_diagnostic
.struct_span_err(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
}
/// Detect usages of Unicode codepoints changing the direction of the text on screen and loudly
/// complain about it.
fn lint_unicode_text_flow(&self, start: BytePos) {
@ -368,14 +332,12 @@ impl<'a> StringReader<'a> {
) -> TokenKind {
if content.contains('\r') {
for (idx, _) in content.char_indices().filter(|&(_, c)| c == '\r') {
self.err_span_(
let span = self.mk_sp(
content_start + BytePos(idx as u32),
content_start + BytePos(idx as u32 + 1),
match comment_kind {
CommentKind::Line => "bare CR not allowed in doc-comment",
CommentKind::Block => "bare CR not allowed in block doc-comment",
},
);
let block = matches!(comment_kind, CommentKind::Block);
self.sess.emit_err(errors::CrDocComment { span, block });
}
}
@ -454,26 +416,20 @@ impl<'a> StringReader<'a> {
}
rustc_lexer::LiteralKind::Int { base, empty_int } => {
if empty_int {
self.sess
.span_diagnostic
.struct_span_err_with_code(
self.mk_sp(start, end),
"no valid digits found for number",
error_code!(E0768),
)
.emit();
let span = self.mk_sp(start, end);
self.sess.emit_err(errors::NoDigitsLiteral { span });
(token::Integer, sym::integer(0))
} else {
if matches!(base, Base::Binary | Base::Octal) {
let base = base as u32;
let s = self.str_from_to(start + BytePos(2), end);
for (idx, c) in s.char_indices() {
let span = self.mk_sp(
start + BytePos::from_usize(2 + idx),
start + BytePos::from_usize(2 + idx + c.len_utf8()),
);
if c != '_' && c.to_digit(base).is_none() {
self.err_span_(
start + BytePos::from_usize(2 + idx),
start + BytePos::from_usize(2 + idx + c.len_utf8()),
&format!("invalid digit for a base {} literal", base),
);
self.sess.emit_err(errors::InvalidDigitLiteral { span, base });
}
}
}
@ -482,19 +438,18 @@ impl<'a> StringReader<'a> {
}
rustc_lexer::LiteralKind::Float { base, empty_exponent } => {
if empty_exponent {
self.err_span_(start, self.pos, "expected at least one digit in exponent");
let span = self.mk_sp(start, self.pos);
self.sess.emit_err(errors::EmptyExponentFloat { span });
}
match base {
Base::Hexadecimal => {
self.err_span_(start, end, "hexadecimal float literal is not supported")
}
Base::Octal => {
self.err_span_(start, end, "octal float literal is not supported")
}
Base::Binary => {
self.err_span_(start, end, "binary float literal is not supported")
}
_ => {}
let base = match base {
Base::Hexadecimal => Some("hexadecimal"),
Base::Octal => Some("octal"),
Base::Binary => Some("binary"),
_ => None,
};
if let Some(base) = base {
let span = self.mk_sp(start, end);
self.sess.emit_err(errors::FloatLiteralUnsupportedBase { span, base });
}
(token::Float, self.symbol_from_to(start, end))
}
@ -644,54 +599,34 @@ impl<'a> StringReader<'a> {
// identifier tokens.
fn report_unknown_prefix(&self, start: BytePos) {
let prefix_span = self.mk_sp(start, self.pos);
let prefix_str = self.str_from_to(start, self.pos);
let msg = format!("prefix `{}` is unknown", prefix_str);
let prefix = self.str_from_to(start, self.pos);
let expn_data = prefix_span.ctxt().outer_expn_data();
if expn_data.edition >= Edition::Edition2021 {
// In Rust 2021, this is a hard error.
let mut err = self.sess.span_diagnostic.struct_span_err(prefix_span, &msg);
err.span_label(prefix_span, "unknown prefix");
if prefix_str == "rb" {
err.span_suggestion_verbose(
prefix_span,
"use `br` for a raw byte string",
"br",
Applicability::MaybeIncorrect,
);
let sugg = if prefix == "rb" {
Some(errors::UnknownPrefixSugg::UseBr(prefix_span))
} else if expn_data.is_root() {
err.span_suggestion_verbose(
prefix_span.shrink_to_hi(),
"consider inserting whitespace here",
" ",
Applicability::MaybeIncorrect,
);
}
err.note("prefixed identifiers and literals are reserved since Rust 2021");
err.emit();
Some(errors::UnknownPrefixSugg::Whitespace(prefix_span.shrink_to_hi()))
} else {
None
};
self.sess.emit_err(errors::UnknownPrefix { span: prefix_span, prefix, sugg });
} else {
// Before Rust 2021, only emit a lint for migration.
self.sess.buffer_lint_with_diagnostic(
&RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX,
prefix_span,
ast::CRATE_NODE_ID,
&msg,
&format!("prefix `{prefix}` is unknown"),
BuiltinLintDiagnostics::ReservedPrefix(prefix_span),
);
}
}
fn report_too_many_hashes(&self, start: BytePos, found: u32) -> ! {
self.fatal_span_(
start,
self.pos,
&format!(
"too many `#` symbols: raw strings may be delimited \
by up to 255 `#` symbols, but found {}",
found
),
)
fn report_too_many_hashes(&self, start: BytePos, num: u32) -> ! {
self.sess.emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num });
}
fn cook_quoted(