Migrate rustc_parse to derive diagnostics

2023-01-29 13:37:05 +00:00 · 2023-01-29 13:37:05 +00:00 · 521c5f36d6
commit 521c5f36d6
parent e7813fee92
9 changed files with 656 additions and 381 deletions
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@ -1,11 +1,10 @@
+use crate::errors;
 use crate::lexer::unicode_chars::UNICODE_ARRAY;
 use rustc_ast::ast::{self, AttrStyle};
 use rustc_ast::token::{self, CommentKind, Delimiter, Token, TokenKind};
 use rustc_ast::tokenstream::TokenStream;
 use rustc_ast::util::unicode::contains_text_flow_control_chars;
-use rustc_errors::{
-    error_code, Applicability, DiagnosticBuilder, ErrorGuaranteed, PResult, StashKey,
-};
+use rustc_errors::{error_code, Applicability, DiagnosticBuilder, PResult, StashKey};
 use rustc_lexer::unescape::{self, Mode};
 use rustc_lexer::Cursor;
 use rustc_lexer::{Base, DocStyle, RawStrError};
@ -151,7 +150,7 @@ impl<'a> StringReader<'a> {
                    let span = self.mk_sp(start, self.pos);
                    self.sess.symbol_gallery.insert(sym, span);
                    if !sym.can_be_raw() {
-                        self.err_span(span, &format!("`{}` cannot be a raw identifier", sym));
+                        self.sess.emit_err(errors::CannotBeRawIdent { span, ident: sym });
                    }
                    self.sess.raw_identifier_spans.borrow_mut().push(span);
                    token::Ident(sym, true)
@ -262,27 +261,24 @@ impl<'a> StringReader<'a> {
                        self.nbsp_is_whitespace = true;
                    }
                    let repeats = it.take_while(|c1| *c1 == c).count();
-                    let mut err =
-                        self.struct_err_span_char(start, self.pos + Pos::from_usize(repeats * c.len_utf8()), "unknown start of token", c);
                    // FIXME: the lexer could be used to turn the ASCII version of unicode
                    // homoglyphs, instead of keeping a table in `check_for_substitution`into the
                    // token. Ideally, this should be inside `rustc_lexer`. However, we should
                    // first remove compound tokens like `<<` from `rustc_lexer`, and then add
                    // fancier error recovery to it, as there will be less overall work to do this
                    // way.
-                    let token = unicode_chars::check_for_substitution(self, start, c, &mut err, repeats+1);
-                    if c == '\x00' {
-                        err.help("source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used");
-                    }
-                    if repeats > 0 {
-                        if repeats == 1 {
-                            err.note(format!("character appears once more"));
-                        } else {
-                            err.note(format!("character appears {repeats} more times"));
-                        }
-                        swallow_next_invalid = repeats;
-                    }
-                    err.emit();
+                    let (token, sugg) = unicode_chars::check_for_substitution(self, start, c, repeats+1);
+                    self.sess.emit_err(errors::UnknownTokenStart {
+                        span: self.mk_sp(start, self.pos + Pos::from_usize(repeats * c.len_utf8())),
+                        escaped: escaped_char(c),
+                        sugg,
+                        null: if c == '\x00' {Some(errors::UnknownTokenNull)} else {None},
+                        repeat: if repeats > 0 {
+                            swallow_next_invalid = repeats;
+                            Some(errors::UnknownTokenRepeat { repeats })
+                        } else {None}
+                    });
+
                    if let Some(token) = token {
                        token
                    } else {
@ -297,26 +293,6 @@ impl<'a> StringReader<'a> {
        }
    }

-    /// Report a fatal lexical error with a given span.
-    fn fatal_span(&self, sp: Span, m: &str) -> ! {
-        self.sess.span_diagnostic.span_fatal(sp, m)
-    }
-
-    /// Report a lexical error with a given span.
-    fn err_span(&self, sp: Span, m: &str) {
-        self.sess.span_diagnostic.struct_span_err(sp, m).emit();
-    }
-
-    /// Report a fatal error spanning [`from_pos`, `to_pos`).
-    fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> ! {
-        self.fatal_span(self.mk_sp(from_pos, to_pos), m)
-    }
-
-    /// Report a lexical error spanning [`from_pos`, `to_pos`).
-    fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) {
-        self.err_span(self.mk_sp(from_pos, to_pos), m)
-    }
-
    fn struct_fatal_span_char(
        &self,
        from_pos: BytePos,
@ -329,18 +305,6 @@ impl<'a> StringReader<'a> {
            .struct_span_fatal(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
    }

-    fn struct_err_span_char(
-        &self,
-        from_pos: BytePos,
-        to_pos: BytePos,
-        m: &str,
-        c: char,
-    ) -> DiagnosticBuilder<'a, ErrorGuaranteed> {
-        self.sess
-            .span_diagnostic
-            .struct_span_err(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
-    }
-
    /// Detect usages of Unicode codepoints changing the direction of the text on screen and loudly
    /// complain about it.
    fn lint_unicode_text_flow(&self, start: BytePos) {
@ -368,14 +332,12 @@ impl<'a> StringReader<'a> {
    ) -> TokenKind {
        if content.contains('\r') {
            for (idx, _) in content.char_indices().filter(|&(_, c)| c == '\r') {
-                self.err_span_(
+                let span = self.mk_sp(
                    content_start + BytePos(idx as u32),
                    content_start + BytePos(idx as u32 + 1),
-                    match comment_kind {
-                        CommentKind::Line => "bare CR not allowed in doc-comment",
-                        CommentKind::Block => "bare CR not allowed in block doc-comment",
-                    },
                );
+                let block = matches!(comment_kind, CommentKind::Block);
+                self.sess.emit_err(errors::CrDocComment { span, block });
            }
        }

@ -454,26 +416,20 @@ impl<'a> StringReader<'a> {
            }
            rustc_lexer::LiteralKind::Int { base, empty_int } => {
                if empty_int {
-                    self.sess
-                        .span_diagnostic
-                        .struct_span_err_with_code(
-                            self.mk_sp(start, end),
-                            "no valid digits found for number",
-                            error_code!(E0768),
-                        )
-                        .emit();
+                    let span = self.mk_sp(start, end);
+                    self.sess.emit_err(errors::NoDigitsLiteral { span });
                    (token::Integer, sym::integer(0))
                } else {
                    if matches!(base, Base::Binary | Base::Octal) {
                        let base = base as u32;
                        let s = self.str_from_to(start + BytePos(2), end);
                        for (idx, c) in s.char_indices() {
+                            let span = self.mk_sp(
+                                start + BytePos::from_usize(2 + idx),
+                                start + BytePos::from_usize(2 + idx + c.len_utf8()),
+                            );
                            if c != '_' && c.to_digit(base).is_none() {
-                                self.err_span_(
-                                    start + BytePos::from_usize(2 + idx),
-                                    start + BytePos::from_usize(2 + idx + c.len_utf8()),
-                                    &format!("invalid digit for a base {} literal", base),
-                                );
+                                self.sess.emit_err(errors::InvalidDigitLiteral { span, base });
                            }
                        }
                    }
@ -482,19 +438,18 @@ impl<'a> StringReader<'a> {
            }
            rustc_lexer::LiteralKind::Float { base, empty_exponent } => {
                if empty_exponent {
-                    self.err_span_(start, self.pos, "expected at least one digit in exponent");
+                    let span = self.mk_sp(start, self.pos);
+                    self.sess.emit_err(errors::EmptyExponentFloat { span });
                }
-                match base {
-                    Base::Hexadecimal => {
-                        self.err_span_(start, end, "hexadecimal float literal is not supported")
-                    }
-                    Base::Octal => {
-                        self.err_span_(start, end, "octal float literal is not supported")
-                    }
-                    Base::Binary => {
-                        self.err_span_(start, end, "binary float literal is not supported")
-                    }
-                    _ => {}
+                let base = match base {
+                    Base::Hexadecimal => Some("hexadecimal"),
+                    Base::Octal => Some("octal"),
+                    Base::Binary => Some("binary"),
+                    _ => None,
+                };
+                if let Some(base) = base {
+                    let span = self.mk_sp(start, end);
+                    self.sess.emit_err(errors::FloatLiteralUnsupportedBase { span, base });
                }
                (token::Float, self.symbol_from_to(start, end))
            }
@ -644,54 +599,34 @@ impl<'a> StringReader<'a> {
    // identifier tokens.
    fn report_unknown_prefix(&self, start: BytePos) {
        let prefix_span = self.mk_sp(start, self.pos);
-        let prefix_str = self.str_from_to(start, self.pos);
-        let msg = format!("prefix `{}` is unknown", prefix_str);
+        let prefix = self.str_from_to(start, self.pos);

        let expn_data = prefix_span.ctxt().outer_expn_data();

        if expn_data.edition >= Edition::Edition2021 {
            // In Rust 2021, this is a hard error.
-            let mut err = self.sess.span_diagnostic.struct_span_err(prefix_span, &msg);
-            err.span_label(prefix_span, "unknown prefix");
-            if prefix_str == "rb" {
-                err.span_suggestion_verbose(
-                    prefix_span,
-                    "use `br` for a raw byte string",
-                    "br",
-                    Applicability::MaybeIncorrect,
-                );
+            let sugg = if prefix == "rb" {
+                Some(errors::UnknownPrefixSugg::UseBr(prefix_span))
            } else if expn_data.is_root() {
-                err.span_suggestion_verbose(
-                    prefix_span.shrink_to_hi(),
-                    "consider inserting whitespace here",
-                    " ",
-                    Applicability::MaybeIncorrect,
-                );
-            }
-            err.note("prefixed identifiers and literals are reserved since Rust 2021");
-            err.emit();
+                Some(errors::UnknownPrefixSugg::Whitespace(prefix_span.shrink_to_hi()))
+            } else {
+                None
+            };
+            self.sess.emit_err(errors::UnknownPrefix { span: prefix_span, prefix, sugg });
        } else {
            // Before Rust 2021, only emit a lint for migration.
            self.sess.buffer_lint_with_diagnostic(
                &RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX,
                prefix_span,
                ast::CRATE_NODE_ID,
-                &msg,
+                &format!("prefix `{prefix}` is unknown"),
                BuiltinLintDiagnostics::ReservedPrefix(prefix_span),
            );
        }
    }

-    fn report_too_many_hashes(&self, start: BytePos, found: u32) -> ! {
-        self.fatal_span_(
-            start,
-            self.pos,
-            &format!(
-                "too many `#` symbols: raw strings may be delimited \
-                by up to 255 `#` symbols, but found {}",
-                found
-            ),
-        )
+    fn report_too_many_hashes(&self, start: BytePos, num: u32) -> ! {
+        self.sess.emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num });
    }

    fn cook_quoted(