Make non-ASCII errors more consistent.

There are three kinds of "byte" literals: byte literals, byte string literals, and raw byte string literals. None are allowed to have non-ASCII chars in them. Two `EscapeError` variants exist for when that constraint is violated. - `NonAsciiCharInByte`: used for byte literals and byte string literals. - `NonAsciiCharInByteString`: used for raw byte string literals. As a result, the messages for raw byte string literals use different wording, without good reason. Also, byte string literals are incorrectly described as "byte constants" in some error messages. This commit eliminates `NonAsciiCharInByteString` so the three cases are handled similarly, and described correctly. The `mode` is enough to distinguish them. Note: Some existing error messages mention "byte constants" and some mention "byte literals". I went with the latter here, because it's a more correct name, as used by the Reference.
2022-11-03 15:17:37 +11:00 · 2022-11-03 15:17:37 +11:00 · 7dbf2c0ed8
commit 7dbf2c0ed8
parent 34b32b0dac
15 changed files with 62 additions and 74 deletions
--- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
+++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
@ -231,16 +231,23 @@ pub(crate) fn emit_unescape_error(
                .emit();
        }
        EscapeError::NonAsciiCharInByte => {
-            assert!(mode.is_byte());
            let (c, span) = last_char();
-            let mut err = handler.struct_span_err(span, "non-ASCII character in byte constant");
+            let desc = match mode {
+                Mode::Byte => "byte literal",
+                Mode::ByteStr => "byte string literal",
+                Mode::RawByteStr => "raw byte string literal",
+                _ => panic!("non-is_byte literal paired with NonAsciiCharInByte"),
+            };
+            let mut err = handler.struct_span_err(span, format!("non-ASCII character in {}", desc));
            let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
                format!(" but is {:?}", c)
            } else {
                String::new()
            };
-            err.span_label(span, &format!("byte constant must be ASCII{}", postfix));
-            if (c as u32) <= 0xFF {
+            err.span_label(span, &format!("must be ASCII{}", postfix));
+            // Note: the \\xHH suggestions are not given for raw byte string
+            // literals, because they are araw and so cannot use any escapes.
+            if (c as u32) <= 0xFF && mode != Mode::RawByteStr {
                err.span_suggestion(
                    span,
                    &format!(
@ -250,9 +257,9 @@ pub(crate) fn emit_unescape_error(
                    format!("\\x{:X}", c as u32),
                    Applicability::MaybeIncorrect,
                );
-            } else if matches!(mode, Mode::Byte) {
+            } else if mode == Mode::Byte {
                err.span_label(span, "this multibyte character does not fit into a single byte");
-            } else if matches!(mode, Mode::ByteStr) {
+            } else if mode != Mode::RawByteStr {
                let mut utf8 = String::new();
                utf8.push(c);
                err.span_suggestion(
@ -270,19 +277,6 @@ pub(crate) fn emit_unescape_error(
            }
            err.emit();
        }
-        EscapeError::NonAsciiCharInByteString => {
-            assert!(mode.is_byte());
-            let (c, span) = last_char();
-            let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
-                format!(" but is {:?}", c)
-            } else {
-                String::new()
-            };
-            handler
-                .struct_span_err(span, "raw byte string must be ASCII")
-                .span_label(span, &format!("must be ASCII{}", postfix))
-                .emit();
-        }
        EscapeError::OutOfRangeHexEscape => {
            handler
                .struct_span_err(span, "out of range hex escape")