1
Fork 0

Make non-ASCII errors more consistent.

There are three kinds of "byte" literals: byte literals, byte string
literals, and raw byte string literals. None are allowed to have
non-ASCII chars in them.

Two `EscapeError` variants exist for when that constraint is violated.
- `NonAsciiCharInByte`: used for byte literals and byte string literals.
- `NonAsciiCharInByteString`: used for raw byte string literals.

As a result, the messages for raw byte string literals use different
wording, without good reason. Also, byte string literals are incorrectly
described as "byte constants" in some error messages.

This commit eliminates `NonAsciiCharInByteString` so the three cases are
handled similarly, and described correctly. The `mode` is enough to
distinguish them.

Note: Some existing error messages mention "byte constants" and some
mention "byte literals". I went with the latter here, because it's a
more correct name, as used by the Reference.
This commit is contained in:
Nicholas Nethercote 2022-11-03 15:17:37 +11:00
parent 34b32b0dac
commit 7dbf2c0ed8
15 changed files with 62 additions and 74 deletions

View file

@ -231,16 +231,23 @@ pub(crate) fn emit_unescape_error(
.emit();
}
EscapeError::NonAsciiCharInByte => {
assert!(mode.is_byte());
let (c, span) = last_char();
let mut err = handler.struct_span_err(span, "non-ASCII character in byte constant");
let desc = match mode {
Mode::Byte => "byte literal",
Mode::ByteStr => "byte string literal",
Mode::RawByteStr => "raw byte string literal",
_ => panic!("non-is_byte literal paired with NonAsciiCharInByte"),
};
let mut err = handler.struct_span_err(span, format!("non-ASCII character in {}", desc));
let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
format!(" but is {:?}", c)
} else {
String::new()
};
err.span_label(span, &format!("byte constant must be ASCII{}", postfix));
if (c as u32) <= 0xFF {
err.span_label(span, &format!("must be ASCII{}", postfix));
// Note: the \\xHH suggestions are not given for raw byte string
// literals, because they are araw and so cannot use any escapes.
if (c as u32) <= 0xFF && mode != Mode::RawByteStr {
err.span_suggestion(
span,
&format!(
@ -250,9 +257,9 @@ pub(crate) fn emit_unescape_error(
format!("\\x{:X}", c as u32),
Applicability::MaybeIncorrect,
);
} else if matches!(mode, Mode::Byte) {
} else if mode == Mode::Byte {
err.span_label(span, "this multibyte character does not fit into a single byte");
} else if matches!(mode, Mode::ByteStr) {
} else if mode != Mode::RawByteStr {
let mut utf8 = String::new();
utf8.push(c);
err.span_suggestion(
@ -270,19 +277,6 @@ pub(crate) fn emit_unescape_error(
}
err.emit();
}
EscapeError::NonAsciiCharInByteString => {
assert!(mode.is_byte());
let (c, span) = last_char();
let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
format!(" but is {:?}", c)
} else {
String::new()
};
handler
.struct_span_err(span, "raw byte string must be ASCII")
.span_label(span, &format!("must be ASCII{}", postfix))
.emit();
}
EscapeError::OutOfRangeHexEscape => {
handler
.struct_span_err(span, "out of range hex escape")