refactor unescape
This commit is contained in:
parent
d5e7206ca6
commit
4c01d494b8
2 changed files with 75 additions and 38 deletions
|
@ -86,7 +86,8 @@ where
|
||||||
let res = unescape_char_or_byte(&mut chars, mode == Mode::Byte);
|
let res = unescape_char_or_byte(&mut chars, mode == Mode::Byte);
|
||||||
callback(0..(src.len() - chars.as_str().len()), res);
|
callback(0..(src.len() - chars.as_str().len()), res);
|
||||||
}
|
}
|
||||||
Mode::Str | Mode::ByteStr => unescape_str_or_byte_str(src, mode == Mode::ByteStr, callback),
|
Mode::Str | Mode::ByteStr => unescape_str_common(src, mode, callback),
|
||||||
|
|
||||||
Mode::RawStr | Mode::RawByteStr => {
|
Mode::RawStr | Mode::RawByteStr => {
|
||||||
unescape_raw_str_or_raw_byte_str(src, mode == Mode::RawByteStr, callback)
|
unescape_raw_str_or_raw_byte_str(src, mode == Mode::RawByteStr, callback)
|
||||||
}
|
}
|
||||||
|
@ -94,6 +95,7 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A unit within CStr. Must not be a nul character.
|
||||||
pub enum CStrUnit {
|
pub enum CStrUnit {
|
||||||
Byte(u8),
|
Byte(u8),
|
||||||
Char(char),
|
Char(char),
|
||||||
|
@ -164,24 +166,52 @@ impl Mode {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_byte(self) -> bool {
|
/// Non-byte literals should have `\xXX` escapes that are within the ASCII range.
|
||||||
|
pub fn ascii_escapes_should_be_ascii(self) -> bool {
|
||||||
match self {
|
match self {
|
||||||
Mode::Byte | Mode::ByteStr | Mode::RawByteStr | Mode::CStr | Mode::RawCStr => true,
|
Mode::Char | Mode::Str | Mode::RawStr => true,
|
||||||
Mode::Char | Mode::Str | Mode::RawStr => false,
|
Mode::Byte | Mode::ByteStr | Mode::RawByteStr | Mode::CStr | Mode::RawCStr => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Whether characters within the literal must be within the ASCII range
|
||||||
|
pub fn characters_should_be_ascii(self) -> bool {
|
||||||
|
match self {
|
||||||
|
Mode::Byte | Mode::ByteStr | Mode::RawByteStr => true,
|
||||||
|
Mode::Char | Mode::Str | Mode::RawStr | Mode::CStr | Mode::RawCStr => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Byte literals do not allow unicode escape.
|
||||||
|
pub fn is_unicode_escape_disallowed(self) -> bool {
|
||||||
|
match self {
|
||||||
|
Mode::Byte | Mode::ByteStr | Mode::RawByteStr => true,
|
||||||
|
Mode::Char | Mode::Str | Mode::RawStr | Mode::CStr | Mode::RawCStr => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn prefix_noraw(self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
Mode::Byte | Mode::ByteStr | Mode::RawByteStr => "b",
|
||||||
|
Mode::CStr | Mode::RawCStr => "c",
|
||||||
|
Mode::Char | Mode::Str | Mode::RawStr => "",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn scan_escape(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError> {
|
fn scan_escape<T: From<u8> + From<char>>(
|
||||||
|
chars: &mut Chars<'_>,
|
||||||
|
mode: Mode,
|
||||||
|
) -> Result<T, EscapeError> {
|
||||||
// Previous character was '\\', unescape what follows.
|
// Previous character was '\\', unescape what follows.
|
||||||
let res = match chars.next().ok_or(EscapeError::LoneSlash)? {
|
let res = match chars.next().ok_or(EscapeError::LoneSlash)? {
|
||||||
'"' => '"',
|
'"' => b'"',
|
||||||
'n' => '\n',
|
'n' => b'\n',
|
||||||
'r' => '\r',
|
'r' => b'\r',
|
||||||
't' => '\t',
|
't' => b'\t',
|
||||||
'\\' => '\\',
|
'\\' => b'\\',
|
||||||
'\'' => '\'',
|
'\'' => b'\'',
|
||||||
'0' => '\0',
|
'0' => b'\0',
|
||||||
|
|
||||||
'x' => {
|
'x' => {
|
||||||
// Parse hexadecimal character code.
|
// Parse hexadecimal character code.
|
||||||
|
@ -194,22 +224,23 @@ fn scan_escape(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError
|
||||||
|
|
||||||
let value = hi * 16 + lo;
|
let value = hi * 16 + lo;
|
||||||
|
|
||||||
// For a non-byte literal verify that it is within ASCII range.
|
if mode.ascii_escapes_should_be_ascii() && !is_ascii(value) {
|
||||||
if !is_byte && !is_ascii(value) {
|
|
||||||
return Err(EscapeError::OutOfRangeHexEscape);
|
return Err(EscapeError::OutOfRangeHexEscape);
|
||||||
}
|
}
|
||||||
let value = value as u8;
|
|
||||||
|
|
||||||
value as char
|
value as u8
|
||||||
}
|
}
|
||||||
|
|
||||||
'u' => scan_unicode(chars, is_byte)?,
|
'u' => return scan_unicode(chars, mode.is_unicode_escape_disallowed()).map(Into::into),
|
||||||
_ => return Err(EscapeError::InvalidEscape),
|
_ => return Err(EscapeError::InvalidEscape),
|
||||||
};
|
};
|
||||||
Ok(res)
|
Ok(res.into())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn scan_unicode(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError> {
|
fn scan_unicode(
|
||||||
|
chars: &mut Chars<'_>,
|
||||||
|
is_unicode_escape_disallowed: bool,
|
||||||
|
) -> Result<char, EscapeError> {
|
||||||
// We've parsed '\u', now we have to parse '{..}'.
|
// We've parsed '\u', now we have to parse '{..}'.
|
||||||
|
|
||||||
if chars.next() != Some('{') {
|
if chars.next() != Some('{') {
|
||||||
|
@ -237,7 +268,7 @@ fn scan_unicode(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeErro
|
||||||
|
|
||||||
// Incorrect syntax has higher priority for error reporting
|
// Incorrect syntax has higher priority for error reporting
|
||||||
// than unallowed value for a literal.
|
// than unallowed value for a literal.
|
||||||
if is_byte {
|
if is_unicode_escape_disallowed {
|
||||||
return Err(EscapeError::UnicodeEscapeInByte);
|
return Err(EscapeError::UnicodeEscapeInByte);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -263,8 +294,8 @@ fn scan_unicode(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeErro
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn ascii_check(c: char, is_byte: bool) -> Result<char, EscapeError> {
|
fn ascii_check(c: char, characters_should_be_ascii: bool) -> Result<char, EscapeError> {
|
||||||
if is_byte && !c.is_ascii() {
|
if characters_should_be_ascii && !c.is_ascii() {
|
||||||
// Byte literal can't be a non-ascii character.
|
// Byte literal can't be a non-ascii character.
|
||||||
Err(EscapeError::NonAsciiCharInByte)
|
Err(EscapeError::NonAsciiCharInByte)
|
||||||
} else {
|
} else {
|
||||||
|
@ -275,7 +306,7 @@ fn ascii_check(c: char, is_byte: bool) -> Result<char, EscapeError> {
|
||||||
fn unescape_char_or_byte(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError> {
|
fn unescape_char_or_byte(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError> {
|
||||||
let c = chars.next().ok_or(EscapeError::ZeroChars)?;
|
let c = chars.next().ok_or(EscapeError::ZeroChars)?;
|
||||||
let res = match c {
|
let res = match c {
|
||||||
'\\' => scan_escape(chars, is_byte),
|
'\\' => scan_escape(chars, if is_byte { Mode::Byte } else { Mode::Char }),
|
||||||
'\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar),
|
'\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar),
|
||||||
'\r' => Err(EscapeError::BareCarriageReturn),
|
'\r' => Err(EscapeError::BareCarriageReturn),
|
||||||
_ => ascii_check(c, is_byte),
|
_ => ascii_check(c, is_byte),
|
||||||
|
@ -288,9 +319,9 @@ fn unescape_char_or_byte(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, E
|
||||||
|
|
||||||
/// Takes a contents of a string literal (without quotes) and produces a
|
/// Takes a contents of a string literal (without quotes) and produces a
|
||||||
/// sequence of escaped characters or errors.
|
/// sequence of escaped characters or errors.
|
||||||
fn unescape_str_or_byte_str<F>(src: &str, is_byte: bool, callback: &mut F)
|
fn unescape_str_common<F, T: From<u8> + From<char>>(src: &str, mode: Mode, callback: &mut F)
|
||||||
where
|
where
|
||||||
F: FnMut(Range<usize>, Result<char, EscapeError>),
|
F: FnMut(Range<usize>, Result<T, EscapeError>),
|
||||||
{
|
{
|
||||||
let mut chars = src.chars();
|
let mut chars = src.chars();
|
||||||
|
|
||||||
|
@ -312,17 +343,17 @@ where
|
||||||
});
|
});
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
_ => scan_escape(&mut chars, is_byte),
|
_ => scan_escape::<T>(&mut chars, mode),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
'\n' => Ok('\n'),
|
'\n' => Ok(b'\n'.into()),
|
||||||
'\t' => Ok('\t'),
|
'\t' => Ok(b'\t'.into()),
|
||||||
'"' => Err(EscapeError::EscapeOnlyChar),
|
'"' => Err(EscapeError::EscapeOnlyChar),
|
||||||
'\r' => Err(EscapeError::BareCarriageReturn),
|
'\r' => Err(EscapeError::BareCarriageReturn),
|
||||||
_ => ascii_check(c, is_byte),
|
_ => ascii_check(c, mode.characters_should_be_ascii()).map(Into::into),
|
||||||
};
|
};
|
||||||
let end = src.len() - chars.as_str().len();
|
let end = src.len() - chars.as_str().len();
|
||||||
callback(start..end, res);
|
callback(start..end, res.map(Into::into));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -78,8 +78,7 @@ pub(crate) fn emit_unescape_error(
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let sugg = sugg.unwrap_or_else(|| {
|
let sugg = sugg.unwrap_or_else(|| {
|
||||||
let is_byte = mode.is_byte();
|
let prefix = mode.prefix_noraw();
|
||||||
let prefix = if is_byte { "b" } else { "" };
|
|
||||||
let mut escaped = String::with_capacity(lit.len());
|
let mut escaped = String::with_capacity(lit.len());
|
||||||
let mut chrs = lit.chars().peekable();
|
let mut chrs = lit.chars().peekable();
|
||||||
while let Some(first) = chrs.next() {
|
while let Some(first) = chrs.next() {
|
||||||
|
@ -97,7 +96,11 @@ pub(crate) fn emit_unescape_error(
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
let sugg = format!("{prefix}\"{escaped}\"");
|
let sugg = format!("{prefix}\"{escaped}\"");
|
||||||
MoreThanOneCharSugg::Quotes { span: span_with_quotes, is_byte, sugg }
|
MoreThanOneCharSugg::Quotes {
|
||||||
|
span: span_with_quotes,
|
||||||
|
is_byte: mode == Mode::Byte,
|
||||||
|
sugg,
|
||||||
|
}
|
||||||
});
|
});
|
||||||
handler.emit_err(UnescapeError::MoreThanOneChar {
|
handler.emit_err(UnescapeError::MoreThanOneChar {
|
||||||
span: span_with_quotes,
|
span: span_with_quotes,
|
||||||
|
@ -112,7 +115,7 @@ pub(crate) fn emit_unescape_error(
|
||||||
char_span,
|
char_span,
|
||||||
escaped_sugg: c.escape_default().to_string(),
|
escaped_sugg: c.escape_default().to_string(),
|
||||||
escaped_msg: escaped_char(c),
|
escaped_msg: escaped_char(c),
|
||||||
byte: mode.is_byte(),
|
byte: mode == Mode::Byte,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
EscapeError::BareCarriageReturn => {
|
EscapeError::BareCarriageReturn => {
|
||||||
|
@ -126,12 +129,15 @@ pub(crate) fn emit_unescape_error(
|
||||||
EscapeError::InvalidEscape => {
|
EscapeError::InvalidEscape => {
|
||||||
let (c, span) = last_char();
|
let (c, span) = last_char();
|
||||||
|
|
||||||
let label =
|
let label = if mode == Mode::Byte || mode == Mode::ByteStr {
|
||||||
if mode.is_byte() { "unknown byte escape" } else { "unknown character escape" };
|
"unknown byte escape"
|
||||||
|
} else {
|
||||||
|
"unknown character escape"
|
||||||
|
};
|
||||||
let ec = escaped_char(c);
|
let ec = escaped_char(c);
|
||||||
let mut diag = handler.struct_span_err(span, &format!("{}: `{}`", label, ec));
|
let mut diag = handler.struct_span_err(span, &format!("{}: `{}`", label, ec));
|
||||||
diag.span_label(span, label);
|
diag.span_label(span, label);
|
||||||
if c == '{' || c == '}' && !mode.is_byte() {
|
if c == '{' || c == '}' && matches!(mode, Mode::Str | Mode::RawStr) {
|
||||||
diag.help(
|
diag.help(
|
||||||
"if used in a formatting string, curly braces are escaped with `{{` and `}}`",
|
"if used in a formatting string, curly braces are escaped with `{{` and `}}`",
|
||||||
);
|
);
|
||||||
|
@ -141,7 +147,7 @@ pub(crate) fn emit_unescape_error(
|
||||||
version control settings",
|
version control settings",
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
if !mode.is_byte() {
|
if mode == Mode::Str || mode == Mode::Char {
|
||||||
diag.span_suggestion(
|
diag.span_suggestion(
|
||||||
span_with_quotes,
|
span_with_quotes,
|
||||||
"if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal",
|
"if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue