introduce unescape module
Currently, we deal with escape sequences twice: once when we lex a string, and a second time when we unescape literals. This PR aims to remove this duplication, by introducing a new `unescape` mode as a single source of truth for character escaping rules
This commit is contained in:
parent
9b67bd42b7
commit
bfa5f27847
24 changed files with 1046 additions and 768 deletions
|
@ -184,7 +184,7 @@ impl<'a> DiagnosticBuilder<'a> {
|
||||||
) -> &mut Self);
|
) -> &mut Self);
|
||||||
forward!(pub fn warn(&mut self, msg: &str) -> &mut Self);
|
forward!(pub fn warn(&mut self, msg: &str) -> &mut Self);
|
||||||
forward!(pub fn span_warn<S: Into<MultiSpan>>(&mut self, sp: S, msg: &str) -> &mut Self);
|
forward!(pub fn span_warn<S: Into<MultiSpan>>(&mut self, sp: S, msg: &str) -> &mut Self);
|
||||||
forward!(pub fn help(&mut self , msg: &str) -> &mut Self);
|
forward!(pub fn help(&mut self, msg: &str) -> &mut Self);
|
||||||
forward!(pub fn span_help<S: Into<MultiSpan>>(&mut self,
|
forward!(pub fn span_help<S: Into<MultiSpan>>(&mut self,
|
||||||
sp: S,
|
sp: S,
|
||||||
msg: &str,
|
msg: &str,
|
||||||
|
|
|
@ -1,8 +1,10 @@
|
||||||
use crate::ast::{self, Ident};
|
use crate::ast::{self, Ident};
|
||||||
use crate::parse::{token, ParseSess};
|
use crate::parse::{token, ParseSess};
|
||||||
use crate::symbol::Symbol;
|
use crate::symbol::Symbol;
|
||||||
|
use crate::parse::unescape;
|
||||||
|
use crate::parse::unescape_error_reporting::{emit_unescape_error, push_escaped_char};
|
||||||
|
|
||||||
use errors::{Applicability, FatalError, Diagnostic, DiagnosticBuilder};
|
use errors::{FatalError, Diagnostic, DiagnosticBuilder};
|
||||||
use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION};
|
use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION};
|
||||||
use core::unicode::property::Pattern_White_Space;
|
use core::unicode::property::Pattern_White_Space;
|
||||||
|
|
||||||
|
@ -334,25 +336,12 @@ impl<'a> StringReader<'a> {
|
||||||
self.err_span(self.mk_sp(from_pos, to_pos), m)
|
self.err_span(self.mk_sp(from_pos, to_pos), m)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Pushes a character to a message string for error reporting
|
|
||||||
fn push_escaped_char_for_msg(m: &mut String, c: char) {
|
|
||||||
match c {
|
|
||||||
'\u{20}'..='\u{7e}' => {
|
|
||||||
// Don't escape \, ' or " for user-facing messages
|
|
||||||
m.push(c);
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
m.extend(c.escape_default());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Report a lexical error spanning [`from_pos`, `to_pos`), appending an
|
/// Report a lexical error spanning [`from_pos`, `to_pos`), appending an
|
||||||
/// escaped character to the error message
|
/// escaped character to the error message
|
||||||
fn fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) -> FatalError {
|
fn fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) -> FatalError {
|
||||||
let mut m = m.to_string();
|
let mut m = m.to_string();
|
||||||
m.push_str(": ");
|
m.push_str(": ");
|
||||||
Self::push_escaped_char_for_msg(&mut m, c);
|
push_escaped_char(&mut m, c);
|
||||||
|
|
||||||
self.fatal_span_(from_pos, to_pos, &m[..])
|
self.fatal_span_(from_pos, to_pos, &m[..])
|
||||||
}
|
}
|
||||||
|
@ -368,7 +357,7 @@ impl<'a> StringReader<'a> {
|
||||||
{
|
{
|
||||||
let mut m = m.to_string();
|
let mut m = m.to_string();
|
||||||
m.push_str(": ");
|
m.push_str(": ");
|
||||||
Self::push_escaped_char_for_msg(&mut m, c);
|
push_escaped_char(&mut m, c);
|
||||||
|
|
||||||
self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), &m[..])
|
self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), &m[..])
|
||||||
}
|
}
|
||||||
|
@ -378,29 +367,10 @@ impl<'a> StringReader<'a> {
|
||||||
fn err_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) {
|
fn err_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) {
|
||||||
let mut m = m.to_string();
|
let mut m = m.to_string();
|
||||||
m.push_str(": ");
|
m.push_str(": ");
|
||||||
Self::push_escaped_char_for_msg(&mut m, c);
|
push_escaped_char(&mut m, c);
|
||||||
self.err_span_(from_pos, to_pos, &m[..]);
|
self.err_span_(from_pos, to_pos, &m[..]);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn struct_err_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char)
|
|
||||||
-> DiagnosticBuilder<'a>
|
|
||||||
{
|
|
||||||
let mut m = m.to_string();
|
|
||||||
m.push_str(": ");
|
|
||||||
Self::push_escaped_char_for_msg(&mut m, c);
|
|
||||||
|
|
||||||
self.sess.span_diagnostic.struct_span_err(self.mk_sp(from_pos, to_pos), &m[..])
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Report a lexical error spanning [`from_pos`, `to_pos`), appending the
|
|
||||||
/// offending string to the error message
|
|
||||||
fn fatal_span_verbose(&self, from_pos: BytePos, to_pos: BytePos, mut m: String) -> FatalError {
|
|
||||||
m.push_str(": ");
|
|
||||||
m.push_str(&self.src[self.src_index(from_pos)..self.src_index(to_pos)]);
|
|
||||||
|
|
||||||
self.fatal_span_(from_pos, to_pos, &m[..])
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Advance peek_tok and peek_span to refer to the next token, and
|
/// Advance peek_tok and peek_span to refer to the next token, and
|
||||||
/// possibly update the interner.
|
/// possibly update the interner.
|
||||||
fn advance_token(&mut self) -> Result<(), ()> {
|
fn advance_token(&mut self) -> Result<(), ()> {
|
||||||
|
@ -863,271 +833,6 @@ impl<'a> StringReader<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Scan over `n_digits` hex digits, stopping at `delim`, reporting an
|
|
||||||
/// error if too many or too few digits are encountered.
|
|
||||||
fn scan_hex_digits(&mut self, n_digits: usize, delim: char, below_0x7f_only: bool) -> bool {
|
|
||||||
debug!("scanning {} digits until {:?}", n_digits, delim);
|
|
||||||
let start_bpos = self.pos;
|
|
||||||
let mut accum_int = 0;
|
|
||||||
|
|
||||||
let mut valid = true;
|
|
||||||
for _ in 0..n_digits {
|
|
||||||
if self.is_eof() {
|
|
||||||
let last_bpos = self.pos;
|
|
||||||
self.fatal_span_(start_bpos,
|
|
||||||
last_bpos,
|
|
||||||
"unterminated numeric character escape").raise();
|
|
||||||
}
|
|
||||||
if self.ch_is(delim) {
|
|
||||||
let last_bpos = self.pos;
|
|
||||||
self.err_span_(start_bpos,
|
|
||||||
last_bpos,
|
|
||||||
"numeric character escape is too short");
|
|
||||||
valid = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
let c = self.ch.unwrap_or('\x00');
|
|
||||||
accum_int *= 16;
|
|
||||||
accum_int += c.to_digit(16).unwrap_or_else(|| {
|
|
||||||
self.err_span_char(self.pos,
|
|
||||||
self.next_pos,
|
|
||||||
"invalid character in numeric character escape",
|
|
||||||
c);
|
|
||||||
|
|
||||||
valid = false;
|
|
||||||
0
|
|
||||||
});
|
|
||||||
self.bump();
|
|
||||||
}
|
|
||||||
|
|
||||||
if below_0x7f_only && accum_int >= 0x80 {
|
|
||||||
self.err_span_(start_bpos,
|
|
||||||
self.pos,
|
|
||||||
"this form of character escape may only be used with characters in \
|
|
||||||
the range [\\x00-\\x7f]");
|
|
||||||
valid = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
match char::from_u32(accum_int) {
|
|
||||||
Some(_) => valid,
|
|
||||||
None => {
|
|
||||||
let last_bpos = self.pos;
|
|
||||||
self.err_span_(start_bpos, last_bpos, "invalid numeric character escape");
|
|
||||||
false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Scan for a single (possibly escaped) byte or char
|
|
||||||
/// in a byte, (non-raw) byte string, char, or (non-raw) string literal.
|
|
||||||
/// `start` is the position of `first_source_char`, which is already consumed.
|
|
||||||
///
|
|
||||||
/// Returns `true` if there was a valid char/byte.
|
|
||||||
fn scan_char_or_byte(&mut self,
|
|
||||||
start: BytePos,
|
|
||||||
first_source_char: char,
|
|
||||||
ascii_only: bool,
|
|
||||||
delim: char)
|
|
||||||
-> bool
|
|
||||||
{
|
|
||||||
match first_source_char {
|
|
||||||
'\\' => {
|
|
||||||
// '\X' for some X must be a character constant:
|
|
||||||
let escaped = self.ch;
|
|
||||||
let escaped_pos = self.pos;
|
|
||||||
self.bump();
|
|
||||||
match escaped {
|
|
||||||
None => {} // EOF here is an error that will be checked later.
|
|
||||||
Some(e) => {
|
|
||||||
return match e {
|
|
||||||
'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true,
|
|
||||||
'x' => self.scan_byte_escape(delim, !ascii_only),
|
|
||||||
'u' => {
|
|
||||||
let valid = if self.ch_is('{') {
|
|
||||||
self.scan_unicode_escape(delim) && !ascii_only
|
|
||||||
} else {
|
|
||||||
let span = self.mk_sp(start, self.pos);
|
|
||||||
let mut suggestion = "\\u{".to_owned();
|
|
||||||
let msg = "incorrect unicode escape sequence";
|
|
||||||
let mut err = self.sess.span_diagnostic.struct_span_err(
|
|
||||||
span,
|
|
||||||
msg,
|
|
||||||
);
|
|
||||||
let mut i = 0;
|
|
||||||
while let (Some(ch), true) = (self.ch, i < 6) {
|
|
||||||
if ch.is_digit(16) {
|
|
||||||
suggestion.push(ch);
|
|
||||||
self.bump();
|
|
||||||
i += 1;
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if i != 0 {
|
|
||||||
suggestion.push('}');
|
|
||||||
err.span_suggestion(
|
|
||||||
self.mk_sp(start, self.pos),
|
|
||||||
"format of unicode escape sequences uses braces",
|
|
||||||
suggestion,
|
|
||||||
Applicability::MaybeIncorrect,
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
err.span_label(span, msg);
|
|
||||||
err.help(
|
|
||||||
"format of unicode escape sequences is `\\u{...}`",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
err.emit();
|
|
||||||
false
|
|
||||||
};
|
|
||||||
if ascii_only {
|
|
||||||
self.err_span_(start,
|
|
||||||
self.pos,
|
|
||||||
"unicode escape sequences cannot be used as a \
|
|
||||||
byte or in a byte string");
|
|
||||||
}
|
|
||||||
valid
|
|
||||||
|
|
||||||
}
|
|
||||||
'\n' if delim == '"' => {
|
|
||||||
self.consume_whitespace();
|
|
||||||
true
|
|
||||||
}
|
|
||||||
'\r' if delim == '"' && self.ch_is('\n') => {
|
|
||||||
self.consume_whitespace();
|
|
||||||
true
|
|
||||||
}
|
|
||||||
c => {
|
|
||||||
let pos = self.pos;
|
|
||||||
let msg = if ascii_only {
|
|
||||||
"unknown byte escape"
|
|
||||||
} else {
|
|
||||||
"unknown character escape"
|
|
||||||
};
|
|
||||||
let mut err = self.struct_err_span_char(escaped_pos, pos, msg, c);
|
|
||||||
err.span_label(self.mk_sp(escaped_pos, pos), msg);
|
|
||||||
if e == '\r' {
|
|
||||||
err.help(
|
|
||||||
"this is an isolated carriage return; consider checking \
|
|
||||||
your editor and version control settings",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if (e == '{' || e == '}') && !ascii_only {
|
|
||||||
err.help(
|
|
||||||
"if used in a formatting string, curly braces are escaped \
|
|
||||||
with `{{` and `}}`",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
err.emit();
|
|
||||||
false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
'\t' | '\n' | '\r' | '\'' if delim == '\'' => {
|
|
||||||
let pos = self.pos;
|
|
||||||
self.err_span_char(start,
|
|
||||||
pos,
|
|
||||||
if ascii_only {
|
|
||||||
"byte constant must be escaped"
|
|
||||||
} else {
|
|
||||||
"character constant must be escaped"
|
|
||||||
},
|
|
||||||
first_source_char);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
'\r' => {
|
|
||||||
if self.ch_is('\n') {
|
|
||||||
self.bump();
|
|
||||||
return true;
|
|
||||||
} else {
|
|
||||||
self.err_span_(start,
|
|
||||||
self.pos,
|
|
||||||
"bare CR not allowed in string, use \\r instead");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
if ascii_only && first_source_char > '\x7F' {
|
|
||||||
let pos = self.pos;
|
|
||||||
self.err_span_(start,
|
|
||||||
pos,
|
|
||||||
"byte constant must be ASCII. Use a \\xHH escape for a \
|
|
||||||
non-ASCII byte");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
true
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Scan over a `\u{...}` escape
|
|
||||||
///
|
|
||||||
/// At this point, we have already seen the `\` and the `u`, the `{` is the current character.
|
|
||||||
/// We will read a hex number (with `_` separators), with 1 to 6 actual digits,
|
|
||||||
/// and pass over the `}`.
|
|
||||||
fn scan_unicode_escape(&mut self, delim: char) -> bool {
|
|
||||||
self.bump(); // past the {
|
|
||||||
let start_bpos = self.pos;
|
|
||||||
let mut valid = true;
|
|
||||||
|
|
||||||
if let Some('_') = self.ch {
|
|
||||||
// disallow leading `_`
|
|
||||||
self.err_span_(self.pos,
|
|
||||||
self.next_pos,
|
|
||||||
"invalid start of unicode escape");
|
|
||||||
valid = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
let count = self.scan_digits(16, 16);
|
|
||||||
|
|
||||||
if count > 6 {
|
|
||||||
self.err_span_(start_bpos,
|
|
||||||
self.pos,
|
|
||||||
"overlong unicode escape (must have at most 6 hex digits)");
|
|
||||||
valid = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
loop {
|
|
||||||
match self.ch {
|
|
||||||
Some('}') => {
|
|
||||||
if valid && count == 0 {
|
|
||||||
self.err_span_(start_bpos,
|
|
||||||
self.pos,
|
|
||||||
"empty unicode escape (must have at least 1 hex digit)");
|
|
||||||
valid = false;
|
|
||||||
}
|
|
||||||
self.bump(); // past the ending `}`
|
|
||||||
break;
|
|
||||||
},
|
|
||||||
Some(c) => {
|
|
||||||
if c == delim {
|
|
||||||
self.err_span_(self.pos,
|
|
||||||
self.pos,
|
|
||||||
"unterminated unicode escape (needed a `}`)");
|
|
||||||
valid = false;
|
|
||||||
break;
|
|
||||||
} else if valid {
|
|
||||||
self.err_span_char(start_bpos,
|
|
||||||
self.pos,
|
|
||||||
"invalid character in unicode escape",
|
|
||||||
c);
|
|
||||||
valid = false;
|
|
||||||
}
|
|
||||||
},
|
|
||||||
None => {
|
|
||||||
self.fatal_span_(start_bpos,
|
|
||||||
self.pos,
|
|
||||||
"unterminated unicode escape (found EOF)").raise();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
self.bump();
|
|
||||||
}
|
|
||||||
|
|
||||||
valid
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Scan over a float exponent.
|
/// Scan over a float exponent.
|
||||||
fn scan_float_exponent(&mut self) {
|
fn scan_float_exponent(&mut self) {
|
||||||
if self.ch_is('e') || self.ch_is('E') {
|
if self.ch_is('e') || self.ch_is('E') {
|
||||||
|
@ -1393,26 +1098,21 @@ impl<'a> StringReader<'a> {
|
||||||
self.bump();
|
self.bump();
|
||||||
let start = self.pos;
|
let start = self.pos;
|
||||||
|
|
||||||
// the eof will be picked up by the final `'` check below
|
|
||||||
let c2 = self.ch.unwrap_or('\x00');
|
|
||||||
self.bump();
|
|
||||||
|
|
||||||
// If the character is an ident start not followed by another single
|
// If the character is an ident start not followed by another single
|
||||||
// quote, then this is a lifetime name:
|
// quote, then this is a lifetime name:
|
||||||
if (ident_start(Some(c2)) || c2.is_numeric()) && !self.ch_is('\'') {
|
let starts_with_number = self.ch.unwrap_or('\x00').is_numeric();
|
||||||
|
if (ident_start(self.ch) || starts_with_number) && !self.nextch_is('\'') {
|
||||||
|
self.bump();
|
||||||
while ident_continue(self.ch) {
|
while ident_continue(self.ch) {
|
||||||
self.bump();
|
self.bump();
|
||||||
}
|
}
|
||||||
// lifetimes shouldn't end with a single quote
|
// lifetimes shouldn't end with a single quote
|
||||||
// if we find one, then this is an invalid character literal
|
// if we find one, then this is an invalid character literal
|
||||||
if self.ch_is('\'') {
|
if self.ch_is('\'') {
|
||||||
self.err_span_(
|
let id = self.name_from(start);
|
||||||
start_with_quote,
|
|
||||||
self.next_pos,
|
|
||||||
"character literal may only contain one codepoint");
|
|
||||||
self.bump();
|
self.bump();
|
||||||
return Ok(token::Literal(token::Err(Symbol::intern("??")), None))
|
self.validate_char_escape(start_with_quote);
|
||||||
|
return Ok(token::Literal(token::Char(id), None))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Include the leading `'` in the real identifier, for macro
|
// Include the leading `'` in the real identifier, for macro
|
||||||
|
@ -1422,7 +1122,7 @@ impl<'a> StringReader<'a> {
|
||||||
self.mk_ident(lifetime_name)
|
self.mk_ident(lifetime_name)
|
||||||
});
|
});
|
||||||
|
|
||||||
if c2.is_numeric() {
|
if starts_with_number {
|
||||||
// this is a recovered lifetime written `'1`, error but accept it
|
// this is a recovered lifetime written `'1`, error but accept it
|
||||||
self.err_span_(
|
self.err_span_(
|
||||||
start_with_quote,
|
start_with_quote,
|
||||||
|
@ -1433,58 +1133,30 @@ impl<'a> StringReader<'a> {
|
||||||
|
|
||||||
return Ok(token::Lifetime(ident));
|
return Ok(token::Lifetime(ident));
|
||||||
}
|
}
|
||||||
|
let msg = "unterminated character literal";
|
||||||
let valid = self.scan_char_or_byte(start, c2, /* ascii_only */ false, '\'');
|
let id = self.scan_single_quoted_string(start_with_quote, msg);
|
||||||
|
self.validate_char_escape(start_with_quote);
|
||||||
if !self.ch_is('\'') {
|
|
||||||
let pos = self.pos;
|
|
||||||
|
|
||||||
loop {
|
|
||||||
self.bump();
|
|
||||||
if self.ch_is('\'') {
|
|
||||||
let start = self.src_index(start);
|
|
||||||
let end = self.src_index(self.pos);
|
|
||||||
self.bump();
|
|
||||||
let span = self.mk_sp(start_with_quote, self.pos);
|
|
||||||
self.sess.span_diagnostic
|
|
||||||
.struct_span_err(span,
|
|
||||||
"character literal may only contain one codepoint")
|
|
||||||
.span_suggestion(
|
|
||||||
span,
|
|
||||||
"if you meant to write a `str` literal, use double quotes",
|
|
||||||
format!("\"{}\"", &self.src[start..end]),
|
|
||||||
Applicability::MachineApplicable
|
|
||||||
).emit();
|
|
||||||
return Ok(token::Literal(token::Err(Symbol::intern("??")), None))
|
|
||||||
}
|
|
||||||
if self.ch_is('\n') || self.is_eof() || self.ch_is('/') {
|
|
||||||
// Only attempt to infer single line string literals. If we encounter
|
|
||||||
// a slash, bail out in order to avoid nonsensical suggestion when
|
|
||||||
// involving comments.
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
self.fatal_span_verbose(start_with_quote, pos,
|
|
||||||
String::from("character literal may only contain one codepoint")).raise();
|
|
||||||
}
|
|
||||||
|
|
||||||
let id = if valid {
|
|
||||||
self.name_from(start)
|
|
||||||
} else {
|
|
||||||
Symbol::intern("0")
|
|
||||||
};
|
|
||||||
|
|
||||||
self.bump(); // advance ch past token
|
|
||||||
let suffix = self.scan_optional_raw_name();
|
let suffix = self.scan_optional_raw_name();
|
||||||
|
|
||||||
Ok(token::Literal(token::Char(id), suffix))
|
Ok(token::Literal(token::Char(id), suffix))
|
||||||
}
|
}
|
||||||
'b' => {
|
'b' => {
|
||||||
self.bump();
|
self.bump();
|
||||||
let lit = match self.ch {
|
let lit = match self.ch {
|
||||||
Some('\'') => self.scan_byte(),
|
Some('\'') => {
|
||||||
Some('"') => self.scan_byte_string(),
|
let start_with_quote = self.pos;
|
||||||
|
self.bump();
|
||||||
|
let msg = "unterminated byte constant";
|
||||||
|
let id = self.scan_single_quoted_string(start_with_quote, msg);
|
||||||
|
self.validate_byte_escape(start_with_quote);
|
||||||
|
token::Byte(id)
|
||||||
|
},
|
||||||
|
Some('"') => {
|
||||||
|
let start_with_quote = self.pos;
|
||||||
|
let msg = "unterminated double quote byte string";
|
||||||
|
let id = self.scan_double_quoted_string(msg);
|
||||||
|
self.validate_byte_str_escape(start_with_quote);
|
||||||
|
token::ByteStr(id)
|
||||||
|
},
|
||||||
Some('r') => self.scan_raw_byte_string(),
|
Some('r') => self.scan_raw_byte_string(),
|
||||||
_ => unreachable!(), // Should have been a token::Ident above.
|
_ => unreachable!(), // Should have been a token::Ident above.
|
||||||
};
|
};
|
||||||
|
@ -1493,32 +1165,11 @@ impl<'a> StringReader<'a> {
|
||||||
Ok(token::Literal(lit, suffix))
|
Ok(token::Literal(lit, suffix))
|
||||||
}
|
}
|
||||||
'"' => {
|
'"' => {
|
||||||
let start_bpos = self.pos;
|
let start_with_quote = self.pos;
|
||||||
let mut valid = true;
|
let msg = "unterminated double quote string";
|
||||||
self.bump();
|
let id = self.scan_double_quoted_string(msg);
|
||||||
|
self.validate_str_escape(start_with_quote);
|
||||||
while !self.ch_is('"') {
|
|
||||||
if self.is_eof() {
|
|
||||||
let last_bpos = self.pos;
|
|
||||||
self.fatal_span_(start_bpos,
|
|
||||||
last_bpos,
|
|
||||||
"unterminated double quote string").raise();
|
|
||||||
}
|
|
||||||
|
|
||||||
let ch_start = self.pos;
|
|
||||||
let ch = self.ch.unwrap();
|
|
||||||
self.bump();
|
|
||||||
valid &= self.scan_char_or_byte(ch_start, ch, /* ascii_only */ false, '"');
|
|
||||||
}
|
|
||||||
// adjust for the ASCII " at the start of the literal
|
|
||||||
let id = if valid {
|
|
||||||
self.name_from(start_bpos + BytePos(1))
|
|
||||||
} else {
|
|
||||||
Symbol::intern("??")
|
|
||||||
};
|
|
||||||
self.bump();
|
|
||||||
let suffix = self.scan_optional_raw_name();
|
let suffix = self.scan_optional_raw_name();
|
||||||
|
|
||||||
Ok(token::Literal(token::Str_(id), suffix))
|
Ok(token::Literal(token::Str_(id), suffix))
|
||||||
}
|
}
|
||||||
'r' => {
|
'r' => {
|
||||||
|
@ -1659,12 +1310,6 @@ impl<'a> StringReader<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn consume_whitespace(&mut self) {
|
|
||||||
while is_pattern_whitespace(self.ch) && !self.is_eof() {
|
|
||||||
self.bump();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn read_to_eol(&mut self) -> String {
|
fn read_to_eol(&mut self) -> String {
|
||||||
let mut val = String::new();
|
let mut val = String::new();
|
||||||
while !self.ch_is('\n') && !self.is_eof() {
|
while !self.ch_is('\n') && !self.is_eof() {
|
||||||
|
@ -1698,73 +1343,63 @@ impl<'a> StringReader<'a> {
|
||||||
(self.ch_is('#') && self.nextch_is('!') && !self.nextnextch_is('['))
|
(self.ch_is('#') && self.nextch_is('!') && !self.nextnextch_is('['))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn scan_byte(&mut self) -> token::Lit {
|
fn scan_single_quoted_string(&mut self,
|
||||||
self.bump();
|
start_with_quote: BytePos,
|
||||||
|
unterminated_msg: &str) -> ast::Name {
|
||||||
|
// assumes that first `'` is consumed
|
||||||
let start = self.pos;
|
let start = self.pos;
|
||||||
|
// lex `'''` as a single char, for recovery
|
||||||
|
if self.ch_is('\'') && self.nextch_is('\'') {
|
||||||
|
self.bump();
|
||||||
|
} else {
|
||||||
|
let mut first = true;
|
||||||
|
loop {
|
||||||
|
if self.ch_is('\'') {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if self.ch_is('\\') && (self.nextch_is('\'') || self.nextch_is('\\')) {
|
||||||
|
self.bump();
|
||||||
|
self.bump();
|
||||||
|
} else {
|
||||||
|
// Only attempt to infer single line string literals. If we encounter
|
||||||
|
// a slash, bail out in order to avoid nonsensical suggestion when
|
||||||
|
// involving comments.
|
||||||
|
if self.is_eof()
|
||||||
|
|| (self.ch_is('/') && !first)
|
||||||
|
|| (self.ch_is('\n') && !self.nextch_is('\'')) {
|
||||||
|
|
||||||
// the eof will be picked up by the final `'` check below
|
self.fatal_span_(start_with_quote, self.pos, unterminated_msg.into())
|
||||||
let c2 = self.ch.unwrap_or('\x00');
|
.raise()
|
||||||
self.bump();
|
}
|
||||||
|
self.bump();
|
||||||
let valid = self.scan_char_or_byte(start,
|
}
|
||||||
c2,
|
first = false;
|
||||||
// ascii_only =
|
}
|
||||||
true,
|
|
||||||
'\'');
|
|
||||||
if !self.ch_is('\'') {
|
|
||||||
// Byte offsetting here is okay because the
|
|
||||||
// character before position `start` are an
|
|
||||||
// ascii single quote and ascii 'b'.
|
|
||||||
let pos = self.pos;
|
|
||||||
self.fatal_span_verbose(start - BytePos(2),
|
|
||||||
pos,
|
|
||||||
"unterminated byte constant".to_string()).raise();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let id = if valid {
|
let id = self.name_from(start);
|
||||||
self.name_from(start)
|
self.bump();
|
||||||
} else {
|
id
|
||||||
Symbol::intern("?")
|
|
||||||
};
|
|
||||||
self.bump(); // advance ch past token
|
|
||||||
|
|
||||||
token::Byte(id)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
fn scan_double_quoted_string(&mut self, unterminated_msg: &str) -> ast::Name {
|
||||||
fn scan_byte_escape(&mut self, delim: char, below_0x7f_only: bool) -> bool {
|
debug_assert!(self.ch_is('\"'));
|
||||||
self.scan_hex_digits(2, delim, below_0x7f_only)
|
let start_with_quote = self.pos;
|
||||||
}
|
|
||||||
|
|
||||||
fn scan_byte_string(&mut self) -> token::Lit {
|
|
||||||
self.bump();
|
self.bump();
|
||||||
let start = self.pos;
|
let start = self.pos;
|
||||||
let mut valid = true;
|
|
||||||
|
|
||||||
while !self.ch_is('"') {
|
while !self.ch_is('"') {
|
||||||
if self.is_eof() {
|
if self.is_eof() {
|
||||||
let pos = self.pos;
|
let pos = self.pos;
|
||||||
self.fatal_span_(start, pos, "unterminated double quote byte string").raise();
|
self.fatal_span_(start_with_quote, pos, unterminated_msg).raise();
|
||||||
|
}
|
||||||
|
if self.ch_is('\\') && (self.nextch_is('\\') || self.nextch_is('"')) {
|
||||||
|
self.bump();
|
||||||
}
|
}
|
||||||
|
|
||||||
let ch_start = self.pos;
|
|
||||||
let ch = self.ch.unwrap();
|
|
||||||
self.bump();
|
self.bump();
|
||||||
valid &= self.scan_char_or_byte(ch_start,
|
|
||||||
ch,
|
|
||||||
// ascii_only =
|
|
||||||
true,
|
|
||||||
'"');
|
|
||||||
}
|
}
|
||||||
|
let id = self.name_from(start);
|
||||||
let id = if valid {
|
|
||||||
self.name_from(start)
|
|
||||||
} else {
|
|
||||||
Symbol::intern("??")
|
|
||||||
};
|
|
||||||
self.bump();
|
self.bump();
|
||||||
|
id
|
||||||
token::ByteStr(id)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn scan_raw_byte_string(&mut self) -> token::Lit {
|
fn scan_raw_byte_string(&mut self) -> token::Lit {
|
||||||
|
@ -1826,6 +1461,70 @@ impl<'a> StringReader<'a> {
|
||||||
|
|
||||||
token::ByteStrRaw(self.name_from_to(content_start_bpos, content_end_bpos), hash_count)
|
token::ByteStrRaw(self.name_from_to(content_start_bpos, content_end_bpos), hash_count)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn validate_char_escape(&self, start_with_quote: BytePos) {
|
||||||
|
self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| {
|
||||||
|
if let Err((off, err)) = unescape::unescape_char(lit) {
|
||||||
|
emit_unescape_error(
|
||||||
|
&self.sess.span_diagnostic,
|
||||||
|
lit,
|
||||||
|
self.mk_sp(start_with_quote, self.pos),
|
||||||
|
unescape::Mode::Char,
|
||||||
|
0..off,
|
||||||
|
err,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn validate_byte_escape(&self, start_with_quote: BytePos) {
|
||||||
|
self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| {
|
||||||
|
if let Err((off, err)) = unescape::unescape_byte(lit) {
|
||||||
|
emit_unescape_error(
|
||||||
|
&self.sess.span_diagnostic,
|
||||||
|
lit,
|
||||||
|
self.mk_sp(start_with_quote, self.pos),
|
||||||
|
unescape::Mode::Byte,
|
||||||
|
0..off,
|
||||||
|
err,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn validate_str_escape(&self, start_with_quote: BytePos) {
|
||||||
|
self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| {
|
||||||
|
unescape::unescape_str(lit, &mut |range, c| {
|
||||||
|
if let Err(err) = c {
|
||||||
|
emit_unescape_error(
|
||||||
|
&self.sess.span_diagnostic,
|
||||||
|
lit,
|
||||||
|
self.mk_sp(start_with_quote, self.pos),
|
||||||
|
unescape::Mode::Str,
|
||||||
|
range,
|
||||||
|
err,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn validate_byte_str_escape(&self, start_with_quote: BytePos) {
|
||||||
|
self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| {
|
||||||
|
unescape::unescape_byte_str(lit, &mut |range, c| {
|
||||||
|
if let Err(err) = c {
|
||||||
|
emit_unescape_error(
|
||||||
|
&self.sess.span_diagnostic,
|
||||||
|
lit,
|
||||||
|
self.mk_sp(start_with_quote, self.pos),
|
||||||
|
unescape::Mode::ByteStr,
|
||||||
|
range,
|
||||||
|
err,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// This tests the character for the unicode property 'PATTERN_WHITE_SPACE' which
|
// This tests the character for the unicode property 'PATTERN_WHITE_SPACE' which
|
||||||
|
|
|
@ -18,7 +18,6 @@ use log::debug;
|
||||||
|
|
||||||
use rustc_data_structures::fx::FxHashSet;
|
use rustc_data_structures::fx::FxHashSet;
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::iter;
|
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::str;
|
use std::str;
|
||||||
|
|
||||||
|
@ -33,6 +32,11 @@ pub mod attr;
|
||||||
|
|
||||||
pub mod classify;
|
pub mod classify;
|
||||||
|
|
||||||
|
pub(crate) mod unescape;
|
||||||
|
use unescape::{unescape_str, unescape_char, unescape_byte_str, unescape_byte, EscapeError};
|
||||||
|
|
||||||
|
pub(crate) mod unescape_error_reporting;
|
||||||
|
|
||||||
/// Info about a parsing session.
|
/// Info about a parsing session.
|
||||||
pub struct ParseSess {
|
pub struct ParseSess {
|
||||||
pub span_diagnostic: Handler,
|
pub span_diagnostic: Handler,
|
||||||
|
@ -306,133 +310,6 @@ pub fn stream_to_parser(sess: &ParseSess, stream: TokenStream) -> Parser<'_> {
|
||||||
Parser::new(sess, stream, None, true, false)
|
Parser::new(sess, stream, None, true, false)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parses a string representing a character literal into its final form.
|
|
||||||
/// Rather than just accepting/rejecting a given literal, unescapes it as
|
|
||||||
/// well. Can take any slice prefixed by a character escape. Returns the
|
|
||||||
/// character and the number of characters consumed.
|
|
||||||
fn char_lit(lit: &str, diag: Option<(Span, &Handler)>) -> (char, isize) {
|
|
||||||
use std::char;
|
|
||||||
|
|
||||||
// Handle non-escaped chars first.
|
|
||||||
if lit.as_bytes()[0] != b'\\' {
|
|
||||||
// If the first byte isn't '\\' it might part of a multi-byte char, so
|
|
||||||
// get the char with chars().
|
|
||||||
let c = lit.chars().next().unwrap();
|
|
||||||
return (c, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle escaped chars.
|
|
||||||
match lit.as_bytes()[1] as char {
|
|
||||||
'"' => ('"', 2),
|
|
||||||
'n' => ('\n', 2),
|
|
||||||
'r' => ('\r', 2),
|
|
||||||
't' => ('\t', 2),
|
|
||||||
'\\' => ('\\', 2),
|
|
||||||
'\'' => ('\'', 2),
|
|
||||||
'0' => ('\0', 2),
|
|
||||||
'x' => {
|
|
||||||
let v = u32::from_str_radix(&lit[2..4], 16).unwrap();
|
|
||||||
let c = char::from_u32(v).unwrap();
|
|
||||||
(c, 4)
|
|
||||||
}
|
|
||||||
'u' => {
|
|
||||||
assert_eq!(lit.as_bytes()[2], b'{');
|
|
||||||
let idx = lit.find('}').unwrap();
|
|
||||||
|
|
||||||
// All digits and '_' are ascii, so treat each byte as a char.
|
|
||||||
let mut v: u32 = 0;
|
|
||||||
for c in lit[3..idx].bytes() {
|
|
||||||
let c = char::from(c);
|
|
||||||
if c != '_' {
|
|
||||||
let x = c.to_digit(16).unwrap();
|
|
||||||
v = v.checked_mul(16).unwrap().checked_add(x).unwrap();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let c = char::from_u32(v).unwrap_or_else(|| {
|
|
||||||
if let Some((span, diag)) = diag {
|
|
||||||
let mut diag = diag.struct_span_err(span, "invalid unicode character escape");
|
|
||||||
if v > 0x10FFFF {
|
|
||||||
diag.help("unicode escape must be at most 10FFFF").emit();
|
|
||||||
} else {
|
|
||||||
diag.help("unicode escape must not be a surrogate").emit();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
'\u{FFFD}'
|
|
||||||
});
|
|
||||||
(c, (idx + 1) as isize)
|
|
||||||
}
|
|
||||||
_ => panic!("lexer should have rejected a bad character escape {}", lit)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parses a string representing a string literal into its final form. Does unescaping.
|
|
||||||
fn str_lit(lit: &str, diag: Option<(Span, &Handler)>) -> String {
|
|
||||||
debug!("str_lit: given {}", lit.escape_default());
|
|
||||||
let mut res = String::with_capacity(lit.len());
|
|
||||||
|
|
||||||
let error = |i| format!("lexer should have rejected {} at {}", lit, i);
|
|
||||||
|
|
||||||
/// Eat everything up to a non-whitespace.
|
|
||||||
fn eat<'a>(it: &mut iter::Peekable<str::CharIndices<'a>>) {
|
|
||||||
loop {
|
|
||||||
match it.peek().map(|x| x.1) {
|
|
||||||
Some(' ') | Some('\n') | Some('\r') | Some('\t') => {
|
|
||||||
it.next();
|
|
||||||
},
|
|
||||||
_ => { break; }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut chars = lit.char_indices().peekable();
|
|
||||||
while let Some((i, c)) = chars.next() {
|
|
||||||
match c {
|
|
||||||
'\\' => {
|
|
||||||
let ch = chars.peek().unwrap_or_else(|| {
|
|
||||||
panic!("{}", error(i))
|
|
||||||
}).1;
|
|
||||||
|
|
||||||
if ch == '\n' {
|
|
||||||
eat(&mut chars);
|
|
||||||
} else if ch == '\r' {
|
|
||||||
chars.next();
|
|
||||||
let ch = chars.peek().unwrap_or_else(|| {
|
|
||||||
panic!("{}", error(i))
|
|
||||||
}).1;
|
|
||||||
|
|
||||||
if ch != '\n' {
|
|
||||||
panic!("lexer accepted bare CR");
|
|
||||||
}
|
|
||||||
eat(&mut chars);
|
|
||||||
} else {
|
|
||||||
// otherwise, a normal escape
|
|
||||||
let (c, n) = char_lit(&lit[i..], diag);
|
|
||||||
for _ in 0..n - 1 { // we don't need to move past the first \
|
|
||||||
chars.next();
|
|
||||||
}
|
|
||||||
res.push(c);
|
|
||||||
}
|
|
||||||
},
|
|
||||||
'\r' => {
|
|
||||||
let ch = chars.peek().unwrap_or_else(|| {
|
|
||||||
panic!("{}", error(i))
|
|
||||||
}).1;
|
|
||||||
|
|
||||||
if ch != '\n' {
|
|
||||||
panic!("lexer accepted bare CR");
|
|
||||||
}
|
|
||||||
chars.next();
|
|
||||||
res.push('\n');
|
|
||||||
}
|
|
||||||
c => res.push(c),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
res.shrink_to_fit(); // probably not going to do anything, unless there was an escape.
|
|
||||||
debug!("parse_str_lit: returning {}", res);
|
|
||||||
res
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parses a string representing a raw string literal into its final form. The
|
/// Parses a string representing a raw string literal into its final form. The
|
||||||
/// only operation this does is convert embedded CRLF into a single LF.
|
/// only operation this does is convert embedded CRLF into a single LF.
|
||||||
fn raw_str_lit(lit: &str) -> String {
|
fn raw_str_lit(lit: &str) -> String {
|
||||||
|
@ -475,9 +352,23 @@ crate fn lit_token(lit: token::Lit, suf: Option<Symbol>, diag: Option<(Span, &Ha
|
||||||
use ast::LitKind;
|
use ast::LitKind;
|
||||||
|
|
||||||
match lit {
|
match lit {
|
||||||
token::Byte(i) => (true, Some(LitKind::Byte(byte_lit(&i.as_str()).0))),
|
token::Byte(i) => {
|
||||||
token::Char(i) => (true, Some(LitKind::Char(char_lit(&i.as_str(), diag).0))),
|
let lit_kind = match unescape_byte(&i.as_str()) {
|
||||||
token::Err(i) => (true, Some(LitKind::Err(i))),
|
Ok(c) => LitKind::Byte(c),
|
||||||
|
Err((_, EscapeError::MoreThanOneChar)) => LitKind::Err(i),
|
||||||
|
Err(_) => LitKind::Byte(0),
|
||||||
|
};
|
||||||
|
(true, Some(lit_kind))
|
||||||
|
},
|
||||||
|
token::Char(i) => {
|
||||||
|
let lit_kind = match unescape_char(&i.as_str()) {
|
||||||
|
Ok(c) => LitKind::Char(c),
|
||||||
|
Err((_, EscapeError::MoreThanOneChar)) => LitKind::Err(i),
|
||||||
|
Err(_) => LitKind::Char('\u{FFFD}'),
|
||||||
|
};
|
||||||
|
(true, Some(lit_kind))
|
||||||
|
},
|
||||||
|
token::Err(i) => (true, Some(LitKind::Err(i))),
|
||||||
|
|
||||||
// There are some valid suffixes for integer and float literals,
|
// There are some valid suffixes for integer and float literals,
|
||||||
// so all the handling is done internally.
|
// so all the handling is done internally.
|
||||||
|
@ -491,7 +382,14 @@ crate fn lit_token(lit: token::Lit, suf: Option<Symbol>, diag: Option<(Span, &Ha
|
||||||
// string in the Token.
|
// string in the Token.
|
||||||
let s = &sym.as_str();
|
let s = &sym.as_str();
|
||||||
if s.as_bytes().iter().any(|&c| c == b'\\' || c == b'\r') {
|
if s.as_bytes().iter().any(|&c| c == b'\\' || c == b'\r') {
|
||||||
sym = Symbol::intern(&str_lit(s, diag));
|
let mut buf = String::with_capacity(s.len());
|
||||||
|
unescape_str(s, &mut |_, unescaped_char| {
|
||||||
|
match unescaped_char {
|
||||||
|
Ok(c) => buf.push(c),
|
||||||
|
Err(_) => buf.push('\u{FFFD}'),
|
||||||
|
}
|
||||||
|
});
|
||||||
|
sym = Symbol::intern(&buf)
|
||||||
}
|
}
|
||||||
(true, Some(LitKind::Str(sym, ast::StrStyle::Cooked)))
|
(true, Some(LitKind::Str(sym, ast::StrStyle::Cooked)))
|
||||||
}
|
}
|
||||||
|
@ -504,7 +402,16 @@ crate fn lit_token(lit: token::Lit, suf: Option<Symbol>, diag: Option<(Span, &Ha
|
||||||
(true, Some(LitKind::Str(sym, ast::StrStyle::Raw(n))))
|
(true, Some(LitKind::Str(sym, ast::StrStyle::Raw(n))))
|
||||||
}
|
}
|
||||||
token::ByteStr(i) => {
|
token::ByteStr(i) => {
|
||||||
(true, Some(LitKind::ByteStr(byte_str_lit(&i.as_str()))))
|
let s = &i.as_str();
|
||||||
|
let mut buf = Vec::with_capacity(s.len());
|
||||||
|
unescape_byte_str(s, &mut |_, unescaped_byte| {
|
||||||
|
match unescaped_byte {
|
||||||
|
Ok(c) => buf.push(c),
|
||||||
|
Err(_) => buf.push(0),
|
||||||
|
}
|
||||||
|
});
|
||||||
|
buf.shrink_to_fit();
|
||||||
|
(true, Some(LitKind::ByteStr(Lrc::new(buf))))
|
||||||
}
|
}
|
||||||
token::ByteStrRaw(i, _) => {
|
token::ByteStrRaw(i, _) => {
|
||||||
(true, Some(LitKind::ByteStr(Lrc::new(i.to_string().into_bytes()))))
|
(true, Some(LitKind::ByteStr(Lrc::new(i.to_string().into_bytes()))))
|
||||||
|
@ -559,95 +466,6 @@ fn float_lit(s: &str, suffix: Option<Symbol>, diag: Option<(Span, &Handler)>)
|
||||||
filtered_float_lit(Symbol::intern(s), suffix, diag)
|
filtered_float_lit(Symbol::intern(s), suffix, diag)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parses a string representing a byte literal into its final form. Similar to `char_lit`.
|
|
||||||
fn byte_lit(lit: &str) -> (u8, usize) {
|
|
||||||
let err = |i| format!("lexer accepted invalid byte literal {} step {}", lit, i);
|
|
||||||
|
|
||||||
if lit.len() == 1 {
|
|
||||||
(lit.as_bytes()[0], 1)
|
|
||||||
} else {
|
|
||||||
assert_eq!(lit.as_bytes()[0], b'\\', "{}", err(0));
|
|
||||||
let b = match lit.as_bytes()[1] {
|
|
||||||
b'"' => b'"',
|
|
||||||
b'n' => b'\n',
|
|
||||||
b'r' => b'\r',
|
|
||||||
b't' => b'\t',
|
|
||||||
b'\\' => b'\\',
|
|
||||||
b'\'' => b'\'',
|
|
||||||
b'0' => b'\0',
|
|
||||||
_ => {
|
|
||||||
match u64::from_str_radix(&lit[2..4], 16).ok() {
|
|
||||||
Some(c) =>
|
|
||||||
if c > 0xFF {
|
|
||||||
panic!(err(2))
|
|
||||||
} else {
|
|
||||||
return (c as u8, 4)
|
|
||||||
},
|
|
||||||
None => panic!(err(3))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
(b, 2)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn byte_str_lit(lit: &str) -> Lrc<Vec<u8>> {
|
|
||||||
let mut res = Vec::with_capacity(lit.len());
|
|
||||||
|
|
||||||
let error = |i| panic!("lexer should have rejected {} at {}", lit, i);
|
|
||||||
|
|
||||||
/// Eat everything up to a non-whitespace.
|
|
||||||
fn eat<I: Iterator<Item=(usize, u8)>>(it: &mut iter::Peekable<I>) {
|
|
||||||
loop {
|
|
||||||
match it.peek().map(|x| x.1) {
|
|
||||||
Some(b' ') | Some(b'\n') | Some(b'\r') | Some(b'\t') => {
|
|
||||||
it.next();
|
|
||||||
},
|
|
||||||
_ => { break; }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// byte string literals *must* be ASCII, but the escapes don't have to be
|
|
||||||
let mut chars = lit.bytes().enumerate().peekable();
|
|
||||||
loop {
|
|
||||||
match chars.next() {
|
|
||||||
Some((i, b'\\')) => {
|
|
||||||
match chars.peek().unwrap_or_else(|| error(i)).1 {
|
|
||||||
b'\n' => eat(&mut chars),
|
|
||||||
b'\r' => {
|
|
||||||
chars.next();
|
|
||||||
if chars.peek().unwrap_or_else(|| error(i)).1 != b'\n' {
|
|
||||||
panic!("lexer accepted bare CR");
|
|
||||||
}
|
|
||||||
eat(&mut chars);
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
// otherwise, a normal escape
|
|
||||||
let (c, n) = byte_lit(&lit[i..]);
|
|
||||||
// we don't need to move past the first \
|
|
||||||
for _ in 0..n - 1 {
|
|
||||||
chars.next();
|
|
||||||
}
|
|
||||||
res.push(c);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
Some((i, b'\r')) => {
|
|
||||||
if chars.peek().unwrap_or_else(|| error(i)).1 != b'\n' {
|
|
||||||
panic!("lexer accepted bare CR");
|
|
||||||
}
|
|
||||||
chars.next();
|
|
||||||
res.push(b'\n');
|
|
||||||
}
|
|
||||||
Some((_, c)) => res.push(c),
|
|
||||||
None => break,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Lrc::new(res)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn integer_lit(s: &str, suffix: Option<Symbol>, diag: Option<(Span, &Handler)>)
|
fn integer_lit(s: &str, suffix: Option<Symbol>, diag: Option<(Span, &Handler)>)
|
||||||
-> Option<ast::LitKind> {
|
-> Option<ast::LitKind> {
|
||||||
// s can only be ascii, byte indexing is fine
|
// s can only be ascii, byte indexing is fine
|
||||||
|
|
515
src/libsyntax/parse/unescape.rs
Normal file
515
src/libsyntax/parse/unescape.rs
Normal file
|
@ -0,0 +1,515 @@
|
||||||
|
//! Utilities for validating string and char literals and turning them into
|
||||||
|
//! values they represent.
|
||||||
|
|
||||||
|
use std::str::Chars;
|
||||||
|
use std::ops::Range;
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
|
pub(crate) enum EscapeError {
|
||||||
|
ZeroChars,
|
||||||
|
MoreThanOneChar,
|
||||||
|
|
||||||
|
LoneSlash,
|
||||||
|
InvalidEscape,
|
||||||
|
BareCarriageReturn,
|
||||||
|
EscapeOnlyChar,
|
||||||
|
|
||||||
|
TooShortHexEscape,
|
||||||
|
InvalidCharInHexEscape,
|
||||||
|
OutOfRangeHexEscape,
|
||||||
|
|
||||||
|
NoBraceInUnicodeEscape,
|
||||||
|
InvalidCharInUnicodeEscape,
|
||||||
|
EmptyUnicodeEscape,
|
||||||
|
UnclosedUnicodeEscape,
|
||||||
|
LeadingUnderscoreUnicodeEscape,
|
||||||
|
OverlongUnicodeEscape,
|
||||||
|
LoneSurrogateUnicodeEscape,
|
||||||
|
OutOfRangeUnicodeEscape,
|
||||||
|
|
||||||
|
UnicodeEscapeInByte,
|
||||||
|
NonAsciiCharInByte,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Takes a contents of a char literal (without quotes), and returns an
|
||||||
|
/// unescaped char or an error
|
||||||
|
pub(crate) fn unescape_char(literal_text: &str) -> Result<char, (usize, EscapeError)> {
|
||||||
|
let mut chars = literal_text.chars();
|
||||||
|
unescape_char_or_byte(&mut chars, Mode::Char)
|
||||||
|
.map_err(|err| (literal_text.len() - chars.as_str().len(), err))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Takes a contents of a string literal (without quotes) and produces a
|
||||||
|
/// sequence of escaped characters or errors.
|
||||||
|
pub(crate) fn unescape_str<F>(literal_text: &str, callback: &mut F)
|
||||||
|
where
|
||||||
|
F: FnMut(Range<usize>, Result<char, EscapeError>),
|
||||||
|
{
|
||||||
|
unescape_str_or_byte_str(literal_text, Mode::Str, callback)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn unescape_byte(literal_text: &str) -> Result<u8, (usize, EscapeError)> {
|
||||||
|
let mut chars = literal_text.chars();
|
||||||
|
unescape_char_or_byte(&mut chars, Mode::Byte)
|
||||||
|
.map(byte_from_char)
|
||||||
|
.map_err(|err| (literal_text.len() - chars.as_str().len(), err))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Takes a contents of a string literal (without quotes) and produces a
|
||||||
|
/// sequence of escaped characters or errors.
|
||||||
|
pub(crate) fn unescape_byte_str<F>(literal_text: &str, callback: &mut F)
|
||||||
|
where
|
||||||
|
F: FnMut(Range<usize>, Result<u8, EscapeError>),
|
||||||
|
{
|
||||||
|
unescape_str_or_byte_str(literal_text, Mode::ByteStr, &mut |range, char| {
|
||||||
|
callback(range, char.map(byte_from_char))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
pub(crate) enum Mode {
|
||||||
|
Char,
|
||||||
|
Str,
|
||||||
|
Byte,
|
||||||
|
ByteStr,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Mode {
|
||||||
|
fn in_single_quotes(self) -> bool {
|
||||||
|
match self {
|
||||||
|
Mode::Char | Mode::Byte => true,
|
||||||
|
Mode::Str | Mode::ByteStr => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn in_double_quotes(self) -> bool {
|
||||||
|
!self.in_single_quotes()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn is_bytes(self) -> bool {
|
||||||
|
match self {
|
||||||
|
Mode::Byte | Mode::ByteStr => true,
|
||||||
|
Mode::Char | Mode::Str => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
fn scan_escape(first_char: char, chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
|
||||||
|
if first_char != '\\' {
|
||||||
|
return match first_char {
|
||||||
|
'\t' | '\n' => Err(EscapeError::EscapeOnlyChar),
|
||||||
|
'\r' => Err(if chars.clone().next() == Some('\n') {
|
||||||
|
EscapeError::EscapeOnlyChar
|
||||||
|
} else {
|
||||||
|
EscapeError::BareCarriageReturn
|
||||||
|
}),
|
||||||
|
'\'' if mode.in_single_quotes() => Err(EscapeError::EscapeOnlyChar),
|
||||||
|
'"' if mode.in_double_quotes() => Err(EscapeError::EscapeOnlyChar),
|
||||||
|
_ => {
|
||||||
|
if mode.is_bytes() && !first_char.is_ascii() {
|
||||||
|
return Err(EscapeError::NonAsciiCharInByte);
|
||||||
|
}
|
||||||
|
Ok(first_char)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
let second_char = chars.next().ok_or(EscapeError::LoneSlash)?;
|
||||||
|
|
||||||
|
let res = match second_char {
|
||||||
|
'"' => '"',
|
||||||
|
'n' => '\n',
|
||||||
|
'r' => '\r',
|
||||||
|
't' => '\t',
|
||||||
|
'\\' => '\\',
|
||||||
|
'\'' => '\'',
|
||||||
|
'0' => '\0',
|
||||||
|
|
||||||
|
'x' => {
|
||||||
|
let hi = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
|
||||||
|
let hi = hi.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
|
||||||
|
|
||||||
|
let lo = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
|
||||||
|
let lo = lo.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
|
||||||
|
|
||||||
|
let value = hi * 16 + lo;
|
||||||
|
|
||||||
|
if !mode.is_bytes() && !is_ascii(value) {
|
||||||
|
return Err(EscapeError::OutOfRangeHexEscape);
|
||||||
|
}
|
||||||
|
let value = value as u8;
|
||||||
|
|
||||||
|
value as char
|
||||||
|
}
|
||||||
|
|
||||||
|
'u' => {
|
||||||
|
if chars.next() != Some('{') {
|
||||||
|
return Err(EscapeError::NoBraceInUnicodeEscape);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut n_digits = 1;
|
||||||
|
let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? {
|
||||||
|
'_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape),
|
||||||
|
'}' => return Err(EscapeError::EmptyUnicodeEscape),
|
||||||
|
c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?,
|
||||||
|
};
|
||||||
|
|
||||||
|
loop {
|
||||||
|
match chars.next() {
|
||||||
|
None => return Err(EscapeError::UnclosedUnicodeEscape),
|
||||||
|
Some('_') => continue,
|
||||||
|
Some('}') => {
|
||||||
|
if n_digits > 6 {
|
||||||
|
return Err(EscapeError::OverlongUnicodeEscape);
|
||||||
|
}
|
||||||
|
if mode.is_bytes() {
|
||||||
|
return Err(EscapeError::UnicodeEscapeInByte);
|
||||||
|
}
|
||||||
|
|
||||||
|
break std::char::from_u32(value).ok_or_else(|| {
|
||||||
|
if value > 0x10FFFF {
|
||||||
|
EscapeError::OutOfRangeUnicodeEscape
|
||||||
|
} else {
|
||||||
|
EscapeError::LoneSurrogateUnicodeEscape
|
||||||
|
}
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
Some(c) => {
|
||||||
|
let digit = c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?;
|
||||||
|
n_digits += 1;
|
||||||
|
if n_digits > 6 {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let digit = digit as u32;
|
||||||
|
value = value * 16 + digit;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => return Err(EscapeError::InvalidEscape),
|
||||||
|
};
|
||||||
|
Ok(res)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
|
||||||
|
let first_char = chars.next().ok_or(EscapeError::ZeroChars)?;
|
||||||
|
let res = scan_escape(first_char, chars, mode)?;
|
||||||
|
if chars.next().is_some() {
|
||||||
|
return Err(EscapeError::MoreThanOneChar);
|
||||||
|
}
|
||||||
|
Ok(res)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Takes a contents of a string literal (without quotes) and produces a
|
||||||
|
/// sequence of escaped characters or errors.
|
||||||
|
fn unescape_str_or_byte_str<F>(src: &str, mode: Mode, callback: &mut F)
|
||||||
|
where
|
||||||
|
F: FnMut(Range<usize>, Result<char, EscapeError>),
|
||||||
|
{
|
||||||
|
assert!(mode.in_double_quotes());
|
||||||
|
let initial_len = src.len();
|
||||||
|
let mut chars = src.chars();
|
||||||
|
while let Some(first_char) = chars.next() {
|
||||||
|
let start = initial_len - chars.as_str().len() - first_char.len_utf8();
|
||||||
|
|
||||||
|
let unescaped_char = match first_char {
|
||||||
|
'\\' => {
|
||||||
|
let (second_char, third_char) = {
|
||||||
|
let mut chars = chars.clone();
|
||||||
|
(chars.next(), chars.next())
|
||||||
|
};
|
||||||
|
match (second_char, third_char) {
|
||||||
|
(Some('\n'), _) | (Some('\r'), Some('\n')) => {
|
||||||
|
skip_ascii_whitespace(&mut chars);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
_ => scan_escape(first_char, &mut chars, mode),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'\r' => {
|
||||||
|
let second_char = chars.clone().next();
|
||||||
|
if second_char == Some('\n') {
|
||||||
|
chars.next();
|
||||||
|
Ok('\n')
|
||||||
|
} else {
|
||||||
|
scan_escape(first_char, &mut chars, mode)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'\n' => Ok('\n'),
|
||||||
|
'\t' => Ok('\t'),
|
||||||
|
_ => scan_escape(first_char, &mut chars, mode),
|
||||||
|
};
|
||||||
|
let end = initial_len - chars.as_str().len();
|
||||||
|
callback(start..end, unescaped_char);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn skip_ascii_whitespace(chars: &mut Chars<'_>) {
|
||||||
|
let str = chars.as_str();
|
||||||
|
let first_non_space = str
|
||||||
|
.bytes()
|
||||||
|
.position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
|
||||||
|
.unwrap_or(str.len());
|
||||||
|
*chars = str[first_non_space..].chars()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn byte_from_char(c: char) -> u8 {
|
||||||
|
let res = c as u32;
|
||||||
|
assert!(res <= u8::max_value() as u32, "guaranteed because of Mode::Byte");
|
||||||
|
res as u8
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_ascii(x: u32) -> bool {
|
||||||
|
x <= 0x7F
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_unescape_char_bad() {
|
||||||
|
fn check(literal_text: &str, expected_error: EscapeError) {
|
||||||
|
let actual_result = unescape_char(literal_text).map_err(|(_offset, err)| err);
|
||||||
|
assert_eq!(actual_result, Err(expected_error));
|
||||||
|
}
|
||||||
|
|
||||||
|
check("", EscapeError::ZeroChars);
|
||||||
|
check(r"\", EscapeError::LoneSlash);
|
||||||
|
|
||||||
|
check("\n", EscapeError::EscapeOnlyChar);
|
||||||
|
check("\r\n", EscapeError::EscapeOnlyChar);
|
||||||
|
check("\t", EscapeError::EscapeOnlyChar);
|
||||||
|
check("'", EscapeError::EscapeOnlyChar);
|
||||||
|
check("\r", EscapeError::BareCarriageReturn);
|
||||||
|
|
||||||
|
check("spam", EscapeError::MoreThanOneChar);
|
||||||
|
check(r"\x0ff", EscapeError::MoreThanOneChar);
|
||||||
|
check(r#"\"a"#, EscapeError::MoreThanOneChar);
|
||||||
|
check(r"\na", EscapeError::MoreThanOneChar);
|
||||||
|
check(r"\ra", EscapeError::MoreThanOneChar);
|
||||||
|
check(r"\ta", EscapeError::MoreThanOneChar);
|
||||||
|
check(r"\\a", EscapeError::MoreThanOneChar);
|
||||||
|
check(r"\'a", EscapeError::MoreThanOneChar);
|
||||||
|
check(r"\0a", EscapeError::MoreThanOneChar);
|
||||||
|
check(r"\u{0}x", EscapeError::MoreThanOneChar);
|
||||||
|
check(r"\u{1F63b}}", EscapeError::MoreThanOneChar);
|
||||||
|
|
||||||
|
check(r"\v", EscapeError::InvalidEscape);
|
||||||
|
check(r"\💩", EscapeError::InvalidEscape);
|
||||||
|
check(r"\●", EscapeError::InvalidEscape);
|
||||||
|
|
||||||
|
check(r"\x", EscapeError::TooShortHexEscape);
|
||||||
|
check(r"\x0", EscapeError::TooShortHexEscape);
|
||||||
|
check(r"\xf", EscapeError::TooShortHexEscape);
|
||||||
|
check(r"\xa", EscapeError::TooShortHexEscape);
|
||||||
|
check(r"\xx", EscapeError::InvalidCharInHexEscape);
|
||||||
|
check(r"\xы", EscapeError::InvalidCharInHexEscape);
|
||||||
|
check(r"\x🦀", EscapeError::InvalidCharInHexEscape);
|
||||||
|
check(r"\xtt", EscapeError::InvalidCharInHexEscape);
|
||||||
|
check(r"\xff", EscapeError::OutOfRangeHexEscape);
|
||||||
|
check(r"\xFF", EscapeError::OutOfRangeHexEscape);
|
||||||
|
check(r"\x80", EscapeError::OutOfRangeHexEscape);
|
||||||
|
|
||||||
|
check(r"\u", EscapeError::NoBraceInUnicodeEscape);
|
||||||
|
check(r"\u[0123]", EscapeError::NoBraceInUnicodeEscape);
|
||||||
|
check(r"\u{0x}", EscapeError::InvalidCharInUnicodeEscape);
|
||||||
|
check(r"\u{", EscapeError::UnclosedUnicodeEscape);
|
||||||
|
check(r"\u{0000", EscapeError::UnclosedUnicodeEscape);
|
||||||
|
check(r"\u{}", EscapeError::EmptyUnicodeEscape);
|
||||||
|
check(r"\u{_0000}", EscapeError::LeadingUnderscoreUnicodeEscape);
|
||||||
|
check(r"\u{0000000}", EscapeError::OverlongUnicodeEscape);
|
||||||
|
check(r"\u{FFFFFF}", EscapeError::OutOfRangeUnicodeEscape);
|
||||||
|
check(r"\u{ffffff}", EscapeError::OutOfRangeUnicodeEscape);
|
||||||
|
check(r"\u{ffffff}", EscapeError::OutOfRangeUnicodeEscape);
|
||||||
|
|
||||||
|
check(r"\u{DC00}", EscapeError::LoneSurrogateUnicodeEscape);
|
||||||
|
check(r"\u{DDDD}", EscapeError::LoneSurrogateUnicodeEscape);
|
||||||
|
check(r"\u{DFFF}", EscapeError::LoneSurrogateUnicodeEscape);
|
||||||
|
|
||||||
|
check(r"\u{D800}", EscapeError::LoneSurrogateUnicodeEscape);
|
||||||
|
check(r"\u{DAAA}", EscapeError::LoneSurrogateUnicodeEscape);
|
||||||
|
check(r"\u{DBFF}", EscapeError::LoneSurrogateUnicodeEscape);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_unescape_char_good() {
|
||||||
|
fn check(literal_text: &str, expected_char: char) {
|
||||||
|
let actual_result = unescape_char(literal_text);
|
||||||
|
assert_eq!(actual_result, Ok(expected_char));
|
||||||
|
}
|
||||||
|
|
||||||
|
check("a", 'a');
|
||||||
|
check("ы", 'ы');
|
||||||
|
check("🦀", '🦀');
|
||||||
|
|
||||||
|
check(r#"\""#, '"');
|
||||||
|
check(r"\n", '\n');
|
||||||
|
check(r"\r", '\r');
|
||||||
|
check(r"\t", '\t');
|
||||||
|
check(r"\\", '\\');
|
||||||
|
check(r"\'", '\'');
|
||||||
|
check(r"\0", '\0');
|
||||||
|
|
||||||
|
check(r"\x00", '\0');
|
||||||
|
check(r"\x5a", 'Z');
|
||||||
|
check(r"\x5A", 'Z');
|
||||||
|
check(r"\x7f", 127 as char);
|
||||||
|
|
||||||
|
check(r"\u{0}", '\0');
|
||||||
|
check(r"\u{000000}", '\0');
|
||||||
|
check(r"\u{41}", 'A');
|
||||||
|
check(r"\u{0041}", 'A');
|
||||||
|
check(r"\u{00_41}", 'A');
|
||||||
|
check(r"\u{4__1__}", 'A');
|
||||||
|
check(r"\u{1F63b}", '😻');
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_unescape_str_good() {
|
||||||
|
fn check(literal_text: &str, expected: &str) {
|
||||||
|
let mut buf = Ok(String::with_capacity(literal_text.len()));
|
||||||
|
unescape_str(literal_text, &mut |range, c| {
|
||||||
|
if let Ok(b) = &mut buf {
|
||||||
|
match c {
|
||||||
|
Ok(c) => b.push(c),
|
||||||
|
Err(e) => buf = Err((range, e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
let buf = buf.as_ref().map(|it| it.as_ref());
|
||||||
|
assert_eq!(buf, Ok(expected))
|
||||||
|
}
|
||||||
|
|
||||||
|
check("foo", "foo");
|
||||||
|
check("", "");
|
||||||
|
check(" \t\n\r\n", " \t\n\n");
|
||||||
|
|
||||||
|
check("hello \\\n world", "hello world");
|
||||||
|
check("hello \\\r\n world", "hello world");
|
||||||
|
check("thread's", "thread's")
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_unescape_byte_bad() {
|
||||||
|
fn check(literal_text: &str, expected_error: EscapeError) {
|
||||||
|
let actual_result = unescape_byte(literal_text).map_err(|(_offset, err)| err);
|
||||||
|
assert_eq!(actual_result, Err(expected_error));
|
||||||
|
}
|
||||||
|
|
||||||
|
check("", EscapeError::ZeroChars);
|
||||||
|
check(r"\", EscapeError::LoneSlash);
|
||||||
|
|
||||||
|
check("\n", EscapeError::EscapeOnlyChar);
|
||||||
|
check("\r\n", EscapeError::EscapeOnlyChar);
|
||||||
|
check("\t", EscapeError::EscapeOnlyChar);
|
||||||
|
check("'", EscapeError::EscapeOnlyChar);
|
||||||
|
check("\r", EscapeError::BareCarriageReturn);
|
||||||
|
|
||||||
|
check("spam", EscapeError::MoreThanOneChar);
|
||||||
|
check(r"\x0ff", EscapeError::MoreThanOneChar);
|
||||||
|
check(r#"\"a"#, EscapeError::MoreThanOneChar);
|
||||||
|
check(r"\na", EscapeError::MoreThanOneChar);
|
||||||
|
check(r"\ra", EscapeError::MoreThanOneChar);
|
||||||
|
check(r"\ta", EscapeError::MoreThanOneChar);
|
||||||
|
check(r"\\a", EscapeError::MoreThanOneChar);
|
||||||
|
check(r"\'a", EscapeError::MoreThanOneChar);
|
||||||
|
check(r"\0a", EscapeError::MoreThanOneChar);
|
||||||
|
|
||||||
|
check(r"\v", EscapeError::InvalidEscape);
|
||||||
|
check(r"\💩", EscapeError::InvalidEscape);
|
||||||
|
check(r"\●", EscapeError::InvalidEscape);
|
||||||
|
|
||||||
|
check(r"\x", EscapeError::TooShortHexEscape);
|
||||||
|
check(r"\x0", EscapeError::TooShortHexEscape);
|
||||||
|
check(r"\xa", EscapeError::TooShortHexEscape);
|
||||||
|
check(r"\xf", EscapeError::TooShortHexEscape);
|
||||||
|
check(r"\xx", EscapeError::InvalidCharInHexEscape);
|
||||||
|
check(r"\xы", EscapeError::InvalidCharInHexEscape);
|
||||||
|
check(r"\x🦀", EscapeError::InvalidCharInHexEscape);
|
||||||
|
check(r"\xtt", EscapeError::InvalidCharInHexEscape);
|
||||||
|
|
||||||
|
check(r"\u", EscapeError::NoBraceInUnicodeEscape);
|
||||||
|
check(r"\u[0123]", EscapeError::NoBraceInUnicodeEscape);
|
||||||
|
check(r"\u{0x}", EscapeError::InvalidCharInUnicodeEscape);
|
||||||
|
check(r"\u{", EscapeError::UnclosedUnicodeEscape);
|
||||||
|
check(r"\u{0000", EscapeError::UnclosedUnicodeEscape);
|
||||||
|
check(r"\u{}", EscapeError::EmptyUnicodeEscape);
|
||||||
|
check(r"\u{_0000}", EscapeError::LeadingUnderscoreUnicodeEscape);
|
||||||
|
check(r"\u{0000000}", EscapeError::OverlongUnicodeEscape);
|
||||||
|
|
||||||
|
check("ы", EscapeError::NonAsciiCharInByte);
|
||||||
|
check("🦀", EscapeError::NonAsciiCharInByte);
|
||||||
|
|
||||||
|
check(r"\u{0}", EscapeError::UnicodeEscapeInByte);
|
||||||
|
check(r"\u{000000}", EscapeError::UnicodeEscapeInByte);
|
||||||
|
check(r"\u{41}", EscapeError::UnicodeEscapeInByte);
|
||||||
|
check(r"\u{0041}", EscapeError::UnicodeEscapeInByte);
|
||||||
|
check(r"\u{00_41}", EscapeError::UnicodeEscapeInByte);
|
||||||
|
check(r"\u{4__1__}", EscapeError::UnicodeEscapeInByte);
|
||||||
|
check(r"\u{1F63b}", EscapeError::UnicodeEscapeInByte);
|
||||||
|
check(r"\u{0}x", EscapeError::UnicodeEscapeInByte);
|
||||||
|
check(r"\u{1F63b}}", EscapeError::UnicodeEscapeInByte);
|
||||||
|
check(r"\u{FFFFFF}", EscapeError::UnicodeEscapeInByte);
|
||||||
|
check(r"\u{ffffff}", EscapeError::UnicodeEscapeInByte);
|
||||||
|
check(r"\u{ffffff}", EscapeError::UnicodeEscapeInByte);
|
||||||
|
check(r"\u{DC00}", EscapeError::UnicodeEscapeInByte);
|
||||||
|
check(r"\u{DDDD}", EscapeError::UnicodeEscapeInByte);
|
||||||
|
check(r"\u{DFFF}", EscapeError::UnicodeEscapeInByte);
|
||||||
|
check(r"\u{D800}", EscapeError::UnicodeEscapeInByte);
|
||||||
|
check(r"\u{DAAA}", EscapeError::UnicodeEscapeInByte);
|
||||||
|
check(r"\u{DBFF}", EscapeError::UnicodeEscapeInByte);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_unescape_byte_good() {
|
||||||
|
fn check(literal_text: &str, expected_byte: u8) {
|
||||||
|
let actual_result = unescape_byte(literal_text);
|
||||||
|
assert_eq!(actual_result, Ok(expected_byte));
|
||||||
|
}
|
||||||
|
|
||||||
|
check("a", b'a');
|
||||||
|
|
||||||
|
check(r#"\""#, b'"');
|
||||||
|
check(r"\n", b'\n');
|
||||||
|
check(r"\r", b'\r');
|
||||||
|
check(r"\t", b'\t');
|
||||||
|
check(r"\\", b'\\');
|
||||||
|
check(r"\'", b'\'');
|
||||||
|
check(r"\0", b'\0');
|
||||||
|
|
||||||
|
check(r"\x00", b'\0');
|
||||||
|
check(r"\x5a", b'Z');
|
||||||
|
check(r"\x5A", b'Z');
|
||||||
|
check(r"\x7f", 127);
|
||||||
|
check(r"\x80", 128);
|
||||||
|
check(r"\xff", 255);
|
||||||
|
check(r"\xFF", 255);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_unescape_byte_str_good() {
|
||||||
|
fn check(literal_text: &str, expected: &[u8]) {
|
||||||
|
let mut buf = Ok(Vec::with_capacity(literal_text.len()));
|
||||||
|
unescape_byte_str(literal_text, &mut |range, c| {
|
||||||
|
if let Ok(b) = &mut buf {
|
||||||
|
match c {
|
||||||
|
Ok(c) => b.push(c),
|
||||||
|
Err(e) => buf = Err((range, e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
let buf = buf.as_ref().map(|it| it.as_ref());
|
||||||
|
assert_eq!(buf, Ok(expected))
|
||||||
|
}
|
||||||
|
|
||||||
|
check("foo", b"foo");
|
||||||
|
check("", b"");
|
||||||
|
check(" \t\n\r\n", b" \t\n\n");
|
||||||
|
|
||||||
|
check("hello \\\n world", b"hello world");
|
||||||
|
check("hello \\\r\n world", b"hello world");
|
||||||
|
check("thread's", b"thread's")
|
||||||
|
}
|
||||||
|
}
|
200
src/libsyntax/parse/unescape_error_reporting.rs
Normal file
200
src/libsyntax/parse/unescape_error_reporting.rs
Normal file
|
@ -0,0 +1,200 @@
|
||||||
|
//! Utilities for rendering escape sequence errors as diagnostics.
|
||||||
|
|
||||||
|
use std::ops::Range;
|
||||||
|
use std::iter::once;
|
||||||
|
|
||||||
|
use syntax_pos::{Span, BytePos};
|
||||||
|
|
||||||
|
use crate::errors::{Handler, Applicability};
|
||||||
|
|
||||||
|
use super::unescape::{EscapeError, Mode};
|
||||||
|
|
||||||
|
pub(crate) fn emit_unescape_error(
|
||||||
|
handler: &Handler,
|
||||||
|
// interior part of the literal, without quotes
|
||||||
|
lit: &str,
|
||||||
|
// full span of the literal, including quotes
|
||||||
|
span_with_quotes: Span,
|
||||||
|
mode: Mode,
|
||||||
|
// range of the error inside `lit`
|
||||||
|
range: Range<usize>,
|
||||||
|
error: EscapeError,
|
||||||
|
) {
|
||||||
|
log::debug!("emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}",
|
||||||
|
lit, span_with_quotes, mode, range, error);
|
||||||
|
let span = {
|
||||||
|
let Range { start, end } = range;
|
||||||
|
let (start, end) = (start as u32, end as u32);
|
||||||
|
let lo = span_with_quotes.lo() + BytePos(start + 1);
|
||||||
|
let hi = lo + BytePos(end - start);
|
||||||
|
span_with_quotes
|
||||||
|
.with_lo(lo)
|
||||||
|
.with_hi(hi)
|
||||||
|
};
|
||||||
|
let last_char = || {
|
||||||
|
let c = lit[range.clone()].chars().rev().next().unwrap();
|
||||||
|
let span = span.with_lo(span.hi() - BytePos(c.len_utf8() as u32));
|
||||||
|
(c, span)
|
||||||
|
};
|
||||||
|
match error {
|
||||||
|
EscapeError::LoneSurrogateUnicodeEscape => {
|
||||||
|
handler.struct_span_err(span, "invalid unicode character escape")
|
||||||
|
.help("unicode escape must not be a surrogate")
|
||||||
|
.emit();
|
||||||
|
}
|
||||||
|
EscapeError::OutOfRangeUnicodeEscape => {
|
||||||
|
handler.struct_span_err(span, "invalid unicode character escape")
|
||||||
|
.help("unicode escape must be at most 10FFFF")
|
||||||
|
.emit();
|
||||||
|
}
|
||||||
|
EscapeError::MoreThanOneChar => {
|
||||||
|
handler
|
||||||
|
.struct_span_err(
|
||||||
|
span_with_quotes,
|
||||||
|
"character literal may only contain one codepoint",
|
||||||
|
)
|
||||||
|
.span_suggestion(
|
||||||
|
span_with_quotes,
|
||||||
|
"if you meant to write a `str` literal, use double quotes",
|
||||||
|
format!("\"{}\"", lit),
|
||||||
|
Applicability::MachineApplicable,
|
||||||
|
).emit()
|
||||||
|
}
|
||||||
|
EscapeError::EscapeOnlyChar => {
|
||||||
|
let (c, _span) = last_char();
|
||||||
|
|
||||||
|
let mut msg = if mode.is_bytes() {
|
||||||
|
"byte constant must be escaped: "
|
||||||
|
} else {
|
||||||
|
"character constant must be escaped: "
|
||||||
|
}.to_string();
|
||||||
|
push_escaped_char(&mut msg, c);
|
||||||
|
|
||||||
|
handler.span_err(span, msg.as_str())
|
||||||
|
}
|
||||||
|
EscapeError::BareCarriageReturn => {
|
||||||
|
let msg = if mode.in_double_quotes() {
|
||||||
|
"bare CR not allowed in string, use \\r instead"
|
||||||
|
} else {
|
||||||
|
"character constant must be escaped: \\r"
|
||||||
|
};
|
||||||
|
handler.span_err(span, msg);
|
||||||
|
}
|
||||||
|
EscapeError::InvalidEscape => {
|
||||||
|
let (c, span) = last_char();
|
||||||
|
|
||||||
|
let label = if mode.is_bytes() {
|
||||||
|
"unknown byte escape"
|
||||||
|
} else {
|
||||||
|
"unknown character escape"
|
||||||
|
};
|
||||||
|
let mut msg = label.to_string();
|
||||||
|
msg.push_str(": ");
|
||||||
|
push_escaped_char(&mut msg, c);
|
||||||
|
|
||||||
|
let mut diag = handler.struct_span_err(span, msg.as_str());
|
||||||
|
diag.span_label(span, label);
|
||||||
|
if c == '{' || c == '}' && !mode.is_bytes() {
|
||||||
|
diag.help("if used in a formatting string, \
|
||||||
|
curly braces are escaped with `{{` and `}}`");
|
||||||
|
} else if c == '\r' {
|
||||||
|
diag.help("this is an isolated carriage return; \
|
||||||
|
consider checking your editor and version control settings");
|
||||||
|
}
|
||||||
|
diag.emit();
|
||||||
|
}
|
||||||
|
EscapeError::TooShortHexEscape => {
|
||||||
|
handler.span_err(span, "numeric character escape is too short")
|
||||||
|
}
|
||||||
|
EscapeError::InvalidCharInHexEscape | EscapeError::InvalidCharInUnicodeEscape => {
|
||||||
|
let (c, span) = last_char();
|
||||||
|
|
||||||
|
let mut msg = if error == EscapeError::InvalidCharInHexEscape {
|
||||||
|
"invalid character in numeric character escape: "
|
||||||
|
} else {
|
||||||
|
"invalid character in unicode escape: "
|
||||||
|
}.to_string();
|
||||||
|
push_escaped_char(&mut msg, c);
|
||||||
|
|
||||||
|
handler.span_err(span, msg.as_str())
|
||||||
|
}
|
||||||
|
EscapeError::NonAsciiCharInByte => {
|
||||||
|
assert!(mode.is_bytes());
|
||||||
|
let (_c, span) = last_char();
|
||||||
|
handler.span_err(span, "byte constant must be ASCII. \
|
||||||
|
Use a \\xHH escape for a non-ASCII byte")
|
||||||
|
}
|
||||||
|
EscapeError::OutOfRangeHexEscape => {
|
||||||
|
handler.span_err(span, "this form of character escape may only be used \
|
||||||
|
with characters in the range [\\x00-\\x7f]")
|
||||||
|
}
|
||||||
|
EscapeError::LeadingUnderscoreUnicodeEscape => {
|
||||||
|
let (_c, span) = last_char();
|
||||||
|
handler.span_err(span, "invalid start of unicode escape")
|
||||||
|
}
|
||||||
|
EscapeError::OverlongUnicodeEscape => {
|
||||||
|
handler.span_err(span, "overlong unicode escape (must have at most 6 hex digits)")
|
||||||
|
}
|
||||||
|
EscapeError::UnclosedUnicodeEscape => {
|
||||||
|
handler.span_err(span, "unterminated unicode escape (needed a `}`)")
|
||||||
|
}
|
||||||
|
EscapeError::NoBraceInUnicodeEscape => {
|
||||||
|
let msg = "incorrect unicode escape sequence";
|
||||||
|
let mut diag = handler.struct_span_err(span, msg);
|
||||||
|
|
||||||
|
let mut suggestion = "\\u{".to_owned();
|
||||||
|
let mut suggestion_len = 0;
|
||||||
|
let (c, char_span) = last_char();
|
||||||
|
let chars = once(c).chain(lit[range.end..].chars());
|
||||||
|
for c in chars.take(6).take_while(|c| c.is_digit(16)) {
|
||||||
|
suggestion.push(c);
|
||||||
|
suggestion_len += c.len_utf8();
|
||||||
|
}
|
||||||
|
|
||||||
|
if suggestion_len > 0 {
|
||||||
|
suggestion.push('}');
|
||||||
|
let lo = char_span.lo();
|
||||||
|
let hi = lo + BytePos(suggestion_len as u32);
|
||||||
|
diag.span_suggestion(
|
||||||
|
span.with_lo(lo).with_hi(hi),
|
||||||
|
"format of unicode escape sequences uses braces",
|
||||||
|
suggestion,
|
||||||
|
Applicability::MaybeIncorrect,
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
diag.span_label(span, msg);
|
||||||
|
diag.help(
|
||||||
|
"format of unicode escape sequences is `\\u{...}`",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
diag.emit();
|
||||||
|
}
|
||||||
|
EscapeError::UnicodeEscapeInByte => {
|
||||||
|
handler.span_err(span, "unicode escape sequences cannot be used \
|
||||||
|
as a byte or in a byte string")
|
||||||
|
}
|
||||||
|
EscapeError::EmptyUnicodeEscape => {
|
||||||
|
handler.span_err(span, "empty unicode escape (must have at least 1 hex digit)")
|
||||||
|
}
|
||||||
|
EscapeError::ZeroChars => {
|
||||||
|
handler.span_err(span, "empty character literal")
|
||||||
|
}
|
||||||
|
EscapeError::LoneSlash => {
|
||||||
|
panic!("lexer accepted unterminated literal with trailing slash")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Pushes a character to a message string for error reporting
|
||||||
|
pub(crate) fn push_escaped_char(msg: &mut String, c: char) {
|
||||||
|
match c {
|
||||||
|
'\u{20}'..='\u{7e}' => {
|
||||||
|
// Don't escape \, ' or " for user-facing messages
|
||||||
|
msg.push(c);
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
msg.extend(c.escape_default());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,3 +1,4 @@
|
||||||
|
// compile-flags: -Z continue-parse-after-error
|
||||||
// ignore-tidy-tab
|
// ignore-tidy-tab
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
|
@ -76,7 +77,7 @@ raw { \n
|
||||||
|
|
||||||
println!("\x7B}\u8 {", 1);
|
println!("\x7B}\u8 {", 1);
|
||||||
//~^ ERROR incorrect unicode escape sequence
|
//~^ ERROR incorrect unicode escape sequence
|
||||||
//~| ERROR argument never used
|
//~| ERROR invalid format string: expected `'}'` but string was terminated
|
||||||
|
|
||||||
// note: raw strings don't escape `\xFF` and `\u{FF}` sequences
|
// note: raw strings don't escape `\xFF` and `\u{FF}` sequences
|
||||||
println!(r#"\x7B}\u{8} {"#, 1);
|
println!(r#"\x7B}\u{8} {"#, 1);
|
||||||
|
|
|
@ -1,13 +1,13 @@
|
||||||
error: incorrect unicode escape sequence
|
error: incorrect unicode escape sequence
|
||||||
--> $DIR/format-string-error-2.rs:77:20
|
--> $DIR/format-string-error-2.rs:78:20
|
||||||
|
|
|
|
||||||
LL | println!("\x7B}\u8 {", 1);
|
LL | println!("\x7B}\u8 {", 1);
|
||||||
| ^^-
|
| ^^-
|
||||||
| |
|
| |
|
||||||
| help: format of unicode escape sequences uses braces: `\u{8}`
|
| help: format of unicode escape sequences uses braces: `\u{8}`
|
||||||
|
|
||||||
error: invalid format string: expected `'}'`, found `'a'`
|
error: invalid format string: expected `'}'`, found `'a'`
|
||||||
--> $DIR/format-string-error-2.rs:5:5
|
--> $DIR/format-string-error-2.rs:6:5
|
||||||
|
|
|
|
||||||
LL | format!("{
|
LL | format!("{
|
||||||
| - because of this opening brace
|
| - because of this opening brace
|
||||||
|
@ -17,7 +17,7 @@ LL | a");
|
||||||
= note: if you intended to print `{`, you can escape it using `{{`
|
= note: if you intended to print `{`, you can escape it using `{{`
|
||||||
|
|
||||||
error: invalid format string: expected `'}'`, found `'b'`
|
error: invalid format string: expected `'}'`, found `'b'`
|
||||||
--> $DIR/format-string-error-2.rs:9:5
|
--> $DIR/format-string-error-2.rs:10:5
|
||||||
|
|
|
|
||||||
LL | format!("{ \
|
LL | format!("{ \
|
||||||
| - because of this opening brace
|
| - because of this opening brace
|
||||||
|
@ -28,7 +28,7 @@ LL | b");
|
||||||
= note: if you intended to print `{`, you can escape it using `{{`
|
= note: if you intended to print `{`, you can escape it using `{{`
|
||||||
|
|
||||||
error: invalid format string: expected `'}'`, found `'\'`
|
error: invalid format string: expected `'}'`, found `'\'`
|
||||||
--> $DIR/format-string-error-2.rs:11:18
|
--> $DIR/format-string-error-2.rs:12:18
|
||||||
|
|
|
|
||||||
LL | format!(r#"{ \
|
LL | format!(r#"{ \
|
||||||
| - ^ expected `}` in format string
|
| - ^ expected `}` in format string
|
||||||
|
@ -38,7 +38,7 @@ LL | format!(r#"{ \
|
||||||
= note: if you intended to print `{`, you can escape it using `{{`
|
= note: if you intended to print `{`, you can escape it using `{{`
|
||||||
|
|
||||||
error: invalid format string: expected `'}'`, found `'\'`
|
error: invalid format string: expected `'}'`, found `'\'`
|
||||||
--> $DIR/format-string-error-2.rs:15:18
|
--> $DIR/format-string-error-2.rs:16:18
|
||||||
|
|
|
|
||||||
LL | format!(r#"{ \n
|
LL | format!(r#"{ \n
|
||||||
| - ^ expected `}` in format string
|
| - ^ expected `}` in format string
|
||||||
|
@ -48,7 +48,7 @@ LL | format!(r#"{ \n
|
||||||
= note: if you intended to print `{`, you can escape it using `{{`
|
= note: if you intended to print `{`, you can escape it using `{{`
|
||||||
|
|
||||||
error: invalid format string: expected `'}'`, found `'e'`
|
error: invalid format string: expected `'}'`, found `'e'`
|
||||||
--> $DIR/format-string-error-2.rs:21:5
|
--> $DIR/format-string-error-2.rs:22:5
|
||||||
|
|
|
|
||||||
LL | format!("{ \n
|
LL | format!("{ \n
|
||||||
| - because of this opening brace
|
| - because of this opening brace
|
||||||
|
@ -59,7 +59,7 @@ LL | e");
|
||||||
= note: if you intended to print `{`, you can escape it using `{{`
|
= note: if you intended to print `{`, you can escape it using `{{`
|
||||||
|
|
||||||
error: invalid format string: expected `'}'`, found `'a'`
|
error: invalid format string: expected `'}'`, found `'a'`
|
||||||
--> $DIR/format-string-error-2.rs:25:5
|
--> $DIR/format-string-error-2.rs:26:5
|
||||||
|
|
|
|
||||||
LL | {
|
LL | {
|
||||||
| - because of this opening brace
|
| - because of this opening brace
|
||||||
|
@ -69,7 +69,7 @@ LL | a");
|
||||||
= note: if you intended to print `{`, you can escape it using `{{`
|
= note: if you intended to print `{`, you can escape it using `{{`
|
||||||
|
|
||||||
error: invalid format string: expected `'}'`, found `'a'`
|
error: invalid format string: expected `'}'`, found `'a'`
|
||||||
--> $DIR/format-string-error-2.rs:29:5
|
--> $DIR/format-string-error-2.rs:30:5
|
||||||
|
|
|
|
||||||
LL | {
|
LL | {
|
||||||
| - because of this opening brace
|
| - because of this opening brace
|
||||||
|
@ -79,7 +79,7 @@ LL | a
|
||||||
= note: if you intended to print `{`, you can escape it using `{{`
|
= note: if you intended to print `{`, you can escape it using `{{`
|
||||||
|
|
||||||
error: invalid format string: expected `'}'`, found `'b'`
|
error: invalid format string: expected `'}'`, found `'b'`
|
||||||
--> $DIR/format-string-error-2.rs:35:5
|
--> $DIR/format-string-error-2.rs:36:5
|
||||||
|
|
|
|
||||||
LL | { \
|
LL | { \
|
||||||
| - because of this opening brace
|
| - because of this opening brace
|
||||||
|
@ -90,7 +90,7 @@ LL | b");
|
||||||
= note: if you intended to print `{`, you can escape it using `{{`
|
= note: if you intended to print `{`, you can escape it using `{{`
|
||||||
|
|
||||||
error: invalid format string: expected `'}'`, found `'b'`
|
error: invalid format string: expected `'}'`, found `'b'`
|
||||||
--> $DIR/format-string-error-2.rs:40:5
|
--> $DIR/format-string-error-2.rs:41:5
|
||||||
|
|
|
|
||||||
LL | { \
|
LL | { \
|
||||||
| - because of this opening brace
|
| - because of this opening brace
|
||||||
|
@ -101,7 +101,7 @@ LL | b \
|
||||||
= note: if you intended to print `{`, you can escape it using `{{`
|
= note: if you intended to print `{`, you can escape it using `{{`
|
||||||
|
|
||||||
error: invalid format string: expected `'}'`, found `'\'`
|
error: invalid format string: expected `'}'`, found `'\'`
|
||||||
--> $DIR/format-string-error-2.rs:45:8
|
--> $DIR/format-string-error-2.rs:46:8
|
||||||
|
|
|
|
||||||
LL | raw { \
|
LL | raw { \
|
||||||
| - ^ expected `}` in format string
|
| - ^ expected `}` in format string
|
||||||
|
@ -111,7 +111,7 @@ LL | raw { \
|
||||||
= note: if you intended to print `{`, you can escape it using `{{`
|
= note: if you intended to print `{`, you can escape it using `{{`
|
||||||
|
|
||||||
error: invalid format string: expected `'}'`, found `'\'`
|
error: invalid format string: expected `'}'`, found `'\'`
|
||||||
--> $DIR/format-string-error-2.rs:50:8
|
--> $DIR/format-string-error-2.rs:51:8
|
||||||
|
|
|
|
||||||
LL | raw { \n
|
LL | raw { \n
|
||||||
| - ^ expected `}` in format string
|
| - ^ expected `}` in format string
|
||||||
|
@ -121,7 +121,7 @@ LL | raw { \n
|
||||||
= note: if you intended to print `{`, you can escape it using `{{`
|
= note: if you intended to print `{`, you can escape it using `{{`
|
||||||
|
|
||||||
error: invalid format string: expected `'}'`, found `'e'`
|
error: invalid format string: expected `'}'`, found `'e'`
|
||||||
--> $DIR/format-string-error-2.rs:57:5
|
--> $DIR/format-string-error-2.rs:58:5
|
||||||
|
|
|
|
||||||
LL | { \n
|
LL | { \n
|
||||||
| - because of this opening brace
|
| - because of this opening brace
|
||||||
|
@ -132,7 +132,7 @@ LL | e");
|
||||||
= note: if you intended to print `{`, you can escape it using `{{`
|
= note: if you intended to print `{`, you can escape it using `{{`
|
||||||
|
|
||||||
error: invalid format string: expected `'}'`, found `'a'`
|
error: invalid format string: expected `'}'`, found `'a'`
|
||||||
--> $DIR/format-string-error-2.rs:67:5
|
--> $DIR/format-string-error-2.rs:68:5
|
||||||
|
|
|
|
||||||
LL | {
|
LL | {
|
||||||
| - because of this opening brace
|
| - because of this opening brace
|
||||||
|
@ -142,13 +142,13 @@ LL | asdf}
|
||||||
= note: if you intended to print `{`, you can escape it using `{{`
|
= note: if you intended to print `{`, you can escape it using `{{`
|
||||||
|
|
||||||
error: 1 positional argument in format string, but no arguments were given
|
error: 1 positional argument in format string, but no arguments were given
|
||||||
--> $DIR/format-string-error-2.rs:70:17
|
--> $DIR/format-string-error-2.rs:71:17
|
||||||
|
|
|
|
||||||
LL | println!("\t{}");
|
LL | println!("\t{}");
|
||||||
| ^^
|
| ^^
|
||||||
|
|
||||||
error: invalid format string: expected `'}'` but string was terminated
|
error: invalid format string: expected `'}'` but string was terminated
|
||||||
--> $DIR/format-string-error-2.rs:74:27
|
--> $DIR/format-string-error-2.rs:75:27
|
||||||
|
|
|
|
||||||
LL | println!("\x7B}\u{8} {", 1);
|
LL | println!("\x7B}\u{8} {", 1);
|
||||||
| -^ expected `'}'` in format string
|
| -^ expected `'}'` in format string
|
||||||
|
@ -157,16 +157,18 @@ LL | println!("\x7B}\u{8} {", 1);
|
||||||
|
|
|
|
||||||
= note: if you intended to print `{`, you can escape it using `{{`
|
= note: if you intended to print `{`, you can escape it using `{{`
|
||||||
|
|
||||||
error: argument never used
|
error: invalid format string: expected `'}'` but string was terminated
|
||||||
--> $DIR/format-string-error-2.rs:77:28
|
--> $DIR/format-string-error-2.rs:78:27
|
||||||
|
|
|
|
||||||
LL | println!("\x7B}\u8 {", 1);
|
LL | println!("\x7B}\u8 {", 1);
|
||||||
| ------------ ^ argument never used
|
| -^ expected `'}'` in format string
|
||||||
| |
|
| |
|
||||||
| formatting specifier missing
|
| because of this opening brace
|
||||||
|
|
|
||||||
|
= note: if you intended to print `{`, you can escape it using `{{`
|
||||||
|
|
||||||
error: invalid format string: unmatched `}` found
|
error: invalid format string: unmatched `}` found
|
||||||
--> $DIR/format-string-error-2.rs:82:21
|
--> $DIR/format-string-error-2.rs:83:21
|
||||||
|
|
|
|
||||||
LL | println!(r#"\x7B}\u{8} {"#, 1);
|
LL | println!(r#"\x7B}\u{8} {"#, 1);
|
||||||
| ^ unmatched `}` in format string
|
| ^ unmatched `}` in format string
|
||||||
|
@ -174,7 +176,7 @@ LL | println!(r#"\x7B}\u{8} {"#, 1);
|
||||||
= note: if you intended to print `}`, you can escape it using `}}`
|
= note: if you intended to print `}`, you can escape it using `}}`
|
||||||
|
|
||||||
error: invalid format string: unmatched `}` found
|
error: invalid format string: unmatched `}` found
|
||||||
--> $DIR/format-string-error-2.rs:85:21
|
--> $DIR/format-string-error-2.rs:86:21
|
||||||
|
|
|
|
||||||
LL | println!(r#"\x7B}\u8 {"#, 1);
|
LL | println!(r#"\x7B}\u8 {"#, 1);
|
||||||
| ^ unmatched `}` in format string
|
| ^ unmatched `}` in format string
|
||||||
|
|
|
@ -1,20 +1,20 @@
|
||||||
error: this form of character escape may only be used with characters in the range [\x00-\x7f]
|
error: this form of character escape may only be used with characters in the range [\x00-\x7f]
|
||||||
--> $DIR/ascii-only-character-escape.rs:4:16
|
--> $DIR/ascii-only-character-escape.rs:4:14
|
||||||
|
|
|
|
||||||
LL | let x = "\x80";
|
LL | let x = "\x80";
|
||||||
| ^^
|
| ^^^^
|
||||||
|
|
||||||
error: this form of character escape may only be used with characters in the range [\x00-\x7f]
|
error: this form of character escape may only be used with characters in the range [\x00-\x7f]
|
||||||
--> $DIR/ascii-only-character-escape.rs:5:16
|
--> $DIR/ascii-only-character-escape.rs:5:14
|
||||||
|
|
|
|
||||||
LL | let y = "\xff";
|
LL | let y = "\xff";
|
||||||
| ^^
|
| ^^^^
|
||||||
|
|
||||||
error: this form of character escape may only be used with characters in the range [\x00-\x7f]
|
error: this form of character escape may only be used with characters in the range [\x00-\x7f]
|
||||||
--> $DIR/ascii-only-character-escape.rs:6:16
|
--> $DIR/ascii-only-character-escape.rs:6:14
|
||||||
|
|
|
|
||||||
LL | let z = "\xe2";
|
LL | let z = "\xe2";
|
||||||
| ^^
|
| ^^^^
|
||||||
|
|
||||||
error: aborting due to 3 previous errors
|
error: aborting due to 3 previous errors
|
||||||
|
|
||||||
|
|
|
@ -34,11 +34,11 @@ error: byte constant must be ASCII. Use a \xHH escape for a non-ASCII byte
|
||||||
LL | b'é';
|
LL | b'é';
|
||||||
| ^
|
| ^
|
||||||
|
|
||||||
error: unterminated byte constant: b'a
|
error: unterminated byte constant
|
||||||
--> $DIR/byte-literals.rs:14:5
|
--> $DIR/byte-literals.rs:14:6
|
||||||
|
|
|
|
||||||
LL | b'a
|
LL | b'a
|
||||||
| ^^^
|
| ^^^^
|
||||||
|
|
||||||
error: aborting due to 7 previous errors
|
error: aborting due to 7 previous errors
|
||||||
|
|
||||||
|
|
|
@ -23,10 +23,10 @@ LL | b"é";
|
||||||
| ^
|
| ^
|
||||||
|
|
||||||
error: unterminated double quote byte string
|
error: unterminated double quote byte string
|
||||||
--> $DIR/byte-string-literals.rs:9:7
|
--> $DIR/byte-string-literals.rs:9:6
|
||||||
|
|
|
|
||||||
LL | b"a
|
LL | b"a
|
||||||
| _______^
|
| ______^
|
||||||
LL | | }
|
LL | | }
|
||||||
| |__^
|
| |__^
|
||||||
|
|
||||||
|
|
|
@ -9,32 +9,27 @@ fn main() {
|
||||||
|
|
||||||
let _ = b'\u';
|
let _ = b'\u';
|
||||||
//~^ ERROR incorrect unicode escape sequence
|
//~^ ERROR incorrect unicode escape sequence
|
||||||
//~^^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
|
|
||||||
|
|
||||||
let _ = b'\x5';
|
let _ = b'\x5';
|
||||||
//~^ ERROR numeric character escape is too short
|
//~^ ERROR numeric character escape is too short
|
||||||
|
|
||||||
let _ = b'\xxy';
|
let _ = b'\xxy';
|
||||||
//~^ ERROR invalid character in numeric character escape: x
|
//~^ ERROR invalid character in numeric character escape: x
|
||||||
//~^^ ERROR invalid character in numeric character escape: y
|
|
||||||
|
|
||||||
let _ = '\x5';
|
let _ = '\x5';
|
||||||
//~^ ERROR numeric character escape is too short
|
//~^ ERROR numeric character escape is too short
|
||||||
|
|
||||||
let _ = '\xxy';
|
let _ = '\xxy';
|
||||||
//~^ ERROR invalid character in numeric character escape: x
|
//~^ ERROR invalid character in numeric character escape: x
|
||||||
//~^^ ERROR invalid character in numeric character escape: y
|
|
||||||
|
|
||||||
let _ = b"\u{a4a4} \xf \u";
|
let _ = b"\u{a4a4} \xf \u";
|
||||||
//~^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
|
//~^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
|
||||||
//~^^ ERROR invalid character in numeric character escape:
|
//~^^ ERROR invalid character in numeric character escape:
|
||||||
//~^^^ ERROR incorrect unicode escape sequence
|
//~^^^ ERROR incorrect unicode escape sequence
|
||||||
//~^^^^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
|
|
||||||
|
|
||||||
let _ = "\xf \u";
|
let _ = "\xf \u";
|
||||||
//~^ ERROR invalid character in numeric character escape:
|
//~^ ERROR invalid character in numeric character escape:
|
||||||
//~^^ ERROR form of character escape may only be used with characters in the range [\x00-\x7f]
|
//~^^ ERROR incorrect unicode escape sequence
|
||||||
//~^^^ ERROR incorrect unicode escape sequence
|
|
||||||
|
|
||||||
let _ = "\u8f";
|
let _ = "\u8f";
|
||||||
//~^ ERROR incorrect unicode escape sequence
|
//~^ ERROR incorrect unicode escape sequence
|
||||||
|
|
|
@ -18,88 +18,58 @@ LL | let _ = b'\u';
|
||||||
|
|
|
|
||||||
= help: format of unicode escape sequences is `\u{...}`
|
= help: format of unicode escape sequences is `\u{...}`
|
||||||
|
|
||||||
error: unicode escape sequences cannot be used as a byte or in a byte string
|
|
||||||
--> $DIR/issue-23620-invalid-escapes.rs:10:15
|
|
||||||
|
|
|
||||||
LL | let _ = b'\u';
|
|
||||||
| ^^
|
|
||||||
|
|
||||||
error: numeric character escape is too short
|
error: numeric character escape is too short
|
||||||
--> $DIR/issue-23620-invalid-escapes.rs:14:17
|
--> $DIR/issue-23620-invalid-escapes.rs:13:15
|
||||||
|
|
|
|
||||||
LL | let _ = b'\x5';
|
LL | let _ = b'\x5';
|
||||||
| ^
|
| ^^^
|
||||||
|
|
||||||
error: invalid character in numeric character escape: x
|
error: invalid character in numeric character escape: x
|
||||||
--> $DIR/issue-23620-invalid-escapes.rs:17:17
|
--> $DIR/issue-23620-invalid-escapes.rs:16:17
|
||||||
|
|
|
|
||||||
LL | let _ = b'\xxy';
|
LL | let _ = b'\xxy';
|
||||||
| ^
|
| ^
|
||||||
|
|
||||||
error: invalid character in numeric character escape: y
|
|
||||||
--> $DIR/issue-23620-invalid-escapes.rs:17:18
|
|
||||||
|
|
|
||||||
LL | let _ = b'\xxy';
|
|
||||||
| ^
|
|
||||||
|
|
||||||
error: numeric character escape is too short
|
error: numeric character escape is too short
|
||||||
--> $DIR/issue-23620-invalid-escapes.rs:21:16
|
--> $DIR/issue-23620-invalid-escapes.rs:19:14
|
||||||
|
|
|
|
||||||
LL | let _ = '\x5';
|
LL | let _ = '\x5';
|
||||||
| ^
|
| ^^^
|
||||||
|
|
||||||
error: invalid character in numeric character escape: x
|
error: invalid character in numeric character escape: x
|
||||||
--> $DIR/issue-23620-invalid-escapes.rs:24:16
|
--> $DIR/issue-23620-invalid-escapes.rs:22:16
|
||||||
|
|
|
|
||||||
LL | let _ = '\xxy';
|
LL | let _ = '\xxy';
|
||||||
| ^
|
| ^
|
||||||
|
|
||||||
error: invalid character in numeric character escape: y
|
|
||||||
--> $DIR/issue-23620-invalid-escapes.rs:24:17
|
|
||||||
|
|
|
||||||
LL | let _ = '\xxy';
|
|
||||||
| ^
|
|
||||||
|
|
||||||
error: unicode escape sequences cannot be used as a byte or in a byte string
|
error: unicode escape sequences cannot be used as a byte or in a byte string
|
||||||
--> $DIR/issue-23620-invalid-escapes.rs:28:15
|
--> $DIR/issue-23620-invalid-escapes.rs:25:15
|
||||||
|
|
|
|
||||||
LL | let _ = b"\u{a4a4} \xf \u";
|
LL | let _ = b"\u{a4a4} \xf \u";
|
||||||
| ^^^^^^^^
|
| ^^^^^^^^
|
||||||
|
|
||||||
error: invalid character in numeric character escape:
|
error: invalid character in numeric character escape:
|
||||||
--> $DIR/issue-23620-invalid-escapes.rs:28:27
|
--> $DIR/issue-23620-invalid-escapes.rs:25:27
|
||||||
|
|
|
|
||||||
LL | let _ = b"\u{a4a4} \xf \u";
|
LL | let _ = b"\u{a4a4} \xf \u";
|
||||||
| ^
|
| ^
|
||||||
|
|
||||||
error: incorrect unicode escape sequence
|
error: incorrect unicode escape sequence
|
||||||
--> $DIR/issue-23620-invalid-escapes.rs:28:28
|
--> $DIR/issue-23620-invalid-escapes.rs:25:28
|
||||||
|
|
|
|
||||||
LL | let _ = b"\u{a4a4} \xf \u";
|
LL | let _ = b"\u{a4a4} \xf \u";
|
||||||
| ^^ incorrect unicode escape sequence
|
| ^^ incorrect unicode escape sequence
|
||||||
|
|
|
|
||||||
= help: format of unicode escape sequences is `\u{...}`
|
= help: format of unicode escape sequences is `\u{...}`
|
||||||
|
|
||||||
error: unicode escape sequences cannot be used as a byte or in a byte string
|
|
||||||
--> $DIR/issue-23620-invalid-escapes.rs:28:28
|
|
||||||
|
|
|
||||||
LL | let _ = b"\u{a4a4} \xf \u";
|
|
||||||
| ^^
|
|
||||||
|
|
||||||
error: invalid character in numeric character escape:
|
error: invalid character in numeric character escape:
|
||||||
--> $DIR/issue-23620-invalid-escapes.rs:34:17
|
--> $DIR/issue-23620-invalid-escapes.rs:30:17
|
||||||
|
|
|
|
||||||
LL | let _ = "\xf \u";
|
LL | let _ = "\xf \u";
|
||||||
| ^
|
| ^
|
||||||
|
|
||||||
error: this form of character escape may only be used with characters in the range [\x00-\x7f]
|
|
||||||
--> $DIR/issue-23620-invalid-escapes.rs:34:16
|
|
||||||
|
|
|
||||||
LL | let _ = "\xf \u";
|
|
||||||
| ^^
|
|
||||||
|
|
||||||
error: incorrect unicode escape sequence
|
error: incorrect unicode escape sequence
|
||||||
--> $DIR/issue-23620-invalid-escapes.rs:34:18
|
--> $DIR/issue-23620-invalid-escapes.rs:30:18
|
||||||
|
|
|
|
||||||
LL | let _ = "\xf \u";
|
LL | let _ = "\xf \u";
|
||||||
| ^^ incorrect unicode escape sequence
|
| ^^ incorrect unicode escape sequence
|
||||||
|
@ -107,12 +77,12 @@ LL | let _ = "\xf \u";
|
||||||
= help: format of unicode escape sequences is `\u{...}`
|
= help: format of unicode escape sequences is `\u{...}`
|
||||||
|
|
||||||
error: incorrect unicode escape sequence
|
error: incorrect unicode escape sequence
|
||||||
--> $DIR/issue-23620-invalid-escapes.rs:39:14
|
--> $DIR/issue-23620-invalid-escapes.rs:34:14
|
||||||
|
|
|
|
||||||
LL | let _ = "\u8f";
|
LL | let _ = "\u8f";
|
||||||
| ^^--
|
| ^^--
|
||||||
| |
|
| |
|
||||||
| help: format of unicode escape sequences uses braces: `\u{8f}`
|
| help: format of unicode escape sequences uses braces: `\u{8f}`
|
||||||
|
|
||||||
error: aborting due to 18 previous errors
|
error: aborting due to 13 previous errors
|
||||||
|
|
||||||
|
|
|
@ -1,14 +1,14 @@
|
||||||
error: numeric character escape is too short
|
error: numeric character escape is too short
|
||||||
--> $DIR/lex-bad-char-literals-1.rs:3:8
|
--> $DIR/lex-bad-char-literals-1.rs:3:6
|
||||||
|
|
|
|
||||||
LL | '\x1'
|
LL | '\x1'
|
||||||
| ^
|
| ^^^
|
||||||
|
|
||||||
error: numeric character escape is too short
|
error: numeric character escape is too short
|
||||||
--> $DIR/lex-bad-char-literals-1.rs:7:8
|
--> $DIR/lex-bad-char-literals-1.rs:7:6
|
||||||
|
|
|
|
||||||
LL | "\x1"
|
LL | "\x1"
|
||||||
| ^
|
| ^^^
|
||||||
|
|
||||||
error: unknown character escape: \u{25cf}
|
error: unknown character escape: \u{25cf}
|
||||||
--> $DIR/lex-bad-char-literals-1.rs:11:7
|
--> $DIR/lex-bad-char-literals-1.rs:11:7
|
||||||
|
|
|
@ -3,6 +3,10 @@ error: character literal may only contain one codepoint
|
||||||
|
|
|
|
||||||
LL | 'nope'
|
LL | 'nope'
|
||||||
| ^^^^^^
|
| ^^^^^^
|
||||||
|
help: if you meant to write a `str` literal, use double quotes
|
||||||
|
|
|
||||||
|
LL | "nope"
|
||||||
|
| ^^^^^^
|
||||||
|
|
||||||
error[E0601]: `main` function not found in crate `lex_bad_char_literals_2`
|
error[E0601]: `main` function not found in crate `lex_bad_char_literals_2`
|
||||||
|
|
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
//
|
//
|
||||||
// This test needs to the last one appearing in this file as it kills the parser
|
// This test needs to the last one appearing in this file as it kills the parser
|
||||||
static c: char =
|
static c: char =
|
||||||
'● //~ ERROR: character literal may only contain one codepoint
|
'● //~ ERROR: unterminated character literal
|
||||||
;
|
;
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
error: character literal may only contain one codepoint: '●
|
error: unterminated character literal
|
||||||
--> $DIR/lex-bad-char-literals-4.rs:4:5
|
--> $DIR/lex-bad-char-literals-4.rs:4:5
|
||||||
|
|
|
|
||||||
LL | '●
|
LL | '●
|
||||||
| ^^
|
| ^^^^
|
||||||
|
|
||||||
error: aborting due to previous error
|
error: aborting due to previous error
|
||||||
|
|
||||||
|
|
|
@ -3,18 +3,30 @@ error: character literal may only contain one codepoint
|
||||||
|
|
|
|
||||||
LL | let x: &str = 'ab';
|
LL | let x: &str = 'ab';
|
||||||
| ^^^^
|
| ^^^^
|
||||||
|
help: if you meant to write a `str` literal, use double quotes
|
||||||
|
|
|
||||||
|
LL | let x: &str = "ab";
|
||||||
|
| ^^^^
|
||||||
|
|
||||||
error: character literal may only contain one codepoint
|
error: character literal may only contain one codepoint
|
||||||
--> $DIR/lex-bad-char-literals-6.rs:4:19
|
--> $DIR/lex-bad-char-literals-6.rs:4:19
|
||||||
|
|
|
|
||||||
LL | let y: char = 'cd';
|
LL | let y: char = 'cd';
|
||||||
| ^^^^
|
| ^^^^
|
||||||
|
help: if you meant to write a `str` literal, use double quotes
|
||||||
|
|
|
||||||
|
LL | let y: char = "cd";
|
||||||
|
| ^^^^
|
||||||
|
|
||||||
error: character literal may only contain one codepoint
|
error: character literal may only contain one codepoint
|
||||||
--> $DIR/lex-bad-char-literals-6.rs:6:13
|
--> $DIR/lex-bad-char-literals-6.rs:6:13
|
||||||
|
|
|
|
||||||
LL | let z = 'ef';
|
LL | let z = 'ef';
|
||||||
| ^^^^
|
| ^^^^
|
||||||
|
help: if you meant to write a `str` literal, use double quotes
|
||||||
|
|
|
||||||
|
LL | let z = "ef";
|
||||||
|
| ^^^^
|
||||||
|
|
||||||
error[E0277]: can't compare `&str` with `char`
|
error[E0277]: can't compare `&str` with `char`
|
||||||
--> $DIR/lex-bad-char-literals-6.rs:9:10
|
--> $DIR/lex-bad-char-literals-6.rs:9:10
|
||||||
|
|
14
src/test/ui/parser/lex-bad-char-literals-7.rs
Normal file
14
src/test/ui/parser/lex-bad-char-literals-7.rs
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
// compile-flags: -Z continue-parse-after-error
|
||||||
|
fn main() {
|
||||||
|
let _: char = '';
|
||||||
|
//~^ ERROR: empty character literal
|
||||||
|
let _: char = '\u{}';
|
||||||
|
//~^ ERROR: empty unicode escape (must have at least 1 hex digit)
|
||||||
|
|
||||||
|
// Next two are OK, but may befool error recovery
|
||||||
|
let _ = '/';
|
||||||
|
let _ = b'/';
|
||||||
|
|
||||||
|
let _ = ' hello // here's a comment
|
||||||
|
//~^ ERROR: unterminated character literal
|
||||||
|
}
|
20
src/test/ui/parser/lex-bad-char-literals-7.stderr
Normal file
20
src/test/ui/parser/lex-bad-char-literals-7.stderr
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
error: empty character literal
|
||||||
|
--> $DIR/lex-bad-char-literals-7.rs:3:20
|
||||||
|
|
|
||||||
|
LL | let _: char = '';
|
||||||
|
| ^
|
||||||
|
|
||||||
|
error: empty unicode escape (must have at least 1 hex digit)
|
||||||
|
--> $DIR/lex-bad-char-literals-7.rs:5:20
|
||||||
|
|
|
||||||
|
LL | let _: char = '\u{}';
|
||||||
|
| ^^^^
|
||||||
|
|
||||||
|
error: unterminated character literal
|
||||||
|
--> $DIR/lex-bad-char-literals-7.rs:12:13
|
||||||
|
|
|
||||||
|
LL | let _ = ' hello // here's a comment
|
||||||
|
| ^^^^^^^^
|
||||||
|
|
||||||
|
error: aborting due to 3 previous errors
|
||||||
|
|
|
@ -0,0 +1,10 @@
|
||||||
|
macro_rules! black_hole {
|
||||||
|
($($tt:tt)*) => {}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
black_hole! { '\u{FFFFFF}' }
|
||||||
|
//~^ ERROR: invalid unicode character escape
|
||||||
|
black_hole! { "this is surrogate: \u{DAAA}" }
|
||||||
|
//~^ ERROR: invalid unicode character escape
|
||||||
|
}
|
|
@ -0,0 +1,18 @@
|
||||||
|
error: invalid unicode character escape
|
||||||
|
--> $DIR/literals-are-validated-before-expansion.rs:6:20
|
||||||
|
|
|
||||||
|
LL | black_hole! { '\u{FFFFFF}' }
|
||||||
|
| ^^^^^^^^^^
|
||||||
|
|
|
||||||
|
= help: unicode escape must be at most 10FFFF
|
||||||
|
|
||||||
|
error: invalid unicode character escape
|
||||||
|
--> $DIR/literals-are-validated-before-expansion.rs:8:39
|
||||||
|
|
|
||||||
|
LL | black_hole! { "this is surrogate: \u{DAAA}" }
|
||||||
|
| ^^^^^^^^
|
||||||
|
|
|
||||||
|
= help: unicode escape must not be a surrogate
|
||||||
|
|
||||||
|
error: aborting due to 2 previous errors
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
error: unterminated unicode escape (needed a `}`)
|
error: unterminated unicode escape (needed a `}`)
|
||||||
--> $DIR/new-unicode-escapes-1.rs:2:21
|
--> $DIR/new-unicode-escapes-1.rs:2:14
|
||||||
|
|
|
|
||||||
LL | let s = "\u{2603";
|
LL | let s = "\u{2603";
|
||||||
| ^
|
| ^^^^^^^
|
||||||
|
|
||||||
error: aborting due to previous error
|
error: aborting due to previous error
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
error: overlong unicode escape (must have at most 6 hex digits)
|
error: overlong unicode escape (must have at most 6 hex digits)
|
||||||
--> $DIR/new-unicode-escapes-2.rs:2:17
|
--> $DIR/new-unicode-escapes-2.rs:2:14
|
||||||
|
|
|
|
||||||
LL | let s = "\u{260311111111}";
|
LL | let s = "\u{260311111111}";
|
||||||
| ^^^^^^^^^^^^
|
| ^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
error: aborting due to previous error
|
error: aborting due to previous error
|
||||||
|
|
||||||
|
|
|
@ -1,16 +1,16 @@
|
||||||
error: invalid unicode character escape
|
error: invalid unicode character escape
|
||||||
--> $DIR/new-unicode-escapes-3.rs:2:14
|
--> $DIR/new-unicode-escapes-3.rs:2:15
|
||||||
|
|
|
|
||||||
LL | let s1 = "\u{d805}";
|
LL | let s1 = "\u{d805}";
|
||||||
| ^^^^^^^^^^
|
| ^^^^^^^^
|
||||||
|
|
|
|
||||||
= help: unicode escape must not be a surrogate
|
= help: unicode escape must not be a surrogate
|
||||||
|
|
||||||
error: invalid unicode character escape
|
error: invalid unicode character escape
|
||||||
--> $DIR/new-unicode-escapes-3.rs:3:14
|
--> $DIR/new-unicode-escapes-3.rs:3:15
|
||||||
|
|
|
|
||||||
LL | let s2 = "\u{ffffff}";
|
LL | let s2 = "\u{ffffff}";
|
||||||
| ^^^^^^^^^^^^
|
| ^^^^^^^^^^
|
||||||
|
|
|
|
||||||
= help: unicode escape must be at most 10FFFF
|
= help: unicode escape must be at most 10FFFF
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue