syntax: don't process string/char/byte/binary lits
This shuffles things around a bit so that LIT_CHAR and co store an Ident which is the original, unaltered literal in the source. When creating the AST, unescape and postprocess them. This changes how syntax extensions can work, slightly, but otherwise poses no visible changes. To get a useful value out of one of these tokens, call `parse::{char_lit, byte_lit, bin_lit, str_lit}` [breaking-change]
This commit is contained in:
parent
bf04a7ccb1
commit
9f5e21da4e
7 changed files with 327 additions and 81 deletions
|
@ -579,9 +579,9 @@ pub fn get_single_str_from_tts(cx: &ExtCtxt,
|
||||||
cx.span_err(sp, format!("{} takes 1 argument.", name).as_slice());
|
cx.span_err(sp, format!("{} takes 1 argument.", name).as_slice());
|
||||||
} else {
|
} else {
|
||||||
match tts[0] {
|
match tts[0] {
|
||||||
ast::TTTok(_, token::LIT_STR(ident))
|
ast::TTTok(_, token::LIT_STR(ident)) => return Some(parse::str_lit(ident.as_str())),
|
||||||
| ast::TTTok(_, token::LIT_STR_RAW(ident, _)) => {
|
ast::TTTok(_, token::LIT_STR_RAW(ident, _)) => {
|
||||||
return Some(token::get_ident(ident).get().to_string())
|
return Some(parse::raw_str_lit(ident.as_str()))
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
cx.span_err(sp,
|
cx.span_err(sp,
|
||||||
|
|
|
@ -401,13 +401,13 @@ fn mk_token(cx: &ExtCtxt, sp: Span, tok: &token::Token) -> Gc<ast::Expr> {
|
||||||
}
|
}
|
||||||
|
|
||||||
LIT_BYTE(i) => {
|
LIT_BYTE(i) => {
|
||||||
let e_byte = cx.expr_lit(sp, ast::LitByte(i));
|
let e_byte = mk_ident(cx, sp, i);
|
||||||
|
|
||||||
return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_BYTE"), vec!(e_byte));
|
return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_BYTE"), vec!(e_byte));
|
||||||
}
|
}
|
||||||
|
|
||||||
LIT_CHAR(i) => {
|
LIT_CHAR(i) => {
|
||||||
let e_char = cx.expr_lit(sp, ast::LitChar(i));
|
let e_char = mk_ident(cx, sp, i);
|
||||||
|
|
||||||
return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_CHAR"), vec!(e_char));
|
return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_CHAR"), vec!(e_char));
|
||||||
}
|
}
|
||||||
|
|
|
@ -685,7 +685,7 @@ impl<'a> StringReader<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
fn scan_numeric_escape(&mut self, n_hex_digits: uint, delim: char) -> char {
|
fn scan_numeric_escape(&mut self, n_hex_digits: uint, delim: char) -> bool {
|
||||||
let mut accum_int = 0u32;
|
let mut accum_int = 0u32;
|
||||||
let start_bpos = self.last_pos;
|
let start_bpos = self.last_pos;
|
||||||
for _ in range(0, n_hex_digits) {
|
for _ in range(0, n_hex_digits) {
|
||||||
|
@ -709,11 +709,11 @@ impl<'a> StringReader<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
match char::from_u32(accum_int) {
|
match char::from_u32(accum_int) {
|
||||||
Some(x) => x,
|
Some(_) => true,
|
||||||
None => {
|
None => {
|
||||||
let last_bpos = self.last_pos;
|
let last_bpos = self.last_pos;
|
||||||
self.err_span_(start_bpos, last_bpos, "illegal numeric character escape");
|
self.err_span_(start_bpos, last_bpos, "illegal numeric character escape");
|
||||||
'?'
|
false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -721,8 +721,10 @@ impl<'a> StringReader<'a> {
|
||||||
/// Scan for a single (possibly escaped) byte or char
|
/// Scan for a single (possibly escaped) byte or char
|
||||||
/// in a byte, (non-raw) byte string, char, or (non-raw) string literal.
|
/// in a byte, (non-raw) byte string, char, or (non-raw) string literal.
|
||||||
/// `start` is the position of `first_source_char`, which is already consumed.
|
/// `start` is the position of `first_source_char`, which is already consumed.
|
||||||
|
///
|
||||||
|
/// Returns true if there was a valid char/byte, false otherwise.
|
||||||
fn scan_char_or_byte(&mut self, start: BytePos, first_source_char: char,
|
fn scan_char_or_byte(&mut self, start: BytePos, first_source_char: char,
|
||||||
ascii_only: bool, delim: char) -> Option<char> {
|
ascii_only: bool, delim: char) -> bool {
|
||||||
match first_source_char {
|
match first_source_char {
|
||||||
'\\' => {
|
'\\' => {
|
||||||
// '\X' for some X must be a character constant:
|
// '\X' for some X must be a character constant:
|
||||||
|
@ -732,24 +734,18 @@ impl<'a> StringReader<'a> {
|
||||||
match escaped {
|
match escaped {
|
||||||
None => {}, // EOF here is an error that will be checked later.
|
None => {}, // EOF here is an error that will be checked later.
|
||||||
Some(e) => {
|
Some(e) => {
|
||||||
return Some(match e {
|
return match e {
|
||||||
'n' => '\n',
|
'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true,
|
||||||
'r' => '\r',
|
|
||||||
't' => '\t',
|
|
||||||
'\\' => '\\',
|
|
||||||
'\'' => '\'',
|
|
||||||
'"' => '"',
|
|
||||||
'0' => '\x00',
|
|
||||||
'x' => self.scan_numeric_escape(2u, delim),
|
'x' => self.scan_numeric_escape(2u, delim),
|
||||||
'u' if !ascii_only => self.scan_numeric_escape(4u, delim),
|
'u' if !ascii_only => self.scan_numeric_escape(4u, delim),
|
||||||
'U' if !ascii_only => self.scan_numeric_escape(8u, delim),
|
'U' if !ascii_only => self.scan_numeric_escape(8u, delim),
|
||||||
'\n' if delim == '"' => {
|
'\n' if delim == '"' => {
|
||||||
self.consume_whitespace();
|
self.consume_whitespace();
|
||||||
return None
|
true
|
||||||
},
|
},
|
||||||
'\r' if delim == '"' && self.curr_is('\n') => {
|
'\r' if delim == '"' && self.curr_is('\n') => {
|
||||||
self.consume_whitespace();
|
self.consume_whitespace();
|
||||||
return None
|
true
|
||||||
}
|
}
|
||||||
c => {
|
c => {
|
||||||
let last_pos = self.last_pos;
|
let last_pos = self.last_pos;
|
||||||
|
@ -758,9 +754,9 @@ impl<'a> StringReader<'a> {
|
||||||
if ascii_only { "unknown byte escape" }
|
if ascii_only { "unknown byte escape" }
|
||||||
else { "unknown character escape" },
|
else { "unknown character escape" },
|
||||||
c);
|
c);
|
||||||
c
|
false
|
||||||
}
|
}
|
||||||
})
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -771,14 +767,16 @@ impl<'a> StringReader<'a> {
|
||||||
if ascii_only { "byte constant must be escaped" }
|
if ascii_only { "byte constant must be escaped" }
|
||||||
else { "character constant must be escaped" },
|
else { "character constant must be escaped" },
|
||||||
first_source_char);
|
first_source_char);
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
'\r' => {
|
'\r' => {
|
||||||
if self.curr_is('\n') {
|
if self.curr_is('\n') {
|
||||||
self.bump();
|
self.bump();
|
||||||
return Some('\n');
|
return true;
|
||||||
} else {
|
} else {
|
||||||
self.err_span_(start, self.last_pos,
|
self.err_span_(start, self.last_pos,
|
||||||
"bare CR not allowed in string, use \\r instead");
|
"bare CR not allowed in string, use \\r instead");
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => if ascii_only && first_source_char > '\x7F' {
|
_ => if ascii_only && first_source_char > '\x7F' {
|
||||||
|
@ -787,9 +785,10 @@ impl<'a> StringReader<'a> {
|
||||||
start, last_pos,
|
start, last_pos,
|
||||||
"byte constant must be ASCII. \
|
"byte constant must be ASCII. \
|
||||||
Use a \\xHH escape for a non-ASCII byte", first_source_char);
|
Use a \\xHH escape for a non-ASCII byte", first_source_char);
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Some(first_source_char)
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
fn binop(&mut self, op: token::BinOp) -> token::Token {
|
fn binop(&mut self, op: token::BinOp) -> token::Token {
|
||||||
|
@ -924,7 +923,7 @@ impl<'a> StringReader<'a> {
|
||||||
let start = self.last_pos;
|
let start = self.last_pos;
|
||||||
|
|
||||||
// the eof will be picked up by the final `'` check below
|
// the eof will be picked up by the final `'` check below
|
||||||
let mut c2 = self.curr.unwrap_or('\x00');
|
let c2 = self.curr.unwrap_or('\x00');
|
||||||
self.bump();
|
self.bump();
|
||||||
|
|
||||||
// If the character is an ident start not followed by another single
|
// If the character is an ident start not followed by another single
|
||||||
|
@ -967,7 +966,7 @@ impl<'a> StringReader<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Otherwise it is a character constant:
|
// Otherwise it is a character constant:
|
||||||
c2 = self.scan_char_or_byte(start, c2, /* ascii_only = */ false, '\'').unwrap();
|
let valid = self.scan_char_or_byte(start, c2, /* ascii_only = */ false, '\'');
|
||||||
if !self.curr_is('\'') {
|
if !self.curr_is('\'') {
|
||||||
let last_bpos = self.last_pos;
|
let last_bpos = self.last_pos;
|
||||||
self.fatal_span_verbose(
|
self.fatal_span_verbose(
|
||||||
|
@ -977,8 +976,9 @@ impl<'a> StringReader<'a> {
|
||||||
start - BytePos(1), last_bpos,
|
start - BytePos(1), last_bpos,
|
||||||
"unterminated character constant".to_string());
|
"unterminated character constant".to_string());
|
||||||
}
|
}
|
||||||
|
let id = if valid { self.ident_from(start) } else { str_to_ident("0") };
|
||||||
self.bump(); // advance curr past token
|
self.bump(); // advance curr past token
|
||||||
return token::LIT_CHAR(c2);
|
return token::LIT_CHAR(id);
|
||||||
}
|
}
|
||||||
'b' => {
|
'b' => {
|
||||||
self.bump();
|
self.bump();
|
||||||
|
@ -991,8 +991,8 @@ impl<'a> StringReader<'a> {
|
||||||
|
|
||||||
}
|
}
|
||||||
'"' => {
|
'"' => {
|
||||||
let mut accum_str = String::new();
|
|
||||||
let start_bpos = self.last_pos;
|
let start_bpos = self.last_pos;
|
||||||
|
let mut valid = true;
|
||||||
self.bump();
|
self.bump();
|
||||||
while !self.curr_is('"') {
|
while !self.curr_is('"') {
|
||||||
if self.is_eof() {
|
if self.is_eof() {
|
||||||
|
@ -1003,11 +1003,13 @@ impl<'a> StringReader<'a> {
|
||||||
let ch_start = self.last_pos;
|
let ch_start = self.last_pos;
|
||||||
let ch = self.curr.unwrap();
|
let ch = self.curr.unwrap();
|
||||||
self.bump();
|
self.bump();
|
||||||
self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ false, '"')
|
valid &= self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ false, '"');
|
||||||
.map(|ch| accum_str.push_char(ch));
|
|
||||||
}
|
}
|
||||||
|
// adjust for the ACSII " at the start of the literal
|
||||||
|
let id = if valid { self.ident_from(start_bpos + BytePos(1)) }
|
||||||
|
else { str_to_ident("??") };
|
||||||
self.bump();
|
self.bump();
|
||||||
return token::LIT_STR(str_to_ident(accum_str.as_slice()));
|
return token::LIT_STR(id);
|
||||||
}
|
}
|
||||||
'r' => {
|
'r' => {
|
||||||
let start_bpos = self.last_pos;
|
let start_bpos = self.last_pos;
|
||||||
|
@ -1032,7 +1034,7 @@ impl<'a> StringReader<'a> {
|
||||||
self.bump();
|
self.bump();
|
||||||
let content_start_bpos = self.last_pos;
|
let content_start_bpos = self.last_pos;
|
||||||
let mut content_end_bpos;
|
let mut content_end_bpos;
|
||||||
let mut has_cr = false;
|
let mut valid = true;
|
||||||
'outer: loop {
|
'outer: loop {
|
||||||
if self.is_eof() {
|
if self.is_eof() {
|
||||||
let last_bpos = self.last_pos;
|
let last_bpos = self.last_pos;
|
||||||
|
@ -1055,23 +1057,26 @@ impl<'a> StringReader<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
},
|
||||||
'\r' => {
|
'\r' => {
|
||||||
has_cr = true;
|
if !self.nextch_is('\n') {
|
||||||
|
let last_bpos = self.last_pos;
|
||||||
|
self.err_span_(start_bpos, last_bpos, "bare CR not allowed in raw \
|
||||||
|
string, use \\r instead");
|
||||||
|
valid = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
_ => ()
|
_ => ()
|
||||||
}
|
}
|
||||||
self.bump();
|
self.bump();
|
||||||
}
|
}
|
||||||
self.bump();
|
self.bump();
|
||||||
let str_content = self.with_str_from_to(content_start_bpos, content_end_bpos, |string| {
|
let id = if valid {
|
||||||
let string = if has_cr {
|
self.ident_from_to(content_start_bpos, content_end_bpos)
|
||||||
self.translate_crlf(content_start_bpos, string,
|
} else {
|
||||||
"bare CR not allowed in raw string")
|
str_to_ident("??")
|
||||||
} else { string.into_maybe_owned() };
|
};
|
||||||
str_to_ident(string.as_slice())
|
return token::LIT_STR_RAW(id, hash_count);
|
||||||
});
|
|
||||||
return token::LIT_STR_RAW(str_content, hash_count);
|
|
||||||
}
|
}
|
||||||
'-' => {
|
'-' => {
|
||||||
if self.nextch_is('>') {
|
if self.nextch_is('>') {
|
||||||
|
@ -1145,10 +1150,10 @@ impl<'a> StringReader<'a> {
|
||||||
let start = self.last_pos;
|
let start = self.last_pos;
|
||||||
|
|
||||||
// the eof will be picked up by the final `'` check below
|
// the eof will be picked up by the final `'` check below
|
||||||
let mut c2 = self.curr.unwrap_or('\x00');
|
let c2 = self.curr.unwrap_or('\x00');
|
||||||
self.bump();
|
self.bump();
|
||||||
|
|
||||||
c2 = self.scan_char_or_byte(start, c2, /* ascii_only = */ true, '\'').unwrap();
|
let valid = self.scan_char_or_byte(start, c2, /* ascii_only = */ true, '\'');
|
||||||
if !self.curr_is('\'') {
|
if !self.curr_is('\'') {
|
||||||
// Byte offsetting here is okay because the
|
// Byte offsetting here is okay because the
|
||||||
// character before position `start` are an
|
// character before position `start` are an
|
||||||
|
@ -1158,14 +1163,17 @@ impl<'a> StringReader<'a> {
|
||||||
start - BytePos(2), last_pos,
|
start - BytePos(2), last_pos,
|
||||||
"unterminated byte constant".to_string());
|
"unterminated byte constant".to_string());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let id = if valid { self.ident_from(start) } else { str_to_ident("??") };
|
||||||
self.bump(); // advance curr past token
|
self.bump(); // advance curr past token
|
||||||
return token::LIT_BYTE(c2 as u8);
|
return token::LIT_BYTE(id);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn scan_byte_string(&mut self) -> token::Token {
|
fn scan_byte_string(&mut self) -> token::Token {
|
||||||
self.bump();
|
self.bump();
|
||||||
let start = self.last_pos;
|
let start = self.last_pos;
|
||||||
let mut value = Vec::new();
|
let mut valid = true;
|
||||||
|
|
||||||
while !self.curr_is('"') {
|
while !self.curr_is('"') {
|
||||||
if self.is_eof() {
|
if self.is_eof() {
|
||||||
let last_pos = self.last_pos;
|
let last_pos = self.last_pos;
|
||||||
|
@ -1176,11 +1184,11 @@ impl<'a> StringReader<'a> {
|
||||||
let ch_start = self.last_pos;
|
let ch_start = self.last_pos;
|
||||||
let ch = self.curr.unwrap();
|
let ch = self.curr.unwrap();
|
||||||
self.bump();
|
self.bump();
|
||||||
self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ true, '"')
|
valid &= self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ true, '"');
|
||||||
.map(|ch| value.push(ch as u8));
|
|
||||||
}
|
}
|
||||||
|
let id = if valid { self.ident_from(start) } else { str_to_ident("??") };
|
||||||
self.bump();
|
self.bump();
|
||||||
return token::LIT_BINARY(Rc::new(value));
|
return token::LIT_BINARY(id);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn scan_raw_byte_string(&mut self) -> token::Token {
|
fn scan_raw_byte_string(&mut self) -> token::Token {
|
||||||
|
@ -1231,10 +1239,8 @@ impl<'a> StringReader<'a> {
|
||||||
self.bump();
|
self.bump();
|
||||||
}
|
}
|
||||||
self.bump();
|
self.bump();
|
||||||
let bytes = self.with_str_from_to(content_start_bpos,
|
return token::LIT_BINARY_RAW(self.ident_from_to(content_start_bpos, content_end_bpos),
|
||||||
content_end_bpos,
|
hash_count);
|
||||||
|s| s.as_bytes().to_owned());
|
|
||||||
return token::LIT_BINARY_RAW(Rc::new(bytes), hash_count);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -272,7 +272,239 @@ pub fn maybe_aborted<T>(result: T, mut p: Parser) -> T {
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parse a string representing a character literal into its final form.
|
||||||
|
/// Rather than just accepting/rejecting a given literal, unescapes it as
|
||||||
|
/// well. Can take any slice prefixed by a character escape. Returns the
|
||||||
|
/// character and the number of characters consumed.
|
||||||
|
pub fn char_lit(lit: &str) -> (char, int) {
|
||||||
|
use std::{num, char};
|
||||||
|
|
||||||
|
let mut chars = lit.chars();
|
||||||
|
let c = match (chars.next(), chars.next()) {
|
||||||
|
(Some(c), None) if c != '\\' => return (c, 1),
|
||||||
|
(Some('\\'), Some(c)) => match c {
|
||||||
|
'"' => Some('"'),
|
||||||
|
'n' => Some('\n'),
|
||||||
|
'r' => Some('\r'),
|
||||||
|
't' => Some('\t'),
|
||||||
|
'\\' => Some('\\'),
|
||||||
|
'\'' => Some('\''),
|
||||||
|
'0' => Some('\0'),
|
||||||
|
_ => { None }
|
||||||
|
},
|
||||||
|
_ => fail!("lexer accepted invalid char escape `{}`", lit)
|
||||||
|
};
|
||||||
|
|
||||||
|
match c {
|
||||||
|
Some(x) => return (x, 2),
|
||||||
|
None => { }
|
||||||
|
}
|
||||||
|
|
||||||
|
let msg = format!("lexer should have rejected a bad character escape {}", lit);
|
||||||
|
let msg2 = msg.as_slice();
|
||||||
|
|
||||||
|
let esc: |uint| -> Option<(char, int)> = |len|
|
||||||
|
num::from_str_radix(lit.slice(2, len), 16)
|
||||||
|
.and_then(char::from_u32)
|
||||||
|
.map(|x| (x, len as int));
|
||||||
|
|
||||||
|
// Unicode escapes
|
||||||
|
return match lit.as_bytes()[1] as char {
|
||||||
|
'x' | 'X' => esc(4),
|
||||||
|
'u' => esc(6),
|
||||||
|
'U' => esc(10),
|
||||||
|
_ => None,
|
||||||
|
}.expect(msg2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a string representing a string literal into its final form. Does
|
||||||
|
/// unescaping.
|
||||||
|
pub fn str_lit(lit: &str) -> String {
|
||||||
|
debug!("parse_str_lit: given {}", lit.escape_default());
|
||||||
|
let mut res = String::with_capacity(lit.len());
|
||||||
|
|
||||||
|
// FIXME #8372: This could be a for-loop if it didn't borrow the iterator
|
||||||
|
let error = |i| format!("lexer should have rejected {} at {}", lit, i);
|
||||||
|
|
||||||
|
/// Eat everything up to a non-whitespace
|
||||||
|
fn eat<'a>(it: &mut ::std::iter::Peekable<(uint, char), ::std::str::CharOffsets<'a>>) {
|
||||||
|
loop {
|
||||||
|
match it.peek().map(|x| x.val1()) {
|
||||||
|
Some(' ') | Some('\n') | Some('\r') | Some('\t') => {
|
||||||
|
it.next();
|
||||||
|
},
|
||||||
|
_ => { break; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut chars = lit.char_indices().peekable();
|
||||||
|
loop {
|
||||||
|
match chars.next() {
|
||||||
|
Some((i, c)) => {
|
||||||
|
let em = error(i);
|
||||||
|
match c {
|
||||||
|
'\\' => {
|
||||||
|
if chars.peek().expect(em.as_slice()).val1() == '\n' {
|
||||||
|
eat(&mut chars);
|
||||||
|
} else if chars.peek().expect(em.as_slice()).val1() == '\r' {
|
||||||
|
chars.next();
|
||||||
|
if chars.peek().expect(em.as_slice()).val1() != '\n' {
|
||||||
|
fail!("lexer accepted bare CR");
|
||||||
|
}
|
||||||
|
eat(&mut chars);
|
||||||
|
} else {
|
||||||
|
// otherwise, a normal escape
|
||||||
|
let (c, n) = char_lit(lit.slice_from(i));
|
||||||
|
for _ in range(0, n - 1) { // we don't need to move past the first \
|
||||||
|
chars.next();
|
||||||
|
}
|
||||||
|
res.push_char(c);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
'\r' => {
|
||||||
|
if chars.peek().expect(em.as_slice()).val1() != '\n' {
|
||||||
|
fail!("lexer accepted bare CR");
|
||||||
|
}
|
||||||
|
chars.next();
|
||||||
|
res.push_char('\n');
|
||||||
|
}
|
||||||
|
c => res.push_char(c),
|
||||||
|
}
|
||||||
|
},
|
||||||
|
None => break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
res.shrink_to_fit(); // probably not going to do anything, unless there was an escape.
|
||||||
|
debug!("parse_str_lit: returning {}", res);
|
||||||
|
res
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a string representing a raw string literal into its final form. The
|
||||||
|
/// only operation this does is convert embedded CRLF into a single LF.
|
||||||
|
pub fn raw_str_lit(lit: &str) -> String {
|
||||||
|
debug!("raw_str_lit: given {}", lit.escape_default());
|
||||||
|
let mut res = String::with_capacity(lit.len());
|
||||||
|
|
||||||
|
// FIXME #8372: This could be a for-loop if it didn't borrow the iterator
|
||||||
|
let mut chars = lit.chars().peekable();
|
||||||
|
loop {
|
||||||
|
match chars.next() {
|
||||||
|
Some(c) => {
|
||||||
|
if c == '\r' {
|
||||||
|
if *chars.peek().unwrap() != '\n' {
|
||||||
|
fail!("lexer accepted bare CR");
|
||||||
|
}
|
||||||
|
chars.next();
|
||||||
|
res.push_char('\n');
|
||||||
|
} else {
|
||||||
|
res.push_char(c);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
None => break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
res.shrink_to_fit();
|
||||||
|
res
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn float_lit(s: &str) -> ast::Lit_ {
|
||||||
|
debug!("float_lit: {}", s);
|
||||||
|
// FIXME #2252: bounds checking float literals is defered until trans
|
||||||
|
let s2 = s.chars().filter(|&c| c != '_').collect::<String>();
|
||||||
|
let s = s2.as_slice();
|
||||||
|
|
||||||
|
let mut ty = None;
|
||||||
|
|
||||||
|
if s.ends_with("f32") {
|
||||||
|
ty = Some(ast::TyF32);
|
||||||
|
} else if s.ends_with("f64") {
|
||||||
|
ty = Some(ast::TyF64);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
match ty {
|
||||||
|
Some(t) => {
|
||||||
|
ast::LitFloat(token::intern_and_get_ident(s.slice_to(s.len() - t.suffix_len())), t)
|
||||||
|
},
|
||||||
|
None => ast::LitFloatUnsuffixed(token::intern_and_get_ident(s))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a string representing a byte literal into its final form. Similar to `char_lit`
|
||||||
|
pub fn byte_lit(lit: &str) -> (u8, uint) {
|
||||||
|
let err = |i| format!("lexer accepted invalid byte literal {} step {}", lit, i);
|
||||||
|
|
||||||
|
if lit.len() == 1 {
|
||||||
|
(lit.as_bytes()[0], 1)
|
||||||
|
} else {
|
||||||
|
assert!(lit.as_bytes()[0] == b'\\', err(0i));
|
||||||
|
let b = match lit.as_bytes()[1] {
|
||||||
|
b'"' => b'"',
|
||||||
|
b'n' => b'\n',
|
||||||
|
b'r' => b'\r',
|
||||||
|
b't' => b'\t',
|
||||||
|
b'\\' => b'\\',
|
||||||
|
b'\'' => b'\'',
|
||||||
|
b'0' => b'\0',
|
||||||
|
_ => {
|
||||||
|
match ::std::num::from_str_radix::<u64>(lit.slice(2, 4), 16) {
|
||||||
|
Some(c) =>
|
||||||
|
if c > 0xFF {
|
||||||
|
fail!(err(2))
|
||||||
|
} else {
|
||||||
|
return (c as u8, 4)
|
||||||
|
},
|
||||||
|
None => fail!(err(3))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
return (b, 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn binary_lit(lit: &str) -> Rc<Vec<u8>> {
|
||||||
|
let mut res = Vec::with_capacity(lit.len());
|
||||||
|
|
||||||
|
// FIXME #8372: This could be a for-loop if it didn't borrow the iterator
|
||||||
|
let error = |i| format!("lexer should have rejected {} at {}", lit, i);
|
||||||
|
|
||||||
|
// binary literals *must* be ASCII, but the escapes don't have to be
|
||||||
|
let mut chars = lit.as_bytes().iter().enumerate().peekable();
|
||||||
|
loop {
|
||||||
|
match chars.next() {
|
||||||
|
Some((i, &c)) => {
|
||||||
|
if c == b'\\' {
|
||||||
|
if *chars.peek().expect(error(i).as_slice()).val1() == b'\n' {
|
||||||
|
loop {
|
||||||
|
// eat everything up to a non-whitespace
|
||||||
|
match chars.peek().map(|x| *x.val1()) {
|
||||||
|
Some(b' ') | Some(b'\n') | Some(b'\r') | Some(b'\t') => {
|
||||||
|
chars.next();
|
||||||
|
},
|
||||||
|
_ => { break; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// otherwise, a normal escape
|
||||||
|
let (c, n) = byte_lit(lit.slice_from(i));
|
||||||
|
for _ in range(0, n - 1) { // we don't need to move past the first \
|
||||||
|
chars.next();
|
||||||
|
}
|
||||||
|
res.push(c);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
res.push(c);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
None => { break; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Rc::new(res)
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
|
|
|
@ -61,6 +61,7 @@ use ast_util::{as_prec, ident_to_path, lit_is_str, operator_prec};
|
||||||
use ast_util;
|
use ast_util;
|
||||||
use codemap::{Span, BytePos, Spanned, spanned, mk_sp};
|
use codemap::{Span, BytePos, Spanned, spanned, mk_sp};
|
||||||
use codemap;
|
use codemap;
|
||||||
|
use parse;
|
||||||
use parse::attr::ParserAttr;
|
use parse::attr::ParserAttr;
|
||||||
use parse::classify;
|
use parse::classify;
|
||||||
use parse::common::{SeqSep, seq_sep_none};
|
use parse::common::{SeqSep, seq_sep_none};
|
||||||
|
@ -1543,8 +1544,8 @@ impl<'a> Parser<'a> {
|
||||||
/// Matches token_lit = LIT_INT | ...
|
/// Matches token_lit = LIT_INT | ...
|
||||||
pub fn lit_from_token(&mut self, tok: &token::Token) -> Lit_ {
|
pub fn lit_from_token(&mut self, tok: &token::Token) -> Lit_ {
|
||||||
match *tok {
|
match *tok {
|
||||||
token::LIT_BYTE(i) => LitByte(i),
|
token::LIT_BYTE(i) => LitByte(parse::byte_lit(i.as_str()).val0()),
|
||||||
token::LIT_CHAR(i) => LitChar(i),
|
token::LIT_CHAR(i) => LitChar(parse::char_lit(i.as_str()).val0()),
|
||||||
token::LIT_INT(i, it) => LitInt(i, it),
|
token::LIT_INT(i, it) => LitInt(i, it),
|
||||||
token::LIT_UINT(u, ut) => LitUint(u, ut),
|
token::LIT_UINT(u, ut) => LitUint(u, ut),
|
||||||
token::LIT_INT_UNSUFFIXED(i) => LitIntUnsuffixed(i),
|
token::LIT_INT_UNSUFFIXED(i) => LitIntUnsuffixed(i),
|
||||||
|
@ -1555,13 +1556,17 @@ impl<'a> Parser<'a> {
|
||||||
LitFloatUnsuffixed(self.id_to_interned_str(s))
|
LitFloatUnsuffixed(self.id_to_interned_str(s))
|
||||||
}
|
}
|
||||||
token::LIT_STR(s) => {
|
token::LIT_STR(s) => {
|
||||||
LitStr(self.id_to_interned_str(s), ast::CookedStr)
|
LitStr(token::intern_and_get_ident(parse::str_lit(s.as_str()).as_slice()),
|
||||||
|
ast::CookedStr)
|
||||||
}
|
}
|
||||||
token::LIT_STR_RAW(s, n) => {
|
token::LIT_STR_RAW(s, n) => {
|
||||||
LitStr(self.id_to_interned_str(s), ast::RawStr(n))
|
LitStr(token::intern_and_get_ident(parse::raw_str_lit(s.as_str()).as_slice()),
|
||||||
|
ast::RawStr(n))
|
||||||
}
|
}
|
||||||
token::LIT_BINARY_RAW(ref v, _) |
|
token::LIT_BINARY(i) =>
|
||||||
token::LIT_BINARY(ref v) => LitBinary(v.clone()),
|
LitBinary(parse::binary_lit(self.id_to_interned_str(i).get())),
|
||||||
|
token::LIT_BINARY_RAW(i, _) =>
|
||||||
|
LitBinary(Rc::new(i.as_str().as_bytes().iter().map(|&x| x).collect())),
|
||||||
token::LPAREN => { self.expect(&token::RPAREN); LitNil },
|
token::LPAREN => { self.expect(&token::RPAREN); LitNil },
|
||||||
_ => { self.unexpected_last(tok); }
|
_ => { self.unexpected_last(tok); }
|
||||||
}
|
}
|
||||||
|
|
|
@ -79,8 +79,8 @@ pub enum Token {
|
||||||
QUESTION,
|
QUESTION,
|
||||||
|
|
||||||
/* Literals */
|
/* Literals */
|
||||||
LIT_BYTE(u8),
|
LIT_BYTE(Ident),
|
||||||
LIT_CHAR(char),
|
LIT_CHAR(Ident),
|
||||||
LIT_INT(i64, ast::IntTy),
|
LIT_INT(i64, ast::IntTy),
|
||||||
LIT_UINT(u64, ast::UintTy),
|
LIT_UINT(u64, ast::UintTy),
|
||||||
LIT_INT_UNSUFFIXED(i64),
|
LIT_INT_UNSUFFIXED(i64),
|
||||||
|
@ -88,8 +88,8 @@ pub enum Token {
|
||||||
LIT_FLOAT_UNSUFFIXED(Ident),
|
LIT_FLOAT_UNSUFFIXED(Ident),
|
||||||
LIT_STR(Ident),
|
LIT_STR(Ident),
|
||||||
LIT_STR_RAW(Ident, uint), /* raw str delimited by n hash symbols */
|
LIT_STR_RAW(Ident, uint), /* raw str delimited by n hash symbols */
|
||||||
LIT_BINARY(Rc<Vec<u8>>),
|
LIT_BINARY(Ident),
|
||||||
LIT_BINARY_RAW(Rc<Vec<u8>>, uint), /* raw binary str delimited by n hash symbols */
|
LIT_BINARY_RAW(Ident, uint), /* raw binary str delimited by n hash symbols */
|
||||||
|
|
||||||
/* Name components */
|
/* Name components */
|
||||||
/// An identifier contains an "is_mod_name" boolean,
|
/// An identifier contains an "is_mod_name" boolean,
|
||||||
|
@ -201,20 +201,10 @@ pub fn to_string(t: &Token) -> String {
|
||||||
|
|
||||||
/* Literals */
|
/* Literals */
|
||||||
LIT_BYTE(b) => {
|
LIT_BYTE(b) => {
|
||||||
let mut res = String::from_str("b'");
|
format!("b'{}'", get_ident(b).get())
|
||||||
(b as char).escape_default(|c| {
|
|
||||||
res.push_char(c);
|
|
||||||
});
|
|
||||||
res.push_char('\'');
|
|
||||||
res
|
|
||||||
}
|
}
|
||||||
LIT_CHAR(c) => {
|
LIT_CHAR(c) => {
|
||||||
let mut res = String::from_str("'");
|
format!("'{}'", get_ident(c).get())
|
||||||
c.escape_default(|c| {
|
|
||||||
res.push_char(c);
|
|
||||||
});
|
|
||||||
res.push_char('\'');
|
|
||||||
res
|
|
||||||
}
|
}
|
||||||
LIT_INT(i, t) => ast_util::int_ty_to_string(t, Some(i)),
|
LIT_INT(i, t) => ast_util::int_ty_to_string(t, Some(i)),
|
||||||
LIT_UINT(u, t) => ast_util::uint_ty_to_string(t, Some(u)),
|
LIT_UINT(u, t) => ast_util::uint_ty_to_string(t, Some(u)),
|
||||||
|
@ -235,20 +225,18 @@ pub fn to_string(t: &Token) -> String {
|
||||||
body
|
body
|
||||||
}
|
}
|
||||||
LIT_STR(s) => {
|
LIT_STR(s) => {
|
||||||
format!("\"{}\"", get_ident(s).get().escape_default())
|
format!("\"{}\"", get_ident(s).get())
|
||||||
}
|
}
|
||||||
LIT_STR_RAW(s, n) => {
|
LIT_STR_RAW(s, n) => {
|
||||||
format!("r{delim}\"{string}\"{delim}",
|
format!("r{delim}\"{string}\"{delim}",
|
||||||
delim="#".repeat(n), string=get_ident(s))
|
delim="#".repeat(n), string=get_ident(s))
|
||||||
}
|
}
|
||||||
LIT_BINARY(ref v) => {
|
LIT_BINARY(v) => {
|
||||||
format!(
|
format!("b\"{}\"", get_ident(v).get())
|
||||||
"b\"{}\"",
|
|
||||||
v.iter().map(|&b| b as char).collect::<String>().escape_default())
|
|
||||||
}
|
}
|
||||||
LIT_BINARY_RAW(ref s, n) => {
|
LIT_BINARY_RAW(s, n) => {
|
||||||
format!("br{delim}\"{string}\"{delim}",
|
format!("br{delim}\"{string}\"{delim}",
|
||||||
delim="#".repeat(n), string=s.as_slice().to_ascii().as_str_ascii())
|
delim="#".repeat(n), string=get_ident(s).get())
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Name components */
|
/* Name components */
|
||||||
|
|
15
src/test/run-pass/string-escapes.rs
Normal file
15
src/test/run-pass/string-escapes.rs
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||||
|
// file at the top-level directory of this distribution and at
|
||||||
|
// http://rust-lang.org/COPYRIGHT.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||||
|
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||||
|
// option. This file may not be copied, modified, or distributed
|
||||||
|
// except according to those terms.
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let x = "\\\\\
|
||||||
|
";
|
||||||
|
assert!(x == r"\\"); // extraneous whitespace stripped
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue