rust/src/libsyntax/parse/lexer/mod.rs

1914 lines
69 KiB
Rust
Raw Normal View History

2014-05-21 16:57:31 -07:00
// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use ast;
use syntax_pos::{self, BytePos, CharPos, Pos, Span};
use codemap::CodeMap;
2015-12-21 10:00:43 +13:00
use errors::{FatalError, Handler, DiagnosticBuilder};
2014-05-21 16:57:31 -07:00
use ext::tt::transcribe::tt_next_token;
use parse::token::{self, keywords, str_to_ident};
use str::char_at;
use rustc_unicode::property::Pattern_White_Space;
2014-05-21 16:57:31 -07:00
2015-04-15 22:15:50 -07:00
use std::borrow::Cow;
2014-05-21 16:57:31 -07:00
use std::char;
use std::mem::replace;
use std::rc::Rc;
pub use ext::tt::transcribe::{TtReader, new_tt_reader, new_tt_reader_with_doc_flag};
2014-05-21 16:57:31 -07:00
pub mod comments;
mod unicode_chars;
2014-05-21 16:57:31 -07:00
pub trait Reader {
fn is_eof(&self) -> bool;
fn try_next_token(&mut self) -> Result<TokenAndSpan, ()>;
fn next_token(&mut self) -> TokenAndSpan where Self: Sized {
let res = self.try_next_token();
self.unwrap_or_abort(res)
}
2014-05-21 16:57:31 -07:00
/// Report a fatal error with the current span.
2015-10-23 19:20:03 -07:00
fn fatal(&self, &str) -> FatalError;
2014-05-21 16:57:31 -07:00
/// Report a non-fatal error with the current span.
fn err(&self, &str);
fn emit_fatal_errors(&mut self);
fn unwrap_or_abort(&mut self, res: Result<TokenAndSpan, ()>) -> TokenAndSpan {
match res {
Ok(tok) => tok,
Err(_) => {
self.emit_fatal_errors();
panic!(FatalError);
}
}
}
2014-05-21 16:57:31 -07:00
fn peek(&self) -> TokenAndSpan;
/// Get a token the parser cares about.
fn try_real_token(&mut self) -> Result<TokenAndSpan, ()> {
let mut t = self.try_next_token()?;
loop {
match t.tok {
token::Whitespace | token::Comment | token::Shebang(_) => {
t = self.try_next_token()?;
2016-01-03 11:14:09 +02:00
}
_ => break,
}
}
Ok(t)
}
fn real_token(&mut self) -> TokenAndSpan {
let res = self.try_real_token();
self.unwrap_or_abort(res)
}
2014-05-21 16:57:31 -07:00
}
2015-01-28 08:34:18 -05:00
#[derive(Clone, PartialEq, Eq, Debug)]
2014-05-21 16:57:31 -07:00
pub struct TokenAndSpan {
pub tok: token::Token,
pub sp: Span,
}
pub struct StringReader<'a> {
pub span_diagnostic: &'a Handler,
2014-06-09 13:12:30 -07:00
/// The absolute offset within the codemap of the next character to read
pub next_pos: BytePos,
/// The absolute offset within the codemap of the current character
pub pos: BytePos,
2014-06-09 13:12:30 -07:00
/// The column of the next character to read
2014-05-21 16:57:31 -07:00
pub col: CharPos,
2014-06-09 13:12:30 -07:00
/// The last character to be read
2014-05-21 16:57:31 -07:00
pub curr: Option<char>,
pub filemap: Rc<syntax_pos::FileMap>,
/// If Some, stop reading the source at this position (inclusive).
pub terminator: Option<BytePos>,
/// Whether to record new-lines in filemap. This is only necessary the first
/// time a filemap is lexed. If part of a filemap is being re-lexed, this
/// should be set to false.
pub save_new_lines: bool,
2016-01-03 11:14:09 +02:00
// cached:
2014-05-21 16:57:31 -07:00
pub peek_tok: token::Token,
pub peek_span: Span,
pub fatal_errs: Vec<DiagnosticBuilder<'a>>,
// cache a direct reference to the source text, so that we don't have to
// retrieve it via `self.filemap.src.as_ref().unwrap()` all the time.
2016-01-03 11:14:09 +02:00
source_text: Rc<String>,
2014-05-21 16:57:31 -07:00
}
impl<'a> Reader for StringReader<'a> {
2016-01-03 11:14:09 +02:00
fn is_eof(&self) -> bool {
if self.curr.is_none() {
return true;
}
match self.terminator {
Some(t) => self.next_pos > t,
None => false,
}
2016-01-03 11:14:09 +02:00
}
2014-06-09 13:12:30 -07:00
/// Return the next token. EFFECT: advances the string_reader.
fn try_next_token(&mut self) -> Result<TokenAndSpan, ()> {
assert!(self.fatal_errs.is_empty());
2014-05-21 16:57:31 -07:00
let ret_val = TokenAndSpan {
2014-10-27 19:22:52 +11:00
tok: replace(&mut self.peek_tok, token::Underscore),
2014-05-21 16:57:31 -07:00
sp: self.peek_span,
};
self.advance_token()?;
Ok(ret_val)
2014-05-21 16:57:31 -07:00
}
2015-10-23 19:20:03 -07:00
fn fatal(&self, m: &str) -> FatalError {
self.fatal_span(self.peek_span, m)
2014-05-21 16:57:31 -07:00
}
fn err(&self, m: &str) {
self.err_span(self.peek_span, m)
2014-05-21 16:57:31 -07:00
}
fn emit_fatal_errors(&mut self) {
for err in &mut self.fatal_errs {
err.emit();
}
self.fatal_errs.clear();
}
2014-05-21 16:57:31 -07:00
fn peek(&self) -> TokenAndSpan {
// FIXME(pcwalton): Bad copy!
TokenAndSpan {
tok: self.peek_tok.clone(),
sp: self.peek_span,
}
}
}
impl<'a> Reader for TtReader<'a> {
fn is_eof(&self) -> bool {
2014-10-27 19:22:52 +11:00
self.cur_tok == token::Eof
2014-05-21 16:57:31 -07:00
}
fn try_next_token(&mut self) -> Result<TokenAndSpan, ()> {
assert!(self.fatal_errs.is_empty());
2014-05-21 16:57:31 -07:00
let r = tt_next_token(self);
debug!("TtReader: r={:?}", r);
Ok(r)
2014-05-21 16:57:31 -07:00
}
2015-10-23 19:20:03 -07:00
fn fatal(&self, m: &str) -> FatalError {
self.sp_diag.span_fatal(self.cur_span, m)
2014-05-21 16:57:31 -07:00
}
fn err(&self, m: &str) {
self.sp_diag.span_err(self.cur_span, m);
}
fn emit_fatal_errors(&mut self) {
for err in &mut self.fatal_errs {
err.emit();
}
self.fatal_errs.clear();
}
2014-05-21 16:57:31 -07:00
fn peek(&self) -> TokenAndSpan {
TokenAndSpan {
tok: self.cur_tok.clone(),
sp: self.cur_span,
}
}
}
impl<'a> StringReader<'a> {
/// For comments.rs, which hackily pokes into next_pos and curr
pub fn new_raw<'b>(span_diagnostic: &'b Handler,
filemap: Rc<syntax_pos::FileMap>)
2016-01-03 11:14:09 +02:00
-> StringReader<'b> {
let mut sr = StringReader::new_raw_internal(span_diagnostic, filemap);
sr.bump();
sr
}
fn new_raw_internal<'b>(span_diagnostic: &'b Handler,
filemap: Rc<syntax_pos::FileMap>)
-> StringReader<'b> {
if filemap.src.is_none() {
2016-01-03 11:14:09 +02:00
span_diagnostic.bug(&format!("Cannot lex filemap \
without source: {}",
filemap.name)[..]);
}
let source_text = (*filemap.src.as_ref().unwrap()).clone();
StringReader {
2014-05-21 16:57:31 -07:00
span_diagnostic: span_diagnostic,
next_pos: filemap.start_pos,
pos: filemap.start_pos,
2014-05-21 16:57:31 -07:00
col: CharPos(0),
curr: Some('\n'),
filemap: filemap,
terminator: None,
save_new_lines: true,
2016-01-03 11:14:09 +02:00
// dummy values; not read
2014-10-27 19:22:52 +11:00
peek_tok: token::Eof,
peek_span: syntax_pos::DUMMY_SP,
2016-01-03 11:14:09 +02:00
source_text: source_text,
fatal_errs: Vec::new(),
}
2014-05-21 16:57:31 -07:00
}
pub fn new<'b>(span_diagnostic: &'b Handler,
filemap: Rc<syntax_pos::FileMap>)
2016-01-03 11:14:09 +02:00
-> StringReader<'b> {
2014-05-21 16:57:31 -07:00
let mut sr = StringReader::new_raw(span_diagnostic, filemap);
if let Err(_) = sr.advance_token() {
sr.emit_fatal_errors();
panic!(FatalError);
}
2014-05-21 16:57:31 -07:00
sr
}
pub fn curr_is(&self, c: char) -> bool {
self.curr == Some(c)
}
/// Report a fatal lexical error with a given span.
2015-10-23 19:20:03 -07:00
pub fn fatal_span(&self, sp: Span, m: &str) -> FatalError {
self.span_diagnostic.span_fatal(sp, m)
2014-05-21 16:57:31 -07:00
}
/// Report a lexical error with a given span.
pub fn err_span(&self, sp: Span, m: &str) {
self.span_diagnostic.span_err(sp, m)
}
/// Report a fatal error spanning [`from_pos`, `to_pos`).
2015-10-23 19:20:03 -07:00
fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> FatalError {
self.fatal_span(syntax_pos::mk_sp(from_pos, to_pos), m)
}
/// Report a lexical error spanning [`from_pos`, `to_pos`).
fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) {
self.err_span(syntax_pos::mk_sp(from_pos, to_pos), m)
2014-05-21 16:57:31 -07:00
}
/// Report a lexical error spanning [`from_pos`, `to_pos`), appending an
/// escaped character to the error message
2015-10-23 19:20:03 -07:00
fn fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) -> FatalError {
2014-05-21 16:57:31 -07:00
let mut m = m.to_string();
m.push_str(": ");
2016-01-03 11:14:09 +02:00
for c in c.escape_default() {
m.push(c)
}
2015-10-23 19:20:03 -07:00
self.fatal_span_(from_pos, to_pos, &m[..])
2014-05-21 16:57:31 -07:00
}
2015-12-21 10:00:43 +13:00
fn struct_fatal_span_char(&self,
from_pos: BytePos,
to_pos: BytePos,
m: &str,
c: char)
2016-01-03 11:14:09 +02:00
-> DiagnosticBuilder<'a> {
2015-12-21 10:00:43 +13:00
let mut m = m.to_string();
m.push_str(": ");
2016-01-03 11:14:09 +02:00
for c in c.escape_default() {
m.push(c)
}
self.span_diagnostic.struct_span_fatal(syntax_pos::mk_sp(from_pos, to_pos), &m[..])
2015-12-21 10:00:43 +13:00
}
2014-05-21 16:57:31 -07:00
/// Report a lexical error spanning [`from_pos`, `to_pos`), appending an
/// escaped character to the error message
fn err_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) {
2014-05-21 16:57:31 -07:00
let mut m = m.to_string();
m.push_str(": ");
2016-01-03 11:14:09 +02:00
for c in c.escape_default() {
m.push(c)
}
self.err_span_(from_pos, to_pos, &m[..]);
2014-05-21 16:57:31 -07:00
}
2015-12-21 10:00:43 +13:00
fn struct_err_span_char(&self,
from_pos: BytePos,
to_pos: BytePos,
m: &str,
c: char)
2016-01-03 11:14:09 +02:00
-> DiagnosticBuilder<'a> {
2015-12-21 10:00:43 +13:00
let mut m = m.to_string();
m.push_str(": ");
2016-01-03 11:14:09 +02:00
for c in c.escape_default() {
m.push(c)
}
self.span_diagnostic.struct_span_err(syntax_pos::mk_sp(from_pos, to_pos), &m[..])
2015-12-21 10:00:43 +13:00
}
2014-05-21 16:57:31 -07:00
/// Report a lexical error spanning [`from_pos`, `to_pos`), appending the
/// offending string to the error message
2015-10-23 19:20:03 -07:00
fn fatal_span_verbose(&self, from_pos: BytePos, to_pos: BytePos, mut m: String) -> FatalError {
2014-05-21 16:57:31 -07:00
m.push_str(": ");
let from = self.byte_offset(from_pos).to_usize();
let to = self.byte_offset(to_pos).to_usize();
m.push_str(&self.source_text[from..to]);
2015-10-23 19:20:03 -07:00
self.fatal_span_(from_pos, to_pos, &m[..])
2014-05-21 16:57:31 -07:00
}
/// Advance peek_tok and peek_span to refer to the next token, and
/// possibly update the interner.
fn advance_token(&mut self) -> Result<(), ()> {
match self.scan_whitespace_or_comment() {
2014-05-21 16:57:31 -07:00
Some(comment) => {
self.peek_span = comment.sp;
self.peek_tok = comment.tok;
2016-01-03 11:14:09 +02:00
}
2014-05-21 16:57:31 -07:00
None => {
if self.is_eof() {
2014-10-27 19:22:52 +11:00
self.peek_tok = token::Eof;
self.peek_span = syntax_pos::mk_sp(self.filemap.end_pos, self.filemap.end_pos);
2014-05-21 16:57:31 -07:00
} else {
let start_bytepos = self.pos;
self.peek_tok = self.next_token_inner()?;
self.peek_span = syntax_pos::mk_sp(start_bytepos, self.pos);
2014-05-21 16:57:31 -07:00
};
}
}
Ok(())
2014-05-21 16:57:31 -07:00
}
fn byte_offset(&self, pos: BytePos) -> BytePos {
(pos - self.filemap.start_pos)
}
/// Calls `f` with a string slice of the source text spanning from `start`
/// up to but excluding `self.pos`, meaning the slice does not include
2014-05-21 16:57:31 -07:00
/// the character `self.curr`.
2016-01-03 11:14:09 +02:00
pub fn with_str_from<T, F>(&self, start: BytePos, f: F) -> T
where F: FnOnce(&str) -> T
2014-12-08 13:28:32 -05:00
{
self.with_str_from_to(start, self.pos, f)
2014-05-21 16:57:31 -07:00
}
2014-07-06 01:17:59 -07:00
/// Create a Name from a given offset to the current offset, each
/// adjusted 1 towards each other (assumes that on either side there is a
/// single-byte delimiter).
2014-07-06 01:17:59 -07:00
pub fn name_from(&self, start: BytePos) -> ast::Name {
debug!("taking an ident from {:?} to {:?}", start, self.pos);
2014-07-06 01:17:59 -07:00
self.with_str_from(start, token::intern)
}
2014-07-06 01:17:59 -07:00
/// As name_from, with an explicit endpoint.
pub fn name_from_to(&self, start: BytePos, end: BytePos) -> ast::Name {
debug!("taking an ident from {:?} to {:?}", start, end);
2014-07-06 01:17:59 -07:00
self.with_str_from_to(start, end, token::intern)
}
2014-05-21 16:57:31 -07:00
/// Calls `f` with a string slice of the source text spanning from `start`
/// up to but excluding `end`.
2016-01-03 11:14:09 +02:00
fn with_str_from_to<T, F>(&self, start: BytePos, end: BytePos, f: F) -> T
where F: FnOnce(&str) -> T
2014-12-08 13:28:32 -05:00
{
2016-01-03 11:14:09 +02:00
f(&self.source_text[self.byte_offset(start).to_usize()..self.byte_offset(end).to_usize()])
2014-05-21 16:57:31 -07:00
}
/// Converts CRLF to LF in the given string, raising an error on bare CR.
2016-01-03 11:14:09 +02:00
fn translate_crlf<'b>(&self, start: BytePos, s: &'b str, errmsg: &'b str) -> Cow<'b, str> {
let mut i = 0;
while i < s.len() {
let ch = char_at(s, i);
let next = i + ch.len_utf8();
if ch == '\r' {
if next < s.len() && char_at(s, next) == '\n' {
2015-04-15 22:15:50 -07:00
return translate_crlf_(self, start, s, errmsg, i).into();
}
let pos = start + BytePos(i as u32);
let end_pos = start + BytePos(next as u32);
self.err_span_(pos, end_pos, errmsg);
}
i = next;
}
2015-04-15 22:15:50 -07:00
return s.into();
2016-01-03 11:14:09 +02:00
fn translate_crlf_(rdr: &StringReader,
start: BytePos,
s: &str,
errmsg: &str,
mut i: usize)
-> String {
let mut buf = String::with_capacity(s.len());
let mut j = 0;
while i < s.len() {
let ch = char_at(s, i);
let next = i + ch.len_utf8();
if ch == '\r' {
2016-01-03 11:14:09 +02:00
if j < i {
buf.push_str(&s[j..i]);
}
j = next;
if next >= s.len() || char_at(s, next) != '\n' {
let pos = start + BytePos(i as u32);
let end_pos = start + BytePos(next as u32);
rdr.err_span_(pos, end_pos, errmsg);
}
}
i = next;
}
2016-01-03 11:14:09 +02:00
if j < s.len() {
buf.push_str(&s[j..]);
}
buf
}
}
2014-05-21 16:57:31 -07:00
/// Advance the StringReader by one character. If a newline is
/// discovered, add it to the FileMap's list of line start offsets.
pub fn bump(&mut self) {
self.pos = self.next_pos;
let current_byte_offset = self.byte_offset(self.next_pos).to_usize();
if current_byte_offset < self.source_text.len() {
2014-05-21 16:57:31 -07:00
let last_char = self.curr.unwrap();
let ch = char_at(&self.source_text, current_byte_offset);
let byte_offset_diff = ch.len_utf8();
self.next_pos = self.next_pos + Pos::from_usize(byte_offset_diff);
self.curr = Some(ch);
self.col = self.col + CharPos(1);
2014-05-21 16:57:31 -07:00
if last_char == '\n' {
if self.save_new_lines {
self.filemap.next_line(self.pos);
}
self.col = CharPos(0);
2014-05-21 16:57:31 -07:00
}
if byte_offset_diff > 1 {
self.filemap.record_multibyte_char(self.pos, byte_offset_diff);
2014-05-21 16:57:31 -07:00
}
} else {
self.curr = None;
}
}
pub fn nextch(&self) -> Option<char> {
let offset = self.byte_offset(self.next_pos).to_usize();
if offset < self.source_text.len() {
Some(char_at(&self.source_text, offset))
2014-05-21 16:57:31 -07:00
} else {
None
}
}
pub fn nextch_is(&self, c: char) -> bool {
self.nextch() == Some(c)
}
pub fn nextnextch(&self) -> Option<char> {
let offset = self.byte_offset(self.next_pos).to_usize();
let s = &self.source_text[..];
2016-01-03 11:14:09 +02:00
if offset >= s.len() {
return None;
}
let next = offset + char_at(s, offset).len_utf8();
2014-05-21 16:57:31 -07:00
if next < s.len() {
Some(char_at(s, next))
2014-05-21 16:57:31 -07:00
} else {
None
}
}
pub fn nextnextch_is(&self, c: char) -> bool {
self.nextnextch() == Some(c)
}
/// Eats <XID_start><XID_continue>*, if possible.
fn scan_optional_raw_name(&mut self) -> Option<ast::Name> {
if !ident_start(self.curr) {
2016-01-03 11:14:09 +02:00
return None;
}
let start = self.pos;
while ident_continue(self.curr) {
self.bump();
}
self.with_str_from(start, |string| {
if string == "_" {
None
} else {
Some(token::intern(string))
}
})
}
2014-05-21 16:57:31 -07:00
/// PRECONDITION: self.curr is not whitespace
/// Eats any kind of comment.
fn scan_comment(&mut self) -> Option<TokenAndSpan> {
if let Some(c) = self.curr {
if c.is_whitespace() {
self.span_diagnostic.span_err(syntax_pos::mk_sp(self.pos, self.pos),
"called consume_any_line_comment, but there \
was whitespace");
2016-01-03 11:14:09 +02:00
}
2014-05-21 16:57:31 -07:00
}
if self.curr_is('/') {
match self.nextch() {
Some('/') => {
self.bump();
self.bump();
2014-05-21 16:57:31 -07:00
// line comments starting with "///" or "//!" are doc-comments
let doc_comment = self.curr_is('/') || self.curr_is('!');
let start_bpos = self.pos - BytePos(2);
while !self.is_eof() {
match self.curr.unwrap() {
'\n' => break,
'\r' => {
if self.nextch_is('\n') {
// CRLF
2016-01-03 11:14:09 +02:00
break;
} else if doc_comment {
self.err_span_(self.pos,
self.next_pos,
"bare CR not allowed in doc-comment");
}
}
2016-01-03 11:14:09 +02:00
_ => (),
2014-05-21 16:57:31 -07:00
}
self.bump();
}
return if doc_comment {
self.with_str_from(start_bpos, |string| {
// comments with only more "/"s are not doc comments
let tok = if is_doc_comment(string) {
2014-10-27 19:22:52 +11:00
token::DocComment(token::intern(string))
2014-05-21 16:57:31 -07:00
} else {
2014-10-27 19:22:52 +11:00
token::Comment
};
2014-05-21 16:57:31 -07:00
Some(TokenAndSpan {
tok: tok,
sp: syntax_pos::mk_sp(start_bpos, self.pos),
})
})
2014-05-21 16:57:31 -07:00
} else {
Some(TokenAndSpan {
2014-10-27 19:22:52 +11:00
tok: token::Comment,
sp: syntax_pos::mk_sp(start_bpos, self.pos),
})
2016-01-03 11:14:09 +02:00
};
2014-05-21 16:57:31 -07:00
}
Some('*') => {
2016-01-03 11:14:09 +02:00
self.bump();
self.bump();
self.scan_block_comment()
}
2016-01-03 11:14:09 +02:00
_ => None,
2014-05-21 16:57:31 -07:00
}
} else if self.curr_is('#') {
if self.nextch_is('!') {
// Parse an inner attribute.
if self.nextnextch_is('[') {
return None;
}
// I guess this is the only way to figure out if
// we're at the beginning of the file...
let cmap = CodeMap::new();
cmap.files.borrow_mut().push(self.filemap.clone());
let loc = cmap.lookup_char_pos_adj(self.pos);
debug!("Skipping a shebang");
if loc.line == 1 && loc.col == CharPos(0) {
// FIXME: Add shebang "token", return it
let start = self.pos;
2016-01-03 11:14:09 +02:00
while !self.curr_is('\n') && !self.is_eof() {
self.bump();
}
return Some(TokenAndSpan {
2014-10-27 19:22:52 +11:00
tok: token::Shebang(self.name_from(start)),
sp: syntax_pos::mk_sp(start, self.pos),
});
2014-05-21 16:57:31 -07:00
}
}
None
} else {
None
}
}
/// If there is whitespace, shebang, or a comment, scan it. Otherwise,
/// return None.
fn scan_whitespace_or_comment(&mut self) -> Option<TokenAndSpan> {
match self.curr.unwrap_or('\0') {
// # to handle shebang at start of file -- this is the entry point
// for skipping over all "junk"
'/' | '#' => {
let c = self.scan_comment();
debug!("scanning a comment {:?}", c);
c
},
c if is_pattern_whitespace(Some(c)) => {
let start_bpos = self.pos;
while is_pattern_whitespace(self.curr) {
2016-01-03 11:14:09 +02:00
self.bump();
}
let c = Some(TokenAndSpan {
2014-10-27 19:22:52 +11:00
tok: token::Whitespace,
sp: syntax_pos::mk_sp(start_bpos, self.pos),
});
debug!("scanning whitespace: {:?}", c);
c
2016-01-03 11:14:09 +02:00
}
_ => None,
}
2014-05-21 16:57:31 -07:00
}
2014-06-09 13:12:30 -07:00
/// Might return a sugared-doc-attr
fn scan_block_comment(&mut self) -> Option<TokenAndSpan> {
2014-05-21 16:57:31 -07:00
// block comments starting with "/**" or "/*!" are doc-comments
let is_doc_comment = self.curr_is('*') || self.curr_is('!');
let start_bpos = self.pos - BytePos(2);
2014-05-21 16:57:31 -07:00
2015-01-18 00:18:19 +00:00
let mut level: isize = 1;
let mut has_cr = false;
2014-05-21 16:57:31 -07:00
while level > 0 {
if self.is_eof() {
let msg = if is_doc_comment {
"unterminated block doc-comment"
} else {
"unterminated block comment"
};
let last_bpos = self.pos;
2015-10-23 19:20:03 -07:00
panic!(self.fatal_span_(start_bpos, last_bpos, msg));
2014-05-21 16:57:31 -07:00
}
let n = self.curr.unwrap();
match n {
'/' if self.nextch_is('*') => {
level += 1;
self.bump();
}
'*' if self.nextch_is('/') => {
level -= 1;
self.bump();
}
'\r' => {
has_cr = true;
}
2016-01-03 11:14:09 +02:00
_ => (),
}
self.bump();
2014-05-21 16:57:31 -07:00
}
self.with_str_from(start_bpos, |string| {
// but comments with only "*"s between two "/"s are not
let tok = if is_block_doc_comment(string) {
let string = if has_cr {
2016-01-03 11:14:09 +02:00
self.translate_crlf(start_bpos,
string,
"bare CR not allowed in block doc-comment")
2016-01-03 11:14:09 +02:00
} else {
string.into()
};
token::DocComment(token::intern(&string[..]))
} else {
2014-10-27 19:22:52 +11:00
token::Comment
};
2014-05-21 16:57:31 -07:00
2016-01-03 11:14:09 +02:00
Some(TokenAndSpan {
tok: tok,
sp: syntax_pos::mk_sp(start_bpos, self.pos),
})
})
2014-05-21 16:57:31 -07:00
}
/// Scan through any digits (base `scan_radix`) or underscores,
/// and return how many digits there were.
///
/// `real_radix` represents the true radix of the number we're
/// interested in, and errors will be emitted for any digits
/// between `real_radix` and `scan_radix`.
fn scan_digits(&mut self, real_radix: u32, scan_radix: u32) -> usize {
assert!(real_radix <= scan_radix);
let mut len = 0;
2014-05-21 16:57:31 -07:00
loop {
let c = self.curr;
2016-01-03 11:14:09 +02:00
if c == Some('_') {
debug!("skipping a _");
self.bump();
continue;
}
match c.and_then(|cc| cc.to_digit(scan_radix)) {
Some(_) => {
debug!("{:?} in scan_digits", c);
// check that the hypothetical digit is actually
// in range for the true radix
if c.unwrap().to_digit(real_radix).is_none() {
self.err_span_(self.pos,
self.next_pos,
2016-01-03 11:14:09 +02:00
&format!("invalid digit for a base {} literal", real_radix));
}
len += 1;
self.bump();
}
2016-01-03 11:14:09 +02:00
_ => return len,
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
}
2014-05-21 16:57:31 -07:00
}
/// Lex a LIT_INTEGER or a LIT_FLOAT
fn scan_number(&mut self, c: char) -> token::Lit {
let num_digits;
let mut base = 10;
let start_bpos = self.pos;
2014-05-21 16:57:31 -07:00
self.bump();
if c == '0' {
match self.curr.unwrap_or('\0') {
2016-01-03 11:14:09 +02:00
'b' => {
self.bump();
base = 2;
num_digits = self.scan_digits(2, 10);
}
'o' => {
self.bump();
base = 8;
num_digits = self.scan_digits(8, 10);
}
'x' => {
self.bump();
base = 16;
num_digits = self.scan_digits(16, 16);
}
'0'...'9' | '_' | '.' => {
num_digits = self.scan_digits(10, 10) + 1;
}
_ => {
// just a 0
return token::Integer(self.name_from(start_bpos));
}
2014-05-21 16:57:31 -07:00
}
} else if c.is_digit(10) {
num_digits = self.scan_digits(10, 10) + 1;
} else {
num_digits = 0;
2014-05-21 16:57:31 -07:00
}
if num_digits == 0 {
2016-01-03 11:14:09 +02:00
self.err_span_(start_bpos,
self.pos,
2016-01-03 11:14:09 +02:00
"no valid digits found for number");
return token::Integer(token::intern("0"));
2014-05-21 16:57:31 -07:00
}
// might be a float, but don't be greedy if this is actually an
// integer literal followed by field/method access or a range pattern
// (`0..2` and `12.foo()`)
2016-01-03 11:14:09 +02:00
if self.curr_is('.') && !self.nextch_is('.') &&
!self.nextch()
.unwrap_or('\0')
.is_xid_start() {
// might have stuff after the ., and if it does, it needs to start
// with a number
2014-05-21 16:57:31 -07:00
self.bump();
if self.curr.unwrap_or('\0').is_digit(10) {
self.scan_digits(10, 10);
self.scan_float_exponent();
2014-05-21 16:57:31 -07:00
}
let pos = self.pos;
self.check_float_base(start_bpos, pos, base);
return token::Float(self.name_from(start_bpos));
2014-05-21 16:57:31 -07:00
} else {
// it might be a float if it has an exponent
if self.curr_is('e') || self.curr_is('E') {
self.scan_float_exponent();
let pos = self.pos;
self.check_float_base(start_bpos, pos, base);
return token::Float(self.name_from(start_bpos));
2014-05-21 16:57:31 -07:00
}
// but we certainly have an integer!
return token::Integer(self.name_from(start_bpos));
2014-05-21 16:57:31 -07:00
}
}
/// Scan over `n_digits` hex digits, stopping at `delim`, reporting an
/// error if too many or too few digits are encountered.
2016-01-03 11:14:09 +02:00
fn scan_hex_digits(&mut self, n_digits: usize, delim: char, below_0x7f_only: bool) -> bool {
debug!("scanning {} digits until {:?}", n_digits, delim);
let start_bpos = self.pos;
let mut accum_int = 0;
let mut valid = true;
for _ in 0..n_digits {
2014-05-21 16:57:31 -07:00
if self.is_eof() {
let last_bpos = self.pos;
2015-10-23 19:20:03 -07:00
panic!(self.fatal_span_(start_bpos,
last_bpos,
"unterminated numeric character escape"));
2014-05-21 16:57:31 -07:00
}
if self.curr_is(delim) {
let last_bpos = self.pos;
2016-01-03 11:14:09 +02:00
self.err_span_(start_bpos,
last_bpos,
"numeric character escape is too short");
valid = false;
2014-05-21 16:57:31 -07:00
break;
}
let c = self.curr.unwrap_or('\x00');
accum_int *= 16;
accum_int += c.to_digit(16).unwrap_or_else(|| {
self.err_span_char(self.pos,
self.next_pos,
2016-01-03 11:14:09 +02:00
"invalid character in numeric character escape",
c);
valid = false;
2014-05-21 16:57:31 -07:00
0
Add trivial cast lints. This permits all coercions to be performed in casts, but adds lints to warn in those cases. Part of this patch moves cast checking to a later stage of type checking. We acquire obligations to check casts as part of type checking where we previously checked them. Once we have type checked a function or module, then we check any cast obligations which have been acquired. That means we have more type information available to check casts (this was crucial to making coercions work properly in place of some casts), but it means that casts cannot feed input into type inference. [breaking change] * Adds two new lints for trivial casts and trivial numeric casts, these are warn by default, but can cause errors if you build with warnings as errors. Previously, trivial numeric casts and casts to trait objects were allowed. * The unused casts lint has gone. * Interactions between casting and type inference have changed in subtle ways. Two ways this might manifest are: - You may need to 'direct' casts more with extra type information, for example, in some cases where `foo as _ as T` succeeded, you may now need to specify the type for `_` - Casts do not influence inference of integer types. E.g., the following used to type check: ``` let x = 42; let y = &x as *const u32; ``` Because the cast would inform inference that `x` must have type `u32`. This no longer applies and the compiler will fallback to `i32` for `x` and thus there will be a type error in the cast. The solution is to add more type information: ``` let x: u32 = 42; let y = &x as *const u32; ```
2015-03-20 17:15:27 +13:00
});
2014-05-21 16:57:31 -07:00
self.bump();
}
if below_0x7f_only && accum_int >= 0x80 {
self.err_span_(start_bpos,
self.pos,
2016-01-03 11:14:09 +02:00
"this form of character escape may only be used with characters in \
the range [\\x00-\\x7f]");
valid = false;
}
2014-05-21 16:57:31 -07:00
match char::from_u32(accum_int) {
Some(_) => valid,
2014-05-21 16:57:31 -07:00
None => {
let last_bpos = self.pos;
self.err_span_(start_bpos, last_bpos, "invalid numeric character escape");
false
2014-05-21 16:57:31 -07:00
}
}
}
/// Scan for a single (possibly escaped) byte or char
/// in a byte, (non-raw) byte string, char, or (non-raw) string literal.
/// `start` is the position of `first_source_char`, which is already consumed.
///
/// Returns true if there was a valid char/byte, false otherwise.
2016-01-03 11:14:09 +02:00
fn scan_char_or_byte(&mut self,
start: BytePos,
first_source_char: char,
ascii_only: bool,
delim: char)
-> bool {
match first_source_char {
'\\' => {
// '\X' for some X must be a character constant:
let escaped = self.curr;
let escaped_pos = self.pos;
self.bump();
match escaped {
2016-01-03 11:14:09 +02:00
None => {} // EOF here is an error that will be checked later.
Some(e) => {
return match e {
'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true,
'x' => self.scan_byte_escape(delim, !ascii_only),
'u' => {
let valid = if self.curr_is('{') {
self.scan_unicode_escape(delim) && !ascii_only
} else {
let span = syntax_pos::mk_sp(start, self.pos);
2016-01-03 11:14:09 +02:00
self.span_diagnostic
.struct_span_err(span, "incorrect unicode escape sequence")
2015-12-21 10:00:43 +13:00
.span_help(span,
2016-01-03 11:14:09 +02:00
"format of unicode escape sequences is \
`\\u{}`")
2015-12-21 10:00:43 +13:00
.emit();
false
};
if ascii_only {
2016-01-03 11:14:09 +02:00
self.err_span_(start,
self.pos,
2016-01-03 11:14:09 +02:00
"unicode escape sequences cannot be used as a \
byte or in a byte string");
2015-06-22 15:30:56 -07:00
}
valid
}
'\n' if delim == '"' => {
self.consume_whitespace();
true
2016-01-03 11:14:09 +02:00
}
'\r' if delim == '"' && self.curr_is('\n') => {
self.consume_whitespace();
true
}
c => {
let pos = self.pos;
2016-01-03 11:14:09 +02:00
let mut err = self.struct_err_span_char(escaped_pos,
pos,
2016-01-03 11:14:09 +02:00
if ascii_only {
"unknown byte escape"
} else {
"unknown character \
escape"
},
c);
if e == '\r' {
err.span_help(syntax_pos::mk_sp(escaped_pos, pos),
2016-01-03 11:14:09 +02:00
"this is an isolated carriage return; consider \
checking your editor and version control \
settings");
}
if (e == '{' || e == '}') && !ascii_only {
err.span_help(syntax_pos::mk_sp(escaped_pos, pos),
2016-01-03 11:14:09 +02:00
"if used in a formatting string, curly braces \
are escaped with `{{` and `}}`");
}
2015-12-21 10:00:43 +13:00
err.emit();
false
}
}
}
}
}
'\t' | '\n' | '\r' | '\'' if delim == '\'' => {
let pos = self.pos;
2016-01-03 11:14:09 +02:00
self.err_span_char(start,
pos,
2016-01-03 11:14:09 +02:00
if ascii_only {
"byte constant must be escaped"
} else {
"character constant must be escaped"
},
first_source_char);
return false;
}
'\r' => {
if self.curr_is('\n') {
self.bump();
return true;
} else {
2016-01-03 11:14:09 +02:00
self.err_span_(start,
self.pos,
"bare CR not allowed in string, use \\r instead");
return false;
}
}
2016-01-03 11:14:09 +02:00
_ => {
if ascii_only && first_source_char > '\x7F' {
let pos = self.pos;
self.err_span_(start,
pos,
"byte constant must be ASCII. Use a \\xHH escape for a \
non-ASCII byte");
2016-01-03 11:14:09 +02:00
return false;
}
}
}
true
}
/// Scan over a \u{...} escape
///
/// At this point, we have already seen the \ and the u, the { is the current character. We
/// will read at least one digit, and up to 6, and pass over the }.
fn scan_unicode_escape(&mut self, delim: char) -> bool {
self.bump(); // past the {
let start_bpos = self.pos;
let mut count = 0;
let mut accum_int = 0;
let mut valid = true;
while !self.curr_is('}') && count <= 6 {
let c = match self.curr {
Some(c) => c,
None => {
2016-01-03 11:14:09 +02:00
panic!(self.fatal_span_(start_bpos,
self.pos,
2015-10-23 19:20:03 -07:00
"unterminated unicode escape (found EOF)"));
}
};
accum_int *= 16;
accum_int += c.to_digit(16).unwrap_or_else(|| {
if c == delim {
panic!(self.fatal_span_(self.pos,
self.next_pos,
2015-10-23 19:20:03 -07:00
"unterminated unicode escape (needed a `}`)"));
} else {
self.err_span_char(self.pos,
self.next_pos,
2016-01-03 11:14:09 +02:00
"invalid character in unicode escape",
c);
}
valid = false;
0
Add trivial cast lints. This permits all coercions to be performed in casts, but adds lints to warn in those cases. Part of this patch moves cast checking to a later stage of type checking. We acquire obligations to check casts as part of type checking where we previously checked them. Once we have type checked a function or module, then we check any cast obligations which have been acquired. That means we have more type information available to check casts (this was crucial to making coercions work properly in place of some casts), but it means that casts cannot feed input into type inference. [breaking change] * Adds two new lints for trivial casts and trivial numeric casts, these are warn by default, but can cause errors if you build with warnings as errors. Previously, trivial numeric casts and casts to trait objects were allowed. * The unused casts lint has gone. * Interactions between casting and type inference have changed in subtle ways. Two ways this might manifest are: - You may need to 'direct' casts more with extra type information, for example, in some cases where `foo as _ as T` succeeded, you may now need to specify the type for `_` - Casts do not influence inference of integer types. E.g., the following used to type check: ``` let x = 42; let y = &x as *const u32; ``` Because the cast would inform inference that `x` must have type `u32`. This no longer applies and the compiler will fallback to `i32` for `x` and thus there will be a type error in the cast. The solution is to add more type information: ``` let x: u32 = 42; let y = &x as *const u32; ```
2015-03-20 17:15:27 +13:00
});
self.bump();
count += 1;
}
if count > 6 {
2016-01-03 11:14:09 +02:00
self.err_span_(start_bpos,
self.pos,
2016-01-03 11:14:09 +02:00
"overlong unicode escape (can have at most 6 hex digits)");
valid = false;
}
if valid && (char::from_u32(accum_int).is_none() || count == 0) {
2016-01-03 11:14:09 +02:00
self.err_span_(start_bpos,
self.pos,
2016-01-03 11:14:09 +02:00
"invalid unicode character escape");
2015-06-22 15:30:56 -07:00
valid = false;
}
self.bump(); // past the ending }
valid
}
/// Scan over a float exponent.
fn scan_float_exponent(&mut self) {
if self.curr_is('e') || self.curr_is('E') {
self.bump();
if self.curr_is('-') || self.curr_is('+') {
self.bump();
}
if self.scan_digits(10, 10) == 0 {
self.err_span_(self.pos,
self.next_pos,
2016-01-03 11:14:09 +02:00
"expected at least one digit in exponent")
}
}
}
/// Check that a base is valid for a floating literal, emitting a nice
/// error if it isn't.
2015-01-17 23:33:05 +00:00
fn check_float_base(&mut self, start_bpos: BytePos, last_bpos: BytePos, base: usize) {
match base {
2016-01-03 11:14:09 +02:00
16 => {
self.err_span_(start_bpos,
last_bpos,
"hexadecimal float literal is not supported")
}
8 => {
self.err_span_(start_bpos,
last_bpos,
"octal float literal is not supported")
}
2 => {
self.err_span_(start_bpos,
last_bpos,
"binary float literal is not supported")
}
_ => (),
}
}
2014-10-27 19:22:52 +11:00
fn binop(&mut self, op: token::BinOpToken) -> token::Token {
2014-05-21 16:57:31 -07:00
self.bump();
if self.curr_is('=') {
self.bump();
2014-10-27 19:22:52 +11:00
return token::BinOpEq(op);
2014-05-21 16:57:31 -07:00
} else {
2014-10-27 19:22:52 +11:00
return token::BinOp(op);
2014-05-21 16:57:31 -07:00
}
}
/// Return the next token from the string, advances the input past that
/// token, and updates the interner
fn next_token_inner(&mut self) -> Result<token::Token, ()> {
2014-05-21 16:57:31 -07:00
let c = self.curr;
2016-01-03 11:14:09 +02:00
if ident_start(c) &&
match (c.unwrap(), self.nextch(), self.nextnextch()) {
2014-05-21 16:57:31 -07:00
// Note: r as in r" or r#" is part of a raw string literal,
2014-06-06 16:04:04 +01:00
// b as in b' is part of a byte literal.
// They are not identifiers, and are handled further down.
2016-01-03 11:14:09 +02:00
('r', Some('"'), _) |
('r', Some('#'), _) |
('b', Some('"'), _) |
('b', Some('\''), _) |
('b', Some('r'), Some('"')) |
('b', Some('r'), Some('#')) => false,
_ => true,
2014-06-06 16:04:04 +01:00
} {
let start = self.pos;
2014-05-21 16:57:31 -07:00
while ident_continue(self.curr) {
self.bump();
}
return Ok(self.with_str_from(start, |string| {
2014-05-21 16:57:31 -07:00
if string == "_" {
2014-10-27 19:22:52 +11:00
token::Underscore
2014-05-21 16:57:31 -07:00
} else {
// FIXME: perform NFKC normalization here. (Issue #2253)
2016-04-16 04:12:02 +03:00
token::Ident(str_to_ident(string))
2014-05-21 16:57:31 -07:00
}
}));
2014-05-21 16:57:31 -07:00
}
if is_dec_digit(c) {
let num = self.scan_number(c.unwrap());
let suffix = self.scan_optional_raw_name();
debug!("next_token_inner: scanned number {:?}, {:?}", num, suffix);
return Ok(token::Literal(num, suffix));
2014-05-21 16:57:31 -07:00
}
match c.expect("next_token_inner called at EOF") {
2016-01-03 11:14:09 +02:00
// One-byte tokens.
';' => {
2014-05-21 16:57:31 -07:00
self.bump();
return Ok(token::Semi);
2016-01-03 11:14:09 +02:00
}
',' => {
self.bump();
return Ok(token::Comma);
2016-01-03 11:14:09 +02:00
}
'.' => {
self.bump();
return if self.curr_is('.') {
self.bump();
if self.curr_is('.') {
self.bump();
Ok(token::DotDotDot)
2016-01-03 11:14:09 +02:00
} else {
Ok(token::DotDot)
2016-01-03 11:14:09 +02:00
}
} else {
Ok(token::Dot)
2016-01-03 11:14:09 +02:00
};
}
'(' => {
self.bump();
return Ok(token::OpenDelim(token::Paren));
2016-01-03 11:14:09 +02:00
}
')' => {
self.bump();
return Ok(token::CloseDelim(token::Paren));
2016-01-03 11:14:09 +02:00
}
'{' => {
self.bump();
return Ok(token::OpenDelim(token::Brace));
2016-01-03 11:14:09 +02:00
}
'}' => {
self.bump();
return Ok(token::CloseDelim(token::Brace));
2016-01-03 11:14:09 +02:00
}
'[' => {
self.bump();
return Ok(token::OpenDelim(token::Bracket));
2016-01-03 11:14:09 +02:00
}
']' => {
self.bump();
return Ok(token::CloseDelim(token::Bracket));
2016-01-03 11:14:09 +02:00
}
'@' => {
self.bump();
return Ok(token::At);
2016-01-03 11:14:09 +02:00
}
'#' => {
self.bump();
return Ok(token::Pound);
2016-01-03 11:14:09 +02:00
}
'~' => {
self.bump();
return Ok(token::Tilde);
2016-01-03 11:14:09 +02:00
}
'?' => {
self.bump();
return Ok(token::Question);
2016-01-03 11:14:09 +02:00
}
':' => {
self.bump();
if self.curr_is(':') {
self.bump();
return Ok(token::ModSep);
2016-01-03 11:14:09 +02:00
} else {
return Ok(token::Colon);
2016-01-03 11:14:09 +02:00
}
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
'$' => {
self.bump();
return Ok(token::Dollar);
2016-01-03 11:14:09 +02:00
}
2014-05-21 16:57:31 -07:00
2016-01-03 11:14:09 +02:00
// Multi-byte tokens.
'=' => {
2014-05-21 16:57:31 -07:00
self.bump();
2016-01-03 11:14:09 +02:00
if self.curr_is('=') {
self.bump();
return Ok(token::EqEq);
2016-01-03 11:14:09 +02:00
} else if self.curr_is('>') {
self.bump();
return Ok(token::FatArrow);
2016-01-03 11:14:09 +02:00
} else {
return Ok(token::Eq);
2016-01-03 11:14:09 +02:00
}
}
'!' => {
2014-05-21 16:57:31 -07:00
self.bump();
2016-01-03 11:14:09 +02:00
if self.curr_is('=') {
self.bump();
return Ok(token::Ne);
2016-01-03 11:14:09 +02:00
} else {
return Ok(token::Not);
2016-01-03 11:14:09 +02:00
}
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
'<' => {
2014-05-21 16:57:31 -07:00
self.bump();
2016-01-03 11:14:09 +02:00
match self.curr.unwrap_or('\x00') {
'=' => {
self.bump();
return Ok(token::Le);
2016-01-03 11:14:09 +02:00
}
'<' => {
return Ok(self.binop(token::Shl));
2016-01-03 11:14:09 +02:00
}
'-' => {
self.bump();
match self.curr.unwrap_or('\x00') {
_ => {
return Ok(token::LArrow);
2016-01-03 11:14:09 +02:00
}
}
}
_ => {
return Ok(token::Lt);
2016-01-03 11:14:09 +02:00
}
}
}
'>' => {
2014-05-21 16:57:31 -07:00
self.bump();
match self.curr.unwrap_or('\x00') {
2016-01-03 11:14:09 +02:00
'=' => {
self.bump();
return Ok(token::Ge);
2016-01-03 11:14:09 +02:00
}
'>' => {
return Ok(self.binop(token::Shr));
2016-01-03 11:14:09 +02:00
}
_ => {
return Ok(token::Gt);
2016-01-03 11:14:09 +02:00
}
2014-05-21 16:57:31 -07:00
}
}
2016-01-03 11:14:09 +02:00
'\'' => {
// Either a character constant 'a' OR a lifetime name 'abc
let start_with_quote = self.pos;
2016-01-03 11:14:09 +02:00
self.bump();
let start = self.pos;
2014-05-21 16:57:31 -07:00
2016-01-03 11:14:09 +02:00
// the eof will be picked up by the final `'` check below
let c2 = self.curr.unwrap_or('\x00');
self.bump();
2014-05-21 16:57:31 -07:00
2016-01-03 11:14:09 +02:00
// If the character is an ident start not followed by another single
// quote, then this is a lifetime name:
if ident_start(Some(c2)) && !self.curr_is('\'') {
while ident_continue(self.curr) {
self.bump();
}
// lifetimes shouldn't end with a single quote
// if we find one, then this is an invalid character literal
if self.curr_is('\'') {
panic!(self.fatal_span_verbose(
start_with_quote, self.next_pos,
String::from("character literal may only contain one codepoint")));
}
2016-01-03 11:14:09 +02:00
// Include the leading `'` in the real identifier, for macro
// expansion purposes. See #12512 for the gory details of why
// this is necessary.
let ident = self.with_str_from(start, |lifetime_name| {
str_to_ident(&format!("'{}", lifetime_name))
});
2016-01-03 11:14:09 +02:00
// Conjure up a "keyword checking ident" to make sure that
// the lifetime name is not a keyword.
let keyword_checking_ident = self.with_str_from(start, |lifetime_name| {
str_to_ident(lifetime_name)
});
2016-04-16 04:12:02 +03:00
let keyword_checking_token = &token::Ident(keyword_checking_ident);
let last_bpos = self.pos;
if keyword_checking_token.is_any_keyword() &&
!keyword_checking_token.is_keyword(keywords::Static) {
self.err_span_(start, last_bpos, "lifetimes cannot use keyword names");
2016-01-03 11:14:09 +02:00
}
return Ok(token::Lifetime(ident));
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
let valid = self.scan_char_or_byte(start,
c2,
// ascii_only =
false,
'\'');
2016-01-03 11:14:09 +02:00
if !self.curr_is('\'') {
panic!(self.fatal_span_verbose(
start_with_quote, self.pos,
String::from("character literal may only contain one codepoint")));
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
let id = if valid {
self.name_from(start)
} else {
token::intern("0")
};
self.bump(); // advance curr past token
let suffix = self.scan_optional_raw_name();
return Ok(token::Literal(token::Char(id), suffix));
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
'b' => {
2014-05-21 16:57:31 -07:00
self.bump();
2016-01-03 11:14:09 +02:00
let lit = match self.curr {
Some('\'') => self.scan_byte(),
Some('"') => self.scan_byte_string(),
Some('r') => self.scan_raw_byte_string(),
_ => unreachable!(), // Should have been a token::Ident above.
};
let suffix = self.scan_optional_raw_name();
return Ok(token::Literal(lit, suffix));
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
'"' => {
let start_bpos = self.pos;
2016-01-03 11:14:09 +02:00
let mut valid = true;
self.bump();
while !self.curr_is('"') {
if self.is_eof() {
let last_bpos = self.pos;
2016-01-03 11:14:09 +02:00
panic!(self.fatal_span_(start_bpos,
last_bpos,
"unterminated double quote string"));
}
2014-05-21 16:57:31 -07:00
let ch_start = self.pos;
2016-01-03 11:14:09 +02:00
let ch = self.curr.unwrap();
self.bump();
valid &= self.scan_char_or_byte(ch_start,
ch,
// ascii_only =
false,
'"');
}
// adjust for the ASCII " at the start of the literal
let id = if valid {
self.name_from(start_bpos + BytePos(1))
} else {
token::intern("??")
};
self.bump();
let suffix = self.scan_optional_raw_name();
return Ok(token::Literal(token::Str_(id), suffix));
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
'r' => {
let start_bpos = self.pos;
2016-01-03 11:14:09 +02:00
self.bump();
let mut hash_count = 0;
while self.curr_is('#') {
self.bump();
hash_count += 1;
}
2014-05-21 16:57:31 -07:00
if self.is_eof() {
let last_bpos = self.pos;
2015-10-23 19:20:03 -07:00
panic!(self.fatal_span_(start_bpos, last_bpos, "unterminated raw string"));
2016-01-03 11:14:09 +02:00
} else if !self.curr_is('"') {
let last_bpos = self.pos;
2016-01-03 11:14:09 +02:00
let curr_char = self.curr.unwrap();
panic!(self.fatal_span_char(start_bpos,
last_bpos,
"found invalid character; only `#` is allowed \
in raw string delimitation",
curr_char));
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
self.bump();
let content_start_bpos = self.pos;
2016-01-03 11:14:09 +02:00
let mut content_end_bpos;
let mut valid = true;
'outer: loop {
if self.is_eof() {
let last_bpos = self.pos;
2016-01-03 11:14:09 +02:00
panic!(self.fatal_span_(start_bpos, last_bpos, "unterminated raw string"));
}
// if self.curr_is('"') {
// content_end_bpos = self.pos;
2016-01-03 11:14:09 +02:00
// for _ in 0..hash_count {
// self.bump();
// if !self.curr_is('#') {
// continue 'outer;
let c = self.curr.unwrap();
match c {
'"' => {
content_end_bpos = self.pos;
2016-01-03 11:14:09 +02:00
for _ in 0..hash_count {
self.bump();
if !self.curr_is('#') {
continue 'outer;
}
}
2016-01-03 11:14:09 +02:00
break;
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
'\r' => {
if !self.nextch_is('\n') {
let last_bpos = self.pos;
2016-01-03 11:14:09 +02:00
self.err_span_(start_bpos,
last_bpos,
"bare CR not allowed in raw string, use \\r \
instead");
valid = false;
}
}
2016-01-03 11:14:09 +02:00
_ => (),
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
self.bump();
2014-05-21 16:57:31 -07:00
}
self.bump();
2016-01-03 11:14:09 +02:00
let id = if valid {
self.name_from_to(content_start_bpos, content_end_bpos)
} else {
token::intern("??")
};
let suffix = self.scan_optional_raw_name();
return Ok(token::Literal(token::StrRaw(id, hash_count), suffix));
2016-01-03 11:14:09 +02:00
}
'-' => {
if self.nextch_is('>') {
self.bump();
self.bump();
return Ok(token::RArrow);
2016-01-03 11:14:09 +02:00
} else {
return Ok(self.binop(token::Minus));
2016-01-03 11:14:09 +02:00
}
}
'&' => {
if self.nextch_is('&') {
self.bump();
self.bump();
return Ok(token::AndAnd);
2016-01-03 11:14:09 +02:00
} else {
return Ok(self.binop(token::And));
2016-01-03 11:14:09 +02:00
}
}
'|' => {
match self.nextch() {
Some('|') => {
self.bump();
self.bump();
return Ok(token::OrOr);
2016-01-03 11:14:09 +02:00
}
_ => {
return Ok(self.binop(token::Or));
2016-01-03 11:14:09 +02:00
}
}
}
'+' => {
return Ok(self.binop(token::Plus));
2016-01-03 11:14:09 +02:00
}
'*' => {
return Ok(self.binop(token::Star));
2016-01-03 11:14:09 +02:00
}
'/' => {
return Ok(self.binop(token::Slash));
2016-01-03 11:14:09 +02:00
}
'^' => {
return Ok(self.binop(token::Caret));
2016-01-03 11:14:09 +02:00
}
'%' => {
return Ok(self.binop(token::Percent));
2016-01-03 11:14:09 +02:00
}
c => {
let last_bpos = self.pos;
let bpos = self.next_pos;
2016-01-03 11:14:09 +02:00
let mut err = self.struct_fatal_span_char(last_bpos,
bpos,
"unknown start of token",
c);
unicode_chars::check_for_substitution(&self, c, &mut err);
self.fatal_errs.push(err);
Err(())
2014-05-21 16:57:31 -07:00
}
}
}
fn consume_whitespace(&mut self) {
while is_pattern_whitespace(self.curr) && !self.is_eof() {
2016-01-03 11:14:09 +02:00
self.bump();
}
2014-05-21 16:57:31 -07:00
}
fn read_to_eol(&mut self) -> String {
let mut val = String::new();
while !self.curr_is('\n') && !self.is_eof() {
val.push(self.curr.unwrap());
2014-05-21 16:57:31 -07:00
self.bump();
}
2016-01-03 11:14:09 +02:00
if self.curr_is('\n') {
self.bump();
}
return val;
2014-05-21 16:57:31 -07:00
}
fn read_one_line_comment(&mut self) -> String {
let val = self.read_to_eol();
2016-01-03 11:14:09 +02:00
assert!((val.as_bytes()[0] == b'/' && val.as_bytes()[1] == b'/') ||
(val.as_bytes()[0] == b'#' && val.as_bytes()[1] == b'!'));
2014-05-21 16:57:31 -07:00
return val;
}
fn consume_non_eol_whitespace(&mut self) {
while is_pattern_whitespace(self.curr) && !self.curr_is('\n') && !self.is_eof() {
2014-05-21 16:57:31 -07:00
self.bump();
}
}
fn peeking_at_comment(&self) -> bool {
2016-01-03 11:14:09 +02:00
(self.curr_is('/') && self.nextch_is('/')) || (self.curr_is('/') && self.nextch_is('*')) ||
// consider shebangs comments, but not inner attributes
2016-01-03 11:14:09 +02:00
(self.curr_is('#') && self.nextch_is('!') && !self.nextnextch_is('['))
2014-05-21 16:57:31 -07:00
}
2014-07-02 09:39:48 -07:00
fn scan_byte(&mut self) -> token::Lit {
2014-07-02 09:39:48 -07:00
self.bump();
let start = self.pos;
2014-07-02 09:39:48 -07:00
// the eof will be picked up by the final `'` check below
let c2 = self.curr.unwrap_or('\x00');
2014-07-02 09:39:48 -07:00
self.bump();
2016-01-03 11:14:09 +02:00
let valid = self.scan_char_or_byte(start,
c2,
// ascii_only =
true,
'\'');
2014-07-02 09:39:48 -07:00
if !self.curr_is('\'') {
// Byte offsetting here is okay because the
// character before position `start` are an
// ascii single quote and ascii 'b'.
let pos = self.pos;
2016-01-03 11:14:09 +02:00
panic!(self.fatal_span_verbose(start - BytePos(2),
pos,
2016-01-03 11:14:09 +02:00
"unterminated byte constant".to_string()));
2014-07-02 09:39:48 -07:00
}
2016-01-03 11:14:09 +02:00
let id = if valid {
self.name_from(start)
} else {
token::intern("?")
};
2014-07-02 09:39:48 -07:00
self.bump(); // advance curr past token
return token::Byte(id);
2014-07-02 09:39:48 -07:00
}
fn scan_byte_escape(&mut self, delim: char, below_0x7f_only: bool) -> bool {
self.scan_hex_digits(2, delim, below_0x7f_only)
}
fn scan_byte_string(&mut self) -> token::Lit {
2014-07-02 09:39:48 -07:00
self.bump();
let start = self.pos;
let mut valid = true;
2014-07-02 09:39:48 -07:00
while !self.curr_is('"') {
if self.is_eof() {
let pos = self.pos;
panic!(self.fatal_span_(start, pos, "unterminated double quote byte string"));
2014-07-02 09:39:48 -07:00
}
let ch_start = self.pos;
2014-07-02 09:39:48 -07:00
let ch = self.curr.unwrap();
self.bump();
2016-01-03 11:14:09 +02:00
valid &= self.scan_char_or_byte(ch_start,
ch,
// ascii_only =
true,
'"');
2014-07-02 09:39:48 -07:00
}
2016-01-03 11:14:09 +02:00
let id = if valid {
self.name_from(start)
} else {
token::intern("??")
};
2014-07-02 09:39:48 -07:00
self.bump();
return token::ByteStr(id);
2014-07-02 09:39:48 -07:00
}
fn scan_raw_byte_string(&mut self) -> token::Lit {
let start_bpos = self.pos;
2014-07-02 09:39:48 -07:00
self.bump();
let mut hash_count = 0;
2014-07-02 09:39:48 -07:00
while self.curr_is('#') {
self.bump();
hash_count += 1;
}
if self.is_eof() {
let pos = self.pos;
panic!(self.fatal_span_(start_bpos, pos, "unterminated raw string"));
2014-07-02 09:39:48 -07:00
} else if !self.curr_is('"') {
let pos = self.pos;
2014-07-02 09:39:48 -07:00
let ch = self.curr.unwrap();
2016-01-03 11:14:09 +02:00
panic!(self.fatal_span_char(start_bpos,
pos,
2016-01-03 11:14:09 +02:00
"found invalid character; only `#` is allowed in raw \
string delimitation",
ch));
2014-07-02 09:39:48 -07:00
}
self.bump();
let content_start_bpos = self.pos;
2014-07-02 09:39:48 -07:00
let mut content_end_bpos;
'outer: loop {
match self.curr {
None => {
let pos = self.pos;
panic!(self.fatal_span_(start_bpos, pos, "unterminated raw string"))
2016-01-03 11:14:09 +02:00
}
2014-07-02 09:39:48 -07:00
Some('"') => {
content_end_bpos = self.pos;
for _ in 0..hash_count {
2014-07-02 09:39:48 -07:00
self.bump();
if !self.curr_is('#') {
continue 'outer;
}
}
break;
2016-01-03 11:14:09 +02:00
}
Some(c) => {
if c > '\x7F' {
let pos = self.pos;
self.err_span_char(pos, pos, "raw byte string must be ASCII", c);
2016-01-03 11:14:09 +02:00
}
2014-07-02 09:39:48 -07:00
}
}
self.bump();
}
self.bump();
2016-01-03 11:14:09 +02:00
return token::ByteStrRaw(self.name_from_to(content_start_bpos, content_end_bpos),
hash_count);
2014-07-02 09:39:48 -07:00
}
2014-05-21 16:57:31 -07:00
}
// This tests the character for the unicode property 'PATTERN_WHITE_SPACE' which
// is guaranteed to be forward compatible. http://unicode.org/reports/tr31/#R3
pub fn is_pattern_whitespace(c: Option<char>) -> bool {
c.map_or(false, Pattern_White_Space)
2014-05-21 16:57:31 -07:00
}
fn in_range(c: Option<char>, lo: char, hi: char) -> bool {
match c {
Some(c) => lo <= c && c <= hi,
2016-01-03 11:14:09 +02:00
_ => false,
2014-05-21 16:57:31 -07:00
}
}
2016-01-03 11:14:09 +02:00
fn is_dec_digit(c: Option<char>) -> bool {
return in_range(c, '0', '9');
}
2014-05-21 16:57:31 -07:00
pub fn is_doc_comment(s: &str) -> bool {
2016-01-03 11:14:09 +02:00
let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/') ||
s.starts_with("//!");
debug!("is {:?} a doc comment? {}", s, res);
res
2014-05-21 16:57:31 -07:00
}
pub fn is_block_doc_comment(s: &str) -> bool {
2016-01-03 11:20:06 +02:00
// Prevent `/**/` from being parsed as a doc comment
2016-01-03 11:14:09 +02:00
let res = ((s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*') ||
2016-01-03 11:20:06 +02:00
s.starts_with("/*!")) && s.len() >= 5;
debug!("is {:?} a doc comment? {}", s, res);
res
2014-05-21 16:57:31 -07:00
}
fn ident_start(c: Option<char>) -> bool {
2016-01-03 11:14:09 +02:00
let c = match c {
Some(c) => c,
None => return false,
};
2014-05-21 16:57:31 -07:00
2016-01-03 11:14:09 +02:00
(c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c > '\x7f' && c.is_xid_start())
2014-05-21 16:57:31 -07:00
}
fn ident_continue(c: Option<char>) -> bool {
2016-01-03 11:14:09 +02:00
let c = match c {
Some(c) => c,
None => return false,
};
2014-05-21 16:57:31 -07:00
2016-01-03 11:14:09 +02:00
(c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' ||
(c > '\x7f' && c.is_xid_continue())
2014-05-21 16:57:31 -07:00
}
#[cfg(test)]
mod tests {
2014-05-21 16:57:31 -07:00
use super::*;
2016-06-22 12:50:19 -04:00
use syntax_pos::{BytePos, Span, NO_EXPANSION};
use codemap::CodeMap;
2015-12-15 16:51:13 +13:00
use errors;
2014-05-21 16:57:31 -07:00
use parse::token;
2016-01-03 11:14:09 +02:00
use parse::token::str_to_ident;
2015-03-11 15:24:14 -07:00
use std::io;
2015-12-15 16:51:13 +13:00
use std::rc::Rc;
2014-05-21 16:57:31 -07:00
2015-12-15 16:51:13 +13:00
fn mk_sh(cm: Rc<CodeMap>) -> errors::Handler {
// FIXME (#22405): Replace `Box::new` with `box` here when/if possible.
let emitter = errors::emitter::EmitterWriter::new(Box::new(io::sink()),
2016-08-07 07:50:27 -07:00
Some(cm));
2015-12-15 16:51:13 +13:00
errors::Handler::with_emitter(true, false, Box::new(emitter))
2014-05-21 16:57:31 -07:00
}
// open a string reader for the given string
2015-12-15 16:51:13 +13:00
fn setup<'a>(cm: &CodeMap,
span_handler: &'a errors::Handler,
2016-01-03 11:14:09 +02:00
teststr: String)
-> StringReader<'a> {
let fm = cm.new_filemap("zebra.rs".to_string(), None, teststr);
2014-05-21 16:57:31 -07:00
StringReader::new(span_handler, fm)
}
2016-01-03 11:14:09 +02:00
#[test]
fn t1() {
2015-12-15 16:51:13 +13:00
let cm = Rc::new(CodeMap::new());
let sh = mk_sh(cm.clone());
2016-01-03 11:14:09 +02:00
let mut string_reader = setup(&cm,
&sh,
"/* my source file */ fn main() { println!(\"zebra\"); }\n"
.to_string());
2014-05-21 16:57:31 -07:00
let id = str_to_ident("fn");
2014-10-27 19:22:52 +11:00
assert_eq!(string_reader.next_token().tok, token::Comment);
assert_eq!(string_reader.next_token().tok, token::Whitespace);
2014-05-21 16:57:31 -07:00
let tok1 = string_reader.next_token();
2016-01-03 11:14:09 +02:00
let tok2 = TokenAndSpan {
2016-04-16 04:12:02 +03:00
tok: token::Ident(id),
2016-01-03 11:14:09 +02:00
sp: Span {
lo: BytePos(21),
hi: BytePos(23),
expn_id: NO_EXPANSION,
},
};
assert_eq!(tok1, tok2);
2014-10-27 19:22:52 +11:00
assert_eq!(string_reader.next_token().tok, token::Whitespace);
2014-05-21 16:57:31 -07:00
// the 'main' id is already read:
assert_eq!(string_reader.pos.clone(), BytePos(28));
2014-05-21 16:57:31 -07:00
// read another token:
let tok3 = string_reader.next_token();
2016-01-03 11:14:09 +02:00
let tok4 = TokenAndSpan {
2016-04-16 04:12:02 +03:00
tok: token::Ident(str_to_ident("main")),
2016-01-03 11:14:09 +02:00
sp: Span {
lo: BytePos(24),
hi: BytePos(28),
expn_id: NO_EXPANSION,
},
};
assert_eq!(tok3, tok4);
2014-05-21 16:57:31 -07:00
// the lparen is already read:
assert_eq!(string_reader.pos.clone(), BytePos(29))
2014-05-21 16:57:31 -07:00
}
// check that the given reader produces the desired stream
// of tokens (stop checking after exhausting the expected vec)
2016-01-03 11:14:09 +02:00
fn check_tokenization(mut string_reader: StringReader, expected: Vec<token::Token>) {
2015-01-31 12:20:46 -05:00
for expected_tok in &expected {
2014-05-21 16:57:31 -07:00
assert_eq!(&string_reader.next_token().tok, expected_tok);
}
}
// make the identifier by looking up the string in the interner
2016-04-16 04:12:02 +03:00
fn mk_ident(id: &str) -> token::Token {
token::Ident(str_to_ident(id))
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
#[test]
fn doublecolonparsing() {
2015-12-15 16:51:13 +13:00
let cm = Rc::new(CodeMap::new());
let sh = mk_sh(cm.clone());
check_tokenization(setup(&cm, &sh, "a b".to_string()),
2016-04-16 04:12:02 +03:00
vec![mk_ident("a"), token::Whitespace, mk_ident("b")]);
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
#[test]
fn dcparsing_2() {
2015-12-15 16:51:13 +13:00
let cm = Rc::new(CodeMap::new());
let sh = mk_sh(cm.clone());
check_tokenization(setup(&cm, &sh, "a::b".to_string()),
2016-04-16 04:12:02 +03:00
vec![mk_ident("a"), token::ModSep, mk_ident("b")]);
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
#[test]
fn dcparsing_3() {
2015-12-15 16:51:13 +13:00
let cm = Rc::new(CodeMap::new());
let sh = mk_sh(cm.clone());
check_tokenization(setup(&cm, &sh, "a ::b".to_string()),
2016-04-16 04:12:02 +03:00
vec![mk_ident("a"), token::Whitespace, token::ModSep, mk_ident("b")]);
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
#[test]
fn dcparsing_4() {
2015-12-15 16:51:13 +13:00
let cm = Rc::new(CodeMap::new());
let sh = mk_sh(cm.clone());
check_tokenization(setup(&cm, &sh, "a:: b".to_string()),
2016-04-16 04:12:02 +03:00
vec![mk_ident("a"), token::ModSep, token::Whitespace, mk_ident("b")]);
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
#[test]
fn character_a() {
2015-12-15 16:51:13 +13:00
let cm = Rc::new(CodeMap::new());
let sh = mk_sh(cm.clone());
assert_eq!(setup(&cm, &sh, "'a'".to_string()).next_token().tok,
token::Literal(token::Char(token::intern("a")), None));
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
#[test]
fn character_space() {
2015-12-15 16:51:13 +13:00
let cm = Rc::new(CodeMap::new());
let sh = mk_sh(cm.clone());
assert_eq!(setup(&cm, &sh, "' '".to_string()).next_token().tok,
token::Literal(token::Char(token::intern(" ")), None));
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
#[test]
fn character_escaped() {
2015-12-15 16:51:13 +13:00
let cm = Rc::new(CodeMap::new());
let sh = mk_sh(cm.clone());
assert_eq!(setup(&cm, &sh, "'\\n'".to_string()).next_token().tok,
token::Literal(token::Char(token::intern("\\n")), None));
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
#[test]
fn lifetime_name() {
2015-12-15 16:51:13 +13:00
let cm = Rc::new(CodeMap::new());
let sh = mk_sh(cm.clone());
assert_eq!(setup(&cm, &sh, "'abc".to_string()).next_token().tok,
2014-10-27 19:22:52 +11:00
token::Lifetime(token::str_to_ident("'abc")));
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
#[test]
fn raw_string() {
2015-12-15 16:51:13 +13:00
let cm = Rc::new(CodeMap::new());
let sh = mk_sh(cm.clone());
2016-01-03 11:14:09 +02:00
assert_eq!(setup(&cm, &sh, "r###\"\"#a\\b\x00c\"\"###".to_string())
.next_token()
.tok,
token::Literal(token::StrRaw(token::intern("\"#a\\b\x00c\""), 3), None));
}
2016-01-03 11:14:09 +02:00
#[test]
fn literal_suffixes() {
2015-12-15 16:51:13 +13:00
let cm = Rc::new(CodeMap::new());
let sh = mk_sh(cm.clone());
macro_rules! test {
($input: expr, $tok_type: ident, $tok_contents: expr) => {{
2015-12-15 16:51:13 +13:00
assert_eq!(setup(&cm, &sh, format!("{}suffix", $input)).next_token().tok,
token::Literal(token::$tok_type(token::intern($tok_contents)),
Some(token::intern("suffix"))));
// with a whitespace separator:
2015-12-15 16:51:13 +13:00
assert_eq!(setup(&cm, &sh, format!("{} suffix", $input)).next_token().tok,
token::Literal(token::$tok_type(token::intern($tok_contents)),
None));
}}
}
test!("'a'", Char, "a");
test!("b'a'", Byte, "a");
test!("\"a\"", Str_, "a");
test!("b\"a\"", ByteStr, "a");
test!("1234", Integer, "1234");
test!("0b101", Integer, "0b101");
test!("0xABC", Integer, "0xABC");
test!("1.0", Float, "1.0");
test!("1.0e10", Float, "1.0e10");
2015-12-15 16:51:13 +13:00
assert_eq!(setup(&cm, &sh, "2us".to_string()).next_token().tok,
token::Literal(token::Integer(token::intern("2")),
2015-01-18 00:41:56 +00:00
Some(token::intern("us"))));
2015-12-15 16:51:13 +13:00
assert_eq!(setup(&cm, &sh, "r###\"raw\"###suffix".to_string()).next_token().tok,
token::Literal(token::StrRaw(token::intern("raw"), 3),
Some(token::intern("suffix"))));
2015-12-15 16:51:13 +13:00
assert_eq!(setup(&cm, &sh, "br###\"raw\"###suffix".to_string()).next_token().tok,
token::Literal(token::ByteStrRaw(token::intern("raw"), 3),
Some(token::intern("suffix"))));
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
#[test]
fn line_doc_comments() {
assert!(is_doc_comment("///"));
assert!(is_doc_comment("/// blah"));
assert!(!is_doc_comment("////"));
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
#[test]
fn nested_block_comments() {
2015-12-15 16:51:13 +13:00
let cm = Rc::new(CodeMap::new());
let sh = mk_sh(cm.clone());
let mut lexer = setup(&cm, &sh, "/* /* */ */'a'".to_string());
2014-07-08 22:28:52 -07:00
match lexer.next_token().tok {
2016-01-03 11:14:09 +02:00
token::Comment => {}
_ => panic!("expected a comment!"),
2014-07-08 22:28:52 -07:00
}
2016-01-03 11:14:09 +02:00
assert_eq!(lexer.next_token().tok,
token::Literal(token::Char(token::intern("a")), None));
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
#[test]
fn crlf_comments() {
2015-12-15 16:51:13 +13:00
let cm = Rc::new(CodeMap::new());
let sh = mk_sh(cm.clone());
let mut lexer = setup(&cm, &sh, "// test\r\n/// test\r\n".to_string());
let comment = lexer.next_token();
assert_eq!(comment.tok, token::Comment);
assert_eq!(comment.sp, ::syntax_pos::mk_sp(BytePos(0), BytePos(7)));
assert_eq!(lexer.next_token().tok, token::Whitespace);
2016-01-03 11:14:09 +02:00
assert_eq!(lexer.next_token().tok,
token::DocComment(token::intern("/// test")));
}
2014-05-21 16:57:31 -07:00
}