rust/src/libsyntax/parse/lexer/mod.rs

2047 lines
75 KiB
Rust
Raw Normal View History

2014-05-21 16:57:31 -07:00
// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use ast::{self, Ident};
use syntax_pos::{self, BytePos, CharPos, Pos, Span, NO_EXPANSION};
use codemap::{CodeMap, FilePathMapping};
use errors::{FatalError, DiagnosticBuilder};
use parse::{token, ParseSess};
use str::char_at;
use symbol::{Symbol, keywords};
2018-04-05 17:20:08 +02:00
use core::unicode::property::Pattern_White_Space;
2014-05-21 16:57:31 -07:00
2015-04-15 22:15:50 -07:00
use std::borrow::Cow;
2014-05-21 16:57:31 -07:00
use std::char;
use std::mem::replace;
2018-02-27 17:11:14 +01:00
use rustc_data_structures::sync::Lrc;
2014-05-21 16:57:31 -07:00
pub mod comments;
mod tokentrees;
mod unicode_chars;
2014-05-21 16:57:31 -07:00
2015-01-28 08:34:18 -05:00
#[derive(Clone, PartialEq, Eq, Debug)]
2014-05-21 16:57:31 -07:00
pub struct TokenAndSpan {
pub tok: token::Token,
pub sp: Span,
}
impl Default for TokenAndSpan {
fn default() -> Self {
TokenAndSpan {
tok: token::Whitespace,
sp: syntax_pos::DUMMY_SP,
}
}
}
2014-05-21 16:57:31 -07:00
pub struct StringReader<'a> {
pub sess: &'a ParseSess,
2014-06-09 13:12:30 -07:00
/// The absolute offset within the codemap of the next character to read
pub next_pos: BytePos,
/// The absolute offset within the codemap of the current character
pub pos: BytePos,
/// The current character (which has been read from self.pos)
pub ch: Option<char>,
2018-02-27 17:11:14 +01:00
pub filemap: Lrc<syntax_pos::FileMap>,
/// Stop reading src at this index.
pub end_src_index: usize,
/// Whether to record new-lines and multibyte chars in filemap.
/// This is only necessary the first time a filemap is lexed.
/// If part of a filemap is being re-lexed, this should be set to false.
pub save_new_lines_and_multibyte: bool,
2016-01-03 11:14:09 +02:00
// cached:
peek_tok: token::Token,
peek_span: Span,
peek_span_src_raw: Span,
pub fatal_errs: Vec<DiagnosticBuilder<'a>>,
// cache a direct reference to the source text, so that we don't have to
// retrieve it via `self.filemap.src.as_ref().unwrap()` all the time.
src: Lrc<String>,
/// Stack of open delimiters and their spans. Used for error message.
token: token::Token,
span: Span,
/// The raw source span which *does not* take `override_span` into account
span_src_raw: Span,
open_braces: Vec<(token::DelimToken, Span)>,
pub override_span: Option<Span>,
}
impl<'a> StringReader<'a> {
fn mk_sp(&self, lo: BytePos, hi: BytePos) -> Span {
self.mk_sp_and_raw(lo, hi).0
}
fn mk_sp_and_raw(&self, lo: BytePos, hi: BytePos) -> (Span, Span) {
let raw = Span::new(lo, hi, NO_EXPANSION);
let real = unwrap_or!(self.override_span, raw);
(real, raw)
}
2017-11-27 23:07:44 -08:00
fn mk_ident(&self, string: &str) -> Ident {
let mut ident = Ident::from_str(string);
if let Some(span) = self.override_span {
ident.span = span;
2017-11-27 23:07:44 -08:00
}
ident
}
fn next_token(&mut self) -> TokenAndSpan where Self: Sized {
let res = self.try_next_token();
self.unwrap_or_abort(res)
}
fn unwrap_or_abort(&mut self, res: Result<TokenAndSpan, ()>) -> TokenAndSpan {
match res {
Ok(tok) => tok,
Err(_) => {
self.emit_fatal_errors();
FatalError.raise();
}
}
}
fn try_real_token(&mut self) -> Result<TokenAndSpan, ()> {
let mut t = self.try_next_token()?;
loop {
match t.tok {
token::Whitespace | token::Comment | token::Shebang(_) => {
t = self.try_next_token()?;
}
_ => break,
}
}
self.token = t.tok.clone();
self.span = t.sp;
Ok(t)
}
pub fn real_token(&mut self) -> TokenAndSpan {
let res = self.try_real_token();
self.unwrap_or_abort(res)
}
2016-01-03 11:14:09 +02:00
fn is_eof(&self) -> bool {
self.ch.is_none()
2016-01-03 11:14:09 +02:00
}
2014-06-09 13:12:30 -07:00
/// Return the next token. EFFECT: advances the string_reader.
2017-01-17 04:50:46 +00:00
pub fn try_next_token(&mut self) -> Result<TokenAndSpan, ()> {
assert!(self.fatal_errs.is_empty());
2014-05-21 16:57:31 -07:00
let ret_val = TokenAndSpan {
2018-03-08 14:27:23 +03:00
tok: replace(&mut self.peek_tok, token::Whitespace),
2014-05-21 16:57:31 -07:00
sp: self.peek_span,
};
self.advance_token()?;
self.span_src_raw = self.peek_span_src_raw;
Ok(ret_val)
2014-05-21 16:57:31 -07:00
}
fn fail_unterminated_raw_string(&self, pos: BytePos, hash_count: u16) {
let mut err = self.struct_span_fatal(pos, pos, "unterminated raw string");
err.span_label(self.mk_sp(pos, pos), "unterminated raw string");
if hash_count > 0 {
err.note(&format!("this raw string should be terminated with `\"{}`",
"#".repeat(hash_count as usize)));
}
err.emit();
FatalError.raise();
}
2015-10-23 19:20:03 -07:00
fn fatal(&self, m: &str) -> FatalError {
self.fatal_span(self.peek_span, m)
2014-05-21 16:57:31 -07:00
}
2017-01-17 04:50:46 +00:00
pub fn emit_fatal_errors(&mut self) {
for err in &mut self.fatal_errs {
err.emit();
}
self.fatal_errs.clear();
}
2017-01-17 04:50:46 +00:00
pub fn peek(&self) -> TokenAndSpan {
2014-05-21 16:57:31 -07:00
// FIXME(pcwalton): Bad copy!
TokenAndSpan {
tok: self.peek_tok.clone(),
sp: self.peek_span,
}
}
}
impl<'a> StringReader<'a> {
/// For comments.rs, which hackily pokes into next_pos and ch
pub fn new_raw(sess: &'a ParseSess, filemap: Lrc<syntax_pos::FileMap>,
override_span: Option<Span>) -> Self {
let mut sr = StringReader::new_raw_internal(sess, filemap, override_span);
sr.bump();
sr
}
fn new_raw_internal(sess: &'a ParseSess, filemap: Lrc<syntax_pos::FileMap>,
override_span: Option<Span>) -> Self {
if filemap.src.is_none() {
sess.span_diagnostic.bug(&format!("Cannot lex filemap without source: {}",
filemap.name));
}
let src = (*filemap.src.as_ref().unwrap()).clone();
StringReader {
sess,
next_pos: filemap.start_pos,
pos: filemap.start_pos,
ch: Some('\n'),
filemap,
end_src_index: src.len(),
save_new_lines_and_multibyte: true,
2016-01-03 11:14:09 +02:00
// dummy values; not read
2014-10-27 19:22:52 +11:00
peek_tok: token::Eof,
peek_span: syntax_pos::DUMMY_SP,
peek_span_src_raw: syntax_pos::DUMMY_SP,
src,
fatal_errs: Vec::new(),
token: token::Eof,
span: syntax_pos::DUMMY_SP,
span_src_raw: syntax_pos::DUMMY_SP,
open_braces: Vec::new(),
override_span,
}
2014-05-21 16:57:31 -07:00
}
pub fn new(sess: &'a ParseSess, filemap: Lrc<syntax_pos::FileMap>, override_span: Option<Span>)
-> Self {
let mut sr = StringReader::new_raw(sess, filemap, override_span);
if sr.advance_token().is_err() {
sr.emit_fatal_errors();
FatalError.raise();
}
2014-05-21 16:57:31 -07:00
sr
}
pub fn retokenize(sess: &'a ParseSess, mut span: Span) -> Self {
2017-07-31 23:04:34 +03:00
let begin = sess.codemap().lookup_byte_offset(span.lo());
let end = sess.codemap().lookup_byte_offset(span.hi());
// Make the range zero-length if the span is invalid.
2017-07-31 23:04:34 +03:00
if span.lo() > span.hi() || begin.fm.start_pos != end.fm.start_pos {
span = span.shrink_to_lo();
}
let mut sr = StringReader::new_raw_internal(sess, begin.fm, None);
// Seek the lexer to the right byte range.
sr.save_new_lines_and_multibyte = false;
2017-07-31 23:04:34 +03:00
sr.next_pos = span.lo();
sr.end_src_index = sr.src_index(span.hi());
sr.bump();
if sr.advance_token().is_err() {
sr.emit_fatal_errors();
FatalError.raise();
}
sr
}
pub fn ch_is(&self, c: char) -> bool {
self.ch == Some(c)
2014-05-21 16:57:31 -07:00
}
/// Report a fatal lexical error with a given span.
2015-10-23 19:20:03 -07:00
pub fn fatal_span(&self, sp: Span, m: &str) -> FatalError {
self.sess.span_diagnostic.span_fatal(sp, m)
2014-05-21 16:57:31 -07:00
}
/// Report a lexical error with a given span.
pub fn err_span(&self, sp: Span, m: &str) {
self.sess.span_diagnostic.span_err(sp, m)
}
/// Report a fatal error spanning [`from_pos`, `to_pos`).
2015-10-23 19:20:03 -07:00
fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> FatalError {
self.fatal_span(self.mk_sp(from_pos, to_pos), m)
}
/// Report a lexical error spanning [`from_pos`, `to_pos`).
fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) {
self.err_span(self.mk_sp(from_pos, to_pos), m)
2014-05-21 16:57:31 -07:00
}
/// Pushes a character to a message string for error reporting
fn push_escaped_char_for_msg(m: &mut String, c: char) {
match c {
'\u{20}'...'\u{7e}' => {
// Don't escape \, ' or " for user-facing messages
m.push(c);
}
_ => {
for c in c.escape_default() {
m.push(c);
}
}
}
}
2014-05-21 16:57:31 -07:00
/// Report a lexical error spanning [`from_pos`, `to_pos`), appending an
/// escaped character to the error message
2015-10-23 19:20:03 -07:00
fn fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) -> FatalError {
2014-05-21 16:57:31 -07:00
let mut m = m.to_string();
m.push_str(": ");
Self::push_escaped_char_for_msg(&mut m, c);
2015-10-23 19:20:03 -07:00
self.fatal_span_(from_pos, to_pos, &m[..])
2014-05-21 16:57:31 -07:00
}
fn struct_span_fatal(&self,
from_pos: BytePos,
to_pos: BytePos,
m: &str)
-> DiagnosticBuilder<'a> {
self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), m)
}
2015-12-21 10:00:43 +13:00
fn struct_fatal_span_char(&self,
from_pos: BytePos,
to_pos: BytePos,
m: &str,
c: char)
2016-01-03 11:14:09 +02:00
-> DiagnosticBuilder<'a> {
2015-12-21 10:00:43 +13:00
let mut m = m.to_string();
m.push_str(": ");
Self::push_escaped_char_for_msg(&mut m, c);
self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), &m[..])
2015-12-21 10:00:43 +13:00
}
2014-05-21 16:57:31 -07:00
/// Report a lexical error spanning [`from_pos`, `to_pos`), appending an
/// escaped character to the error message
fn err_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) {
2014-05-21 16:57:31 -07:00
let mut m = m.to_string();
m.push_str(": ");
Self::push_escaped_char_for_msg(&mut m, c);
self.err_span_(from_pos, to_pos, &m[..]);
2014-05-21 16:57:31 -07:00
}
2015-12-21 10:00:43 +13:00
fn struct_err_span_char(&self,
from_pos: BytePos,
to_pos: BytePos,
m: &str,
c: char)
2016-01-03 11:14:09 +02:00
-> DiagnosticBuilder<'a> {
2015-12-21 10:00:43 +13:00
let mut m = m.to_string();
m.push_str(": ");
Self::push_escaped_char_for_msg(&mut m, c);
self.sess.span_diagnostic.struct_span_err(self.mk_sp(from_pos, to_pos), &m[..])
2015-12-21 10:00:43 +13:00
}
2014-05-21 16:57:31 -07:00
/// Report a lexical error spanning [`from_pos`, `to_pos`), appending the
/// offending string to the error message
2015-10-23 19:20:03 -07:00
fn fatal_span_verbose(&self, from_pos: BytePos, to_pos: BytePos, mut m: String) -> FatalError {
2014-05-21 16:57:31 -07:00
m.push_str(": ");
m.push_str(&self.src[self.src_index(from_pos)..self.src_index(to_pos)]);
2015-10-23 19:20:03 -07:00
self.fatal_span_(from_pos, to_pos, &m[..])
2014-05-21 16:57:31 -07:00
}
/// Advance peek_tok and peek_span to refer to the next token, and
/// possibly update the interner.
fn advance_token(&mut self) -> Result<(), ()> {
match self.scan_whitespace_or_comment() {
2014-05-21 16:57:31 -07:00
Some(comment) => {
self.peek_span_src_raw = comment.sp;
2014-05-21 16:57:31 -07:00
self.peek_span = comment.sp;
self.peek_tok = comment.tok;
2016-01-03 11:14:09 +02:00
}
2014-05-21 16:57:31 -07:00
None => {
if self.is_eof() {
2014-10-27 19:22:52 +11:00
self.peek_tok = token::Eof;
let (real, raw) = self.mk_sp_and_raw(
self.filemap.end_pos,
self.filemap.end_pos,
);
self.peek_span = real;
self.peek_span_src_raw = raw;
2014-05-21 16:57:31 -07:00
} else {
let start_bytepos = self.pos;
self.peek_tok = self.next_token_inner()?;
let (real, raw) = self.mk_sp_and_raw(start_bytepos, self.pos);
self.peek_span = real;
self.peek_span_src_raw = raw;
2014-05-21 16:57:31 -07:00
};
}
}
Ok(())
2014-05-21 16:57:31 -07:00
}
#[inline]
fn src_index(&self, pos: BytePos) -> usize {
(pos - self.filemap.start_pos).to_usize()
2014-05-21 16:57:31 -07:00
}
/// Calls `f` with a string slice of the source text spanning from `start`
/// up to but excluding `self.pos`, meaning the slice does not include
/// the character `self.ch`.
2016-01-03 11:14:09 +02:00
pub fn with_str_from<T, F>(&self, start: BytePos, f: F) -> T
where F: FnOnce(&str) -> T
2014-12-08 13:28:32 -05:00
{
self.with_str_from_to(start, self.pos, f)
2014-05-21 16:57:31 -07:00
}
2014-07-06 01:17:59 -07:00
/// Create a Name from a given offset to the current offset, each
/// adjusted 1 towards each other (assumes that on either side there is a
/// single-byte delimiter).
2014-07-06 01:17:59 -07:00
pub fn name_from(&self, start: BytePos) -> ast::Name {
debug!("taking an ident from {:?} to {:?}", start, self.pos);
self.with_str_from(start, Symbol::intern)
}
2014-07-06 01:17:59 -07:00
/// As name_from, with an explicit endpoint.
pub fn name_from_to(&self, start: BytePos, end: BytePos) -> ast::Name {
debug!("taking an ident from {:?} to {:?}", start, end);
self.with_str_from_to(start, end, Symbol::intern)
}
2014-05-21 16:57:31 -07:00
/// Calls `f` with a string slice of the source text spanning from `start`
/// up to but excluding `end`.
2016-01-03 11:14:09 +02:00
fn with_str_from_to<T, F>(&self, start: BytePos, end: BytePos, f: F) -> T
where F: FnOnce(&str) -> T
2014-12-08 13:28:32 -05:00
{
f(&self.src[self.src_index(start)..self.src_index(end)])
2014-05-21 16:57:31 -07:00
}
/// Converts CRLF to LF in the given string, raising an error on bare CR.
2016-01-03 11:14:09 +02:00
fn translate_crlf<'b>(&self, start: BytePos, s: &'b str, errmsg: &'b str) -> Cow<'b, str> {
let mut i = 0;
while i < s.len() {
let ch = char_at(s, i);
let next = i + ch.len_utf8();
if ch == '\r' {
if next < s.len() && char_at(s, next) == '\n' {
2015-04-15 22:15:50 -07:00
return translate_crlf_(self, start, s, errmsg, i).into();
}
let pos = start + BytePos(i as u32);
let end_pos = start + BytePos(next as u32);
self.err_span_(pos, end_pos, errmsg);
}
i = next;
}
2015-04-15 22:15:50 -07:00
return s.into();
2016-01-03 11:14:09 +02:00
fn translate_crlf_(rdr: &StringReader,
start: BytePos,
s: &str,
errmsg: &str,
mut i: usize)
-> String {
let mut buf = String::with_capacity(s.len());
let mut j = 0;
while i < s.len() {
let ch = char_at(s, i);
let next = i + ch.len_utf8();
if ch == '\r' {
2016-01-03 11:14:09 +02:00
if j < i {
buf.push_str(&s[j..i]);
}
j = next;
if next >= s.len() || char_at(s, next) != '\n' {
let pos = start + BytePos(i as u32);
let end_pos = start + BytePos(next as u32);
rdr.err_span_(pos, end_pos, errmsg);
}
}
i = next;
}
2016-01-03 11:14:09 +02:00
if j < s.len() {
buf.push_str(&s[j..]);
}
buf
}
}
2014-05-21 16:57:31 -07:00
/// Advance the StringReader by one character. If a newline is
/// discovered, add it to the FileMap's list of line start offsets.
pub fn bump(&mut self) {
let next_src_index = self.src_index(self.next_pos);
if next_src_index < self.end_src_index {
let next_ch = char_at(&self.src, next_src_index);
let next_ch_len = next_ch.len_utf8();
if self.ch.unwrap() == '\n' {
if self.save_new_lines_and_multibyte {
self.filemap.next_line(self.next_pos);
}
2014-05-21 16:57:31 -07:00
}
if next_ch_len > 1 {
if self.save_new_lines_and_multibyte {
self.filemap.record_multibyte_char(self.next_pos, next_ch_len);
}
2014-05-21 16:57:31 -07:00
}
self.filemap.record_width(self.next_pos, next_ch);
self.ch = Some(next_ch);
self.pos = self.next_pos;
self.next_pos = self.next_pos + Pos::from_usize(next_ch_len);
2014-05-21 16:57:31 -07:00
} else {
self.ch = None;
self.pos = self.next_pos;
2014-05-21 16:57:31 -07:00
}
}
pub fn nextch(&self) -> Option<char> {
let next_src_index = self.src_index(self.next_pos);
if next_src_index < self.end_src_index {
Some(char_at(&self.src, next_src_index))
2014-05-21 16:57:31 -07:00
} else {
None
}
}
pub fn nextch_is(&self, c: char) -> bool {
self.nextch() == Some(c)
}
pub fn nextnextch(&self) -> Option<char> {
let next_src_index = self.src_index(self.next_pos);
if next_src_index < self.end_src_index {
let next_next_src_index =
next_src_index + char_at(&self.src, next_src_index).len_utf8();
if next_next_src_index < self.end_src_index {
return Some(char_at(&self.src, next_next_src_index));
}
2014-05-21 16:57:31 -07:00
}
None
2014-05-21 16:57:31 -07:00
}
pub fn nextnextch_is(&self, c: char) -> bool {
self.nextnextch() == Some(c)
}
/// Eats <XID_start><XID_continue>*, if possible.
fn scan_optional_raw_name(&mut self) -> Option<ast::Name> {
if !ident_start(self.ch) {
2016-01-03 11:14:09 +02:00
return None;
}
let start = self.pos;
while ident_continue(self.ch) {
self.bump();
}
self.with_str_from(start, |string| {
if string == "_" {
2017-05-31 16:43:47 +09:00
self.sess.span_diagnostic
2017-06-05 01:41:33 +00:00
.struct_span_warn(self.mk_sp(start, self.pos),
2017-05-31 16:43:47 +09:00
"underscore literal suffix is not allowed")
.warn("this was previously accepted by the compiler but is \
being phased out; it will become a hard error in \
a future release!")
.note("for more information, see issue #42326 \
<https://github.com/rust-lang/rust/issues/42326>")
.emit();
None
} else {
Some(Symbol::intern(string))
}
})
}
/// PRECONDITION: self.ch is not whitespace
2014-05-21 16:57:31 -07:00
/// Eats any kind of comment.
fn scan_comment(&mut self) -> Option<TokenAndSpan> {
if let Some(c) = self.ch {
if c.is_whitespace() {
let msg = "called consume_any_line_comment, but there was whitespace";
self.sess.span_diagnostic.span_err(self.mk_sp(self.pos, self.pos), msg);
2016-01-03 11:14:09 +02:00
}
2014-05-21 16:57:31 -07:00
}
if self.ch_is('/') {
2014-05-21 16:57:31 -07:00
match self.nextch() {
Some('/') => {
self.bump();
self.bump();
2014-05-21 16:57:31 -07:00
// line comments starting with "///" or "//!" are doc-comments
2017-05-08 22:29:24 +09:00
let doc_comment = (self.ch_is('/') && !self.nextch_is('/')) || self.ch_is('!');
let start_bpos = self.pos - BytePos(2);
while !self.is_eof() {
match self.ch.unwrap() {
'\n' => break,
'\r' => {
if self.nextch_is('\n') {
// CRLF
2016-01-03 11:14:09 +02:00
break;
} else if doc_comment {
self.err_span_(self.pos,
self.next_pos,
"bare CR not allowed in doc-comment");
}
}
2016-01-03 11:14:09 +02:00
_ => (),
2014-05-21 16:57:31 -07:00
}
self.bump();
}
if doc_comment {
self.with_str_from(start_bpos, |string| {
// comments with only more "/"s are not doc comments
let tok = if is_doc_comment(string) {
token::DocComment(Symbol::intern(string))
2014-05-21 16:57:31 -07:00
} else {
2014-10-27 19:22:52 +11:00
token::Comment
};
2014-05-21 16:57:31 -07:00
Some(TokenAndSpan {
tok,
sp: self.mk_sp(start_bpos, self.pos),
})
})
2014-05-21 16:57:31 -07:00
} else {
Some(TokenAndSpan {
2014-10-27 19:22:52 +11:00
tok: token::Comment,
sp: self.mk_sp(start_bpos, self.pos),
})
}
2014-05-21 16:57:31 -07:00
}
Some('*') => {
2016-01-03 11:14:09 +02:00
self.bump();
self.bump();
self.scan_block_comment()
}
2016-01-03 11:14:09 +02:00
_ => None,
2014-05-21 16:57:31 -07:00
}
} else if self.ch_is('#') {
2014-05-21 16:57:31 -07:00
if self.nextch_is('!') {
// Parse an inner attribute.
if self.nextnextch_is('[') {
return None;
}
// I guess this is the only way to figure out if
// we're at the beginning of the file...
let cmap = CodeMap::new(FilePathMapping::empty());
cmap.files.borrow_mut().file_maps.push(self.filemap.clone());
let loc = cmap.lookup_char_pos_adj(self.pos);
debug!("Skipping a shebang");
if loc.line == 1 && loc.col == CharPos(0) {
// FIXME: Add shebang "token", return it
let start = self.pos;
while !self.ch_is('\n') && !self.is_eof() {
2016-01-03 11:14:09 +02:00
self.bump();
}
return Some(TokenAndSpan {
2014-10-27 19:22:52 +11:00
tok: token::Shebang(self.name_from(start)),
sp: self.mk_sp(start, self.pos),
});
2014-05-21 16:57:31 -07:00
}
}
None
} else {
None
}
}
/// If there is whitespace, shebang, or a comment, scan it. Otherwise,
/// return None.
fn scan_whitespace_or_comment(&mut self) -> Option<TokenAndSpan> {
match self.ch.unwrap_or('\0') {
// # to handle shebang at start of file -- this is the entry point
// for skipping over all "junk"
'/' | '#' => {
let c = self.scan_comment();
debug!("scanning a comment {:?}", c);
c
},
c if is_pattern_whitespace(Some(c)) => {
let start_bpos = self.pos;
while is_pattern_whitespace(self.ch) {
2016-01-03 11:14:09 +02:00
self.bump();
}
let c = Some(TokenAndSpan {
2014-10-27 19:22:52 +11:00
tok: token::Whitespace,
sp: self.mk_sp(start_bpos, self.pos),
});
debug!("scanning whitespace: {:?}", c);
c
2016-01-03 11:14:09 +02:00
}
_ => None,
}
2014-05-21 16:57:31 -07:00
}
2014-06-09 13:12:30 -07:00
/// Might return a sugared-doc-attr
fn scan_block_comment(&mut self) -> Option<TokenAndSpan> {
2014-05-21 16:57:31 -07:00
// block comments starting with "/**" or "/*!" are doc-comments
let is_doc_comment = self.ch_is('*') || self.ch_is('!');
let start_bpos = self.pos - BytePos(2);
2014-05-21 16:57:31 -07:00
2015-01-18 00:18:19 +00:00
let mut level: isize = 1;
let mut has_cr = false;
2014-05-21 16:57:31 -07:00
while level > 0 {
if self.is_eof() {
let msg = if is_doc_comment {
"unterminated block doc-comment"
} else {
"unterminated block comment"
};
let last_bpos = self.pos;
self.fatal_span_(start_bpos, last_bpos, msg).raise();
2014-05-21 16:57:31 -07:00
}
let n = self.ch.unwrap();
match n {
'/' if self.nextch_is('*') => {
level += 1;
self.bump();
}
'*' if self.nextch_is('/') => {
level -= 1;
self.bump();
}
'\r' => {
has_cr = true;
}
2016-01-03 11:14:09 +02:00
_ => (),
}
self.bump();
2014-05-21 16:57:31 -07:00
}
self.with_str_from(start_bpos, |string| {
// but comments with only "*"s between two "/"s are not
let tok = if is_block_doc_comment(string) {
let string = if has_cr {
2016-01-03 11:14:09 +02:00
self.translate_crlf(start_bpos,
string,
"bare CR not allowed in block doc-comment")
2016-01-03 11:14:09 +02:00
} else {
string.into()
};
token::DocComment(Symbol::intern(&string[..]))
} else {
2014-10-27 19:22:52 +11:00
token::Comment
};
2014-05-21 16:57:31 -07:00
2016-01-03 11:14:09 +02:00
Some(TokenAndSpan {
tok,
sp: self.mk_sp(start_bpos, self.pos),
})
})
2014-05-21 16:57:31 -07:00
}
/// Scan through any digits (base `scan_radix`) or underscores,
/// and return how many digits there were.
///
/// `real_radix` represents the true radix of the number we're
/// interested in, and errors will be emitted for any digits
/// between `real_radix` and `scan_radix`.
fn scan_digits(&mut self, real_radix: u32, scan_radix: u32) -> usize {
assert!(real_radix <= scan_radix);
let mut len = 0;
2014-05-21 16:57:31 -07:00
loop {
let c = self.ch;
2016-01-03 11:14:09 +02:00
if c == Some('_') {
debug!("skipping a _");
self.bump();
continue;
}
match c.and_then(|cc| cc.to_digit(scan_radix)) {
Some(_) => {
debug!("{:?} in scan_digits", c);
// check that the hypothetical digit is actually
// in range for the true radix
if c.unwrap().to_digit(real_radix).is_none() {
self.err_span_(self.pos,
self.next_pos,
2016-01-03 11:14:09 +02:00
&format!("invalid digit for a base {} literal", real_radix));
}
len += 1;
self.bump();
}
2016-01-03 11:14:09 +02:00
_ => return len,
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
}
2014-05-21 16:57:31 -07:00
}
/// Lex a LIT_INTEGER or a LIT_FLOAT
fn scan_number(&mut self, c: char) -> token::Lit {
let num_digits;
let mut base = 10;
let start_bpos = self.pos;
2014-05-21 16:57:31 -07:00
self.bump();
if c == '0' {
match self.ch.unwrap_or('\0') {
2016-01-03 11:14:09 +02:00
'b' => {
self.bump();
base = 2;
num_digits = self.scan_digits(2, 10);
}
'o' => {
self.bump();
base = 8;
num_digits = self.scan_digits(8, 10);
}
'x' => {
self.bump();
base = 16;
num_digits = self.scan_digits(16, 16);
}
'0'...'9' | '_' | '.' | 'e' | 'E' => {
num_digits = self.scan_digits(10, 10) + 1;
}
_ => {
// just a 0
return token::Integer(self.name_from(start_bpos));
}
2014-05-21 16:57:31 -07:00
}
} else if c.is_digit(10) {
num_digits = self.scan_digits(10, 10) + 1;
} else {
num_digits = 0;
2014-05-21 16:57:31 -07:00
}
if num_digits == 0 {
2016-01-03 11:14:09 +02:00
self.err_span_(start_bpos,
self.pos,
2016-01-03 11:14:09 +02:00
"no valid digits found for number");
return token::Integer(Symbol::intern("0"));
2014-05-21 16:57:31 -07:00
}
// might be a float, but don't be greedy if this is actually an
// integer literal followed by field/method access or a range pattern
// (`0..2` and `12.foo()`)
if self.ch_is('.') && !self.nextch_is('.') &&
2017-05-12 22:00:06 +09:00
!ident_start(self.nextch()) {
// might have stuff after the ., and if it does, it needs to start
// with a number
2014-05-21 16:57:31 -07:00
self.bump();
if self.ch.unwrap_or('\0').is_digit(10) {
self.scan_digits(10, 10);
self.scan_float_exponent();
2014-05-21 16:57:31 -07:00
}
let pos = self.pos;
self.check_float_base(start_bpos, pos, base);
token::Float(self.name_from(start_bpos))
2014-05-21 16:57:31 -07:00
} else {
// it might be a float if it has an exponent
if self.ch_is('e') || self.ch_is('E') {
self.scan_float_exponent();
let pos = self.pos;
self.check_float_base(start_bpos, pos, base);
return token::Float(self.name_from(start_bpos));
2014-05-21 16:57:31 -07:00
}
// but we certainly have an integer!
token::Integer(self.name_from(start_bpos))
2014-05-21 16:57:31 -07:00
}
}
/// Scan over `n_digits` hex digits, stopping at `delim`, reporting an
/// error if too many or too few digits are encountered.
2016-01-03 11:14:09 +02:00
fn scan_hex_digits(&mut self, n_digits: usize, delim: char, below_0x7f_only: bool) -> bool {
debug!("scanning {} digits until {:?}", n_digits, delim);
let start_bpos = self.pos;
let mut accum_int = 0;
let mut valid = true;
for _ in 0..n_digits {
2014-05-21 16:57:31 -07:00
if self.is_eof() {
let last_bpos = self.pos;
self.fatal_span_(start_bpos,
last_bpos,
"unterminated numeric character escape").raise();
2014-05-21 16:57:31 -07:00
}
if self.ch_is(delim) {
let last_bpos = self.pos;
2016-01-03 11:14:09 +02:00
self.err_span_(start_bpos,
last_bpos,
"numeric character escape is too short");
valid = false;
2014-05-21 16:57:31 -07:00
break;
}
let c = self.ch.unwrap_or('\x00');
2014-05-21 16:57:31 -07:00
accum_int *= 16;
accum_int += c.to_digit(16).unwrap_or_else(|| {
self.err_span_char(self.pos,
self.next_pos,
2016-01-03 11:14:09 +02:00
"invalid character in numeric character escape",
c);
valid = false;
2014-05-21 16:57:31 -07:00
0
Add trivial cast lints. This permits all coercions to be performed in casts, but adds lints to warn in those cases. Part of this patch moves cast checking to a later stage of type checking. We acquire obligations to check casts as part of type checking where we previously checked them. Once we have type checked a function or module, then we check any cast obligations which have been acquired. That means we have more type information available to check casts (this was crucial to making coercions work properly in place of some casts), but it means that casts cannot feed input into type inference. [breaking change] * Adds two new lints for trivial casts and trivial numeric casts, these are warn by default, but can cause errors if you build with warnings as errors. Previously, trivial numeric casts and casts to trait objects were allowed. * The unused casts lint has gone. * Interactions between casting and type inference have changed in subtle ways. Two ways this might manifest are: - You may need to 'direct' casts more with extra type information, for example, in some cases where `foo as _ as T` succeeded, you may now need to specify the type for `_` - Casts do not influence inference of integer types. E.g., the following used to type check: ``` let x = 42; let y = &x as *const u32; ``` Because the cast would inform inference that `x` must have type `u32`. This no longer applies and the compiler will fallback to `i32` for `x` and thus there will be a type error in the cast. The solution is to add more type information: ``` let x: u32 = 42; let y = &x as *const u32; ```
2015-03-20 17:15:27 +13:00
});
2014-05-21 16:57:31 -07:00
self.bump();
}
if below_0x7f_only && accum_int >= 0x80 {
self.err_span_(start_bpos,
self.pos,
2016-01-03 11:14:09 +02:00
"this form of character escape may only be used with characters in \
the range [\\x00-\\x7f]");
valid = false;
}
2014-05-21 16:57:31 -07:00
match char::from_u32(accum_int) {
Some(_) => valid,
2014-05-21 16:57:31 -07:00
None => {
let last_bpos = self.pos;
self.err_span_(start_bpos, last_bpos, "invalid numeric character escape");
false
2014-05-21 16:57:31 -07:00
}
}
}
/// Scan for a single (possibly escaped) byte or char
/// in a byte, (non-raw) byte string, char, or (non-raw) string literal.
/// `start` is the position of `first_source_char`, which is already consumed.
///
/// Returns true if there was a valid char/byte, false otherwise.
2016-01-03 11:14:09 +02:00
fn scan_char_or_byte(&mut self,
start: BytePos,
first_source_char: char,
ascii_only: bool,
delim: char)
-> bool {
match first_source_char {
'\\' => {
// '\X' for some X must be a character constant:
let escaped = self.ch;
let escaped_pos = self.pos;
self.bump();
match escaped {
2016-01-03 11:14:09 +02:00
None => {} // EOF here is an error that will be checked later.
Some(e) => {
return match e {
'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true,
'x' => self.scan_byte_escape(delim, !ascii_only),
'u' => {
let valid = if self.ch_is('{') {
self.scan_unicode_escape(delim) && !ascii_only
} else {
let span = self.mk_sp(start, self.pos);
self.sess.span_diagnostic
2016-01-03 11:14:09 +02:00
.struct_span_err(span, "incorrect unicode escape sequence")
2015-12-21 10:00:43 +13:00
.span_help(span,
2016-01-03 11:14:09 +02:00
"format of unicode escape sequences is \
`\\u{}`")
2015-12-21 10:00:43 +13:00
.emit();
false
};
if ascii_only {
2016-01-03 11:14:09 +02:00
self.err_span_(start,
self.pos,
2016-01-03 11:14:09 +02:00
"unicode escape sequences cannot be used as a \
byte or in a byte string");
2015-06-22 15:30:56 -07:00
}
valid
}
'\n' if delim == '"' => {
self.consume_whitespace();
true
2016-01-03 11:14:09 +02:00
}
'\r' if delim == '"' && self.ch_is('\n') => {
self.consume_whitespace();
true
}
c => {
let pos = self.pos;
2016-01-03 11:14:09 +02:00
let mut err = self.struct_err_span_char(escaped_pos,
pos,
2016-01-03 11:14:09 +02:00
if ascii_only {
"unknown byte escape"
} else {
"unknown character \
escape"
},
c);
if e == '\r' {
err.span_help(self.mk_sp(escaped_pos, pos),
2016-01-03 11:14:09 +02:00
"this is an isolated carriage return; consider \
checking your editor and version control \
settings");
}
if (e == '{' || e == '}') && !ascii_only {
err.span_help(self.mk_sp(escaped_pos, pos),
2016-01-03 11:14:09 +02:00
"if used in a formatting string, curly braces \
are escaped with `{{` and `}}`");
}
2015-12-21 10:00:43 +13:00
err.emit();
false
}
}
}
}
}
'\t' | '\n' | '\r' | '\'' if delim == '\'' => {
let pos = self.pos;
2016-01-03 11:14:09 +02:00
self.err_span_char(start,
pos,
2016-01-03 11:14:09 +02:00
if ascii_only {
"byte constant must be escaped"
} else {
"character constant must be escaped"
},
first_source_char);
return false;
}
'\r' => {
if self.ch_is('\n') {
self.bump();
return true;
} else {
2016-01-03 11:14:09 +02:00
self.err_span_(start,
self.pos,
"bare CR not allowed in string, use \\r instead");
return false;
}
}
2016-01-03 11:14:09 +02:00
_ => {
if ascii_only && first_source_char > '\x7F' {
let pos = self.pos;
self.err_span_(start,
pos,
"byte constant must be ASCII. Use a \\xHH escape for a \
non-ASCII byte");
2016-01-03 11:14:09 +02:00
return false;
}
}
}
true
}
/// Scan over a `\u{...}` escape
///
/// At this point, we have already seen the `\` and the `u`, the `{` is the current character.
/// We will read a hex number (with `_` separators), with 1 to 6 actual digits,
/// and pass over the `}`.
fn scan_unicode_escape(&mut self, delim: char) -> bool {
self.bump(); // past the {
let start_bpos = self.pos;
let mut valid = true;
if let Some('_') = self.ch {
// disallow leading `_`
self.err_span_(self.pos,
self.next_pos,
"invalid start of unicode escape");
valid = false;
}
let count = self.scan_digits(16, 16);
if count > 6 {
2016-01-03 11:14:09 +02:00
self.err_span_(start_bpos,
self.pos,
"overlong unicode escape (must have at most 6 hex digits)");
valid = false;
}
loop {
match self.ch {
Some('}') => {
if valid && count == 0 {
self.err_span_(start_bpos,
self.pos,
"empty unicode escape (must have at least 1 hex digit)");
valid = false;
}
self.bump(); // past the ending `}`
break;
},
Some(c) => {
if c == delim {
self.err_span_(self.pos,
self.pos,
"unterminated unicode escape (needed a `}`)");
valid = false;
break;
} else if valid {
self.err_span_char(start_bpos,
self.pos,
"invalid character in unicode escape",
c);
valid = false;
}
},
None => {
self.fatal_span_(start_bpos,
self.pos,
"unterminated unicode escape (found EOF)").raise();
}
}
self.bump();
}
valid
}
/// Scan over a float exponent.
fn scan_float_exponent(&mut self) {
if self.ch_is('e') || self.ch_is('E') {
self.bump();
if self.ch_is('-') || self.ch_is('+') {
self.bump();
}
if self.scan_digits(10, 10) == 0 {
let mut err = self.struct_span_fatal(
self.pos, self.next_pos,
"expected at least one digit in exponent"
);
if let Some(ch) = self.ch {
// check for e.g. Unicode minus '' (Issue #49746)
unicode_chars::check_for_substitution(self, ch, &mut err);
}
err.emit();
FatalError.raise();
}
}
}
/// Check that a base is valid for a floating literal, emitting a nice
/// error if it isn't.
2015-01-17 23:33:05 +00:00
fn check_float_base(&mut self, start_bpos: BytePos, last_bpos: BytePos, base: usize) {
match base {
2016-01-03 11:14:09 +02:00
16 => {
self.err_span_(start_bpos,
last_bpos,
"hexadecimal float literal is not supported")
}
8 => {
self.err_span_(start_bpos,
last_bpos,
"octal float literal is not supported")
}
2 => {
self.err_span_(start_bpos,
last_bpos,
"binary float literal is not supported")
}
_ => (),
}
}
2014-10-27 19:22:52 +11:00
fn binop(&mut self, op: token::BinOpToken) -> token::Token {
2014-05-21 16:57:31 -07:00
self.bump();
if self.ch_is('=') {
2014-05-21 16:57:31 -07:00
self.bump();
token::BinOpEq(op)
2014-05-21 16:57:31 -07:00
} else {
token::BinOp(op)
2014-05-21 16:57:31 -07:00
}
}
/// Return the next token from the string, advances the input past that
/// token, and updates the interner
fn next_token_inner(&mut self) -> Result<token::Token, ()> {
let c = self.ch;
2014-05-21 16:57:31 -07:00
if ident_start(c) {
let (is_ident_start, is_raw_ident) =
match (c.unwrap(), self.nextch(), self.nextnextch()) {
// r# followed by an identifier starter is a raw identifier.
// This is an exception to the r# case below.
('r', Some('#'), x) if ident_start(x) => (true, true),
// r as in r" or r#" is part of a raw string literal.
// b as in b' is part of a byte literal.
// They are not identifiers, and are handled further down.
('r', Some('"'), _) |
('r', Some('#'), _) |
('b', Some('"'), _) |
('b', Some('\''), _) |
('b', Some('r'), Some('"')) |
('b', Some('r'), Some('#')) => (false, false),
_ => (true, false),
};
if is_ident_start {
let raw_start = self.pos;
if is_raw_ident {
// Consume the 'r#' characters.
self.bump();
self.bump();
}
let start = self.pos;
while ident_continue(self.ch) {
self.bump();
}
return Ok(self.with_str_from(start, |string| {
// FIXME: perform NFKC normalization here. (Issue #2253)
let ident = self.mk_ident(string);
if is_raw_ident && (ident.is_path_segment_keyword() ||
ident.name == keywords::Underscore.name()) {
self.fatal_span_(raw_start, self.pos,
&format!("`r#{}` is not currently supported.", ident.name)
).raise();
}
if is_raw_ident {
let span = self.mk_sp(raw_start, self.pos);
self.sess.raw_identifier_spans.borrow_mut().push(span);
}
token::Ident(ident, is_raw_ident)
}));
}
2014-05-21 16:57:31 -07:00
}
if is_dec_digit(c) {
let num = self.scan_number(c.unwrap());
let suffix = self.scan_optional_raw_name();
debug!("next_token_inner: scanned number {:?}, {:?}", num, suffix);
return Ok(token::Literal(num, suffix));
2014-05-21 16:57:31 -07:00
}
match c.expect("next_token_inner called at EOF") {
2016-01-03 11:14:09 +02:00
// One-byte tokens.
';' => {
2014-05-21 16:57:31 -07:00
self.bump();
Ok(token::Semi)
2016-01-03 11:14:09 +02:00
}
',' => {
self.bump();
Ok(token::Comma)
2016-01-03 11:14:09 +02:00
}
'.' => {
self.bump();
if self.ch_is('.') {
2016-01-03 11:14:09 +02:00
self.bump();
if self.ch_is('.') {
2016-01-03 11:14:09 +02:00
self.bump();
Ok(token::DotDotDot)
} else if self.ch_is('=') {
self.bump();
Ok(token::DotDotEq)
2016-01-03 11:14:09 +02:00
} else {
Ok(token::DotDot)
2016-01-03 11:14:09 +02:00
}
} else {
Ok(token::Dot)
}
2016-01-03 11:14:09 +02:00
}
'(' => {
self.bump();
Ok(token::OpenDelim(token::Paren))
2016-01-03 11:14:09 +02:00
}
')' => {
self.bump();
Ok(token::CloseDelim(token::Paren))
2016-01-03 11:14:09 +02:00
}
'{' => {
self.bump();
Ok(token::OpenDelim(token::Brace))
2016-01-03 11:14:09 +02:00
}
'}' => {
self.bump();
Ok(token::CloseDelim(token::Brace))
2016-01-03 11:14:09 +02:00
}
'[' => {
self.bump();
Ok(token::OpenDelim(token::Bracket))
2016-01-03 11:14:09 +02:00
}
']' => {
self.bump();
Ok(token::CloseDelim(token::Bracket))
2016-01-03 11:14:09 +02:00
}
'@' => {
self.bump();
Ok(token::At)
2016-01-03 11:14:09 +02:00
}
'#' => {
self.bump();
Ok(token::Pound)
2016-01-03 11:14:09 +02:00
}
'~' => {
self.bump();
Ok(token::Tilde)
2016-01-03 11:14:09 +02:00
}
'?' => {
self.bump();
Ok(token::Question)
2016-01-03 11:14:09 +02:00
}
':' => {
self.bump();
if self.ch_is(':') {
2016-01-03 11:14:09 +02:00
self.bump();
Ok(token::ModSep)
2016-01-03 11:14:09 +02:00
} else {
Ok(token::Colon)
2016-01-03 11:14:09 +02:00
}
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
'$' => {
self.bump();
Ok(token::Dollar)
2016-01-03 11:14:09 +02:00
}
2014-05-21 16:57:31 -07:00
2016-01-03 11:14:09 +02:00
// Multi-byte tokens.
'=' => {
2014-05-21 16:57:31 -07:00
self.bump();
if self.ch_is('=') {
2016-01-03 11:14:09 +02:00
self.bump();
Ok(token::EqEq)
} else if self.ch_is('>') {
2016-01-03 11:14:09 +02:00
self.bump();
Ok(token::FatArrow)
2016-01-03 11:14:09 +02:00
} else {
Ok(token::Eq)
2016-01-03 11:14:09 +02:00
}
}
'!' => {
2014-05-21 16:57:31 -07:00
self.bump();
if self.ch_is('=') {
2016-01-03 11:14:09 +02:00
self.bump();
Ok(token::Ne)
2016-01-03 11:14:09 +02:00
} else {
Ok(token::Not)
2016-01-03 11:14:09 +02:00
}
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
'<' => {
2014-05-21 16:57:31 -07:00
self.bump();
match self.ch.unwrap_or('\x00') {
2016-01-03 11:14:09 +02:00
'=' => {
self.bump();
Ok(token::Le)
2016-01-03 11:14:09 +02:00
}
'<' => {
Ok(self.binop(token::Shl))
2016-01-03 11:14:09 +02:00
}
'-' => {
self.bump();
match self.ch.unwrap_or('\x00') {
2016-01-03 11:14:09 +02:00
_ => {
Ok(token::LArrow)
2016-01-03 11:14:09 +02:00
}
}
}
_ => {
Ok(token::Lt)
2016-01-03 11:14:09 +02:00
}
}
}
'>' => {
2014-05-21 16:57:31 -07:00
self.bump();
match self.ch.unwrap_or('\x00') {
2016-01-03 11:14:09 +02:00
'=' => {
self.bump();
Ok(token::Ge)
2016-01-03 11:14:09 +02:00
}
'>' => {
Ok(self.binop(token::Shr))
2016-01-03 11:14:09 +02:00
}
_ => {
Ok(token::Gt)
2016-01-03 11:14:09 +02:00
}
2014-05-21 16:57:31 -07:00
}
}
2016-01-03 11:14:09 +02:00
'\'' => {
// Either a character constant 'a' OR a lifetime name 'abc
let start_with_quote = self.pos;
2016-01-03 11:14:09 +02:00
self.bump();
let start = self.pos;
2014-05-21 16:57:31 -07:00
2016-01-03 11:14:09 +02:00
// the eof will be picked up by the final `'` check below
let c2 = self.ch.unwrap_or('\x00');
2016-01-03 11:14:09 +02:00
self.bump();
2014-05-21 16:57:31 -07:00
2016-01-03 11:14:09 +02:00
// If the character is an ident start not followed by another single
// quote, then this is a lifetime name:
if ident_start(Some(c2)) && !self.ch_is('\'') {
while ident_continue(self.ch) {
2016-01-03 11:14:09 +02:00
self.bump();
}
// lifetimes shouldn't end with a single quote
// if we find one, then this is an invalid character literal
if self.ch_is('\'') {
self.fatal_span_verbose(start_with_quote, self.next_pos,
String::from("character literal may only contain one codepoint"))
.raise();
}
2016-01-03 11:14:09 +02:00
// Include the leading `'` in the real identifier, for macro
// expansion purposes. See #12512 for the gory details of why
// this is necessary.
let ident = self.with_str_from(start, |lifetime_name| {
2017-11-27 23:07:44 -08:00
self.mk_ident(&format!("'{}", lifetime_name))
2016-01-03 11:14:09 +02:00
});
return Ok(token::Lifetime(ident));
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
let valid = self.scan_char_or_byte(start,
c2,
// ascii_only =
false,
'\'');
if !self.ch_is('\'') {
let pos = self.pos;
loop {
self.bump();
if self.ch_is('\'') {
let start = self.src_index(start);
let end = self.src_index(self.pos);
self.bump();
let span = self.mk_sp(start_with_quote, self.pos);
self.sess.span_diagnostic
.struct_span_err(span,
"character literal may only contain one codepoint")
.span_suggestion(span,
"if you meant to write a `str` literal, \
use double quotes",
format!("\"{}\"", &self.src[start..end]))
.emit();
return Ok(token::Literal(token::Str_(Symbol::intern("??")), None))
}
if self.ch_is('\n') || self.is_eof() || self.ch_is('/') {
// Only attempt to infer single line string literals. If we encounter
// a slash, bail out in order to avoid nonsensical suggestion when
// involving comments.
break;
}
}
self.fatal_span_verbose(start_with_quote, pos,
String::from("character literal may only contain one codepoint")).raise();
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
let id = if valid {
self.name_from(start)
} else {
Symbol::intern("0")
2016-01-03 11:14:09 +02:00
};
self.bump(); // advance ch past token
2016-01-03 11:14:09 +02:00
let suffix = self.scan_optional_raw_name();
Ok(token::Literal(token::Char(id), suffix))
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
'b' => {
2014-05-21 16:57:31 -07:00
self.bump();
let lit = match self.ch {
2016-01-03 11:14:09 +02:00
Some('\'') => self.scan_byte(),
Some('"') => self.scan_byte_string(),
Some('r') => self.scan_raw_byte_string(),
_ => unreachable!(), // Should have been a token::Ident above.
};
let suffix = self.scan_optional_raw_name();
Ok(token::Literal(lit, suffix))
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
'"' => {
let start_bpos = self.pos;
2016-01-03 11:14:09 +02:00
let mut valid = true;
self.bump();
while !self.ch_is('"') {
2016-01-03 11:14:09 +02:00
if self.is_eof() {
let last_bpos = self.pos;
self.fatal_span_(start_bpos,
last_bpos,
"unterminated double quote string").raise();
2016-01-03 11:14:09 +02:00
}
2014-05-21 16:57:31 -07:00
let ch_start = self.pos;
let ch = self.ch.unwrap();
2016-01-03 11:14:09 +02:00
self.bump();
valid &= self.scan_char_or_byte(ch_start,
ch,
// ascii_only =
false,
'"');
}
// adjust for the ASCII " at the start of the literal
let id = if valid {
self.name_from(start_bpos + BytePos(1))
} else {
Symbol::intern("??")
2016-01-03 11:14:09 +02:00
};
self.bump();
let suffix = self.scan_optional_raw_name();
Ok(token::Literal(token::Str_(id), suffix))
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
'r' => {
let start_bpos = self.pos;
2016-01-03 11:14:09 +02:00
self.bump();
let mut hash_count: u16 = 0;
while self.ch_is('#') {
2016-01-03 11:14:09 +02:00
self.bump();
hash_count += 1;
}
2014-05-21 16:57:31 -07:00
if self.is_eof() {
self.fail_unterminated_raw_string(start_bpos, hash_count);
} else if !self.ch_is('"') {
let last_bpos = self.pos;
let curr_char = self.ch.unwrap();
self.fatal_span_char(start_bpos,
last_bpos,
"found invalid character; only `#` is allowed \
in raw string delimitation",
curr_char).raise();
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
self.bump();
let content_start_bpos = self.pos;
2016-01-03 11:14:09 +02:00
let mut content_end_bpos;
let mut valid = true;
'outer: loop {
if self.is_eof() {
self.fail_unterminated_raw_string(start_bpos, hash_count);
2016-01-03 11:14:09 +02:00
}
// if self.ch_is('"') {
// content_end_bpos = self.pos;
2016-01-03 11:14:09 +02:00
// for _ in 0..hash_count {
// self.bump();
// if !self.ch_is('#') {
2016-01-03 11:14:09 +02:00
// continue 'outer;
let c = self.ch.unwrap();
2016-01-03 11:14:09 +02:00
match c {
'"' => {
content_end_bpos = self.pos;
2016-01-03 11:14:09 +02:00
for _ in 0..hash_count {
self.bump();
if !self.ch_is('#') {
2016-01-03 11:14:09 +02:00
continue 'outer;
}
}
2016-01-03 11:14:09 +02:00
break;
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
'\r' => {
if !self.nextch_is('\n') {
let last_bpos = self.pos;
2016-01-03 11:14:09 +02:00
self.err_span_(start_bpos,
last_bpos,
"bare CR not allowed in raw string, use \\r \
instead");
valid = false;
}
}
2016-01-03 11:14:09 +02:00
_ => (),
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
self.bump();
2014-05-21 16:57:31 -07:00
}
self.bump();
2016-01-03 11:14:09 +02:00
let id = if valid {
self.name_from_to(content_start_bpos, content_end_bpos)
} else {
Symbol::intern("??")
2016-01-03 11:14:09 +02:00
};
let suffix = self.scan_optional_raw_name();
Ok(token::Literal(token::StrRaw(id, hash_count), suffix))
2016-01-03 11:14:09 +02:00
}
'-' => {
if self.nextch_is('>') {
self.bump();
self.bump();
Ok(token::RArrow)
2016-01-03 11:14:09 +02:00
} else {
Ok(self.binop(token::Minus))
2016-01-03 11:14:09 +02:00
}
}
'&' => {
if self.nextch_is('&') {
self.bump();
self.bump();
Ok(token::AndAnd)
2016-01-03 11:14:09 +02:00
} else {
Ok(self.binop(token::And))
2016-01-03 11:14:09 +02:00
}
}
'|' => {
match self.nextch() {
Some('|') => {
self.bump();
self.bump();
Ok(token::OrOr)
2016-01-03 11:14:09 +02:00
}
_ => {
Ok(self.binop(token::Or))
2016-01-03 11:14:09 +02:00
}
}
}
'+' => {
Ok(self.binop(token::Plus))
2016-01-03 11:14:09 +02:00
}
'*' => {
Ok(self.binop(token::Star))
2016-01-03 11:14:09 +02:00
}
'/' => {
Ok(self.binop(token::Slash))
2016-01-03 11:14:09 +02:00
}
'^' => {
Ok(self.binop(token::Caret))
2016-01-03 11:14:09 +02:00
}
'%' => {
Ok(self.binop(token::Percent))
2016-01-03 11:14:09 +02:00
}
c => {
let last_bpos = self.pos;
let bpos = self.next_pos;
2016-01-03 11:14:09 +02:00
let mut err = self.struct_fatal_span_char(last_bpos,
bpos,
"unknown start of token",
c);
unicode_chars::check_for_substitution(self, c, &mut err);
self.fatal_errs.push(err);
Err(())
2014-05-21 16:57:31 -07:00
}
}
}
fn consume_whitespace(&mut self) {
while is_pattern_whitespace(self.ch) && !self.is_eof() {
2016-01-03 11:14:09 +02:00
self.bump();
}
2014-05-21 16:57:31 -07:00
}
fn read_to_eol(&mut self) -> String {
let mut val = String::new();
while !self.ch_is('\n') && !self.is_eof() {
val.push(self.ch.unwrap());
2014-05-21 16:57:31 -07:00
self.bump();
}
if self.ch_is('\n') {
2016-01-03 11:14:09 +02:00
self.bump();
}
val
2014-05-21 16:57:31 -07:00
}
fn read_one_line_comment(&mut self) -> String {
let val = self.read_to_eol();
2016-01-03 11:14:09 +02:00
assert!((val.as_bytes()[0] == b'/' && val.as_bytes()[1] == b'/') ||
(val.as_bytes()[0] == b'#' && val.as_bytes()[1] == b'!'));
val
2014-05-21 16:57:31 -07:00
}
fn consume_non_eol_whitespace(&mut self) {
while is_pattern_whitespace(self.ch) && !self.ch_is('\n') && !self.is_eof() {
2014-05-21 16:57:31 -07:00
self.bump();
}
}
fn peeking_at_comment(&self) -> bool {
(self.ch_is('/') && self.nextch_is('/')) || (self.ch_is('/') && self.nextch_is('*')) ||
// consider shebangs comments, but not inner attributes
(self.ch_is('#') && self.nextch_is('!') && !self.nextnextch_is('['))
2014-05-21 16:57:31 -07:00
}
2014-07-02 09:39:48 -07:00
fn scan_byte(&mut self) -> token::Lit {
2014-07-02 09:39:48 -07:00
self.bump();
let start = self.pos;
2014-07-02 09:39:48 -07:00
// the eof will be picked up by the final `'` check below
let c2 = self.ch.unwrap_or('\x00');
2014-07-02 09:39:48 -07:00
self.bump();
2016-01-03 11:14:09 +02:00
let valid = self.scan_char_or_byte(start,
c2,
// ascii_only =
true,
'\'');
if !self.ch_is('\'') {
2014-07-02 09:39:48 -07:00
// Byte offsetting here is okay because the
// character before position `start` are an
// ascii single quote and ascii 'b'.
let pos = self.pos;
self.fatal_span_verbose(start - BytePos(2),
pos,
"unterminated byte constant".to_string()).raise();
2014-07-02 09:39:48 -07:00
}
2016-01-03 11:14:09 +02:00
let id = if valid {
self.name_from(start)
} else {
Symbol::intern("?")
2016-01-03 11:14:09 +02:00
};
self.bump(); // advance ch past token
token::Byte(id)
2014-07-02 09:39:48 -07:00
}
fn scan_byte_escape(&mut self, delim: char, below_0x7f_only: bool) -> bool {
self.scan_hex_digits(2, delim, below_0x7f_only)
}
fn scan_byte_string(&mut self) -> token::Lit {
2014-07-02 09:39:48 -07:00
self.bump();
let start = self.pos;
let mut valid = true;
while !self.ch_is('"') {
2014-07-02 09:39:48 -07:00
if self.is_eof() {
let pos = self.pos;
self.fatal_span_(start, pos, "unterminated double quote byte string").raise();
2014-07-02 09:39:48 -07:00
}
let ch_start = self.pos;
let ch = self.ch.unwrap();
2014-07-02 09:39:48 -07:00
self.bump();
2016-01-03 11:14:09 +02:00
valid &= self.scan_char_or_byte(ch_start,
ch,
// ascii_only =
true,
'"');
2014-07-02 09:39:48 -07:00
}
2016-01-03 11:14:09 +02:00
let id = if valid {
self.name_from(start)
} else {
Symbol::intern("??")
2016-01-03 11:14:09 +02:00
};
2014-07-02 09:39:48 -07:00
self.bump();
token::ByteStr(id)
2014-07-02 09:39:48 -07:00
}
fn scan_raw_byte_string(&mut self) -> token::Lit {
let start_bpos = self.pos;
2014-07-02 09:39:48 -07:00
self.bump();
let mut hash_count = 0;
while self.ch_is('#') {
2014-07-02 09:39:48 -07:00
self.bump();
hash_count += 1;
}
if self.is_eof() {
self.fail_unterminated_raw_string(start_bpos, hash_count);
} else if !self.ch_is('"') {
let pos = self.pos;
let ch = self.ch.unwrap();
self.fatal_span_char(start_bpos,
pos,
2016-01-03 11:14:09 +02:00
"found invalid character; only `#` is allowed in raw \
string delimitation",
ch).raise();
2014-07-02 09:39:48 -07:00
}
self.bump();
let content_start_bpos = self.pos;
2014-07-02 09:39:48 -07:00
let mut content_end_bpos;
'outer: loop {
match self.ch {
2014-07-02 09:39:48 -07:00
None => {
self.fail_unterminated_raw_string(start_bpos, hash_count);
2016-01-03 11:14:09 +02:00
}
2014-07-02 09:39:48 -07:00
Some('"') => {
content_end_bpos = self.pos;
for _ in 0..hash_count {
2014-07-02 09:39:48 -07:00
self.bump();
if !self.ch_is('#') {
2014-07-02 09:39:48 -07:00
continue 'outer;
}
}
break;
2016-01-03 11:14:09 +02:00
}
Some(c) => {
if c > '\x7F' {
let pos = self.pos;
self.err_span_char(pos, pos, "raw byte string must be ASCII", c);
2016-01-03 11:14:09 +02:00
}
2014-07-02 09:39:48 -07:00
}
}
self.bump();
}
self.bump();
token::ByteStrRaw(self.name_from_to(content_start_bpos, content_end_bpos),
hash_count)
2014-07-02 09:39:48 -07:00
}
2014-05-21 16:57:31 -07:00
}
// This tests the character for the unicode property 'PATTERN_WHITE_SPACE' which
// is guaranteed to be forward compatible. http://unicode.org/reports/tr31/#R3
pub fn is_pattern_whitespace(c: Option<char>) -> bool {
c.map_or(false, Pattern_White_Space)
2014-05-21 16:57:31 -07:00
}
fn in_range(c: Option<char>, lo: char, hi: char) -> bool {
match c {
Some(c) => lo <= c && c <= hi,
2016-01-03 11:14:09 +02:00
_ => false,
2014-05-21 16:57:31 -07:00
}
}
2016-01-03 11:14:09 +02:00
fn is_dec_digit(c: Option<char>) -> bool {
in_range(c, '0', '9')
2016-01-03 11:14:09 +02:00
}
2014-05-21 16:57:31 -07:00
pub fn is_doc_comment(s: &str) -> bool {
2016-01-03 11:14:09 +02:00
let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/') ||
s.starts_with("//!");
debug!("is {:?} a doc comment? {}", s, res);
res
2014-05-21 16:57:31 -07:00
}
pub fn is_block_doc_comment(s: &str) -> bool {
2016-01-03 11:20:06 +02:00
// Prevent `/**/` from being parsed as a doc comment
2016-01-03 11:14:09 +02:00
let res = ((s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*') ||
2016-01-03 11:20:06 +02:00
s.starts_with("/*!")) && s.len() >= 5;
debug!("is {:?} a doc comment? {}", s, res);
res
2014-05-21 16:57:31 -07:00
}
fn ident_start(c: Option<char>) -> bool {
2016-01-03 11:14:09 +02:00
let c = match c {
Some(c) => c,
None => return false,
};
2014-05-21 16:57:31 -07:00
2016-01-03 11:14:09 +02:00
(c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c > '\x7f' && c.is_xid_start())
2014-05-21 16:57:31 -07:00
}
fn ident_continue(c: Option<char>) -> bool {
2016-01-03 11:14:09 +02:00
let c = match c {
Some(c) => c,
None => return false,
};
2014-05-21 16:57:31 -07:00
2016-01-03 11:14:09 +02:00
(c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' ||
(c > '\x7f' && c.is_xid_continue())
2014-05-21 16:57:31 -07:00
}
// The string is a valid identifier or a lifetime identifier.
pub fn is_valid_ident(s: &str) -> bool {
let mut chars = s.chars();
ident_start(chars.next()) && chars.all(|ch| ident_continue(Some(ch)))
}
2014-05-21 16:57:31 -07:00
#[cfg(test)]
mod tests {
2014-05-21 16:57:31 -07:00
use super::*;
use ast::{Ident, CrateConfig};
2016-11-17 14:04:36 +00:00
use symbol::Symbol;
2016-06-22 12:50:19 -04:00
use syntax_pos::{BytePos, Span, NO_EXPANSION};
use codemap::CodeMap;
2015-12-15 16:51:13 +13:00
use errors;
use feature_gate::UnstableFeatures;
2014-05-21 16:57:31 -07:00
use parse::token;
2017-02-26 03:25:22 +00:00
use std::collections::HashSet;
2015-03-11 15:24:14 -07:00
use std::io;
use std::path::PathBuf;
use diagnostics::plugin::ErrorMap;
use rustc_data_structures::sync::Lock;
use with_globals;
2018-02-27 17:11:14 +01:00
fn mk_sess(cm: Lrc<CodeMap>) -> ParseSess {
2017-09-16 19:24:08 +02:00
let emitter = errors::emitter::EmitterWriter::new(Box::new(io::sink()),
Some(cm.clone()),
2018-01-28 18:37:55 -08:00
false,
2017-09-16 19:24:08 +02:00
false);
ParseSess {
span_diagnostic: errors::Handler::with_emitter(true, false, Box::new(emitter)),
unstable_features: UnstableFeatures::from_environment(),
config: CrateConfig::new(),
2018-02-15 10:52:26 +01:00
included_mod_stack: Lock::new(Vec::new()),
code_map: cm,
2018-02-15 10:52:26 +01:00
missing_fragment_specifiers: Lock::new(HashSet::new()),
raw_identifier_spans: Lock::new(Vec::new()),
registered_diagnostics: Lock::new(ErrorMap::new()),
2018-02-15 10:52:26 +01:00
non_modrs_mods: Lock::new(vec![]),
}
2014-05-21 16:57:31 -07:00
}
// open a string reader for the given string
2015-12-15 16:51:13 +13:00
fn setup<'a>(cm: &CodeMap,
sess: &'a ParseSess,
2016-01-03 11:14:09 +02:00
teststr: String)
-> StringReader<'a> {
let fm = cm.new_filemap(PathBuf::from("zebra.rs").into(), teststr);
StringReader::new(sess, fm, None)
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
#[test]
fn t1() {
with_globals(|| {
let cm = Lrc::new(CodeMap::new(FilePathMapping::empty()));
let sh = mk_sess(cm.clone());
let mut string_reader = setup(&cm,
&sh,
"/* my source file */ fn main() { println!(\"zebra\"); }\n"
.to_string());
let id = Ident::from_str("fn");
assert_eq!(string_reader.next_token().tok, token::Comment);
assert_eq!(string_reader.next_token().tok, token::Whitespace);
let tok1 = string_reader.next_token();
let tok2 = TokenAndSpan {
tok: token::Ident(id, false),
sp: Span::new(BytePos(21), BytePos(23), NO_EXPANSION),
};
assert_eq!(tok1, tok2);
assert_eq!(string_reader.next_token().tok, token::Whitespace);
// the 'main' id is already read:
assert_eq!(string_reader.pos.clone(), BytePos(28));
// read another token:
let tok3 = string_reader.next_token();
let tok4 = TokenAndSpan {
tok: mk_ident("main"),
sp: Span::new(BytePos(24), BytePos(28), NO_EXPANSION),
};
assert_eq!(tok3, tok4);
// the lparen is already read:
assert_eq!(string_reader.pos.clone(), BytePos(29))
})
2014-05-21 16:57:31 -07:00
}
// check that the given reader produces the desired stream
// of tokens (stop checking after exhausting the expected vec)
2016-01-03 11:14:09 +02:00
fn check_tokenization(mut string_reader: StringReader, expected: Vec<token::Token>) {
2015-01-31 12:20:46 -05:00
for expected_tok in &expected {
2014-05-21 16:57:31 -07:00
assert_eq!(&string_reader.next_token().tok, expected_tok);
}
}
// make the identifier by looking up the string in the interner
2016-04-16 04:12:02 +03:00
fn mk_ident(id: &str) -> token::Token {
token::Token::from_ast_ident(Ident::from_str(id))
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
#[test]
fn doublecolonparsing() {
with_globals(|| {
let cm = Lrc::new(CodeMap::new(FilePathMapping::empty()));
let sh = mk_sess(cm.clone());
check_tokenization(setup(&cm, &sh, "a b".to_string()),
vec![mk_ident("a"), token::Whitespace, mk_ident("b")]);
})
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
#[test]
fn dcparsing_2() {
with_globals(|| {
let cm = Lrc::new(CodeMap::new(FilePathMapping::empty()));
let sh = mk_sess(cm.clone());
check_tokenization(setup(&cm, &sh, "a::b".to_string()),
vec![mk_ident("a"), token::ModSep, mk_ident("b")]);
})
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
#[test]
fn dcparsing_3() {
with_globals(|| {
let cm = Lrc::new(CodeMap::new(FilePathMapping::empty()));
let sh = mk_sess(cm.clone());
check_tokenization(setup(&cm, &sh, "a ::b".to_string()),
vec![mk_ident("a"), token::Whitespace, token::ModSep, mk_ident("b")]);
})
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
#[test]
fn dcparsing_4() {
with_globals(|| {
let cm = Lrc::new(CodeMap::new(FilePathMapping::empty()));
let sh = mk_sess(cm.clone());
check_tokenization(setup(&cm, &sh, "a:: b".to_string()),
vec![mk_ident("a"), token::ModSep, token::Whitespace, mk_ident("b")]);
})
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
#[test]
fn character_a() {
with_globals(|| {
let cm = Lrc::new(CodeMap::new(FilePathMapping::empty()));
let sh = mk_sess(cm.clone());
assert_eq!(setup(&cm, &sh, "'a'".to_string()).next_token().tok,
token::Literal(token::Char(Symbol::intern("a")), None));
})
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
#[test]
fn character_space() {
with_globals(|| {
let cm = Lrc::new(CodeMap::new(FilePathMapping::empty()));
let sh = mk_sess(cm.clone());
assert_eq!(setup(&cm, &sh, "' '".to_string()).next_token().tok,
token::Literal(token::Char(Symbol::intern(" ")), None));
})
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
#[test]
fn character_escaped() {
with_globals(|| {
let cm = Lrc::new(CodeMap::new(FilePathMapping::empty()));
let sh = mk_sess(cm.clone());
assert_eq!(setup(&cm, &sh, "'\\n'".to_string()).next_token().tok,
token::Literal(token::Char(Symbol::intern("\\n")), None));
})
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
#[test]
fn lifetime_name() {
with_globals(|| {
let cm = Lrc::new(CodeMap::new(FilePathMapping::empty()));
let sh = mk_sess(cm.clone());
assert_eq!(setup(&cm, &sh, "'abc".to_string()).next_token().tok,
token::Lifetime(Ident::from_str("'abc")));
})
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
#[test]
fn raw_string() {
with_globals(|| {
let cm = Lrc::new(CodeMap::new(FilePathMapping::empty()));
let sh = mk_sess(cm.clone());
assert_eq!(setup(&cm, &sh, "r###\"\"#a\\b\x00c\"\"###".to_string())
.next_token()
.tok,
token::Literal(token::StrRaw(Symbol::intern("\"#a\\b\x00c\""), 3), None));
})
}
2016-01-03 11:14:09 +02:00
#[test]
fn literal_suffixes() {
with_globals(|| {
let cm = Lrc::new(CodeMap::new(FilePathMapping::empty()));
let sh = mk_sess(cm.clone());
macro_rules! test {
($input: expr, $tok_type: ident, $tok_contents: expr) => {{
assert_eq!(setup(&cm, &sh, format!("{}suffix", $input)).next_token().tok,
token::Literal(token::$tok_type(Symbol::intern($tok_contents)),
Some(Symbol::intern("suffix"))));
// with a whitespace separator:
assert_eq!(setup(&cm, &sh, format!("{} suffix", $input)).next_token().tok,
token::Literal(token::$tok_type(Symbol::intern($tok_contents)),
None));
}}
}
test!("'a'", Char, "a");
test!("b'a'", Byte, "a");
test!("\"a\"", Str_, "a");
test!("b\"a\"", ByteStr, "a");
test!("1234", Integer, "1234");
test!("0b101", Integer, "0b101");
test!("0xABC", Integer, "0xABC");
test!("1.0", Float, "1.0");
test!("1.0e10", Float, "1.0e10");
assert_eq!(setup(&cm, &sh, "2us".to_string()).next_token().tok,
token::Literal(token::Integer(Symbol::intern("2")),
Some(Symbol::intern("us"))));
assert_eq!(setup(&cm, &sh, "r###\"raw\"###suffix".to_string()).next_token().tok,
token::Literal(token::StrRaw(Symbol::intern("raw"), 3),
Some(Symbol::intern("suffix"))));
assert_eq!(setup(&cm, &sh, "br###\"raw\"###suffix".to_string()).next_token().tok,
token::Literal(token::ByteStrRaw(Symbol::intern("raw"), 3),
Some(Symbol::intern("suffix"))));
})
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
#[test]
fn line_doc_comments() {
assert!(is_doc_comment("///"));
assert!(is_doc_comment("/// blah"));
assert!(!is_doc_comment("////"));
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
#[test]
fn nested_block_comments() {
with_globals(|| {
let cm = Lrc::new(CodeMap::new(FilePathMapping::empty()));
let sh = mk_sess(cm.clone());
let mut lexer = setup(&cm, &sh, "/* /* */ */'a'".to_string());
match lexer.next_token().tok {
token::Comment => {}
_ => panic!("expected a comment!"),
}
assert_eq!(lexer.next_token().tok,
token::Literal(token::Char(Symbol::intern("a")), None));
})
2014-05-21 16:57:31 -07:00
}
2016-01-03 11:14:09 +02:00
#[test]
fn crlf_comments() {
with_globals(|| {
let cm = Lrc::new(CodeMap::new(FilePathMapping::empty()));
let sh = mk_sess(cm.clone());
let mut lexer = setup(&cm, &sh, "// test\r\n/// test\r\n".to_string());
let comment = lexer.next_token();
assert_eq!(comment.tok, token::Comment);
assert_eq!((comment.sp.lo(), comment.sp.hi()), (BytePos(0), BytePos(7)));
assert_eq!(lexer.next_token().tok, token::Whitespace);
assert_eq!(lexer.next_token().tok,
token::DocComment(Symbol::intern("/// test")));
})
}
2014-05-21 16:57:31 -07:00
}