2014-05-21 16:57:31 -07:00
|
|
|
// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
|
|
|
|
// file at the top-level directory of this distribution and at
|
|
|
|
// http://rust-lang.org/COPYRIGHT.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
|
|
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
|
|
// option. This file may not be copied, modified, or distributed
|
|
|
|
// except according to those terms.
|
|
|
|
|
2016-11-16 08:21:52 +00:00
|
|
|
use ast::{self, Ident};
|
2017-03-15 00:22:48 +00:00
|
|
|
use syntax_pos::{self, BytePos, CharPos, Pos, Span, NO_EXPANSION};
|
2017-04-24 19:01:19 +02:00
|
|
|
use codemap::{CodeMap, FilePathMapping};
|
2017-01-17 01:14:53 +00:00
|
|
|
use errors::{FatalError, DiagnosticBuilder};
|
|
|
|
use parse::{token, ParseSess};
|
2015-04-21 10:19:53 -07:00
|
|
|
use str::char_at;
|
2016-11-16 08:21:52 +00:00
|
|
|
use symbol::{Symbol, keywords};
|
2016-11-29 14:38:08 -05:00
|
|
|
use std_unicode::property::Pattern_White_Space;
|
2014-05-21 16:57:31 -07:00
|
|
|
|
2015-04-15 22:15:50 -07:00
|
|
|
use std::borrow::Cow;
|
2014-05-21 16:57:31 -07:00
|
|
|
use std::char;
|
|
|
|
use std::mem::replace;
|
|
|
|
use std::rc::Rc;
|
|
|
|
|
|
|
|
pub mod comments;
|
2017-01-12 23:32:00 +00:00
|
|
|
mod tokentrees;
|
2015-11-15 02:37:49 +05:30
|
|
|
mod unicode_chars;
|
2014-05-21 16:57:31 -07:00
|
|
|
|
2015-01-28 08:34:18 -05:00
|
|
|
#[derive(Clone, PartialEq, Eq, Debug)]
|
2014-05-21 16:57:31 -07:00
|
|
|
pub struct TokenAndSpan {
|
|
|
|
pub tok: token::Token,
|
|
|
|
pub sp: Span,
|
|
|
|
}
|
|
|
|
|
2016-10-19 23:33:41 +03:00
|
|
|
impl Default for TokenAndSpan {
|
|
|
|
fn default() -> Self {
|
|
|
|
TokenAndSpan { tok: token::Underscore, sp: syntax_pos::DUMMY_SP }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-05-21 16:57:31 -07:00
|
|
|
pub struct StringReader<'a> {
|
2017-01-17 01:14:53 +00:00
|
|
|
pub sess: &'a ParseSess,
|
2014-06-09 13:12:30 -07:00
|
|
|
/// The absolute offset within the codemap of the next character to read
|
2016-10-04 11:41:01 +11:00
|
|
|
pub next_pos: BytePos,
|
2016-10-04 11:46:54 +11:00
|
|
|
/// The absolute offset within the codemap of the current character
|
|
|
|
pub pos: BytePos,
|
2014-06-09 13:12:30 -07:00
|
|
|
/// The column of the next character to read
|
2014-05-21 16:57:31 -07:00
|
|
|
pub col: CharPos,
|
2016-10-04 11:55:58 +11:00
|
|
|
/// The current character (which has been read from self.pos)
|
|
|
|
pub ch: Option<char>,
|
2016-06-21 18:08:13 -04:00
|
|
|
pub filemap: Rc<syntax_pos::FileMap>,
|
2016-08-29 16:16:43 +12:00
|
|
|
/// If Some, stop reading the source at this position (inclusive).
|
|
|
|
pub terminator: Option<BytePos>,
|
2016-06-10 13:00:21 +03:00
|
|
|
/// Whether to record new-lines and multibyte chars in filemap.
|
|
|
|
/// This is only necessary the first time a filemap is lexed.
|
|
|
|
/// If part of a filemap is being re-lexed, this should be set to false.
|
|
|
|
pub save_new_lines_and_multibyte: bool,
|
2016-01-03 11:14:09 +02:00
|
|
|
// cached:
|
2014-05-21 16:57:31 -07:00
|
|
|
pub peek_tok: token::Token,
|
|
|
|
pub peek_span: Span,
|
2016-04-25 17:20:32 +02:00
|
|
|
pub fatal_errs: Vec<DiagnosticBuilder<'a>>,
|
2015-02-11 18:29:49 +01:00
|
|
|
// cache a direct reference to the source text, so that we don't have to
|
|
|
|
// retrieve it via `self.filemap.src.as_ref().unwrap()` all the time.
|
2016-01-03 11:14:09 +02:00
|
|
|
source_text: Rc<String>,
|
2017-01-12 23:32:00 +00:00
|
|
|
/// Stack of open delimiters and their spans. Used for error message.
|
|
|
|
token: token::Token,
|
|
|
|
span: Span,
|
|
|
|
open_braces: Vec<(token::DelimToken, Span)>,
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
2017-03-15 00:22:48 +00:00
|
|
|
fn mk_sp(lo: BytePos, hi: BytePos) -> Span {
|
|
|
|
Span { lo: lo, hi: hi, ctxt: NO_EXPANSION }
|
|
|
|
}
|
|
|
|
|
2017-01-12 23:32:00 +00:00
|
|
|
impl<'a> StringReader<'a> {
|
2017-05-04 13:14:39 +01:00
|
|
|
fn next_token(&mut self) -> TokenAndSpan {
|
2017-01-12 23:32:00 +00:00
|
|
|
let res = self.try_next_token();
|
|
|
|
self.unwrap_or_abort(res)
|
|
|
|
}
|
|
|
|
fn unwrap_or_abort(&mut self, res: Result<TokenAndSpan, ()>) -> TokenAndSpan {
|
|
|
|
match res {
|
|
|
|
Ok(tok) => tok,
|
|
|
|
Err(_) => {
|
|
|
|
self.emit_fatal_errors();
|
|
|
|
panic!(FatalError);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
fn try_real_token(&mut self) -> Result<TokenAndSpan, ()> {
|
|
|
|
let mut t = self.try_next_token()?;
|
|
|
|
loop {
|
|
|
|
match t.tok {
|
|
|
|
token::Whitespace | token::Comment | token::Shebang(_) => {
|
|
|
|
t = self.try_next_token()?;
|
|
|
|
}
|
|
|
|
_ => break,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
self.token = t.tok.clone();
|
|
|
|
self.span = t.sp;
|
|
|
|
Ok(t)
|
|
|
|
}
|
|
|
|
pub fn real_token(&mut self) -> TokenAndSpan {
|
|
|
|
let res = self.try_real_token();
|
|
|
|
self.unwrap_or_abort(res)
|
|
|
|
}
|
2016-01-03 11:14:09 +02:00
|
|
|
fn is_eof(&self) -> bool {
|
2016-10-04 11:55:58 +11:00
|
|
|
if self.ch.is_none() {
|
2016-08-29 16:16:43 +12:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
match self.terminator {
|
2016-10-04 11:41:01 +11:00
|
|
|
Some(t) => self.next_pos > t,
|
2016-08-29 16:16:43 +12:00
|
|
|
None => false,
|
|
|
|
}
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
2014-06-09 13:12:30 -07:00
|
|
|
/// Return the next token. EFFECT: advances the string_reader.
|
2017-01-17 04:50:46 +00:00
|
|
|
pub fn try_next_token(&mut self) -> Result<TokenAndSpan, ()> {
|
2016-04-25 17:20:32 +02:00
|
|
|
assert!(self.fatal_errs.is_empty());
|
2014-05-21 16:57:31 -07:00
|
|
|
let ret_val = TokenAndSpan {
|
2014-10-27 19:22:52 +11:00
|
|
|
tok: replace(&mut self.peek_tok, token::Underscore),
|
2014-05-21 16:57:31 -07:00
|
|
|
sp: self.peek_span,
|
|
|
|
};
|
2016-04-25 17:20:32 +02:00
|
|
|
self.advance_token()?;
|
|
|
|
Ok(ret_val)
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
2015-10-23 19:20:03 -07:00
|
|
|
fn fatal(&self, m: &str) -> FatalError {
|
2014-05-24 01:12:22 -07:00
|
|
|
self.fatal_span(self.peek_span, m)
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
2017-01-17 04:50:46 +00:00
|
|
|
pub fn emit_fatal_errors(&mut self) {
|
2016-04-25 17:20:32 +02:00
|
|
|
for err in &mut self.fatal_errs {
|
|
|
|
err.emit();
|
|
|
|
}
|
|
|
|
self.fatal_errs.clear();
|
|
|
|
}
|
2017-01-17 04:50:46 +00:00
|
|
|
pub fn peek(&self) -> TokenAndSpan {
|
2014-05-21 16:57:31 -07:00
|
|
|
// FIXME(pcwalton): Bad copy!
|
|
|
|
TokenAndSpan {
|
|
|
|
tok: self.peek_tok.clone(),
|
|
|
|
sp: self.peek_span,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> StringReader<'a> {
|
2016-10-04 11:55:58 +11:00
|
|
|
/// For comments.rs, which hackily pokes into next_pos and ch
|
2017-01-17 01:14:53 +00:00
|
|
|
pub fn new_raw<'b>(sess: &'a ParseSess, filemap: Rc<syntax_pos::FileMap>) -> Self {
|
|
|
|
let mut sr = StringReader::new_raw_internal(sess, filemap);
|
2016-08-29 16:16:43 +12:00
|
|
|
sr.bump();
|
|
|
|
sr
|
|
|
|
}
|
|
|
|
|
2017-01-17 01:14:53 +00:00
|
|
|
fn new_raw_internal(sess: &'a ParseSess, filemap: Rc<syntax_pos::FileMap>) -> Self {
|
2015-02-11 18:29:49 +01:00
|
|
|
if filemap.src.is_none() {
|
2017-01-17 01:14:53 +00:00
|
|
|
sess.span_diagnostic.bug(&format!("Cannot lex filemap without source: {}",
|
|
|
|
filemap.name));
|
2015-02-11 18:29:49 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
let source_text = (*filemap.src.as_ref().unwrap()).clone();
|
|
|
|
|
2016-08-29 16:16:43 +12:00
|
|
|
StringReader {
|
2017-01-17 01:14:53 +00:00
|
|
|
sess: sess,
|
2016-10-04 11:41:01 +11:00
|
|
|
next_pos: filemap.start_pos,
|
2016-10-04 11:46:54 +11:00
|
|
|
pos: filemap.start_pos,
|
2014-05-21 16:57:31 -07:00
|
|
|
col: CharPos(0),
|
2016-10-04 11:55:58 +11:00
|
|
|
ch: Some('\n'),
|
2014-05-21 16:57:31 -07:00
|
|
|
filemap: filemap,
|
2016-08-29 16:16:43 +12:00
|
|
|
terminator: None,
|
2016-06-10 13:00:21 +03:00
|
|
|
save_new_lines_and_multibyte: true,
|
2016-01-03 11:14:09 +02:00
|
|
|
// dummy values; not read
|
2014-10-27 19:22:52 +11:00
|
|
|
peek_tok: token::Eof,
|
2016-06-21 18:08:13 -04:00
|
|
|
peek_span: syntax_pos::DUMMY_SP,
|
2016-01-03 11:14:09 +02:00
|
|
|
source_text: source_text,
|
2016-04-25 17:20:32 +02:00
|
|
|
fatal_errs: Vec::new(),
|
2017-01-12 23:32:00 +00:00
|
|
|
token: token::Eof,
|
|
|
|
span: syntax_pos::DUMMY_SP,
|
|
|
|
open_braces: Vec::new(),
|
2016-08-29 16:16:43 +12:00
|
|
|
}
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
2017-01-17 01:14:53 +00:00
|
|
|
pub fn new(sess: &'a ParseSess, filemap: Rc<syntax_pos::FileMap>) -> Self {
|
|
|
|
let mut sr = StringReader::new_raw(sess, filemap);
|
2016-04-25 17:20:32 +02:00
|
|
|
if let Err(_) = sr.advance_token() {
|
|
|
|
sr.emit_fatal_errors();
|
|
|
|
panic!(FatalError);
|
|
|
|
}
|
2014-05-21 16:57:31 -07:00
|
|
|
sr
|
|
|
|
}
|
|
|
|
|
2016-06-10 13:00:21 +03:00
|
|
|
pub fn retokenize(sess: &'a ParseSess, mut span: Span) -> Self {
|
|
|
|
let begin = sess.codemap().lookup_byte_offset(span.lo);
|
|
|
|
let end = sess.codemap().lookup_byte_offset(span.hi);
|
|
|
|
|
|
|
|
// Make the range zero-length if the span is invalid.
|
|
|
|
if span.lo > span.hi || begin.fm.start_pos != end.fm.start_pos {
|
|
|
|
span.hi = span.lo;
|
|
|
|
}
|
|
|
|
|
|
|
|
let mut sr = StringReader::new_raw_internal(sess, begin.fm);
|
|
|
|
|
|
|
|
// Seek the lexer to the right byte range.
|
|
|
|
sr.save_new_lines_and_multibyte = false;
|
|
|
|
sr.next_pos = span.lo;
|
|
|
|
sr.terminator = Some(span.hi);
|
|
|
|
|
|
|
|
sr.bump();
|
|
|
|
|
|
|
|
if let Err(_) = sr.advance_token() {
|
|
|
|
sr.emit_fatal_errors();
|
|
|
|
panic!(FatalError);
|
|
|
|
}
|
|
|
|
sr
|
|
|
|
}
|
|
|
|
|
2016-10-04 11:55:58 +11:00
|
|
|
pub fn ch_is(&self, c: char) -> bool {
|
|
|
|
self.ch == Some(c)
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
2014-05-24 01:12:22 -07:00
|
|
|
/// Report a fatal lexical error with a given span.
|
2015-10-23 19:20:03 -07:00
|
|
|
pub fn fatal_span(&self, sp: Span, m: &str) -> FatalError {
|
2017-01-17 01:14:53 +00:00
|
|
|
self.sess.span_diagnostic.span_fatal(sp, m)
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
2014-05-24 01:12:22 -07:00
|
|
|
/// Report a lexical error with a given span.
|
|
|
|
pub fn err_span(&self, sp: Span, m: &str) {
|
2017-01-17 01:14:53 +00:00
|
|
|
self.sess.span_diagnostic.span_err(sp, m)
|
2014-05-24 01:12:22 -07:00
|
|
|
}
|
|
|
|
|
2015-07-10 21:37:21 +03:00
|
|
|
|
2014-05-24 01:12:22 -07:00
|
|
|
/// Report a fatal error spanning [`from_pos`, `to_pos`).
|
2015-10-23 19:20:03 -07:00
|
|
|
fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> FatalError {
|
2017-03-15 00:22:48 +00:00
|
|
|
self.fatal_span(mk_sp(from_pos, to_pos), m)
|
2014-05-24 01:12:22 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Report a lexical error spanning [`from_pos`, `to_pos`).
|
|
|
|
fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) {
|
2017-03-15 00:22:48 +00:00
|
|
|
self.err_span(mk_sp(from_pos, to_pos), m)
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Report a lexical error spanning [`from_pos`, `to_pos`), appending an
|
|
|
|
/// escaped character to the error message
|
2015-10-23 19:20:03 -07:00
|
|
|
fn fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) -> FatalError {
|
2014-05-21 16:57:31 -07:00
|
|
|
let mut m = m.to_string();
|
|
|
|
m.push_str(": ");
|
2016-01-03 11:14:09 +02:00
|
|
|
for c in c.escape_default() {
|
|
|
|
m.push(c)
|
|
|
|
}
|
2015-10-23 19:20:03 -07:00
|
|
|
self.fatal_span_(from_pos, to_pos, &m[..])
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
2015-12-21 10:00:43 +13:00
|
|
|
fn struct_fatal_span_char(&self,
|
|
|
|
from_pos: BytePos,
|
|
|
|
to_pos: BytePos,
|
|
|
|
m: &str,
|
|
|
|
c: char)
|
2016-01-03 11:14:09 +02:00
|
|
|
-> DiagnosticBuilder<'a> {
|
2015-12-21 10:00:43 +13:00
|
|
|
let mut m = m.to_string();
|
|
|
|
m.push_str(": ");
|
2016-01-03 11:14:09 +02:00
|
|
|
for c in c.escape_default() {
|
|
|
|
m.push(c)
|
|
|
|
}
|
2017-03-15 00:22:48 +00:00
|
|
|
self.sess.span_diagnostic.struct_span_fatal(mk_sp(from_pos, to_pos), &m[..])
|
2015-12-21 10:00:43 +13:00
|
|
|
}
|
2014-05-21 16:57:31 -07:00
|
|
|
|
|
|
|
/// Report a lexical error spanning [`from_pos`, `to_pos`), appending an
|
|
|
|
/// escaped character to the error message
|
2014-05-24 01:12:22 -07:00
|
|
|
fn err_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) {
|
2014-05-21 16:57:31 -07:00
|
|
|
let mut m = m.to_string();
|
|
|
|
m.push_str(": ");
|
2016-01-03 11:14:09 +02:00
|
|
|
for c in c.escape_default() {
|
|
|
|
m.push(c)
|
|
|
|
}
|
2015-02-18 14:48:57 -05:00
|
|
|
self.err_span_(from_pos, to_pos, &m[..]);
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
2015-12-21 10:00:43 +13:00
|
|
|
fn struct_err_span_char(&self,
|
|
|
|
from_pos: BytePos,
|
|
|
|
to_pos: BytePos,
|
|
|
|
m: &str,
|
|
|
|
c: char)
|
2016-01-03 11:14:09 +02:00
|
|
|
-> DiagnosticBuilder<'a> {
|
2015-12-21 10:00:43 +13:00
|
|
|
let mut m = m.to_string();
|
|
|
|
m.push_str(": ");
|
2016-01-03 11:14:09 +02:00
|
|
|
for c in c.escape_default() {
|
|
|
|
m.push(c)
|
|
|
|
}
|
2017-03-15 00:22:48 +00:00
|
|
|
self.sess.span_diagnostic.struct_span_err(mk_sp(from_pos, to_pos), &m[..])
|
2015-12-21 10:00:43 +13:00
|
|
|
}
|
2014-05-21 16:57:31 -07:00
|
|
|
|
|
|
|
/// Report a lexical error spanning [`from_pos`, `to_pos`), appending the
|
|
|
|
/// offending string to the error message
|
2015-10-23 19:20:03 -07:00
|
|
|
fn fatal_span_verbose(&self, from_pos: BytePos, to_pos: BytePos, mut m: String) -> FatalError {
|
2014-05-21 16:57:31 -07:00
|
|
|
m.push_str(": ");
|
2015-01-17 23:49:08 +00:00
|
|
|
let from = self.byte_offset(from_pos).to_usize();
|
|
|
|
let to = self.byte_offset(to_pos).to_usize();
|
2015-02-11 18:29:49 +01:00
|
|
|
m.push_str(&self.source_text[from..to]);
|
2015-10-23 19:20:03 -07:00
|
|
|
self.fatal_span_(from_pos, to_pos, &m[..])
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Advance peek_tok and peek_span to refer to the next token, and
|
|
|
|
/// possibly update the interner.
|
2016-04-25 17:20:32 +02:00
|
|
|
fn advance_token(&mut self) -> Result<(), ()> {
|
2014-07-04 22:30:39 -07:00
|
|
|
match self.scan_whitespace_or_comment() {
|
2014-05-21 16:57:31 -07:00
|
|
|
Some(comment) => {
|
|
|
|
self.peek_span = comment.sp;
|
|
|
|
self.peek_tok = comment.tok;
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
2014-05-21 16:57:31 -07:00
|
|
|
None => {
|
|
|
|
if self.is_eof() {
|
2014-10-27 19:22:52 +11:00
|
|
|
self.peek_tok = token::Eof;
|
2017-03-15 00:22:48 +00:00
|
|
|
self.peek_span = mk_sp(self.filemap.end_pos, self.filemap.end_pos);
|
2014-05-21 16:57:31 -07:00
|
|
|
} else {
|
2016-10-04 11:46:54 +11:00
|
|
|
let start_bytepos = self.pos;
|
2016-04-25 17:20:32 +02:00
|
|
|
self.peek_tok = self.next_token_inner()?;
|
2017-03-15 00:22:48 +00:00
|
|
|
self.peek_span = mk_sp(start_bytepos, self.pos);
|
2014-05-21 16:57:31 -07:00
|
|
|
};
|
|
|
|
}
|
|
|
|
}
|
2016-04-25 17:20:32 +02:00
|
|
|
Ok(())
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
fn byte_offset(&self, pos: BytePos) -> BytePos {
|
|
|
|
(pos - self.filemap.start_pos)
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Calls `f` with a string slice of the source text spanning from `start`
|
2016-10-04 11:46:54 +11:00
|
|
|
/// up to but excluding `self.pos`, meaning the slice does not include
|
2016-10-04 11:55:58 +11:00
|
|
|
/// the character `self.ch`.
|
2016-01-03 11:14:09 +02:00
|
|
|
pub fn with_str_from<T, F>(&self, start: BytePos, f: F) -> T
|
|
|
|
where F: FnOnce(&str) -> T
|
2014-12-08 13:28:32 -05:00
|
|
|
{
|
2016-10-04 11:46:54 +11:00
|
|
|
self.with_str_from_to(start, self.pos, f)
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
2014-07-06 01:17:59 -07:00
|
|
|
/// Create a Name from a given offset to the current offset, each
|
2014-06-24 17:44:50 -07:00
|
|
|
/// adjusted 1 towards each other (assumes that on either side there is a
|
|
|
|
/// single-byte delimiter).
|
2014-07-06 01:17:59 -07:00
|
|
|
pub fn name_from(&self, start: BytePos) -> ast::Name {
|
2016-10-04 11:46:54 +11:00
|
|
|
debug!("taking an ident from {:?} to {:?}", start, self.pos);
|
2016-11-16 08:21:52 +00:00
|
|
|
self.with_str_from(start, Symbol::intern)
|
2014-06-24 17:44:50 -07:00
|
|
|
}
|
|
|
|
|
2014-07-06 01:17:59 -07:00
|
|
|
/// As name_from, with an explicit endpoint.
|
|
|
|
pub fn name_from_to(&self, start: BytePos, end: BytePos) -> ast::Name {
|
2014-12-20 00:09:35 -08:00
|
|
|
debug!("taking an ident from {:?} to {:?}", start, end);
|
2016-11-16 08:21:52 +00:00
|
|
|
self.with_str_from_to(start, end, Symbol::intern)
|
2014-06-24 17:44:50 -07:00
|
|
|
}
|
|
|
|
|
2014-05-21 16:57:31 -07:00
|
|
|
/// Calls `f` with a string slice of the source text spanning from `start`
|
|
|
|
/// up to but excluding `end`.
|
2016-01-03 11:14:09 +02:00
|
|
|
fn with_str_from_to<T, F>(&self, start: BytePos, end: BytePos, f: F) -> T
|
|
|
|
where F: FnOnce(&str) -> T
|
2014-12-08 13:28:32 -05:00
|
|
|
{
|
2016-01-03 11:14:09 +02:00
|
|
|
f(&self.source_text[self.byte_offset(start).to_usize()..self.byte_offset(end).to_usize()])
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
2014-05-24 01:13:59 -07:00
|
|
|
/// Converts CRLF to LF in the given string, raising an error on bare CR.
|
2016-01-03 11:14:09 +02:00
|
|
|
fn translate_crlf<'b>(&self, start: BytePos, s: &'b str, errmsg: &'b str) -> Cow<'b, str> {
|
2015-01-28 01:01:48 +00:00
|
|
|
let mut i = 0;
|
2014-05-24 01:13:59 -07:00
|
|
|
while i < s.len() {
|
2015-04-21 10:19:53 -07:00
|
|
|
let ch = char_at(s, i);
|
2015-03-10 16:29:02 -07:00
|
|
|
let next = i + ch.len_utf8();
|
2014-05-24 01:13:59 -07:00
|
|
|
if ch == '\r' {
|
2015-04-21 10:19:53 -07:00
|
|
|
if next < s.len() && char_at(s, next) == '\n' {
|
2015-04-15 22:15:50 -07:00
|
|
|
return translate_crlf_(self, start, s, errmsg, i).into();
|
2014-05-24 01:13:59 -07:00
|
|
|
}
|
|
|
|
let pos = start + BytePos(i as u32);
|
|
|
|
let end_pos = start + BytePos(next as u32);
|
|
|
|
self.err_span_(pos, end_pos, errmsg);
|
|
|
|
}
|
|
|
|
i = next;
|
|
|
|
}
|
2015-04-15 22:15:50 -07:00
|
|
|
return s.into();
|
2014-05-24 01:13:59 -07:00
|
|
|
|
2016-01-03 11:14:09 +02:00
|
|
|
fn translate_crlf_(rdr: &StringReader,
|
|
|
|
start: BytePos,
|
|
|
|
s: &str,
|
|
|
|
errmsg: &str,
|
|
|
|
mut i: usize)
|
|
|
|
-> String {
|
2014-05-24 01:13:59 -07:00
|
|
|
let mut buf = String::with_capacity(s.len());
|
|
|
|
let mut j = 0;
|
|
|
|
while i < s.len() {
|
2015-04-21 10:19:53 -07:00
|
|
|
let ch = char_at(s, i);
|
2015-03-10 16:29:02 -07:00
|
|
|
let next = i + ch.len_utf8();
|
2014-05-24 01:13:59 -07:00
|
|
|
if ch == '\r' {
|
2016-01-03 11:14:09 +02:00
|
|
|
if j < i {
|
|
|
|
buf.push_str(&s[j..i]);
|
|
|
|
}
|
2014-05-24 01:13:59 -07:00
|
|
|
j = next;
|
2015-04-21 10:19:53 -07:00
|
|
|
if next >= s.len() || char_at(s, next) != '\n' {
|
2014-05-24 01:13:59 -07:00
|
|
|
let pos = start + BytePos(i as u32);
|
|
|
|
let end_pos = start + BytePos(next as u32);
|
|
|
|
rdr.err_span_(pos, end_pos, errmsg);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
i = next;
|
|
|
|
}
|
2016-01-03 11:14:09 +02:00
|
|
|
if j < s.len() {
|
|
|
|
buf.push_str(&s[j..]);
|
|
|
|
}
|
2014-05-24 01:13:59 -07:00
|
|
|
buf
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-05-21 16:57:31 -07:00
|
|
|
/// Advance the StringReader by one character. If a newline is
|
|
|
|
/// discovered, add it to the FileMap's list of line start offsets.
|
|
|
|
pub fn bump(&mut self) {
|
2016-10-04 12:04:19 +11:00
|
|
|
let new_pos = self.next_pos;
|
|
|
|
let new_byte_offset = self.byte_offset(new_pos).to_usize();
|
2016-06-10 13:00:21 +03:00
|
|
|
let end = self.terminator.map_or(self.source_text.len(), |t| {
|
|
|
|
self.byte_offset(t).to_usize()
|
|
|
|
});
|
|
|
|
if new_byte_offset < end {
|
2016-10-04 12:04:19 +11:00
|
|
|
let old_ch_is_newline = self.ch.unwrap() == '\n';
|
|
|
|
let new_ch = char_at(&self.source_text, new_byte_offset);
|
|
|
|
let new_ch_len = new_ch.len_utf8();
|
|
|
|
|
|
|
|
self.ch = Some(new_ch);
|
|
|
|
self.pos = new_pos;
|
|
|
|
self.next_pos = new_pos + Pos::from_usize(new_ch_len);
|
|
|
|
if old_ch_is_newline {
|
2016-06-10 13:00:21 +03:00
|
|
|
if self.save_new_lines_and_multibyte {
|
2016-10-04 11:46:54 +11:00
|
|
|
self.filemap.next_line(self.pos);
|
2016-08-29 16:16:43 +12:00
|
|
|
}
|
2015-01-28 01:01:48 +00:00
|
|
|
self.col = CharPos(0);
|
2016-10-04 12:04:19 +11:00
|
|
|
} else {
|
|
|
|
self.col = self.col + CharPos(1);
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
2016-10-04 12:04:19 +11:00
|
|
|
if new_ch_len > 1 {
|
2016-06-10 13:00:21 +03:00
|
|
|
if self.save_new_lines_and_multibyte {
|
|
|
|
self.filemap.record_multibyte_char(self.pos, new_ch_len);
|
|
|
|
}
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
} else {
|
2016-10-04 11:55:58 +11:00
|
|
|
self.ch = None;
|
2016-10-04 12:04:19 +11:00
|
|
|
self.pos = new_pos;
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn nextch(&self) -> Option<char> {
|
2016-10-04 11:41:01 +11:00
|
|
|
let offset = self.byte_offset(self.next_pos).to_usize();
|
2015-02-11 18:29:49 +01:00
|
|
|
if offset < self.source_text.len() {
|
2015-04-21 10:19:53 -07:00
|
|
|
Some(char_at(&self.source_text, offset))
|
2014-05-21 16:57:31 -07:00
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn nextch_is(&self, c: char) -> bool {
|
|
|
|
self.nextch() == Some(c)
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn nextnextch(&self) -> Option<char> {
|
2016-10-04 11:41:01 +11:00
|
|
|
let offset = self.byte_offset(self.next_pos).to_usize();
|
2015-02-11 18:29:49 +01:00
|
|
|
let s = &self.source_text[..];
|
2016-01-03 11:14:09 +02:00
|
|
|
if offset >= s.len() {
|
|
|
|
return None;
|
|
|
|
}
|
2015-04-21 10:19:53 -07:00
|
|
|
let next = offset + char_at(s, offset).len_utf8();
|
2014-05-21 16:57:31 -07:00
|
|
|
if next < s.len() {
|
2015-04-21 10:19:53 -07:00
|
|
|
Some(char_at(s, next))
|
2014-05-21 16:57:31 -07:00
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn nextnextch_is(&self, c: char) -> bool {
|
|
|
|
self.nextnextch() == Some(c)
|
|
|
|
}
|
|
|
|
|
2014-11-19 15:48:38 +11:00
|
|
|
/// Eats <XID_start><XID_continue>*, if possible.
|
|
|
|
fn scan_optional_raw_name(&mut self) -> Option<ast::Name> {
|
2016-10-04 11:55:58 +11:00
|
|
|
if !ident_start(self.ch) {
|
2016-01-03 11:14:09 +02:00
|
|
|
return None;
|
2014-11-19 15:48:38 +11:00
|
|
|
}
|
2016-10-04 11:46:54 +11:00
|
|
|
let start = self.pos;
|
2016-10-04 11:55:58 +11:00
|
|
|
while ident_continue(self.ch) {
|
2014-11-19 15:48:38 +11:00
|
|
|
self.bump();
|
|
|
|
}
|
|
|
|
|
|
|
|
self.with_str_from(start, |string| {
|
2017-05-14 21:37:50 +09:00
|
|
|
Some(Symbol::intern(string))
|
2014-11-19 15:48:38 +11:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2016-10-04 11:55:58 +11:00
|
|
|
/// PRECONDITION: self.ch is not whitespace
|
2014-05-21 16:57:31 -07:00
|
|
|
/// Eats any kind of comment.
|
2014-07-04 22:30:39 -07:00
|
|
|
fn scan_comment(&mut self) -> Option<TokenAndSpan> {
|
2016-10-04 11:55:58 +11:00
|
|
|
if let Some(c) = self.ch {
|
2016-07-03 14:38:37 -07:00
|
|
|
if c.is_whitespace() {
|
2017-01-17 01:14:53 +00:00
|
|
|
let msg = "called consume_any_line_comment, but there was whitespace";
|
2017-03-15 00:22:48 +00:00
|
|
|
self.sess.span_diagnostic.span_err(mk_sp(self.pos, self.pos), msg);
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
2016-10-04 11:55:58 +11:00
|
|
|
if self.ch_is('/') {
|
2014-05-21 16:57:31 -07:00
|
|
|
match self.nextch() {
|
|
|
|
Some('/') => {
|
|
|
|
self.bump();
|
|
|
|
self.bump();
|
2015-05-08 20:33:58 +01:00
|
|
|
|
2014-05-21 16:57:31 -07:00
|
|
|
// line comments starting with "///" or "//!" are doc-comments
|
2017-05-08 22:29:24 +09:00
|
|
|
let doc_comment = (self.ch_is('/') && !self.nextch_is('/')) || self.ch_is('!');
|
2016-10-04 11:46:54 +11:00
|
|
|
let start_bpos = self.pos - BytePos(2);
|
2015-05-08 20:33:58 +01:00
|
|
|
|
|
|
|
while !self.is_eof() {
|
2016-10-04 11:55:58 +11:00
|
|
|
match self.ch.unwrap() {
|
2015-05-08 20:33:58 +01:00
|
|
|
'\n' => break,
|
|
|
|
'\r' => {
|
|
|
|
if self.nextch_is('\n') {
|
|
|
|
// CRLF
|
2016-01-03 11:14:09 +02:00
|
|
|
break;
|
2015-05-13 22:05:01 +01:00
|
|
|
} else if doc_comment {
|
2016-10-04 11:46:54 +11:00
|
|
|
self.err_span_(self.pos,
|
2016-10-04 11:41:01 +11:00
|
|
|
self.next_pos,
|
2015-05-13 22:05:01 +01:00
|
|
|
"bare CR not allowed in doc-comment");
|
2014-05-24 01:13:59 -07:00
|
|
|
}
|
|
|
|
}
|
2016-01-03 11:14:09 +02:00
|
|
|
_ => (),
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
2015-05-08 20:33:58 +01:00
|
|
|
self.bump();
|
|
|
|
}
|
|
|
|
|
|
|
|
return if doc_comment {
|
|
|
|
self.with_str_from(start_bpos, |string| {
|
|
|
|
// comments with only more "/"s are not doc comments
|
2014-07-04 22:30:39 -07:00
|
|
|
let tok = if is_doc_comment(string) {
|
2016-11-16 08:21:52 +00:00
|
|
|
token::DocComment(Symbol::intern(string))
|
2014-05-21 16:57:31 -07:00
|
|
|
} else {
|
2014-10-27 19:22:52 +11:00
|
|
|
token::Comment
|
2014-07-04 22:30:39 -07:00
|
|
|
};
|
2014-05-21 16:57:31 -07:00
|
|
|
|
2015-05-08 20:33:58 +01:00
|
|
|
Some(TokenAndSpan {
|
2014-07-04 22:30:39 -07:00
|
|
|
tok: tok,
|
2017-03-15 00:22:48 +00:00
|
|
|
sp: mk_sp(start_bpos, self.pos),
|
2015-05-08 20:33:58 +01:00
|
|
|
})
|
|
|
|
})
|
2014-05-21 16:57:31 -07:00
|
|
|
} else {
|
2015-05-08 20:33:58 +01:00
|
|
|
Some(TokenAndSpan {
|
2014-10-27 19:22:52 +11:00
|
|
|
tok: token::Comment,
|
2017-03-15 00:22:48 +00:00
|
|
|
sp: mk_sp(start_bpos, self.pos),
|
2015-05-08 20:33:58 +01:00
|
|
|
})
|
2016-01-03 11:14:09 +02:00
|
|
|
};
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
2014-07-04 22:30:39 -07:00
|
|
|
Some('*') => {
|
2016-01-03 11:14:09 +02:00
|
|
|
self.bump();
|
|
|
|
self.bump();
|
2014-07-04 22:30:39 -07:00
|
|
|
self.scan_block_comment()
|
|
|
|
}
|
2016-01-03 11:14:09 +02:00
|
|
|
_ => None,
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
2016-10-04 11:55:58 +11:00
|
|
|
} else if self.ch_is('#') {
|
2014-05-21 16:57:31 -07:00
|
|
|
if self.nextch_is('!') {
|
|
|
|
|
|
|
|
// Parse an inner attribute.
|
|
|
|
if self.nextnextch_is('[') {
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
|
|
|
|
// I guess this is the only way to figure out if
|
|
|
|
// we're at the beginning of the file...
|
2017-04-24 19:01:19 +02:00
|
|
|
let cmap = CodeMap::new(FilePathMapping::empty());
|
2014-05-21 16:57:31 -07:00
|
|
|
cmap.files.borrow_mut().push(self.filemap.clone());
|
2016-10-04 11:46:54 +11:00
|
|
|
let loc = cmap.lookup_char_pos_adj(self.pos);
|
2014-07-04 22:30:39 -07:00
|
|
|
debug!("Skipping a shebang");
|
2015-01-28 01:01:48 +00:00
|
|
|
if loc.line == 1 && loc.col == CharPos(0) {
|
2014-07-04 22:30:39 -07:00
|
|
|
// FIXME: Add shebang "token", return it
|
2016-10-04 11:46:54 +11:00
|
|
|
let start = self.pos;
|
2016-10-04 11:55:58 +11:00
|
|
|
while !self.ch_is('\n') && !self.is_eof() {
|
2016-01-03 11:14:09 +02:00
|
|
|
self.bump();
|
|
|
|
}
|
2014-07-04 22:30:39 -07:00
|
|
|
return Some(TokenAndSpan {
|
2014-10-27 19:22:52 +11:00
|
|
|
tok: token::Shebang(self.name_from(start)),
|
2017-03-15 00:22:48 +00:00
|
|
|
sp: mk_sp(start, self.pos),
|
2014-07-04 22:30:39 -07:00
|
|
|
});
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
None
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-07-04 22:30:39 -07:00
|
|
|
/// If there is whitespace, shebang, or a comment, scan it. Otherwise,
|
|
|
|
/// return None.
|
|
|
|
fn scan_whitespace_or_comment(&mut self) -> Option<TokenAndSpan> {
|
2016-10-04 11:55:58 +11:00
|
|
|
match self.ch.unwrap_or('\0') {
|
2014-07-04 22:30:39 -07:00
|
|
|
// # to handle shebang at start of file -- this is the entry point
|
|
|
|
// for skipping over all "junk"
|
|
|
|
'/' | '#' => {
|
|
|
|
let c = self.scan_comment();
|
2014-12-20 00:09:35 -08:00
|
|
|
debug!("scanning a comment {:?}", c);
|
2014-07-04 22:30:39 -07:00
|
|
|
c
|
2015-11-12 02:43:43 +00:00
|
|
|
},
|
|
|
|
c if is_pattern_whitespace(Some(c)) => {
|
2016-10-04 11:46:54 +11:00
|
|
|
let start_bpos = self.pos;
|
2016-10-04 11:55:58 +11:00
|
|
|
while is_pattern_whitespace(self.ch) {
|
2016-01-03 11:14:09 +02:00
|
|
|
self.bump();
|
|
|
|
}
|
2014-07-04 22:30:39 -07:00
|
|
|
let c = Some(TokenAndSpan {
|
2014-10-27 19:22:52 +11:00
|
|
|
tok: token::Whitespace,
|
2017-03-15 00:22:48 +00:00
|
|
|
sp: mk_sp(start_bpos, self.pos),
|
2014-07-04 22:30:39 -07:00
|
|
|
});
|
2014-12-20 00:09:35 -08:00
|
|
|
debug!("scanning whitespace: {:?}", c);
|
2014-07-04 22:30:39 -07:00
|
|
|
c
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
_ => None,
|
2014-07-04 22:30:39 -07:00
|
|
|
}
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
2014-06-09 13:12:30 -07:00
|
|
|
/// Might return a sugared-doc-attr
|
2014-07-04 22:30:39 -07:00
|
|
|
fn scan_block_comment(&mut self) -> Option<TokenAndSpan> {
|
2014-05-21 16:57:31 -07:00
|
|
|
// block comments starting with "/**" or "/*!" are doc-comments
|
2016-10-04 11:55:58 +11:00
|
|
|
let is_doc_comment = self.ch_is('*') || self.ch_is('!');
|
2016-10-04 11:46:54 +11:00
|
|
|
let start_bpos = self.pos - BytePos(2);
|
2014-05-21 16:57:31 -07:00
|
|
|
|
2015-01-18 00:18:19 +00:00
|
|
|
let mut level: isize = 1;
|
2014-05-24 01:13:59 -07:00
|
|
|
let mut has_cr = false;
|
2014-05-21 16:57:31 -07:00
|
|
|
while level > 0 {
|
|
|
|
if self.is_eof() {
|
|
|
|
let msg = if is_doc_comment {
|
|
|
|
"unterminated block doc-comment"
|
|
|
|
} else {
|
|
|
|
"unterminated block comment"
|
|
|
|
};
|
2016-10-04 11:46:54 +11:00
|
|
|
let last_bpos = self.pos;
|
2015-10-23 19:20:03 -07:00
|
|
|
panic!(self.fatal_span_(start_bpos, last_bpos, msg));
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
2016-10-04 11:55:58 +11:00
|
|
|
let n = self.ch.unwrap();
|
2014-05-24 01:13:59 -07:00
|
|
|
match n {
|
|
|
|
'/' if self.nextch_is('*') => {
|
|
|
|
level += 1;
|
|
|
|
self.bump();
|
|
|
|
}
|
|
|
|
'*' if self.nextch_is('/') => {
|
|
|
|
level -= 1;
|
|
|
|
self.bump();
|
|
|
|
}
|
|
|
|
'\r' => {
|
|
|
|
has_cr = true;
|
|
|
|
}
|
2016-01-03 11:14:09 +02:00
|
|
|
_ => (),
|
2014-05-24 01:13:59 -07:00
|
|
|
}
|
|
|
|
self.bump();
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
2014-07-04 22:30:39 -07:00
|
|
|
self.with_str_from(start_bpos, |string| {
|
|
|
|
// but comments with only "*"s between two "/"s are not
|
|
|
|
let tok = if is_block_doc_comment(string) {
|
|
|
|
let string = if has_cr {
|
2016-01-03 11:14:09 +02:00
|
|
|
self.translate_crlf(start_bpos,
|
|
|
|
string,
|
2014-07-04 22:30:39 -07:00
|
|
|
"bare CR not allowed in block doc-comment")
|
2016-01-03 11:14:09 +02:00
|
|
|
} else {
|
|
|
|
string.into()
|
|
|
|
};
|
2016-11-16 08:21:52 +00:00
|
|
|
token::DocComment(Symbol::intern(&string[..]))
|
2014-07-04 22:30:39 -07:00
|
|
|
} else {
|
2014-10-27 19:22:52 +11:00
|
|
|
token::Comment
|
2014-07-04 22:30:39 -07:00
|
|
|
};
|
2014-05-21 16:57:31 -07:00
|
|
|
|
2016-01-03 11:14:09 +02:00
|
|
|
Some(TokenAndSpan {
|
2014-07-04 22:30:39 -07:00
|
|
|
tok: tok,
|
2017-03-15 00:22:48 +00:00
|
|
|
sp: mk_sp(start_bpos, self.pos),
|
2014-07-04 22:30:39 -07:00
|
|
|
})
|
|
|
|
})
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
2015-03-31 00:27:13 +11:00
|
|
|
/// Scan through any digits (base `scan_radix`) or underscores,
|
|
|
|
/// and return how many digits there were.
|
|
|
|
///
|
|
|
|
/// `real_radix` represents the true radix of the number we're
|
|
|
|
/// interested in, and errors will be emitted for any digits
|
|
|
|
/// between `real_radix` and `scan_radix`.
|
|
|
|
fn scan_digits(&mut self, real_radix: u32, scan_radix: u32) -> usize {
|
|
|
|
assert!(real_radix <= scan_radix);
|
2015-01-28 01:01:48 +00:00
|
|
|
let mut len = 0;
|
2014-05-21 16:57:31 -07:00
|
|
|
loop {
|
2016-10-04 11:55:58 +11:00
|
|
|
let c = self.ch;
|
2016-01-03 11:14:09 +02:00
|
|
|
if c == Some('_') {
|
|
|
|
debug!("skipping a _");
|
|
|
|
self.bump();
|
|
|
|
continue;
|
|
|
|
}
|
2015-03-31 00:27:13 +11:00
|
|
|
match c.and_then(|cc| cc.to_digit(scan_radix)) {
|
2014-06-18 10:44:20 -07:00
|
|
|
Some(_) => {
|
2014-12-20 00:09:35 -08:00
|
|
|
debug!("{:?} in scan_digits", c);
|
2015-03-31 00:27:13 +11:00
|
|
|
// check that the hypothetical digit is actually
|
|
|
|
// in range for the true radix
|
|
|
|
if c.unwrap().to_digit(real_radix).is_none() {
|
2016-10-04 11:46:54 +11:00
|
|
|
self.err_span_(self.pos,
|
2016-10-04 11:41:01 +11:00
|
|
|
self.next_pos,
|
2016-01-03 11:14:09 +02:00
|
|
|
&format!("invalid digit for a base {} literal", real_radix));
|
2015-03-31 00:27:13 +11:00
|
|
|
}
|
2014-06-18 10:44:20 -07:00
|
|
|
len += 1;
|
|
|
|
self.bump();
|
|
|
|
}
|
2016-01-03 11:14:09 +02:00
|
|
|
_ => return len,
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
2014-06-18 10:44:20 -07:00
|
|
|
/// Lex a LIT_INTEGER or a LIT_FLOAT
|
2014-11-19 15:48:38 +11:00
|
|
|
fn scan_number(&mut self, c: char) -> token::Lit {
|
2015-06-18 01:02:58 +03:00
|
|
|
let num_digits;
|
2014-06-18 10:44:20 -07:00
|
|
|
let mut base = 10;
|
2016-10-04 11:46:54 +11:00
|
|
|
let start_bpos = self.pos;
|
2014-05-21 16:57:31 -07:00
|
|
|
|
2014-06-18 10:44:20 -07:00
|
|
|
self.bump();
|
|
|
|
|
|
|
|
if c == '0' {
|
2016-10-04 11:55:58 +11:00
|
|
|
match self.ch.unwrap_or('\0') {
|
2016-01-03 11:14:09 +02:00
|
|
|
'b' => {
|
|
|
|
self.bump();
|
|
|
|
base = 2;
|
|
|
|
num_digits = self.scan_digits(2, 10);
|
|
|
|
}
|
|
|
|
'o' => {
|
|
|
|
self.bump();
|
|
|
|
base = 8;
|
|
|
|
num_digits = self.scan_digits(8, 10);
|
|
|
|
}
|
|
|
|
'x' => {
|
|
|
|
self.bump();
|
|
|
|
base = 16;
|
|
|
|
num_digits = self.scan_digits(16, 16);
|
|
|
|
}
|
2017-03-17 09:03:52 +09:00
|
|
|
'0'...'9' | '_' | '.' | 'e' | 'E' => {
|
2015-03-31 00:27:13 +11:00
|
|
|
num_digits = self.scan_digits(10, 10) + 1;
|
2014-06-18 10:44:20 -07:00
|
|
|
}
|
|
|
|
_ => {
|
|
|
|
// just a 0
|
2014-11-19 15:48:38 +11:00
|
|
|
return token::Integer(self.name_from(start_bpos));
|
2014-06-18 10:44:20 -07:00
|
|
|
}
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
2014-10-13 13:03:42 -07:00
|
|
|
} else if c.is_digit(10) {
|
2015-03-31 00:27:13 +11:00
|
|
|
num_digits = self.scan_digits(10, 10) + 1;
|
2014-06-18 10:44:20 -07:00
|
|
|
} else {
|
|
|
|
num_digits = 0;
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
2014-06-18 10:44:20 -07:00
|
|
|
|
|
|
|
if num_digits == 0 {
|
2016-01-03 11:14:09 +02:00
|
|
|
self.err_span_(start_bpos,
|
2016-10-04 11:46:54 +11:00
|
|
|
self.pos,
|
2016-01-03 11:14:09 +02:00
|
|
|
"no valid digits found for number");
|
2016-11-16 08:21:52 +00:00
|
|
|
return token::Integer(Symbol::intern("0"));
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
2014-06-18 10:44:20 -07:00
|
|
|
// might be a float, but don't be greedy if this is actually an
|
|
|
|
// integer literal followed by field/method access or a range pattern
|
|
|
|
// (`0..2` and `12.foo()`)
|
2016-10-04 11:55:58 +11:00
|
|
|
if self.ch_is('.') && !self.nextch_is('.') &&
|
2017-05-12 22:00:06 +09:00
|
|
|
!ident_start(self.nextch()) {
|
2014-06-18 10:44:20 -07:00
|
|
|
// might have stuff after the ., and if it does, it needs to start
|
|
|
|
// with a number
|
2014-05-21 16:57:31 -07:00
|
|
|
self.bump();
|
2016-10-04 11:55:58 +11:00
|
|
|
if self.ch.unwrap_or('\0').is_digit(10) {
|
2015-03-31 00:27:13 +11:00
|
|
|
self.scan_digits(10, 10);
|
2014-06-18 10:44:20 -07:00
|
|
|
self.scan_float_exponent();
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
2016-10-04 11:46:54 +11:00
|
|
|
let pos = self.pos;
|
|
|
|
self.check_float_base(start_bpos, pos, base);
|
2014-11-19 15:48:38 +11:00
|
|
|
return token::Float(self.name_from(start_bpos));
|
2014-05-21 16:57:31 -07:00
|
|
|
} else {
|
2014-06-18 10:44:20 -07:00
|
|
|
// it might be a float if it has an exponent
|
2016-10-04 11:55:58 +11:00
|
|
|
if self.ch_is('e') || self.ch_is('E') {
|
2014-06-18 10:44:20 -07:00
|
|
|
self.scan_float_exponent();
|
2016-10-04 11:46:54 +11:00
|
|
|
let pos = self.pos;
|
|
|
|
self.check_float_base(start_bpos, pos, base);
|
2014-11-19 15:48:38 +11:00
|
|
|
return token::Float(self.name_from(start_bpos));
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
2014-06-18 10:44:20 -07:00
|
|
|
// but we certainly have an integer!
|
2014-11-19 15:48:38 +11:00
|
|
|
return token::Integer(self.name_from(start_bpos));
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-06-18 10:44:20 -07:00
|
|
|
/// Scan over `n_digits` hex digits, stopping at `delim`, reporting an
|
|
|
|
/// error if too many or too few digits are encountered.
|
2016-01-03 11:14:09 +02:00
|
|
|
fn scan_hex_digits(&mut self, n_digits: usize, delim: char, below_0x7f_only: bool) -> bool {
|
2014-12-20 00:09:35 -08:00
|
|
|
debug!("scanning {} digits until {:?}", n_digits, delim);
|
2016-10-04 11:46:54 +11:00
|
|
|
let start_bpos = self.pos;
|
2014-06-18 10:44:20 -07:00
|
|
|
let mut accum_int = 0;
|
|
|
|
|
2015-03-26 09:38:25 +01:00
|
|
|
let mut valid = true;
|
2015-01-26 15:46:12 -05:00
|
|
|
for _ in 0..n_digits {
|
2014-05-21 16:57:31 -07:00
|
|
|
if self.is_eof() {
|
2016-10-04 11:46:54 +11:00
|
|
|
let last_bpos = self.pos;
|
2015-10-23 19:20:03 -07:00
|
|
|
panic!(self.fatal_span_(start_bpos,
|
|
|
|
last_bpos,
|
|
|
|
"unterminated numeric character escape"));
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
2016-10-04 11:55:58 +11:00
|
|
|
if self.ch_is(delim) {
|
2016-10-04 11:46:54 +11:00
|
|
|
let last_bpos = self.pos;
|
2016-01-03 11:14:09 +02:00
|
|
|
self.err_span_(start_bpos,
|
|
|
|
last_bpos,
|
|
|
|
"numeric character escape is too short");
|
2015-03-26 09:38:25 +01:00
|
|
|
valid = false;
|
2014-05-21 16:57:31 -07:00
|
|
|
break;
|
|
|
|
}
|
2016-10-04 11:55:58 +11:00
|
|
|
let c = self.ch.unwrap_or('\x00');
|
2014-05-21 16:57:31 -07:00
|
|
|
accum_int *= 16;
|
|
|
|
accum_int += c.to_digit(16).unwrap_or_else(|| {
|
2016-10-04 11:46:54 +11:00
|
|
|
self.err_span_char(self.pos,
|
2016-10-04 11:41:01 +11:00
|
|
|
self.next_pos,
|
2016-01-03 11:14:09 +02:00
|
|
|
"invalid character in numeric character escape",
|
|
|
|
c);
|
2015-03-26 09:38:25 +01:00
|
|
|
|
|
|
|
valid = false;
|
2014-05-21 16:57:31 -07:00
|
|
|
0
|
Add trivial cast lints.
This permits all coercions to be performed in casts, but adds lints to warn in those cases.
Part of this patch moves cast checking to a later stage of type checking. We acquire obligations to check casts as part of type checking where we previously checked them. Once we have type checked a function or module, then we check any cast obligations which have been acquired. That means we have more type information available to check casts (this was crucial to making coercions work properly in place of some casts), but it means that casts cannot feed input into type inference.
[breaking change]
* Adds two new lints for trivial casts and trivial numeric casts, these are warn by default, but can cause errors if you build with warnings as errors. Previously, trivial numeric casts and casts to trait objects were allowed.
* The unused casts lint has gone.
* Interactions between casting and type inference have changed in subtle ways. Two ways this might manifest are:
- You may need to 'direct' casts more with extra type information, for example, in some cases where `foo as _ as T` succeeded, you may now need to specify the type for `_`
- Casts do not influence inference of integer types. E.g., the following used to type check:
```
let x = 42;
let y = &x as *const u32;
```
Because the cast would inform inference that `x` must have type `u32`. This no longer applies and the compiler will fallback to `i32` for `x` and thus there will be a type error in the cast. The solution is to add more type information:
```
let x: u32 = 42;
let y = &x as *const u32;
```
2015-03-20 17:15:27 +13:00
|
|
|
});
|
2014-05-21 16:57:31 -07:00
|
|
|
self.bump();
|
|
|
|
}
|
|
|
|
|
2014-10-27 09:13:51 -07:00
|
|
|
if below_0x7f_only && accum_int >= 0x80 {
|
|
|
|
self.err_span_(start_bpos,
|
2016-10-04 11:46:54 +11:00
|
|
|
self.pos,
|
2016-01-03 11:14:09 +02:00
|
|
|
"this form of character escape may only be used with characters in \
|
|
|
|
the range [\\x00-\\x7f]");
|
2015-03-26 09:38:25 +01:00
|
|
|
valid = false;
|
2014-10-27 09:13:51 -07:00
|
|
|
}
|
|
|
|
|
2014-05-21 16:57:31 -07:00
|
|
|
match char::from_u32(accum_int) {
|
2015-03-26 09:38:25 +01:00
|
|
|
Some(_) => valid,
|
2014-05-21 16:57:31 -07:00
|
|
|
None => {
|
2016-10-04 11:46:54 +11:00
|
|
|
let last_bpos = self.pos;
|
2015-07-27 03:49:38 +03:00
|
|
|
self.err_span_(start_bpos, last_bpos, "invalid numeric character escape");
|
2014-07-03 00:47:30 -07:00
|
|
|
false
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-06-17 17:58:13 +02:00
|
|
|
/// Scan for a single (possibly escaped) byte or char
|
|
|
|
/// in a byte, (non-raw) byte string, char, or (non-raw) string literal.
|
|
|
|
/// `start` is the position of `first_source_char`, which is already consumed.
|
2014-07-03 00:47:30 -07:00
|
|
|
///
|
|
|
|
/// Returns true if there was a valid char/byte, false otherwise.
|
2016-01-03 11:14:09 +02:00
|
|
|
fn scan_char_or_byte(&mut self,
|
|
|
|
start: BytePos,
|
|
|
|
first_source_char: char,
|
|
|
|
ascii_only: bool,
|
|
|
|
delim: char)
|
|
|
|
-> bool {
|
2014-06-17 17:58:13 +02:00
|
|
|
match first_source_char {
|
|
|
|
'\\' => {
|
|
|
|
// '\X' for some X must be a character constant:
|
2016-10-04 11:55:58 +11:00
|
|
|
let escaped = self.ch;
|
2016-10-04 11:46:54 +11:00
|
|
|
let escaped_pos = self.pos;
|
2014-06-17 17:58:13 +02:00
|
|
|
self.bump();
|
|
|
|
match escaped {
|
2016-01-03 11:14:09 +02:00
|
|
|
None => {} // EOF here is an error that will be checked later.
|
2014-06-17 17:58:13 +02:00
|
|
|
Some(e) => {
|
2014-07-03 00:47:30 -07:00
|
|
|
return match e {
|
|
|
|
'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true,
|
2014-12-02 16:48:48 -08:00
|
|
|
'x' => self.scan_byte_escape(delim, !ascii_only),
|
2015-07-10 22:31:44 +03:00
|
|
|
'u' => {
|
2016-10-04 11:55:58 +11:00
|
|
|
let valid = if self.ch_is('{') {
|
2015-07-10 22:31:44 +03:00
|
|
|
self.scan_unicode_escape(delim) && !ascii_only
|
|
|
|
} else {
|
2017-03-15 00:22:48 +00:00
|
|
|
let span = mk_sp(start, self.pos);
|
2017-01-17 01:14:53 +00:00
|
|
|
self.sess.span_diagnostic
|
2016-01-03 11:14:09 +02:00
|
|
|
.struct_span_err(span, "incorrect unicode escape sequence")
|
2015-12-21 10:00:43 +13:00
|
|
|
.span_help(span,
|
2016-01-03 11:14:09 +02:00
|
|
|
"format of unicode escape sequences is \
|
|
|
|
`\\u{…}`")
|
2015-12-21 10:00:43 +13:00
|
|
|
.emit();
|
2015-07-10 22:31:44 +03:00
|
|
|
false
|
|
|
|
};
|
|
|
|
if ascii_only {
|
2016-01-03 11:14:09 +02:00
|
|
|
self.err_span_(start,
|
2016-10-04 11:46:54 +11:00
|
|
|
self.pos,
|
2016-01-03 11:14:09 +02:00
|
|
|
"unicode escape sequences cannot be used as a \
|
|
|
|
byte or in a byte string");
|
2015-06-22 15:30:56 -07:00
|
|
|
}
|
2015-07-10 22:31:44 +03:00
|
|
|
valid
|
|
|
|
|
2015-07-10 21:37:21 +03:00
|
|
|
}
|
2014-06-17 17:58:13 +02:00
|
|
|
'\n' if delim == '"' => {
|
|
|
|
self.consume_whitespace();
|
2014-07-03 00:47:30 -07:00
|
|
|
true
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
2016-10-04 11:55:58 +11:00
|
|
|
'\r' if delim == '"' && self.ch_is('\n') => {
|
2014-05-24 01:13:59 -07:00
|
|
|
self.consume_whitespace();
|
2014-07-03 00:47:30 -07:00
|
|
|
true
|
2014-05-24 01:13:59 -07:00
|
|
|
}
|
2014-06-17 17:58:13 +02:00
|
|
|
c => {
|
2016-10-04 11:46:54 +11:00
|
|
|
let pos = self.pos;
|
2016-01-03 11:14:09 +02:00
|
|
|
let mut err = self.struct_err_span_char(escaped_pos,
|
2016-10-04 11:46:54 +11:00
|
|
|
pos,
|
2016-01-03 11:14:09 +02:00
|
|
|
if ascii_only {
|
|
|
|
"unknown byte escape"
|
|
|
|
} else {
|
|
|
|
"unknown character \
|
|
|
|
escape"
|
|
|
|
},
|
|
|
|
c);
|
2014-11-10 21:54:42 +11:00
|
|
|
if e == '\r' {
|
2017-03-15 00:22:48 +00:00
|
|
|
err.span_help(mk_sp(escaped_pos, pos),
|
2016-01-03 11:14:09 +02:00
|
|
|
"this is an isolated carriage return; consider \
|
|
|
|
checking your editor and version control \
|
|
|
|
settings");
|
2014-11-10 21:54:42 +11:00
|
|
|
}
|
2015-04-13 15:56:10 +02:00
|
|
|
if (e == '{' || e == '}') && !ascii_only {
|
2017-03-15 00:22:48 +00:00
|
|
|
err.span_help(mk_sp(escaped_pos, pos),
|
2016-01-03 11:14:09 +02:00
|
|
|
"if used in a formatting string, curly braces \
|
|
|
|
are escaped with `{{` and `}}`");
|
2015-04-13 15:56:10 +02:00
|
|
|
}
|
2015-12-21 10:00:43 +13:00
|
|
|
err.emit();
|
2014-07-03 00:47:30 -07:00
|
|
|
false
|
2014-06-17 17:58:13 +02:00
|
|
|
}
|
2014-07-03 00:47:30 -07:00
|
|
|
}
|
2014-06-17 17:58:13 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
'\t' | '\n' | '\r' | '\'' if delim == '\'' => {
|
2016-10-04 11:46:54 +11:00
|
|
|
let pos = self.pos;
|
2016-01-03 11:14:09 +02:00
|
|
|
self.err_span_char(start,
|
2016-10-04 11:46:54 +11:00
|
|
|
pos,
|
2016-01-03 11:14:09 +02:00
|
|
|
if ascii_only {
|
|
|
|
"byte constant must be escaped"
|
|
|
|
} else {
|
|
|
|
"character constant must be escaped"
|
|
|
|
},
|
|
|
|
first_source_char);
|
2014-07-03 00:47:30 -07:00
|
|
|
return false;
|
2014-06-17 17:58:13 +02:00
|
|
|
}
|
2014-05-24 01:13:59 -07:00
|
|
|
'\r' => {
|
2016-10-04 11:55:58 +11:00
|
|
|
if self.ch_is('\n') {
|
2014-05-24 01:13:59 -07:00
|
|
|
self.bump();
|
2014-07-03 00:47:30 -07:00
|
|
|
return true;
|
2014-05-24 01:13:59 -07:00
|
|
|
} else {
|
2016-01-03 11:14:09 +02:00
|
|
|
self.err_span_(start,
|
2016-10-04 11:46:54 +11:00
|
|
|
self.pos,
|
2014-05-24 01:13:59 -07:00
|
|
|
"bare CR not allowed in string, use \\r instead");
|
2014-07-03 00:47:30 -07:00
|
|
|
return false;
|
2014-05-24 01:13:59 -07:00
|
|
|
}
|
|
|
|
}
|
2016-01-03 11:14:09 +02:00
|
|
|
_ => {
|
|
|
|
if ascii_only && first_source_char > '\x7F' {
|
2016-10-04 11:46:54 +11:00
|
|
|
let pos = self.pos;
|
2016-05-02 07:44:21 +02:00
|
|
|
self.err_span_(start,
|
2016-10-04 11:46:54 +11:00
|
|
|
pos,
|
2016-05-02 07:44:21 +02:00
|
|
|
"byte constant must be ASCII. Use a \\xHH escape for a \
|
|
|
|
non-ASCII byte");
|
2016-01-03 11:14:09 +02:00
|
|
|
return false;
|
|
|
|
}
|
2014-06-17 17:58:13 +02:00
|
|
|
}
|
|
|
|
}
|
2014-07-03 00:47:30 -07:00
|
|
|
true
|
2014-06-17 17:58:13 +02:00
|
|
|
}
|
|
|
|
|
2014-12-02 16:48:48 -08:00
|
|
|
/// Scan over a \u{...} escape
|
|
|
|
///
|
|
|
|
/// At this point, we have already seen the \ and the u, the { is the current character. We
|
|
|
|
/// will read at least one digit, and up to 6, and pass over the }.
|
|
|
|
fn scan_unicode_escape(&mut self, delim: char) -> bool {
|
|
|
|
self.bump(); // past the {
|
2016-10-04 11:46:54 +11:00
|
|
|
let start_bpos = self.pos;
|
2015-01-28 01:01:48 +00:00
|
|
|
let mut count = 0;
|
2014-12-02 16:48:48 -08:00
|
|
|
let mut accum_int = 0;
|
2015-03-26 09:38:25 +01:00
|
|
|
let mut valid = true;
|
2014-12-02 16:48:48 -08:00
|
|
|
|
2016-10-04 11:55:58 +11:00
|
|
|
while !self.ch_is('}') && count <= 6 {
|
|
|
|
let c = match self.ch {
|
2014-12-02 16:48:48 -08:00
|
|
|
Some(c) => c,
|
|
|
|
None => {
|
2016-01-03 11:14:09 +02:00
|
|
|
panic!(self.fatal_span_(start_bpos,
|
2016-10-04 11:46:54 +11:00
|
|
|
self.pos,
|
2015-10-23 19:20:03 -07:00
|
|
|
"unterminated unicode escape (found EOF)"));
|
2014-12-02 16:48:48 -08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
accum_int *= 16;
|
|
|
|
accum_int += c.to_digit(16).unwrap_or_else(|| {
|
|
|
|
if c == delim {
|
2016-10-04 11:46:54 +11:00
|
|
|
panic!(self.fatal_span_(self.pos,
|
2016-10-04 11:41:01 +11:00
|
|
|
self.next_pos,
|
2015-10-23 19:20:03 -07:00
|
|
|
"unterminated unicode escape (needed a `}`)"));
|
2014-12-02 16:48:48 -08:00
|
|
|
} else {
|
2016-10-04 11:46:54 +11:00
|
|
|
self.err_span_char(self.pos,
|
2016-10-04 11:41:01 +11:00
|
|
|
self.next_pos,
|
2016-01-03 11:14:09 +02:00
|
|
|
"invalid character in unicode escape",
|
|
|
|
c);
|
2014-12-02 16:48:48 -08:00
|
|
|
}
|
2015-03-26 09:38:25 +01:00
|
|
|
valid = false;
|
|
|
|
0
|
Add trivial cast lints.
This permits all coercions to be performed in casts, but adds lints to warn in those cases.
Part of this patch moves cast checking to a later stage of type checking. We acquire obligations to check casts as part of type checking where we previously checked them. Once we have type checked a function or module, then we check any cast obligations which have been acquired. That means we have more type information available to check casts (this was crucial to making coercions work properly in place of some casts), but it means that casts cannot feed input into type inference.
[breaking change]
* Adds two new lints for trivial casts and trivial numeric casts, these are warn by default, but can cause errors if you build with warnings as errors. Previously, trivial numeric casts and casts to trait objects were allowed.
* The unused casts lint has gone.
* Interactions between casting and type inference have changed in subtle ways. Two ways this might manifest are:
- You may need to 'direct' casts more with extra type information, for example, in some cases where `foo as _ as T` succeeded, you may now need to specify the type for `_`
- Casts do not influence inference of integer types. E.g., the following used to type check:
```
let x = 42;
let y = &x as *const u32;
```
Because the cast would inform inference that `x` must have type `u32`. This no longer applies and the compiler will fallback to `i32` for `x` and thus there will be a type error in the cast. The solution is to add more type information:
```
let x: u32 = 42;
let y = &x as *const u32;
```
2015-03-20 17:15:27 +13:00
|
|
|
});
|
2014-12-02 16:48:48 -08:00
|
|
|
self.bump();
|
|
|
|
count += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if count > 6 {
|
2016-01-03 11:14:09 +02:00
|
|
|
self.err_span_(start_bpos,
|
2016-10-04 11:46:54 +11:00
|
|
|
self.pos,
|
2016-01-03 11:14:09 +02:00
|
|
|
"overlong unicode escape (can have at most 6 hex digits)");
|
2015-03-26 09:38:25 +01:00
|
|
|
valid = false;
|
2014-12-02 16:48:48 -08:00
|
|
|
}
|
|
|
|
|
2015-03-26 09:38:25 +01:00
|
|
|
if valid && (char::from_u32(accum_int).is_none() || count == 0) {
|
2016-01-03 11:14:09 +02:00
|
|
|
self.err_span_(start_bpos,
|
2016-10-04 11:46:54 +11:00
|
|
|
self.pos,
|
2016-01-03 11:14:09 +02:00
|
|
|
"invalid unicode character escape");
|
2015-06-22 15:30:56 -07:00
|
|
|
valid = false;
|
2014-12-02 16:48:48 -08:00
|
|
|
}
|
|
|
|
|
2015-07-10 21:41:37 +03:00
|
|
|
self.bump(); // past the ending }
|
2014-12-02 16:48:48 -08:00
|
|
|
valid
|
|
|
|
}
|
|
|
|
|
2014-06-18 10:44:20 -07:00
|
|
|
/// Scan over a float exponent.
|
|
|
|
fn scan_float_exponent(&mut self) {
|
2016-10-04 11:55:58 +11:00
|
|
|
if self.ch_is('e') || self.ch_is('E') {
|
2014-06-18 10:44:20 -07:00
|
|
|
self.bump();
|
2016-10-04 11:55:58 +11:00
|
|
|
if self.ch_is('-') || self.ch_is('+') {
|
2014-06-18 10:44:20 -07:00
|
|
|
self.bump();
|
|
|
|
}
|
2015-03-31 00:27:13 +11:00
|
|
|
if self.scan_digits(10, 10) == 0 {
|
2016-10-04 11:46:54 +11:00
|
|
|
self.err_span_(self.pos,
|
2016-10-04 11:41:01 +11:00
|
|
|
self.next_pos,
|
2016-01-03 11:14:09 +02:00
|
|
|
"expected at least one digit in exponent")
|
2014-06-18 10:44:20 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Check that a base is valid for a floating literal, emitting a nice
|
|
|
|
/// error if it isn't.
|
2015-01-17 23:33:05 +00:00
|
|
|
fn check_float_base(&mut self, start_bpos: BytePos, last_bpos: BytePos, base: usize) {
|
2014-06-18 10:44:20 -07:00
|
|
|
match base {
|
2016-01-03 11:14:09 +02:00
|
|
|
16 => {
|
|
|
|
self.err_span_(start_bpos,
|
|
|
|
last_bpos,
|
|
|
|
"hexadecimal float literal is not supported")
|
|
|
|
}
|
|
|
|
8 => {
|
|
|
|
self.err_span_(start_bpos,
|
|
|
|
last_bpos,
|
|
|
|
"octal float literal is not supported")
|
|
|
|
}
|
|
|
|
2 => {
|
|
|
|
self.err_span_(start_bpos,
|
|
|
|
last_bpos,
|
|
|
|
"binary float literal is not supported")
|
|
|
|
}
|
|
|
|
_ => (),
|
2014-06-18 10:44:20 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-10-27 19:22:52 +11:00
|
|
|
fn binop(&mut self, op: token::BinOpToken) -> token::Token {
|
2014-05-21 16:57:31 -07:00
|
|
|
self.bump();
|
2016-10-04 11:55:58 +11:00
|
|
|
if self.ch_is('=') {
|
2014-05-21 16:57:31 -07:00
|
|
|
self.bump();
|
2014-10-27 19:22:52 +11:00
|
|
|
return token::BinOpEq(op);
|
2014-05-21 16:57:31 -07:00
|
|
|
} else {
|
2014-10-27 19:22:52 +11:00
|
|
|
return token::BinOp(op);
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Return the next token from the string, advances the input past that
|
|
|
|
/// token, and updates the interner
|
2016-04-25 17:20:32 +02:00
|
|
|
fn next_token_inner(&mut self) -> Result<token::Token, ()> {
|
2016-10-04 11:55:58 +11:00
|
|
|
let c = self.ch;
|
2016-01-03 11:14:09 +02:00
|
|
|
if ident_start(c) &&
|
|
|
|
match (c.unwrap(), self.nextch(), self.nextnextch()) {
|
2014-05-21 16:57:31 -07:00
|
|
|
// Note: r as in r" or r#" is part of a raw string literal,
|
2014-06-06 16:04:04 +01:00
|
|
|
// b as in b' is part of a byte literal.
|
|
|
|
// They are not identifiers, and are handled further down.
|
2016-01-03 11:14:09 +02:00
|
|
|
('r', Some('"'), _) |
|
|
|
|
('r', Some('#'), _) |
|
|
|
|
('b', Some('"'), _) |
|
|
|
|
('b', Some('\''), _) |
|
|
|
|
('b', Some('r'), Some('"')) |
|
|
|
|
('b', Some('r'), Some('#')) => false,
|
|
|
|
_ => true,
|
2014-06-06 16:04:04 +01:00
|
|
|
} {
|
2016-10-04 11:46:54 +11:00
|
|
|
let start = self.pos;
|
2016-10-04 11:55:58 +11:00
|
|
|
while ident_continue(self.ch) {
|
2014-05-21 16:57:31 -07:00
|
|
|
self.bump();
|
|
|
|
}
|
|
|
|
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(self.with_str_from(start, |string| {
|
2014-05-21 16:57:31 -07:00
|
|
|
if string == "_" {
|
2014-10-27 19:22:52 +11:00
|
|
|
token::Underscore
|
2014-05-21 16:57:31 -07:00
|
|
|
} else {
|
|
|
|
// FIXME: perform NFKC normalization here. (Issue #2253)
|
2016-11-16 08:21:52 +00:00
|
|
|
token::Ident(Ident::from_str(string))
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
2016-04-25 17:20:32 +02:00
|
|
|
}));
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
if is_dec_digit(c) {
|
2014-11-19 15:48:38 +11:00
|
|
|
let num = self.scan_number(c.unwrap());
|
|
|
|
let suffix = self.scan_optional_raw_name();
|
2014-12-20 00:09:35 -08:00
|
|
|
debug!("next_token_inner: scanned number {:?}, {:?}", num, suffix);
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::Literal(num, suffix));
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
match c.expect("next_token_inner called at EOF") {
|
2016-01-03 11:14:09 +02:00
|
|
|
// One-byte tokens.
|
|
|
|
';' => {
|
2014-05-21 16:57:31 -07:00
|
|
|
self.bump();
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::Semi);
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
',' => {
|
|
|
|
self.bump();
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::Comma);
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
'.' => {
|
|
|
|
self.bump();
|
2016-10-04 11:55:58 +11:00
|
|
|
return if self.ch_is('.') {
|
2016-01-03 11:14:09 +02:00
|
|
|
self.bump();
|
2016-10-04 11:55:58 +11:00
|
|
|
if self.ch_is('.') {
|
2016-01-03 11:14:09 +02:00
|
|
|
self.bump();
|
2016-04-25 17:20:32 +02:00
|
|
|
Ok(token::DotDotDot)
|
2016-01-03 11:14:09 +02:00
|
|
|
} else {
|
2016-04-25 17:20:32 +02:00
|
|
|
Ok(token::DotDot)
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
} else {
|
2016-04-25 17:20:32 +02:00
|
|
|
Ok(token::Dot)
|
2016-01-03 11:14:09 +02:00
|
|
|
};
|
|
|
|
}
|
|
|
|
'(' => {
|
|
|
|
self.bump();
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::OpenDelim(token::Paren));
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
')' => {
|
|
|
|
self.bump();
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::CloseDelim(token::Paren));
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
'{' => {
|
|
|
|
self.bump();
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::OpenDelim(token::Brace));
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
'}' => {
|
|
|
|
self.bump();
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::CloseDelim(token::Brace));
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
'[' => {
|
|
|
|
self.bump();
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::OpenDelim(token::Bracket));
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
']' => {
|
|
|
|
self.bump();
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::CloseDelim(token::Bracket));
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
'@' => {
|
|
|
|
self.bump();
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::At);
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
'#' => {
|
|
|
|
self.bump();
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::Pound);
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
'~' => {
|
|
|
|
self.bump();
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::Tilde);
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
'?' => {
|
|
|
|
self.bump();
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::Question);
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
':' => {
|
|
|
|
self.bump();
|
2016-10-04 11:55:58 +11:00
|
|
|
if self.ch_is(':') {
|
2016-01-03 11:14:09 +02:00
|
|
|
self.bump();
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::ModSep);
|
2016-01-03 11:14:09 +02:00
|
|
|
} else {
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::Colon);
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
2016-01-03 11:14:09 +02:00
|
|
|
'$' => {
|
|
|
|
self.bump();
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::Dollar);
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
2014-05-21 16:57:31 -07:00
|
|
|
|
2016-01-03 11:14:09 +02:00
|
|
|
// Multi-byte tokens.
|
|
|
|
'=' => {
|
2014-05-21 16:57:31 -07:00
|
|
|
self.bump();
|
2016-10-04 11:55:58 +11:00
|
|
|
if self.ch_is('=') {
|
2016-01-03 11:14:09 +02:00
|
|
|
self.bump();
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::EqEq);
|
2016-10-04 11:55:58 +11:00
|
|
|
} else if self.ch_is('>') {
|
2016-01-03 11:14:09 +02:00
|
|
|
self.bump();
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::FatArrow);
|
2016-01-03 11:14:09 +02:00
|
|
|
} else {
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::Eq);
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
'!' => {
|
2014-05-21 16:57:31 -07:00
|
|
|
self.bump();
|
2016-10-04 11:55:58 +11:00
|
|
|
if self.ch_is('=') {
|
2016-01-03 11:14:09 +02:00
|
|
|
self.bump();
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::Ne);
|
2016-01-03 11:14:09 +02:00
|
|
|
} else {
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::Not);
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
2016-01-03 11:14:09 +02:00
|
|
|
'<' => {
|
2014-05-21 16:57:31 -07:00
|
|
|
self.bump();
|
2016-10-04 11:55:58 +11:00
|
|
|
match self.ch.unwrap_or('\x00') {
|
2016-01-03 11:14:09 +02:00
|
|
|
'=' => {
|
|
|
|
self.bump();
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::Le);
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
'<' => {
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(self.binop(token::Shl));
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
'-' => {
|
|
|
|
self.bump();
|
2016-10-04 11:55:58 +11:00
|
|
|
match self.ch.unwrap_or('\x00') {
|
2016-01-03 11:14:09 +02:00
|
|
|
_ => {
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::LArrow);
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
_ => {
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::Lt);
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
'>' => {
|
2014-05-21 16:57:31 -07:00
|
|
|
self.bump();
|
2016-10-04 11:55:58 +11:00
|
|
|
match self.ch.unwrap_or('\x00') {
|
2016-01-03 11:14:09 +02:00
|
|
|
'=' => {
|
|
|
|
self.bump();
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::Ge);
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
'>' => {
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(self.binop(token::Shr));
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
_ => {
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::Gt);
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
}
|
2016-01-03 11:14:09 +02:00
|
|
|
'\'' => {
|
|
|
|
// Either a character constant 'a' OR a lifetime name 'abc
|
2016-10-04 11:46:54 +11:00
|
|
|
let start_with_quote = self.pos;
|
2016-01-03 11:14:09 +02:00
|
|
|
self.bump();
|
2016-10-04 11:46:54 +11:00
|
|
|
let start = self.pos;
|
2014-05-21 16:57:31 -07:00
|
|
|
|
2016-01-03 11:14:09 +02:00
|
|
|
// the eof will be picked up by the final `'` check below
|
2016-10-04 11:55:58 +11:00
|
|
|
let c2 = self.ch.unwrap_or('\x00');
|
2016-01-03 11:14:09 +02:00
|
|
|
self.bump();
|
2014-05-21 16:57:31 -07:00
|
|
|
|
2016-01-03 11:14:09 +02:00
|
|
|
// If the character is an ident start not followed by another single
|
|
|
|
// quote, then this is a lifetime name:
|
2016-10-04 11:55:58 +11:00
|
|
|
if ident_start(Some(c2)) && !self.ch_is('\'') {
|
|
|
|
while ident_continue(self.ch) {
|
2016-01-03 11:14:09 +02:00
|
|
|
self.bump();
|
|
|
|
}
|
2016-01-07 16:12:28 +00:00
|
|
|
// lifetimes shouldn't end with a single quote
|
|
|
|
// if we find one, then this is an invalid character literal
|
2016-10-04 11:55:58 +11:00
|
|
|
if self.ch_is('\'') {
|
2016-01-07 16:12:28 +00:00
|
|
|
panic!(self.fatal_span_verbose(
|
2016-10-04 11:41:01 +11:00
|
|
|
start_with_quote, self.next_pos,
|
2016-01-07 16:12:28 +00:00
|
|
|
String::from("character literal may only contain one codepoint")));
|
|
|
|
|
|
|
|
}
|
2014-06-10 13:54:13 -07:00
|
|
|
|
2016-01-03 11:14:09 +02:00
|
|
|
// Include the leading `'` in the real identifier, for macro
|
|
|
|
// expansion purposes. See #12512 for the gory details of why
|
|
|
|
// this is necessary.
|
|
|
|
let ident = self.with_str_from(start, |lifetime_name| {
|
2016-11-16 08:21:52 +00:00
|
|
|
Ident::from_str(&format!("'{}", lifetime_name))
|
2016-01-03 11:14:09 +02:00
|
|
|
});
|
2014-06-10 13:54:13 -07:00
|
|
|
|
2016-01-03 11:14:09 +02:00
|
|
|
// Conjure up a "keyword checking ident" to make sure that
|
|
|
|
// the lifetime name is not a keyword.
|
|
|
|
let keyword_checking_ident = self.with_str_from(start, |lifetime_name| {
|
2016-11-16 08:21:52 +00:00
|
|
|
Ident::from_str(lifetime_name)
|
2014-06-10 13:54:13 -07:00
|
|
|
});
|
2016-04-16 04:12:02 +03:00
|
|
|
let keyword_checking_token = &token::Ident(keyword_checking_ident);
|
2016-10-04 11:46:54 +11:00
|
|
|
let last_bpos = self.pos;
|
2016-04-19 00:42:18 +03:00
|
|
|
if keyword_checking_token.is_any_keyword() &&
|
|
|
|
!keyword_checking_token.is_keyword(keywords::Static) {
|
|
|
|
self.err_span_(start, last_bpos, "lifetimes cannot use keyword names");
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
2016-01-07 16:12:28 +00:00
|
|
|
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::Lifetime(ident));
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
2016-01-03 11:14:09 +02:00
|
|
|
let valid = self.scan_char_or_byte(start,
|
|
|
|
c2,
|
|
|
|
// ascii_only =
|
|
|
|
false,
|
|
|
|
'\'');
|
2016-01-07 16:12:28 +00:00
|
|
|
|
2016-10-04 11:55:58 +11:00
|
|
|
if !self.ch_is('\'') {
|
2016-01-07 16:12:28 +00:00
|
|
|
panic!(self.fatal_span_verbose(
|
2016-10-04 11:46:54 +11:00
|
|
|
start_with_quote, self.pos,
|
2016-01-07 16:12:28 +00:00
|
|
|
String::from("character literal may only contain one codepoint")));
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
2016-01-07 16:12:28 +00:00
|
|
|
|
2016-01-03 11:14:09 +02:00
|
|
|
let id = if valid {
|
|
|
|
self.name_from(start)
|
|
|
|
} else {
|
2016-11-16 08:21:52 +00:00
|
|
|
Symbol::intern("0")
|
2016-01-03 11:14:09 +02:00
|
|
|
};
|
2016-10-04 11:55:58 +11:00
|
|
|
self.bump(); // advance ch past token
|
2016-01-03 11:14:09 +02:00
|
|
|
let suffix = self.scan_optional_raw_name();
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::Literal(token::Char(id), suffix));
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
2016-01-03 11:14:09 +02:00
|
|
|
'b' => {
|
2014-05-21 16:57:31 -07:00
|
|
|
self.bump();
|
2016-10-04 11:55:58 +11:00
|
|
|
let lit = match self.ch {
|
2016-01-03 11:14:09 +02:00
|
|
|
Some('\'') => self.scan_byte(),
|
|
|
|
Some('"') => self.scan_byte_string(),
|
|
|
|
Some('r') => self.scan_raw_byte_string(),
|
|
|
|
_ => unreachable!(), // Should have been a token::Ident above.
|
|
|
|
};
|
|
|
|
let suffix = self.scan_optional_raw_name();
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::Literal(lit, suffix));
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
2016-01-03 11:14:09 +02:00
|
|
|
'"' => {
|
2016-10-04 11:46:54 +11:00
|
|
|
let start_bpos = self.pos;
|
2016-01-03 11:14:09 +02:00
|
|
|
let mut valid = true;
|
|
|
|
self.bump();
|
2016-10-04 11:55:58 +11:00
|
|
|
while !self.ch_is('"') {
|
2016-01-03 11:14:09 +02:00
|
|
|
if self.is_eof() {
|
2016-10-04 11:46:54 +11:00
|
|
|
let last_bpos = self.pos;
|
2016-01-03 11:14:09 +02:00
|
|
|
panic!(self.fatal_span_(start_bpos,
|
|
|
|
last_bpos,
|
|
|
|
"unterminated double quote string"));
|
|
|
|
}
|
2014-05-21 16:57:31 -07:00
|
|
|
|
2016-10-04 11:46:54 +11:00
|
|
|
let ch_start = self.pos;
|
2016-10-04 11:55:58 +11:00
|
|
|
let ch = self.ch.unwrap();
|
2016-01-03 11:14:09 +02:00
|
|
|
self.bump();
|
|
|
|
valid &= self.scan_char_or_byte(ch_start,
|
|
|
|
ch,
|
|
|
|
// ascii_only =
|
|
|
|
false,
|
|
|
|
'"');
|
|
|
|
}
|
|
|
|
// adjust for the ASCII " at the start of the literal
|
|
|
|
let id = if valid {
|
|
|
|
self.name_from(start_bpos + BytePos(1))
|
|
|
|
} else {
|
2016-11-16 08:21:52 +00:00
|
|
|
Symbol::intern("??")
|
2016-01-03 11:14:09 +02:00
|
|
|
};
|
|
|
|
self.bump();
|
|
|
|
let suffix = self.scan_optional_raw_name();
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::Literal(token::Str_(id), suffix));
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
2016-01-03 11:14:09 +02:00
|
|
|
'r' => {
|
2016-10-04 11:46:54 +11:00
|
|
|
let start_bpos = self.pos;
|
2016-01-03 11:14:09 +02:00
|
|
|
self.bump();
|
|
|
|
let mut hash_count = 0;
|
2016-10-04 11:55:58 +11:00
|
|
|
while self.ch_is('#') {
|
2016-01-03 11:14:09 +02:00
|
|
|
self.bump();
|
|
|
|
hash_count += 1;
|
|
|
|
}
|
|
|
|
|
2014-05-21 16:57:31 -07:00
|
|
|
if self.is_eof() {
|
2016-10-04 11:46:54 +11:00
|
|
|
let last_bpos = self.pos;
|
2015-10-23 19:20:03 -07:00
|
|
|
panic!(self.fatal_span_(start_bpos, last_bpos, "unterminated raw string"));
|
2016-10-04 11:55:58 +11:00
|
|
|
} else if !self.ch_is('"') {
|
2016-10-04 11:46:54 +11:00
|
|
|
let last_bpos = self.pos;
|
2016-10-04 11:55:58 +11:00
|
|
|
let curr_char = self.ch.unwrap();
|
2016-01-03 11:14:09 +02:00
|
|
|
panic!(self.fatal_span_char(start_bpos,
|
|
|
|
last_bpos,
|
|
|
|
"found invalid character; only `#` is allowed \
|
|
|
|
in raw string delimitation",
|
|
|
|
curr_char));
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
2016-01-03 11:14:09 +02:00
|
|
|
self.bump();
|
2016-10-04 11:46:54 +11:00
|
|
|
let content_start_bpos = self.pos;
|
2016-01-03 11:14:09 +02:00
|
|
|
let mut content_end_bpos;
|
|
|
|
let mut valid = true;
|
|
|
|
'outer: loop {
|
|
|
|
if self.is_eof() {
|
2016-10-04 11:46:54 +11:00
|
|
|
let last_bpos = self.pos;
|
2016-01-03 11:14:09 +02:00
|
|
|
panic!(self.fatal_span_(start_bpos, last_bpos, "unterminated raw string"));
|
|
|
|
}
|
2016-10-04 11:55:58 +11:00
|
|
|
// if self.ch_is('"') {
|
2016-10-04 11:46:54 +11:00
|
|
|
// content_end_bpos = self.pos;
|
2016-01-03 11:14:09 +02:00
|
|
|
// for _ in 0..hash_count {
|
|
|
|
// self.bump();
|
2016-10-04 11:55:58 +11:00
|
|
|
// if !self.ch_is('#') {
|
2016-01-03 11:14:09 +02:00
|
|
|
// continue 'outer;
|
2016-10-04 11:55:58 +11:00
|
|
|
let c = self.ch.unwrap();
|
2016-01-03 11:14:09 +02:00
|
|
|
match c {
|
|
|
|
'"' => {
|
2016-10-04 11:46:54 +11:00
|
|
|
content_end_bpos = self.pos;
|
2016-01-03 11:14:09 +02:00
|
|
|
for _ in 0..hash_count {
|
|
|
|
self.bump();
|
2016-10-04 11:55:58 +11:00
|
|
|
if !self.ch_is('#') {
|
2016-01-03 11:14:09 +02:00
|
|
|
continue 'outer;
|
|
|
|
}
|
2014-05-24 01:13:59 -07:00
|
|
|
}
|
2016-01-03 11:14:09 +02:00
|
|
|
break;
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
2016-01-03 11:14:09 +02:00
|
|
|
'\r' => {
|
|
|
|
if !self.nextch_is('\n') {
|
2016-10-04 11:46:54 +11:00
|
|
|
let last_bpos = self.pos;
|
2016-01-03 11:14:09 +02:00
|
|
|
self.err_span_(start_bpos,
|
|
|
|
last_bpos,
|
|
|
|
"bare CR not allowed in raw string, use \\r \
|
|
|
|
instead");
|
|
|
|
valid = false;
|
|
|
|
}
|
2014-07-03 00:47:30 -07:00
|
|
|
}
|
2016-01-03 11:14:09 +02:00
|
|
|
_ => (),
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
2016-01-03 11:14:09 +02:00
|
|
|
self.bump();
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
self.bump();
|
2016-01-03 11:14:09 +02:00
|
|
|
let id = if valid {
|
|
|
|
self.name_from_to(content_start_bpos, content_end_bpos)
|
|
|
|
} else {
|
2016-11-16 08:21:52 +00:00
|
|
|
Symbol::intern("??")
|
2016-01-03 11:14:09 +02:00
|
|
|
};
|
|
|
|
let suffix = self.scan_optional_raw_name();
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::Literal(token::StrRaw(id, hash_count), suffix));
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
'-' => {
|
|
|
|
if self.nextch_is('>') {
|
|
|
|
self.bump();
|
|
|
|
self.bump();
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::RArrow);
|
2016-01-03 11:14:09 +02:00
|
|
|
} else {
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(self.binop(token::Minus));
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
'&' => {
|
|
|
|
if self.nextch_is('&') {
|
|
|
|
self.bump();
|
|
|
|
self.bump();
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::AndAnd);
|
2016-01-03 11:14:09 +02:00
|
|
|
} else {
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(self.binop(token::And));
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
'|' => {
|
|
|
|
match self.nextch() {
|
|
|
|
Some('|') => {
|
|
|
|
self.bump();
|
|
|
|
self.bump();
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(token::OrOr);
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
_ => {
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(self.binop(token::Or));
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
'+' => {
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(self.binop(token::Plus));
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
'*' => {
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(self.binop(token::Star));
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
'/' => {
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(self.binop(token::Slash));
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
'^' => {
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(self.binop(token::Caret));
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
'%' => {
|
2016-04-25 17:20:32 +02:00
|
|
|
return Ok(self.binop(token::Percent));
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
c => {
|
2016-10-04 11:46:54 +11:00
|
|
|
let last_bpos = self.pos;
|
2016-10-04 11:41:01 +11:00
|
|
|
let bpos = self.next_pos;
|
2016-01-03 11:14:09 +02:00
|
|
|
let mut err = self.struct_fatal_span_char(last_bpos,
|
|
|
|
bpos,
|
|
|
|
"unknown start of token",
|
|
|
|
c);
|
|
|
|
unicode_chars::check_for_substitution(&self, c, &mut err);
|
2016-04-25 17:20:32 +02:00
|
|
|
self.fatal_errs.push(err);
|
|
|
|
Err(())
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn consume_whitespace(&mut self) {
|
2016-10-04 11:55:58 +11:00
|
|
|
while is_pattern_whitespace(self.ch) && !self.is_eof() {
|
2016-01-03 11:14:09 +02:00
|
|
|
self.bump();
|
|
|
|
}
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
fn read_to_eol(&mut self) -> String {
|
|
|
|
let mut val = String::new();
|
2016-10-04 11:55:58 +11:00
|
|
|
while !self.ch_is('\n') && !self.is_eof() {
|
|
|
|
val.push(self.ch.unwrap());
|
2014-05-21 16:57:31 -07:00
|
|
|
self.bump();
|
|
|
|
}
|
2016-10-04 11:55:58 +11:00
|
|
|
if self.ch_is('\n') {
|
2016-01-03 11:14:09 +02:00
|
|
|
self.bump();
|
|
|
|
}
|
|
|
|
return val;
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
fn read_one_line_comment(&mut self) -> String {
|
|
|
|
let val = self.read_to_eol();
|
2016-01-03 11:14:09 +02:00
|
|
|
assert!((val.as_bytes()[0] == b'/' && val.as_bytes()[1] == b'/') ||
|
|
|
|
(val.as_bytes()[0] == b'#' && val.as_bytes()[1] == b'!'));
|
2014-05-21 16:57:31 -07:00
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
|
|
|
fn consume_non_eol_whitespace(&mut self) {
|
2016-10-04 11:55:58 +11:00
|
|
|
while is_pattern_whitespace(self.ch) && !self.ch_is('\n') && !self.is_eof() {
|
2014-05-21 16:57:31 -07:00
|
|
|
self.bump();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn peeking_at_comment(&self) -> bool {
|
2016-10-04 11:55:58 +11:00
|
|
|
(self.ch_is('/') && self.nextch_is('/')) || (self.ch_is('/') && self.nextch_is('*')) ||
|
2016-01-12 20:52:22 +02:00
|
|
|
// consider shebangs comments, but not inner attributes
|
2016-10-04 11:55:58 +11:00
|
|
|
(self.ch_is('#') && self.nextch_is('!') && !self.nextnextch_is('['))
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
2014-07-02 09:39:48 -07:00
|
|
|
|
2014-11-19 15:48:38 +11:00
|
|
|
fn scan_byte(&mut self) -> token::Lit {
|
2014-07-02 09:39:48 -07:00
|
|
|
self.bump();
|
2016-10-04 11:46:54 +11:00
|
|
|
let start = self.pos;
|
2014-07-02 09:39:48 -07:00
|
|
|
|
|
|
|
// the eof will be picked up by the final `'` check below
|
2016-10-04 11:55:58 +11:00
|
|
|
let c2 = self.ch.unwrap_or('\x00');
|
2014-07-02 09:39:48 -07:00
|
|
|
self.bump();
|
|
|
|
|
2016-01-03 11:14:09 +02:00
|
|
|
let valid = self.scan_char_or_byte(start,
|
|
|
|
c2,
|
|
|
|
// ascii_only =
|
|
|
|
true,
|
|
|
|
'\'');
|
2016-10-04 11:55:58 +11:00
|
|
|
if !self.ch_is('\'') {
|
2014-07-02 09:39:48 -07:00
|
|
|
// Byte offsetting here is okay because the
|
|
|
|
// character before position `start` are an
|
|
|
|
// ascii single quote and ascii 'b'.
|
2016-10-04 11:46:54 +11:00
|
|
|
let pos = self.pos;
|
2016-01-03 11:14:09 +02:00
|
|
|
panic!(self.fatal_span_verbose(start - BytePos(2),
|
2016-10-04 11:46:54 +11:00
|
|
|
pos,
|
2016-01-03 11:14:09 +02:00
|
|
|
"unterminated byte constant".to_string()));
|
2014-07-02 09:39:48 -07:00
|
|
|
}
|
2014-07-03 00:47:30 -07:00
|
|
|
|
2016-01-03 11:14:09 +02:00
|
|
|
let id = if valid {
|
|
|
|
self.name_from(start)
|
|
|
|
} else {
|
2016-11-16 08:21:52 +00:00
|
|
|
Symbol::intern("?")
|
2016-01-03 11:14:09 +02:00
|
|
|
};
|
2016-10-04 11:55:58 +11:00
|
|
|
self.bump(); // advance ch past token
|
2014-11-19 15:48:38 +11:00
|
|
|
return token::Byte(id);
|
2014-07-02 09:39:48 -07:00
|
|
|
}
|
|
|
|
|
2014-12-02 16:48:48 -08:00
|
|
|
fn scan_byte_escape(&mut self, delim: char, below_0x7f_only: bool) -> bool {
|
|
|
|
self.scan_hex_digits(2, delim, below_0x7f_only)
|
|
|
|
}
|
|
|
|
|
2014-11-19 15:48:38 +11:00
|
|
|
fn scan_byte_string(&mut self) -> token::Lit {
|
2014-07-02 09:39:48 -07:00
|
|
|
self.bump();
|
2016-10-04 11:46:54 +11:00
|
|
|
let start = self.pos;
|
2014-07-03 00:47:30 -07:00
|
|
|
let mut valid = true;
|
|
|
|
|
2016-10-04 11:55:58 +11:00
|
|
|
while !self.ch_is('"') {
|
2014-07-02 09:39:48 -07:00
|
|
|
if self.is_eof() {
|
2016-10-04 11:46:54 +11:00
|
|
|
let pos = self.pos;
|
|
|
|
panic!(self.fatal_span_(start, pos, "unterminated double quote byte string"));
|
2014-07-02 09:39:48 -07:00
|
|
|
}
|
|
|
|
|
2016-10-04 11:46:54 +11:00
|
|
|
let ch_start = self.pos;
|
2016-10-04 11:55:58 +11:00
|
|
|
let ch = self.ch.unwrap();
|
2014-07-02 09:39:48 -07:00
|
|
|
self.bump();
|
2016-01-03 11:14:09 +02:00
|
|
|
valid &= self.scan_char_or_byte(ch_start,
|
|
|
|
ch,
|
|
|
|
// ascii_only =
|
|
|
|
true,
|
|
|
|
'"');
|
2014-07-02 09:39:48 -07:00
|
|
|
}
|
2016-01-03 11:14:09 +02:00
|
|
|
let id = if valid {
|
|
|
|
self.name_from(start)
|
|
|
|
} else {
|
2016-11-16 08:21:52 +00:00
|
|
|
Symbol::intern("??")
|
2016-01-03 11:14:09 +02:00
|
|
|
};
|
2014-07-02 09:39:48 -07:00
|
|
|
self.bump();
|
2015-09-03 10:54:53 +03:00
|
|
|
return token::ByteStr(id);
|
2014-07-02 09:39:48 -07:00
|
|
|
}
|
|
|
|
|
2014-11-19 15:48:38 +11:00
|
|
|
fn scan_raw_byte_string(&mut self) -> token::Lit {
|
2016-10-04 11:46:54 +11:00
|
|
|
let start_bpos = self.pos;
|
2014-07-02 09:39:48 -07:00
|
|
|
self.bump();
|
2015-01-28 01:01:48 +00:00
|
|
|
let mut hash_count = 0;
|
2016-10-04 11:55:58 +11:00
|
|
|
while self.ch_is('#') {
|
2014-07-02 09:39:48 -07:00
|
|
|
self.bump();
|
|
|
|
hash_count += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if self.is_eof() {
|
2016-10-04 11:46:54 +11:00
|
|
|
let pos = self.pos;
|
|
|
|
panic!(self.fatal_span_(start_bpos, pos, "unterminated raw string"));
|
2016-10-04 11:55:58 +11:00
|
|
|
} else if !self.ch_is('"') {
|
2016-10-04 11:46:54 +11:00
|
|
|
let pos = self.pos;
|
2016-10-04 11:55:58 +11:00
|
|
|
let ch = self.ch.unwrap();
|
2016-01-03 11:14:09 +02:00
|
|
|
panic!(self.fatal_span_char(start_bpos,
|
2016-10-04 11:46:54 +11:00
|
|
|
pos,
|
2016-01-03 11:14:09 +02:00
|
|
|
"found invalid character; only `#` is allowed in raw \
|
|
|
|
string delimitation",
|
|
|
|
ch));
|
2014-07-02 09:39:48 -07:00
|
|
|
}
|
|
|
|
self.bump();
|
2016-10-04 11:46:54 +11:00
|
|
|
let content_start_bpos = self.pos;
|
2014-07-02 09:39:48 -07:00
|
|
|
let mut content_end_bpos;
|
|
|
|
'outer: loop {
|
2016-10-04 11:55:58 +11:00
|
|
|
match self.ch {
|
2014-07-02 09:39:48 -07:00
|
|
|
None => {
|
2016-10-04 11:46:54 +11:00
|
|
|
let pos = self.pos;
|
|
|
|
panic!(self.fatal_span_(start_bpos, pos, "unterminated raw string"))
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
2014-07-02 09:39:48 -07:00
|
|
|
Some('"') => {
|
2016-10-04 11:46:54 +11:00
|
|
|
content_end_bpos = self.pos;
|
2015-01-26 15:46:12 -05:00
|
|
|
for _ in 0..hash_count {
|
2014-07-02 09:39:48 -07:00
|
|
|
self.bump();
|
2016-10-04 11:55:58 +11:00
|
|
|
if !self.ch_is('#') {
|
2014-07-02 09:39:48 -07:00
|
|
|
continue 'outer;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
|
|
|
Some(c) => {
|
|
|
|
if c > '\x7F' {
|
2016-10-04 11:46:54 +11:00
|
|
|
let pos = self.pos;
|
|
|
|
self.err_span_char(pos, pos, "raw byte string must be ASCII", c);
|
2016-01-03 11:14:09 +02:00
|
|
|
}
|
2014-07-02 09:39:48 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
self.bump();
|
|
|
|
}
|
|
|
|
self.bump();
|
2016-01-03 11:14:09 +02:00
|
|
|
return token::ByteStrRaw(self.name_from_to(content_start_bpos, content_end_bpos),
|
|
|
|
hash_count);
|
2014-07-02 09:39:48 -07:00
|
|
|
}
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
2015-11-12 02:43:43 +00:00
|
|
|
// This tests the character for the unicode property 'PATTERN_WHITE_SPACE' which
|
|
|
|
// is guaranteed to be forward compatible. http://unicode.org/reports/tr31/#R3
|
|
|
|
pub fn is_pattern_whitespace(c: Option<char>) -> bool {
|
|
|
|
c.map_or(false, Pattern_White_Space)
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
fn in_range(c: Option<char>, lo: char, hi: char) -> bool {
|
|
|
|
match c {
|
|
|
|
Some(c) => lo <= c && c <= hi,
|
2016-01-03 11:14:09 +02:00
|
|
|
_ => false,
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-01-03 11:14:09 +02:00
|
|
|
fn is_dec_digit(c: Option<char>) -> bool {
|
|
|
|
return in_range(c, '0', '9');
|
|
|
|
}
|
2014-05-21 16:57:31 -07:00
|
|
|
|
2014-07-04 22:30:39 -07:00
|
|
|
pub fn is_doc_comment(s: &str) -> bool {
|
2016-01-03 11:14:09 +02:00
|
|
|
let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/') ||
|
|
|
|
s.starts_with("//!");
|
2014-12-20 00:09:35 -08:00
|
|
|
debug!("is {:?} a doc comment? {}", s, res);
|
2014-07-04 22:30:39 -07:00
|
|
|
res
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
2014-07-04 22:30:39 -07:00
|
|
|
pub fn is_block_doc_comment(s: &str) -> bool {
|
2016-01-03 11:20:06 +02:00
|
|
|
// Prevent `/**/` from being parsed as a doc comment
|
2016-01-03 11:14:09 +02:00
|
|
|
let res = ((s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*') ||
|
2016-01-03 11:20:06 +02:00
|
|
|
s.starts_with("/*!")) && s.len() >= 5;
|
2014-12-20 00:09:35 -08:00
|
|
|
debug!("is {:?} a doc comment? {}", s, res);
|
2014-07-04 22:30:39 -07:00
|
|
|
res
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
fn ident_start(c: Option<char>) -> bool {
|
2016-01-03 11:14:09 +02:00
|
|
|
let c = match c {
|
|
|
|
Some(c) => c,
|
|
|
|
None => return false,
|
|
|
|
};
|
2014-05-21 16:57:31 -07:00
|
|
|
|
2016-01-03 11:14:09 +02:00
|
|
|
(c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c > '\x7f' && c.is_xid_start())
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
fn ident_continue(c: Option<char>) -> bool {
|
2016-01-03 11:14:09 +02:00
|
|
|
let c = match c {
|
|
|
|
Some(c) => c,
|
|
|
|
None => return false,
|
|
|
|
};
|
2014-05-21 16:57:31 -07:00
|
|
|
|
2016-01-03 11:14:09 +02:00
|
|
|
(c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' ||
|
|
|
|
(c > '\x7f' && c.is_xid_continue())
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
2015-04-24 17:30:41 +02:00
|
|
|
mod tests {
|
2014-05-21 16:57:31 -07:00
|
|
|
use super::*;
|
|
|
|
|
2017-01-17 01:14:53 +00:00
|
|
|
use ast::{Ident, CrateConfig};
|
2016-11-17 14:04:36 +00:00
|
|
|
use symbol::Symbol;
|
2016-06-22 12:50:19 -04:00
|
|
|
use syntax_pos::{BytePos, Span, NO_EXPANSION};
|
2016-06-21 18:08:13 -04:00
|
|
|
use codemap::CodeMap;
|
2015-12-15 16:51:13 +13:00
|
|
|
use errors;
|
2017-01-17 01:14:53 +00:00
|
|
|
use feature_gate::UnstableFeatures;
|
2014-05-21 16:57:31 -07:00
|
|
|
use parse::token;
|
2017-01-17 01:14:53 +00:00
|
|
|
use std::cell::RefCell;
|
2017-02-26 03:25:22 +00:00
|
|
|
use std::collections::HashSet;
|
2015-03-11 15:24:14 -07:00
|
|
|
use std::io;
|
2015-12-15 16:51:13 +13:00
|
|
|
use std::rc::Rc;
|
2014-05-21 16:57:31 -07:00
|
|
|
|
2017-01-17 01:14:53 +00:00
|
|
|
fn mk_sess(cm: Rc<CodeMap>) -> ParseSess {
|
|
|
|
let emitter = errors::emitter::EmitterWriter::new(Box::new(io::sink()), Some(cm.clone()));
|
|
|
|
ParseSess {
|
|
|
|
span_diagnostic: errors::Handler::with_emitter(true, false, Box::new(emitter)),
|
|
|
|
unstable_features: UnstableFeatures::from_environment(),
|
|
|
|
config: CrateConfig::new(),
|
|
|
|
included_mod_stack: RefCell::new(Vec::new()),
|
|
|
|
code_map: cm,
|
2017-02-26 03:25:22 +00:00
|
|
|
missing_fragment_specifiers: RefCell::new(HashSet::new()),
|
2017-01-17 01:14:53 +00:00
|
|
|
}
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// open a string reader for the given string
|
2015-12-15 16:51:13 +13:00
|
|
|
fn setup<'a>(cm: &CodeMap,
|
2017-01-17 01:14:53 +00:00
|
|
|
sess: &'a ParseSess,
|
2016-01-03 11:14:09 +02:00
|
|
|
teststr: String)
|
|
|
|
-> StringReader<'a> {
|
2017-04-24 19:01:19 +02:00
|
|
|
let fm = cm.new_filemap("zebra.rs".to_string(), teststr);
|
2017-01-17 01:14:53 +00:00
|
|
|
StringReader::new(sess, fm)
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
2016-01-03 11:14:09 +02:00
|
|
|
#[test]
|
|
|
|
fn t1() {
|
2017-04-24 19:01:19 +02:00
|
|
|
let cm = Rc::new(CodeMap::new(FilePathMapping::empty()));
|
2017-01-17 01:14:53 +00:00
|
|
|
let sh = mk_sess(cm.clone());
|
2016-01-03 11:14:09 +02:00
|
|
|
let mut string_reader = setup(&cm,
|
|
|
|
&sh,
|
|
|
|
"/* my source file */ fn main() { println!(\"zebra\"); }\n"
|
|
|
|
.to_string());
|
2016-11-16 08:21:52 +00:00
|
|
|
let id = Ident::from_str("fn");
|
2014-10-27 19:22:52 +11:00
|
|
|
assert_eq!(string_reader.next_token().tok, token::Comment);
|
|
|
|
assert_eq!(string_reader.next_token().tok, token::Whitespace);
|
2014-05-21 16:57:31 -07:00
|
|
|
let tok1 = string_reader.next_token();
|
2016-01-03 11:14:09 +02:00
|
|
|
let tok2 = TokenAndSpan {
|
2016-04-16 04:12:02 +03:00
|
|
|
tok: token::Ident(id),
|
2016-01-03 11:14:09 +02:00
|
|
|
sp: Span {
|
|
|
|
lo: BytePos(21),
|
|
|
|
hi: BytePos(23),
|
2017-03-15 00:22:48 +00:00
|
|
|
ctxt: NO_EXPANSION,
|
2016-01-03 11:14:09 +02:00
|
|
|
},
|
|
|
|
};
|
|
|
|
assert_eq!(tok1, tok2);
|
2014-10-27 19:22:52 +11:00
|
|
|
assert_eq!(string_reader.next_token().tok, token::Whitespace);
|
2014-05-21 16:57:31 -07:00
|
|
|
// the 'main' id is already read:
|
2016-10-04 11:46:54 +11:00
|
|
|
assert_eq!(string_reader.pos.clone(), BytePos(28));
|
2014-05-21 16:57:31 -07:00
|
|
|
// read another token:
|
|
|
|
let tok3 = string_reader.next_token();
|
2016-01-03 11:14:09 +02:00
|
|
|
let tok4 = TokenAndSpan {
|
2016-11-17 14:04:36 +00:00
|
|
|
tok: token::Ident(Ident::from_str("main")),
|
2016-01-03 11:14:09 +02:00
|
|
|
sp: Span {
|
|
|
|
lo: BytePos(24),
|
|
|
|
hi: BytePos(28),
|
2017-03-15 00:22:48 +00:00
|
|
|
ctxt: NO_EXPANSION,
|
2016-01-03 11:14:09 +02:00
|
|
|
},
|
|
|
|
};
|
|
|
|
assert_eq!(tok3, tok4);
|
2014-05-21 16:57:31 -07:00
|
|
|
// the lparen is already read:
|
2016-10-04 11:46:54 +11:00
|
|
|
assert_eq!(string_reader.pos.clone(), BytePos(29))
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// check that the given reader produces the desired stream
|
|
|
|
// of tokens (stop checking after exhausting the expected vec)
|
2016-01-03 11:14:09 +02:00
|
|
|
fn check_tokenization(mut string_reader: StringReader, expected: Vec<token::Token>) {
|
2015-01-31 12:20:46 -05:00
|
|
|
for expected_tok in &expected {
|
2014-05-21 16:57:31 -07:00
|
|
|
assert_eq!(&string_reader.next_token().tok, expected_tok);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-10-28 02:01:44 +11:00
|
|
|
// make the identifier by looking up the string in the interner
|
2016-04-16 04:12:02 +03:00
|
|
|
fn mk_ident(id: &str) -> token::Token {
|
2016-11-17 14:04:36 +00:00
|
|
|
token::Ident(Ident::from_str(id))
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
2016-01-03 11:14:09 +02:00
|
|
|
#[test]
|
|
|
|
fn doublecolonparsing() {
|
2017-04-24 19:01:19 +02:00
|
|
|
let cm = Rc::new(CodeMap::new(FilePathMapping::empty()));
|
2017-01-17 01:14:53 +00:00
|
|
|
let sh = mk_sess(cm.clone());
|
2015-12-15 16:51:13 +13:00
|
|
|
check_tokenization(setup(&cm, &sh, "a b".to_string()),
|
2016-04-16 04:12:02 +03:00
|
|
|
vec![mk_ident("a"), token::Whitespace, mk_ident("b")]);
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
2016-01-03 11:14:09 +02:00
|
|
|
#[test]
|
|
|
|
fn dcparsing_2() {
|
2017-04-24 19:01:19 +02:00
|
|
|
let cm = Rc::new(CodeMap::new(FilePathMapping::empty()));
|
2017-01-17 01:14:53 +00:00
|
|
|
let sh = mk_sess(cm.clone());
|
2015-12-15 16:51:13 +13:00
|
|
|
check_tokenization(setup(&cm, &sh, "a::b".to_string()),
|
2016-04-16 04:12:02 +03:00
|
|
|
vec![mk_ident("a"), token::ModSep, mk_ident("b")]);
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
2016-01-03 11:14:09 +02:00
|
|
|
#[test]
|
|
|
|
fn dcparsing_3() {
|
2017-04-24 19:01:19 +02:00
|
|
|
let cm = Rc::new(CodeMap::new(FilePathMapping::empty()));
|
2017-01-17 01:14:53 +00:00
|
|
|
let sh = mk_sess(cm.clone());
|
2015-12-15 16:51:13 +13:00
|
|
|
check_tokenization(setup(&cm, &sh, "a ::b".to_string()),
|
2016-04-16 04:12:02 +03:00
|
|
|
vec![mk_ident("a"), token::Whitespace, token::ModSep, mk_ident("b")]);
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
2016-01-03 11:14:09 +02:00
|
|
|
#[test]
|
|
|
|
fn dcparsing_4() {
|
2017-04-24 19:01:19 +02:00
|
|
|
let cm = Rc::new(CodeMap::new(FilePathMapping::empty()));
|
2017-01-17 01:14:53 +00:00
|
|
|
let sh = mk_sess(cm.clone());
|
2015-12-15 16:51:13 +13:00
|
|
|
check_tokenization(setup(&cm, &sh, "a:: b".to_string()),
|
2016-04-16 04:12:02 +03:00
|
|
|
vec![mk_ident("a"), token::ModSep, token::Whitespace, mk_ident("b")]);
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
2016-01-03 11:14:09 +02:00
|
|
|
#[test]
|
|
|
|
fn character_a() {
|
2017-04-24 19:01:19 +02:00
|
|
|
let cm = Rc::new(CodeMap::new(FilePathMapping::empty()));
|
2017-01-17 01:14:53 +00:00
|
|
|
let sh = mk_sess(cm.clone());
|
2015-12-15 16:51:13 +13:00
|
|
|
assert_eq!(setup(&cm, &sh, "'a'".to_string()).next_token().tok,
|
2016-11-16 08:21:52 +00:00
|
|
|
token::Literal(token::Char(Symbol::intern("a")), None));
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
2016-01-03 11:14:09 +02:00
|
|
|
#[test]
|
|
|
|
fn character_space() {
|
2017-04-24 19:01:19 +02:00
|
|
|
let cm = Rc::new(CodeMap::new(FilePathMapping::empty()));
|
2017-01-17 01:14:53 +00:00
|
|
|
let sh = mk_sess(cm.clone());
|
2015-12-15 16:51:13 +13:00
|
|
|
assert_eq!(setup(&cm, &sh, "' '".to_string()).next_token().tok,
|
2016-11-16 08:21:52 +00:00
|
|
|
token::Literal(token::Char(Symbol::intern(" ")), None));
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
2016-01-03 11:14:09 +02:00
|
|
|
#[test]
|
|
|
|
fn character_escaped() {
|
2017-04-24 19:01:19 +02:00
|
|
|
let cm = Rc::new(CodeMap::new(FilePathMapping::empty()));
|
2017-01-17 01:14:53 +00:00
|
|
|
let sh = mk_sess(cm.clone());
|
2015-12-15 16:51:13 +13:00
|
|
|
assert_eq!(setup(&cm, &sh, "'\\n'".to_string()).next_token().tok,
|
2016-11-16 08:21:52 +00:00
|
|
|
token::Literal(token::Char(Symbol::intern("\\n")), None));
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
2016-01-03 11:14:09 +02:00
|
|
|
#[test]
|
|
|
|
fn lifetime_name() {
|
2017-04-24 19:01:19 +02:00
|
|
|
let cm = Rc::new(CodeMap::new(FilePathMapping::empty()));
|
2017-01-17 01:14:53 +00:00
|
|
|
let sh = mk_sess(cm.clone());
|
2015-12-15 16:51:13 +13:00
|
|
|
assert_eq!(setup(&cm, &sh, "'abc".to_string()).next_token().tok,
|
2016-11-17 14:04:36 +00:00
|
|
|
token::Lifetime(Ident::from_str("'abc")));
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
2016-01-03 11:14:09 +02:00
|
|
|
#[test]
|
|
|
|
fn raw_string() {
|
2017-04-24 19:01:19 +02:00
|
|
|
let cm = Rc::new(CodeMap::new(FilePathMapping::empty()));
|
2017-01-17 01:14:53 +00:00
|
|
|
let sh = mk_sess(cm.clone());
|
2016-01-03 11:14:09 +02:00
|
|
|
assert_eq!(setup(&cm, &sh, "r###\"\"#a\\b\x00c\"\"###".to_string())
|
|
|
|
.next_token()
|
|
|
|
.tok,
|
2016-11-17 14:04:36 +00:00
|
|
|
token::Literal(token::StrRaw(Symbol::intern("\"#a\\b\x00c\""), 3), None));
|
2014-11-19 15:48:38 +11:00
|
|
|
}
|
|
|
|
|
2016-01-03 11:14:09 +02:00
|
|
|
#[test]
|
|
|
|
fn literal_suffixes() {
|
2017-04-24 19:01:19 +02:00
|
|
|
let cm = Rc::new(CodeMap::new(FilePathMapping::empty()));
|
2017-01-17 01:14:53 +00:00
|
|
|
let sh = mk_sess(cm.clone());
|
2014-11-19 15:48:38 +11:00
|
|
|
macro_rules! test {
|
|
|
|
($input: expr, $tok_type: ident, $tok_contents: expr) => {{
|
2015-12-15 16:51:13 +13:00
|
|
|
assert_eq!(setup(&cm, &sh, format!("{}suffix", $input)).next_token().tok,
|
2016-11-16 08:21:52 +00:00
|
|
|
token::Literal(token::$tok_type(Symbol::intern($tok_contents)),
|
|
|
|
Some(Symbol::intern("suffix"))));
|
2014-11-19 15:48:38 +11:00
|
|
|
// with a whitespace separator:
|
2015-12-15 16:51:13 +13:00
|
|
|
assert_eq!(setup(&cm, &sh, format!("{} suffix", $input)).next_token().tok,
|
2016-11-16 08:21:52 +00:00
|
|
|
token::Literal(token::$tok_type(Symbol::intern($tok_contents)),
|
2014-11-19 15:48:38 +11:00
|
|
|
None));
|
|
|
|
}}
|
|
|
|
}
|
|
|
|
|
|
|
|
test!("'a'", Char, "a");
|
|
|
|
test!("b'a'", Byte, "a");
|
|
|
|
test!("\"a\"", Str_, "a");
|
2015-09-03 10:54:53 +03:00
|
|
|
test!("b\"a\"", ByteStr, "a");
|
2014-11-19 15:48:38 +11:00
|
|
|
test!("1234", Integer, "1234");
|
|
|
|
test!("0b101", Integer, "0b101");
|
|
|
|
test!("0xABC", Integer, "0xABC");
|
|
|
|
test!("1.0", Float, "1.0");
|
|
|
|
test!("1.0e10", Float, "1.0e10");
|
|
|
|
|
2015-12-15 16:51:13 +13:00
|
|
|
assert_eq!(setup(&cm, &sh, "2us".to_string()).next_token().tok,
|
2016-11-16 08:21:52 +00:00
|
|
|
token::Literal(token::Integer(Symbol::intern("2")),
|
|
|
|
Some(Symbol::intern("us"))));
|
2015-12-15 16:51:13 +13:00
|
|
|
assert_eq!(setup(&cm, &sh, "r###\"raw\"###suffix".to_string()).next_token().tok,
|
2016-11-16 08:21:52 +00:00
|
|
|
token::Literal(token::StrRaw(Symbol::intern("raw"), 3),
|
|
|
|
Some(Symbol::intern("suffix"))));
|
2015-12-15 16:51:13 +13:00
|
|
|
assert_eq!(setup(&cm, &sh, "br###\"raw\"###suffix".to_string()).next_token().tok,
|
2016-11-16 08:21:52 +00:00
|
|
|
token::Literal(token::ByteStrRaw(Symbol::intern("raw"), 3),
|
|
|
|
Some(Symbol::intern("suffix"))));
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
2016-01-03 11:14:09 +02:00
|
|
|
#[test]
|
|
|
|
fn line_doc_comments() {
|
2014-07-04 22:30:39 -07:00
|
|
|
assert!(is_doc_comment("///"));
|
|
|
|
assert!(is_doc_comment("/// blah"));
|
|
|
|
assert!(!is_doc_comment("////"));
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
2016-01-03 11:14:09 +02:00
|
|
|
#[test]
|
|
|
|
fn nested_block_comments() {
|
2017-04-24 19:01:19 +02:00
|
|
|
let cm = Rc::new(CodeMap::new(FilePathMapping::empty()));
|
2017-01-17 01:14:53 +00:00
|
|
|
let sh = mk_sess(cm.clone());
|
2015-12-15 16:51:13 +13:00
|
|
|
let mut lexer = setup(&cm, &sh, "/* /* */ */'a'".to_string());
|
2014-07-08 22:28:52 -07:00
|
|
|
match lexer.next_token().tok {
|
2016-01-03 11:14:09 +02:00
|
|
|
token::Comment => {}
|
|
|
|
_ => panic!("expected a comment!"),
|
2014-07-08 22:28:52 -07:00
|
|
|
}
|
2016-01-03 11:14:09 +02:00
|
|
|
assert_eq!(lexer.next_token().tok,
|
2016-11-16 08:21:52 +00:00
|
|
|
token::Literal(token::Char(Symbol::intern("a")), None));
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|
|
|
|
|
2016-01-03 11:14:09 +02:00
|
|
|
#[test]
|
|
|
|
fn crlf_comments() {
|
2017-04-24 19:01:19 +02:00
|
|
|
let cm = Rc::new(CodeMap::new(FilePathMapping::empty()));
|
2017-01-17 01:14:53 +00:00
|
|
|
let sh = mk_sess(cm.clone());
|
2015-12-15 16:51:13 +13:00
|
|
|
let mut lexer = setup(&cm, &sh, "// test\r\n/// test\r\n".to_string());
|
2015-05-13 22:06:26 +01:00
|
|
|
let comment = lexer.next_token();
|
|
|
|
assert_eq!(comment.tok, token::Comment);
|
2017-03-15 00:22:48 +00:00
|
|
|
assert_eq!((comment.sp.lo, comment.sp.hi), (BytePos(0), BytePos(7)));
|
2015-05-13 22:06:26 +01:00
|
|
|
assert_eq!(lexer.next_token().tok, token::Whitespace);
|
2016-01-03 11:14:09 +02:00
|
|
|
assert_eq!(lexer.next_token().tok,
|
2016-11-16 08:21:52 +00:00
|
|
|
token::DocComment(Symbol::intern("/// test")));
|
2015-05-13 22:06:26 +01:00
|
|
|
}
|
2014-05-21 16:57:31 -07:00
|
|
|
}
|