rust/src/libsyntax/parse/lexer/mod.rs

use crate::parse::ParseSess;
use crate::parse::token::{self, Token, TokenKind};
use crate::symbol::{sym, Symbol};
use crate::parse::unescape;
use crate::parse::unescape_error_reporting::{emit_unescape_error, push_escaped_char};

use errors::{FatalError, Diagnostic, DiagnosticBuilder};
use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION};
use core::unicode::property::Pattern_White_Space;

use std::borrow::Cow;
use std::char;
use std::iter;
use rustc_data_structures::sync::Lrc;
use log::debug;

pub mod comments;
mod tokentrees;
mod unicode_chars;

#[derive(Clone, Debug)]
pub struct UnmatchedBrace {
    pub expected_delim: token::DelimToken,
    pub found_delim: token::DelimToken,
    pub found_span: Span,
    pub unclosed_span: Option<Span>,
    pub candidate_span: Option<Span>,
}

pub struct StringReader<'a> {
    crate sess: &'a ParseSess,
    /// The absolute offset within the source_map of the next character to read
    crate next_pos: BytePos,
    /// The absolute offset within the source_map of the current character
    crate pos: BytePos,
    /// The current character (which has been read from self.pos)
    crate ch: Option<char>,
    crate source_file: Lrc<syntax_pos::SourceFile>,
    /// Stop reading src at this index.
    crate end_src_index: usize,
    fatal_errs: Vec<DiagnosticBuilder<'a>>,
    // cache a direct reference to the source text, so that we don't have to
    // retrieve it via `self.source_file.src.as_ref().unwrap()` all the time.
    src: Lrc<String>,
    override_span: Option<Span>,
}

impl<'a> StringReader<'a> {
    pub fn new(sess: &'a ParseSess,
               source_file: Lrc<syntax_pos::SourceFile>,
               override_span: Option<Span>) -> Self {
        let mut sr = StringReader::new_internal(sess, source_file, override_span);
        sr.bump();
        sr
    }

    pub fn retokenize(sess: &'a ParseSess, mut span: Span) -> Self {
        let begin = sess.source_map().lookup_byte_offset(span.lo());
        let end = sess.source_map().lookup_byte_offset(span.hi());

        // Make the range zero-length if the span is invalid.
        if span.lo() > span.hi() || begin.sf.start_pos != end.sf.start_pos {
            span = span.shrink_to_lo();
        }

        let mut sr = StringReader::new_internal(sess, begin.sf, None);

        // Seek the lexer to the right byte range.
        sr.next_pos = span.lo();
        sr.end_src_index = sr.src_index(span.hi());

        sr.bump();

        sr
    }

    fn new_internal(sess: &'a ParseSess, source_file: Lrc<syntax_pos::SourceFile>,
        override_span: Option<Span>) -> Self
    {
        if source_file.src.is_none() {
            sess.span_diagnostic.bug(&format!("Cannot lex source_file without source: {}",
                                              source_file.name));
        }

        let src = (*source_file.src.as_ref().unwrap()).clone();

        StringReader {
            sess,
            next_pos: source_file.start_pos,
            pos: source_file.start_pos,
            ch: Some('\n'),
            source_file,
            end_src_index: src.len(),
            src,
            fatal_errs: Vec::new(),
            override_span,
        }
    }

    fn mk_sp(&self, lo: BytePos, hi: BytePos) -> Span {
        self.override_span.unwrap_or_else(|| Span::new(lo, hi, NO_EXPANSION))
    }

    fn unwrap_or_abort(&mut self, res: Result<Token, ()>) -> Token {
        match res {
            Ok(tok) => tok,
            Err(_) => {
                self.emit_fatal_errors();
                FatalError.raise();
            }
        }
    }

    /// Returns the next token, including trivia like whitespace or comments.
    ///
    /// `Err(())` means that some errors were encountered, which can be
    /// retrieved using `buffer_fatal_errors`.
    pub fn try_next_token(&mut self) -> Result<Token, ()> {
        assert!(self.fatal_errs.is_empty());
        match self.scan_whitespace_or_comment() {
            Some(comment) => Ok(comment),
            None => {
                let (kind, start_pos, end_pos) = if self.is_eof() {
                    (token::Eof, self.source_file.end_pos, self.source_file.end_pos)
                } else {
                    let start_pos = self.pos;
                    (self.next_token_inner()?, start_pos, self.pos)
                };
                let span = self.mk_sp(start_pos, end_pos);
                Ok(Token::new(kind, span))
            }
        }
    }

    /// Returns the next token, including trivia like whitespace or comments.
    ///
    /// Aborts in case of an error.
    pub fn next_token(&mut self) -> Token {
        let res = self.try_next_token();
        self.unwrap_or_abort(res)
    }

    #[inline]
    fn is_eof(&self) -> bool {
        self.ch.is_none()
    }

    fn fail_unterminated_raw_string(&self, pos: BytePos, hash_count: u16) -> ! {
        let mut err = self.struct_span_fatal(pos, pos, "unterminated raw string");
        err.span_label(self.mk_sp(pos, pos), "unterminated raw string");

        if hash_count > 0 {
            err.note(&format!("this raw string should be terminated with `\"{}`",
                              "#".repeat(hash_count as usize)));
        }

        err.emit();
        FatalError.raise();
    }

    crate fn emit_fatal_errors(&mut self) {
        for err in &mut self.fatal_errs {
            err.emit();
        }

        self.fatal_errs.clear();
    }

    pub fn buffer_fatal_errors(&mut self) -> Vec<Diagnostic> {
        let mut buffer = Vec::new();

        for err in self.fatal_errs.drain(..) {
            err.buffer(&mut buffer);
        }

        buffer
    }

    #[inline]
    fn ch_is(&self, c: char) -> bool {
        self.ch == Some(c)
    }

    /// Report a fatal lexical error with a given span.
    fn fatal_span(&self, sp: Span, m: &str) -> FatalError {
        self.sess.span_diagnostic.span_fatal(sp, m)
    }

    /// Report a lexical error with a given span.
    fn err_span(&self, sp: Span, m: &str) {
        self.sess.span_diagnostic.struct_span_err(sp, m).emit();
    }


    /// Report a fatal error spanning [`from_pos`, `to_pos`).
    fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> FatalError {
        self.fatal_span(self.mk_sp(from_pos, to_pos), m)
    }

    /// Report a lexical error spanning [`from_pos`, `to_pos`).
    fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) {
        self.err_span(self.mk_sp(from_pos, to_pos), m)
    }

    /// Report a lexical error spanning [`from_pos`, `to_pos`), appending an
    /// escaped character to the error message
    fn fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) -> FatalError {
        let mut m = m.to_string();
        m.push_str(": ");
        push_escaped_char(&mut m, c);

        self.fatal_span_(from_pos, to_pos, &m[..])
    }

    fn struct_span_fatal(&self, from_pos: BytePos, to_pos: BytePos, m: &str)
        -> DiagnosticBuilder<'a>
    {
        self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), m)
    }

    fn struct_fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char)
        -> DiagnosticBuilder<'a>
    {
        let mut m = m.to_string();
        m.push_str(": ");
        push_escaped_char(&mut m, c);

        self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), &m[..])
    }

    #[inline]
    fn src_index(&self, pos: BytePos) -> usize {
        (pos - self.source_file.start_pos).to_usize()
    }

    /// Slice of the source text from `start` up to but excluding `self.pos`,
    /// meaning the slice does not include the character `self.ch`.
    fn str_from(&self, start: BytePos) -> &str
    {
        self.str_from_to(start, self.pos)
    }

    /// Creates a Symbol from a given offset to the current offset.
    fn symbol_from(&self, start: BytePos) -> Symbol {
        debug!("taking an ident from {:?} to {:?}", start, self.pos);
        Symbol::intern(self.str_from(start))
    }

    /// As symbol_from, with an explicit endpoint.
    fn symbol_from_to(&self, start: BytePos, end: BytePos) -> Symbol {
        debug!("taking an ident from {:?} to {:?}", start, end);
        Symbol::intern(self.str_from_to(start, end))
    }

    /// Slice of the source text spanning from `start` up to but excluding `end`.
    fn str_from_to(&self, start: BytePos, end: BytePos) -> &str
    {
        &self.src[self.src_index(start)..self.src_index(end)]
    }

    /// Converts CRLF to LF in the given string, raising an error on bare CR.
    fn translate_crlf<'b>(&self, start: BytePos, s: &'b str, errmsg: &'b str) -> Cow<'b, str> {
        let mut chars = s.char_indices().peekable();
        while let Some((i, ch)) = chars.next() {
            if ch == '\r' {
                if let Some((lf_idx, '\n')) = chars.peek() {
                    return translate_crlf_(self, start, s, *lf_idx, chars, errmsg).into();
                }
                let pos = start + BytePos(i as u32);
                let end_pos = start + BytePos((i + ch.len_utf8()) as u32);
                self.err_span_(pos, end_pos, errmsg);
            }
        }
        return s.into();

        fn translate_crlf_(rdr: &StringReader<'_>,
                           start: BytePos,
                           s: &str,
                           mut j: usize,
                           mut chars: iter::Peekable<impl Iterator<Item = (usize, char)>>,
                           errmsg: &str)
                           -> String {
            let mut buf = String::with_capacity(s.len());
            // Skip first CR
            buf.push_str(&s[.. j - 1]);
            while let Some((i, ch)) = chars.next() {
                if ch == '\r' {
                    if j < i {
                        buf.push_str(&s[j..i]);
                    }
                    let next = i + ch.len_utf8();
                    j = next;
                    if chars.peek().map(|(_, ch)| *ch) != Some('\n') {
                        let pos = start + BytePos(i as u32);
                        let end_pos = start + BytePos(next as u32);
                        rdr.err_span_(pos, end_pos, errmsg);
                    }
                }
            }
            if j < s.len() {
                buf.push_str(&s[j..]);
            }
            buf
        }
    }

    /// Advance the StringReader by one character.
    crate fn bump(&mut self) {
        let next_src_index = self.src_index(self.next_pos);
        if next_src_index < self.end_src_index {
            let next_ch = char_at(&self.src, next_src_index);
            let next_ch_len = next_ch.len_utf8();

            self.ch = Some(next_ch);
            self.pos = self.next_pos;
            self.next_pos = self.next_pos + Pos::from_usize(next_ch_len);
        } else {
            self.ch = None;
            self.pos = self.next_pos;
        }
    }

    fn nextch(&self) -> Option<char> {
        let next_src_index = self.src_index(self.next_pos);
        if next_src_index < self.end_src_index {
            Some(char_at(&self.src, next_src_index))
        } else {
            None
        }
    }

    #[inline]
    fn nextch_is(&self, c: char) -> bool {
        self.nextch() == Some(c)
    }

    fn nextnextch(&self) -> Option<char> {
        let next_src_index = self.src_index(self.next_pos);
        if next_src_index < self.end_src_index {
            let next_next_src_index =
                next_src_index + char_at(&self.src, next_src_index).len_utf8();
            if next_next_src_index < self.end_src_index {
                return Some(char_at(&self.src, next_next_src_index));
            }
        }
        None
    }

    #[inline]
    fn nextnextch_is(&self, c: char) -> bool {
        self.nextnextch() == Some(c)
    }

    /// Eats <XID_start><XID_continue>*, if possible.
    fn scan_optional_raw_name(&mut self) -> Option<Symbol> {
        if !ident_start(self.ch) {
            return None;
        }

        let start = self.pos;
        self.bump();

        while ident_continue(self.ch) {
            self.bump();
        }

        match self.str_from(start) {
            "_" => {
                self.sess.span_diagnostic
                    .struct_span_warn(self.mk_sp(start, self.pos),
                                      "underscore literal suffix is not allowed")
                    .warn("this was previously accepted by the compiler but is \
                          being phased out; it will become a hard error in \
                          a future release!")
                    .note("for more information, see issue #42326 \
                          <https://github.com/rust-lang/rust/issues/42326>")
                    .emit();
                None
            }
            name => Some(Symbol::intern(name))
        }
    }

    /// PRECONDITION: self.ch is not whitespace
    /// Eats any kind of comment.
    fn scan_comment(&mut self) -> Option<Token> {
        if let Some(c) = self.ch {
            if c.is_whitespace() {
                let msg = "called consume_any_line_comment, but there was whitespace";
                self.sess.span_diagnostic.span_err(self.mk_sp(self.pos, self.pos), msg);
            }
        }

        if self.ch_is('/') {
            match self.nextch() {
                Some('/') => {
                    self.bump();
                    self.bump();

                    // line comments starting with "///" or "//!" are doc-comments
                    let doc_comment = (self.ch_is('/') && !self.nextch_is('/')) || self.ch_is('!');
                    let start_bpos = self.pos - BytePos(2);

                    while !self.is_eof() {
                        match self.ch.unwrap() {
                            '\n' => break,
                            '\r' => {
                                if self.nextch_is('\n') {
                                    // CRLF
                                    break;
                                } else if doc_comment {
                                    self.err_span_(self.pos,
                                                   self.next_pos,
                                                   "bare CR not allowed in doc-comment");
                                }
                            }
                            _ => (),
                        }
                        self.bump();
                    }

                    let kind = if doc_comment {
                        token::DocComment(self.symbol_from(start_bpos))
                    } else {
                        token::Comment
                    };
                    Some(Token::new(kind, self.mk_sp(start_bpos, self.pos)))
                }
                Some('*') => {
                    self.bump();
                    self.bump();
                    self.scan_block_comment()
                }
                _ => None,
            }
        } else if self.ch_is('#') {
            if self.nextch_is('!') {

                // Parse an inner attribute.
                if self.nextnextch_is('[') {
                    return None;
                }

                let is_beginning_of_file = self.pos == self.source_file.start_pos;
                if is_beginning_of_file {
                    debug!("skipping a shebang");
                    let start = self.pos;
                    while !self.ch_is('\n') && !self.is_eof() {
                        self.bump();
                    }
                    return Some(Token::new(
                        token::Shebang(self.symbol_from(start)),
                        self.mk_sp(start, self.pos),
                    ));
                }
            }
            None
        } else {
            None
        }
    }

    /// If there is whitespace, shebang, or a comment, scan it. Otherwise,
    /// return `None`.
    fn scan_whitespace_or_comment(&mut self) -> Option<Token> {
        match self.ch.unwrap_or('\0') {
            // # to handle shebang at start of file -- this is the entry point
            // for skipping over all "junk"
            '/' | '#' => {
                let c = self.scan_comment();
                debug!("scanning a comment {:?}", c);
                c
            },
            c if is_pattern_whitespace(Some(c)) => {
                let start_bpos = self.pos;
                while is_pattern_whitespace(self.ch) {
                    self.bump();
                }
                let c = Some(Token::new(token::Whitespace, self.mk_sp(start_bpos, self.pos)));
                debug!("scanning whitespace: {:?}", c);
                c
            }
            _ => None,
        }
    }

    /// Might return a sugared-doc-attr
    fn scan_block_comment(&mut self) -> Option<Token> {
        // block comments starting with "/**" or "/*!" are doc-comments
        let is_doc_comment = self.ch_is('*') || self.ch_is('!');
        let start_bpos = self.pos - BytePos(2);

        let mut level: isize = 1;
        let mut has_cr = false;
        while level > 0 {
            if self.is_eof() {
                let msg = if is_doc_comment {
                    "unterminated block doc-comment"
                } else {
                    "unterminated block comment"
                };
                let last_bpos = self.pos;
                self.fatal_span_(start_bpos, last_bpos, msg).raise();
            }
            let n = self.ch.unwrap();
            match n {
                '/' if self.nextch_is('*') => {
                    level += 1;
                    self.bump();
                }
                '*' if self.nextch_is('/') => {
                    level -= 1;
                    self.bump();
                }
                '\r' => {
                    has_cr = true;
                }
                _ => (),
            }
            self.bump();
        }

        let string = self.str_from(start_bpos);
        // but comments with only "*"s between two "/"s are not
        let kind = if is_block_doc_comment(string) {
            let string = if has_cr {
                self.translate_crlf(start_bpos,
                                    string,
                                    "bare CR not allowed in block doc-comment")
            } else {
                string.into()
            };
            token::DocComment(Symbol::intern(&string[..]))
        } else {
            token::Comment
        };

        Some(Token::new(kind, self.mk_sp(start_bpos, self.pos)))
    }

    /// Scan through any digits (base `scan_radix`) or underscores,
    /// and return how many digits there were.
    ///
    /// `real_radix` represents the true radix of the number we're
    /// interested in, and errors will be emitted for any digits
    /// between `real_radix` and `scan_radix`.
    fn scan_digits(&mut self, real_radix: u32, scan_radix: u32) -> usize {
        assert!(real_radix <= scan_radix);
        let mut len = 0;

        loop {
            let c = self.ch;
            if c == Some('_') {
                debug!("skipping a _");
                self.bump();
                continue;
            }
            match c.and_then(|cc| cc.to_digit(scan_radix)) {
                Some(_) => {
                    debug!("{:?} in scan_digits", c);
                    // check that the hypothetical digit is actually
                    // in range for the true radix
                    if c.unwrap().to_digit(real_radix).is_none() {
                        self.err_span_(self.pos,
                                       self.next_pos,
                                       &format!("invalid digit for a base {} literal", real_radix));
                    }
                    len += 1;
                    self.bump();
                }
                _ => return len,
            }
        }
    }

    /// Lex a LIT_INTEGER or a LIT_FLOAT
    fn scan_number(&mut self, c: char) -> (token::LitKind, Symbol) {
        let mut base = 10;
        let start_bpos = self.pos;
        self.bump();

        let num_digits = if c == '0' {
            match self.ch.unwrap_or('\0') {
                'b' => {
                    self.bump();
                    base = 2;
                    self.scan_digits(2, 10)
                }
                'o' => {
                    self.bump();
                    base = 8;
                    self.scan_digits(8, 10)
                }
                'x' => {
                    self.bump();
                    base = 16;
                    self.scan_digits(16, 16)
                }
                '0'..='9' | '_' | '.' | 'e' | 'E' => {
                    self.scan_digits(10, 10) + 1
                }
                _ => {
                    // just a 0
                    return (token::Integer, sym::integer(0));
                }
            }
        } else if c.is_digit(10) {
            self.scan_digits(10, 10) + 1
        } else {
            0
        };

        if num_digits == 0 {
            self.err_span_(start_bpos, self.pos, "no valid digits found for number");

            return (token::Integer, sym::integer(0));
        }

        // might be a float, but don't be greedy if this is actually an
        // integer literal followed by field/method access or a range pattern
        // (`0..2` and `12.foo()`)
        if self.ch_is('.') && !self.nextch_is('.') &&
           !ident_start(self.nextch()) {
            // might have stuff after the ., and if it does, it needs to start
            // with a number
            self.bump();
            if self.ch.unwrap_or('\0').is_digit(10) {
                self.scan_digits(10, 10);
                self.scan_float_exponent();
            }
            let pos = self.pos;
            self.check_float_base(start_bpos, pos, base);

            (token::Float, self.symbol_from(start_bpos))
        } else {
            // it might be a float if it has an exponent
            if self.ch_is('e') || self.ch_is('E') {
                self.scan_float_exponent();
                let pos = self.pos;
                self.check_float_base(start_bpos, pos, base);
                return (token::Float, self.symbol_from(start_bpos));
            }
            // but we certainly have an integer!
            (token::Integer, self.symbol_from(start_bpos))
        }
    }

    /// Scan over a float exponent.
    fn scan_float_exponent(&mut self) {
        if self.ch_is('e') || self.ch_is('E') {
            self.bump();

            if self.ch_is('-') || self.ch_is('+') {
                self.bump();
            }

            if self.scan_digits(10, 10) == 0 {
                let mut err = self.struct_span_fatal(
                    self.pos, self.next_pos,
                    "expected at least one digit in exponent"
                );
                if let Some(ch) = self.ch {
                    // check for e.g., Unicode minus '−' (Issue #49746)
                    if unicode_chars::check_for_substitution(self, ch, &mut err) {
                        self.bump();
                        self.scan_digits(10, 10);
                    }
                }
                err.emit();
            }
        }
    }

    /// Checks that a base is valid for a floating literal, emitting a nice
    /// error if it isn't.
    fn check_float_base(&mut self, start_bpos: BytePos, last_bpos: BytePos, base: usize) {
        match base {
            16 => {
                self.err_span_(start_bpos,
                               last_bpos,
                               "hexadecimal float literal is not supported")
            }
            8 => {
                self.err_span_(start_bpos,
                               last_bpos,
                               "octal float literal is not supported")
            }
            2 => {
                self.err_span_(start_bpos,
                               last_bpos,
                               "binary float literal is not supported")
            }
            _ => (),
        }
    }

    fn binop(&mut self, op: token::BinOpToken) -> TokenKind {
        self.bump();
        if self.ch_is('=') {
            self.bump();
            token::BinOpEq(op)
        } else {
            token::BinOp(op)
        }
    }

    /// Returns the next token from the string, advances the input past that
    /// token, and updates the interner
    fn next_token_inner(&mut self) -> Result<TokenKind, ()> {
        let c = self.ch;

        if ident_start(c) {
            let (is_ident_start, is_raw_ident) =
                match (c.unwrap(), self.nextch(), self.nextnextch()) {
                    // r# followed by an identifier starter is a raw identifier.
                    // This is an exception to the r# case below.
                    ('r', Some('#'), x) if ident_start(x) => (true, true),
                    // r as in r" or r#" is part of a raw string literal.
                    // b as in b' is part of a byte literal.
                    // They are not identifiers, and are handled further down.
                    ('r', Some('"'), _) |
                    ('r', Some('#'), _) |
                    ('b', Some('"'), _) |
                    ('b', Some('\''), _) |
                    ('b', Some('r'), Some('"')) |
                    ('b', Some('r'), Some('#')) => (false, false),
                    _ => (true, false),
                };

            if is_ident_start {
                let raw_start = self.pos;
                if is_raw_ident {
                    // Consume the 'r#' characters.
                    self.bump();
                    self.bump();
                }

                let start = self.pos;
                self.bump();

                while ident_continue(self.ch) {
                    self.bump();
                }

                // FIXME: perform NFKC normalization here. (Issue #2253)
                let name = self.symbol_from(start);
                if is_raw_ident {
                    let span = self.mk_sp(raw_start, self.pos);
                    if !name.can_be_raw() {
                        self.err_span(span, &format!("`{}` cannot be a raw identifier", name));
                    }
                    self.sess.raw_identifier_spans.borrow_mut().push(span);
                }

                return Ok(token::Ident(name, is_raw_ident));
            }
        }

        if is_dec_digit(c) {
            let (kind, symbol) = self.scan_number(c.unwrap());
            let suffix = self.scan_optional_raw_name();
            debug!("next_token_inner: scanned number {:?}, {:?}, {:?}", kind, symbol, suffix);
            return Ok(TokenKind::lit(kind, symbol, suffix));
        }

        match c.expect("next_token_inner called at EOF") {
            // One-byte tokens.
            ';' => {
                self.bump();
                Ok(token::Semi)
            }
            ',' => {
                self.bump();
                Ok(token::Comma)
            }
            '.' => {
                self.bump();
                if self.ch_is('.') {
                    self.bump();
                    if self.ch_is('.') {
                        self.bump();
                        Ok(token::DotDotDot)
                    } else if self.ch_is('=') {
                        self.bump();
                        Ok(token::DotDotEq)
                    } else {
                        Ok(token::DotDot)
                    }
                } else {
                    Ok(token::Dot)
                }
            }
            '(' => {
                self.bump();
                Ok(token::OpenDelim(token::Paren))
            }
            ')' => {
                self.bump();
                Ok(token::CloseDelim(token::Paren))
            }
            '{' => {
                self.bump();
                Ok(token::OpenDelim(token::Brace))
            }
            '}' => {
                self.bump();
                Ok(token::CloseDelim(token::Brace))
            }
            '[' => {
                self.bump();
                Ok(token::OpenDelim(token::Bracket))
            }
            ']' => {
                self.bump();
                Ok(token::CloseDelim(token::Bracket))
            }
            '@' => {
                self.bump();
                Ok(token::At)
            }
            '#' => {
                self.bump();
                Ok(token::Pound)
            }
            '~' => {
                self.bump();
                Ok(token::Tilde)
            }
            '?' => {
                self.bump();
                Ok(token::Question)
            }
            ':' => {
                self.bump();
                if self.ch_is(':') {
                    self.bump();
                    Ok(token::ModSep)
                } else {
                    Ok(token::Colon)
                }
            }

            '$' => {
                self.bump();
                Ok(token::Dollar)
            }

            // Multi-byte tokens.
            '=' => {
                self.bump();
                if self.ch_is('=') {
                    self.bump();
                    Ok(token::EqEq)
                } else if self.ch_is('>') {
                    self.bump();
                    Ok(token::FatArrow)
                } else {
                    Ok(token::Eq)
                }
            }
            '!' => {
                self.bump();
                if self.ch_is('=') {
                    self.bump();
                    Ok(token::Ne)
                } else {
                    Ok(token::Not)
                }
            }
            '<' => {
                self.bump();
                match self.ch.unwrap_or('\x00') {
                    '=' => {
                        self.bump();
                        Ok(token::Le)
                    }
                    '<' => {
                        Ok(self.binop(token::Shl))
                    }
                    '-' => {
                        self.bump();
                        Ok(token::LArrow)
                    }
                    _ => {
                        Ok(token::Lt)
                    }
                }
            }
            '>' => {
                self.bump();
                match self.ch.unwrap_or('\x00') {
                    '=' => {
                        self.bump();
                        Ok(token::Ge)
                    }
                    '>' => {
                        Ok(self.binop(token::Shr))
                    }
                    _ => {
                        Ok(token::Gt)
                    }
                }
            }
            '\'' => {
                // Either a character constant 'a' OR a lifetime name 'abc
                let start_with_quote = self.pos;
                self.bump();
                let start = self.pos;

                // If the character is an ident start not followed by another single
                // quote, then this is a lifetime name:
                let starts_with_number = self.ch.unwrap_or('\x00').is_numeric();
                if (ident_start(self.ch) || starts_with_number) && !self.nextch_is('\'') {
                    self.bump();
                    while ident_continue(self.ch) {
                        self.bump();
                    }
                    // lifetimes shouldn't end with a single quote
                    // if we find one, then this is an invalid character literal
                    if self.ch_is('\'') {
                        let symbol = self.symbol_from(start);
                        self.bump();
                        self.validate_char_escape(start_with_quote);
                        return Ok(TokenKind::lit(token::Char, symbol, None));
                    }

                    if starts_with_number {
                        // this is a recovered lifetime written `'1`, error but accept it
                        self.err_span_(
                            start_with_quote,
                            self.pos,
                            "lifetimes cannot start with a number",
                        );
                    }

                    // Include the leading `'` in the real identifier, for macro
                    // expansion purposes. See #12512 for the gory details of why
                    // this is necessary.
                    return Ok(token::Lifetime(self.symbol_from(start_with_quote)));
                }
                let msg = "unterminated character literal";
                let symbol = self.scan_single_quoted_string(start_with_quote, msg);
                self.validate_char_escape(start_with_quote);
                let suffix = self.scan_optional_raw_name();
                Ok(TokenKind::lit(token::Char, symbol, suffix))
            }
            'b' => {
                self.bump();
                let (kind, symbol) = match self.ch {
                    Some('\'') => {
                        let start_with_quote = self.pos;
                        self.bump();
                        let msg = "unterminated byte constant";
                        let symbol = self.scan_single_quoted_string(start_with_quote, msg);
                        self.validate_byte_escape(start_with_quote);
                        (token::Byte, symbol)
                    },
                    Some('"') => {
                        let start_with_quote = self.pos;
                        let msg = "unterminated double quote byte string";
                        let symbol = self.scan_double_quoted_string(msg);
                        self.validate_byte_str_escape(start_with_quote);
                        (token::ByteStr, symbol)
                    },
                    Some('r') => {
                        let (start, end, hash_count) = self.scan_raw_string();
                        let symbol = self.symbol_from_to(start, end);
                        self.validate_raw_byte_str_escape(start, end);

                        (token::ByteStrRaw(hash_count), symbol)
                    }
                    _ => unreachable!(),  // Should have been a token::Ident above.
                };
                let suffix = self.scan_optional_raw_name();

                Ok(TokenKind::lit(kind, symbol, suffix))
            }
            '"' => {
                let start_with_quote = self.pos;
                let msg = "unterminated double quote string";
                let symbol = self.scan_double_quoted_string(msg);
                self.validate_str_escape(start_with_quote);
                let suffix = self.scan_optional_raw_name();
                Ok(TokenKind::lit(token::Str, symbol, suffix))
            }
            'r' => {
                let (start, end, hash_count) = self.scan_raw_string();
                let symbol = self.symbol_from_to(start, end);
                self.validate_raw_str_escape(start, end);
                let suffix = self.scan_optional_raw_name();

                Ok(TokenKind::lit(token::StrRaw(hash_count), symbol, suffix))
            }
            '-' => {
                if self.nextch_is('>') {
                    self.bump();
                    self.bump();
                    Ok(token::RArrow)
                } else {
                    Ok(self.binop(token::Minus))
                }
            }
            '&' => {
                if self.nextch_is('&') {
                    self.bump();
                    self.bump();
                    Ok(token::AndAnd)
                } else {
                    Ok(self.binop(token::And))
                }
            }
            '|' => {
                match self.nextch() {
                    Some('|') => {
                        self.bump();
                        self.bump();
                        Ok(token::OrOr)
                    }
                    _ => {
                        Ok(self.binop(token::Or))
                    }
                }
            }
            '+' => {
                Ok(self.binop(token::Plus))
            }
            '*' => {
                Ok(self.binop(token::Star))
            }
            '/' => {
                Ok(self.binop(token::Slash))
            }
            '^' => {
                Ok(self.binop(token::Caret))
            }
            '%' => {
                Ok(self.binop(token::Percent))
            }
            c => {
                let last_bpos = self.pos;
                let bpos = self.next_pos;
                let mut err = self.struct_fatal_span_char(last_bpos,
                                                          bpos,
                                                          "unknown start of token",
                                                          c);
                unicode_chars::check_for_substitution(self, c, &mut err);
                self.fatal_errs.push(err);

                Err(())
            }
        }
    }

    fn read_to_eol(&mut self) -> String {
        let mut val = String::new();
        while !self.ch_is('\n') && !self.is_eof() {
            val.push(self.ch.unwrap());
            self.bump();
        }

        if self.ch_is('\n') {
            self.bump();
        }

        val
    }

    fn read_one_line_comment(&mut self) -> String {
        let val = self.read_to_eol();
        assert!((val.as_bytes()[0] == b'/' && val.as_bytes()[1] == b'/') ||
                (val.as_bytes()[0] == b'#' && val.as_bytes()[1] == b'!'));
        val
    }

    fn consume_non_eol_whitespace(&mut self) {
        while is_pattern_whitespace(self.ch) && !self.ch_is('\n') && !self.is_eof() {
            self.bump();
        }
    }

    fn peeking_at_comment(&self) -> bool {
        (self.ch_is('/') && self.nextch_is('/')) || (self.ch_is('/') && self.nextch_is('*')) ||
        // consider shebangs comments, but not inner attributes
        (self.ch_is('#') && self.nextch_is('!') && !self.nextnextch_is('['))
    }

    fn scan_single_quoted_string(&mut self,
                                 start_with_quote: BytePos,
                                 unterminated_msg: &str) -> Symbol {
        // assumes that first `'` is consumed
        let start = self.pos;
        // lex `'''` as a single char, for recovery
        if self.ch_is('\'') && self.nextch_is('\'') {
            self.bump();
        } else {
            let mut first = true;
            loop {
                if self.ch_is('\'') {
                    break;
                }
                if self.ch_is('\\') && (self.nextch_is('\'') || self.nextch_is('\\')) {
                    self.bump();
                    self.bump();
                } else {
                    // Only attempt to infer single line string literals. If we encounter
                    // a slash, bail out in order to avoid nonsensical suggestion when
                    // involving comments.
                    if self.is_eof()
                        || (self.ch_is('/') && !first)
                        || (self.ch_is('\n') && !self.nextch_is('\'')) {

                        self.fatal_span_(start_with_quote, self.pos, unterminated_msg.into())
                            .raise()
                    }
                    self.bump();
                }
                first = false;
            }
        }

        let id = self.symbol_from(start);
        self.bump();
        id
    }

    fn scan_double_quoted_string(&mut self, unterminated_msg: &str) -> Symbol {
        debug_assert!(self.ch_is('\"'));
        let start_with_quote = self.pos;
        self.bump();
        let start = self.pos;
        while !self.ch_is('"') {
            if self.is_eof() {
                let pos = self.pos;
                self.fatal_span_(start_with_quote, pos, unterminated_msg).raise();
            }
            if self.ch_is('\\') && (self.nextch_is('\\') || self.nextch_is('"')) {
                self.bump();
            }
            self.bump();
        }
        let id = self.symbol_from(start);
        self.bump();
        id
    }

    /// Scans a raw (byte) string, returning byte position range for `"<literal>"`
    /// (including quotes) along with `#` character count in `(b)r##..."<literal>"##...`;
    fn scan_raw_string(&mut self) -> (BytePos, BytePos, u16) {
        let start_bpos = self.pos;
        self.bump();
        let mut hash_count: u16 = 0;
        while self.ch_is('#') {
            if hash_count == 65535 {
                let bpos = self.next_pos;
                self.fatal_span_(start_bpos,
                                 bpos,
                                 "too many `#` symbols: raw strings may be \
                                 delimited by up to 65535 `#` symbols").raise();
            }
            self.bump();
            hash_count += 1;
        }

        if self.is_eof() {
            self.fail_unterminated_raw_string(start_bpos, hash_count);
        } else if !self.ch_is('"') {
            let last_bpos = self.pos;
            let curr_char = self.ch.unwrap();
            self.fatal_span_char(start_bpos,
                                 last_bpos,
                                 "found invalid character; only `#` is allowed \
                                 in raw string delimitation",
                                 curr_char).raise();
        }
        self.bump();
        let content_start_bpos = self.pos;
        let mut content_end_bpos;
        'outer: loop {
            match self.ch {
                None => {
                    self.fail_unterminated_raw_string(start_bpos, hash_count);
                }
                Some('"') => {
                    content_end_bpos = self.pos;
                    for _ in 0..hash_count {
                        self.bump();
                        if !self.ch_is('#') {
                            continue 'outer;
                        }
                    }
                    break;
                }
                _ => (),
            }
            self.bump();
        }

        self.bump();

        (content_start_bpos, content_end_bpos, hash_count)
    }

    fn validate_char_escape(&self, start_with_quote: BytePos) {
        let lit = self.str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1));
        if let Err((off, err)) = unescape::unescape_char(lit) {
            emit_unescape_error(
                &self.sess.span_diagnostic,
                lit,
                self.mk_sp(start_with_quote, self.pos),
                unescape::Mode::Char,
                0..off,
                err,
            )
        }
    }

    fn validate_byte_escape(&self, start_with_quote: BytePos) {
        let lit = self.str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1));
        if let Err((off, err)) = unescape::unescape_byte(lit) {
            emit_unescape_error(
                &self.sess.span_diagnostic,
                lit,
                self.mk_sp(start_with_quote, self.pos),
                unescape::Mode::Byte,
                0..off,
                err,
            )
        }
    }

    fn validate_str_escape(&self, start_with_quote: BytePos) {
        let lit = self.str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1));
        unescape::unescape_str(lit, &mut |range, c| {
            if let Err(err) = c {
                emit_unescape_error(
                    &self.sess.span_diagnostic,
                    lit,
                    self.mk_sp(start_with_quote, self.pos),
                    unescape::Mode::Str,
                    range,
                    err,
                )
            }
        })
    }

    fn validate_raw_str_escape(&self, content_start: BytePos, content_end: BytePos) {
        let lit = self.str_from_to(content_start, content_end);
        unescape::unescape_raw_str(lit, &mut |range, c| {
            if let Err(err) = c {
                emit_unescape_error(
                    &self.sess.span_diagnostic,
                    lit,
                    self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
                    unescape::Mode::Str,
                    range,
                    err,
                )
            }
        })
    }

    fn validate_raw_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) {
        let lit = self.str_from_to(content_start, content_end);
        unescape::unescape_raw_byte_str(lit, &mut |range, c| {
            if let Err(err) = c {
                emit_unescape_error(
                    &self.sess.span_diagnostic,
                    lit,
                    self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
                    unescape::Mode::ByteStr,
                    range,
                    err,
                )
            }
        })
    }

    fn validate_byte_str_escape(&self, start_with_quote: BytePos) {
        let lit = self.str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1));
        unescape::unescape_byte_str(lit, &mut |range, c| {
            if let Err(err) = c {
                emit_unescape_error(
                    &self.sess.span_diagnostic,
                    lit,
                    self.mk_sp(start_with_quote, self.pos),
                    unescape::Mode::ByteStr,
                    range,
                    err,
                )
            }
        })
    }
}

// This tests the character for the unicode property 'PATTERN_WHITE_SPACE' which
// is guaranteed to be forward compatible. http://unicode.org/reports/tr31/#R3
#[inline]
crate fn is_pattern_whitespace(c: Option<char>) -> bool {
    c.map_or(false, Pattern_White_Space)
}

#[inline]
fn in_range(c: Option<char>, lo: char, hi: char) -> bool {
    c.map_or(false, |c| lo <= c && c <= hi)
}

#[inline]
fn is_dec_digit(c: Option<char>) -> bool {
    in_range(c, '0', '9')
}

fn is_doc_comment(s: &str) -> bool {
    let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/') ||
              s.starts_with("//!");
    debug!("is {:?} a doc comment? {}", s, res);
    res
}

fn is_block_doc_comment(s: &str) -> bool {
    // Prevent `/**/` from being parsed as a doc comment
    let res = ((s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*') ||
               s.starts_with("/*!")) && s.len() >= 5;
    debug!("is {:?} a doc comment? {}", s, res);
    res
}

/// Determine whether `c` is a valid start for an ident.
fn ident_start(c: Option<char>) -> bool {
    let c = match c {
        Some(c) => c,
        None => return false,
    };

    (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c > '\x7f' && c.is_xid_start())
}

fn ident_continue(c: Option<char>) -> bool {
    let c = match c {
        Some(c) => c,
        None => return false,
    };

    (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' ||
    (c > '\x7f' && c.is_xid_continue())
}

#[inline]
fn char_at(s: &str, byte: usize) -> char {
    s[byte..].chars().next().unwrap()
}

#[cfg(test)]
mod tests {
    use super::*;

    use crate::ast::CrateConfig;
    use crate::symbol::Symbol;
    use crate::source_map::{SourceMap, FilePathMapping};
    use crate::feature_gate::UnstableFeatures;
    use crate::parse::token;
    use crate::diagnostics::plugin::ErrorMap;
    use crate::with_default_globals;
    use std::io;
    use std::path::PathBuf;
    use syntax_pos::{BytePos, Span, NO_EXPANSION, edition::Edition};
    use rustc_data_structures::fx::{FxHashSet, FxHashMap};
    use rustc_data_structures::sync::Lock;

    fn mk_sess(sm: Lrc<SourceMap>) -> ParseSess {
        let emitter = errors::emitter::EmitterWriter::new(Box::new(io::sink()),
                                                          Some(sm.clone()),
                                                          false,
                                                          false,
                                                          false);
        ParseSess {
            span_diagnostic: errors::Handler::with_emitter(true, None, Box::new(emitter)),
            unstable_features: UnstableFeatures::from_environment(),
            config: CrateConfig::default(),
            included_mod_stack: Lock::new(Vec::new()),
            source_map: sm,
            missing_fragment_specifiers: Lock::new(FxHashSet::default()),
            raw_identifier_spans: Lock::new(Vec::new()),
            registered_diagnostics: Lock::new(ErrorMap::new()),
            buffered_lints: Lock::new(vec![]),
            edition: Edition::from_session(),
            ambiguous_block_expr_parse: Lock::new(FxHashMap::default()),
            param_attr_spans: Lock::new(Vec::new()),
            let_chains_spans: Lock::new(Vec::new()),
            async_closure_spans: Lock::new(Vec::new()),
        }
    }

    // open a string reader for the given string
    fn setup<'a>(sm: &SourceMap,
                 sess: &'a ParseSess,
                 teststr: String)
                 -> StringReader<'a> {
        let sf = sm.new_source_file(PathBuf::from(teststr.clone()).into(), teststr);
        StringReader::new(sess, sf, None)
    }

    #[test]
    fn t1() {
        with_default_globals(|| {
            let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
            let sh = mk_sess(sm.clone());
            let mut string_reader = setup(&sm,
                                        &sh,
                                        "/* my source file */ fn main() { println!(\"zebra\"); }\n"
                                            .to_string());
            assert_eq!(string_reader.next_token(), token::Comment);
            assert_eq!(string_reader.next_token(), token::Whitespace);
            let tok1 = string_reader.next_token();
            let tok2 = Token::new(
                mk_ident("fn"),
                Span::new(BytePos(21), BytePos(23), NO_EXPANSION),
            );
            assert_eq!(tok1.kind, tok2.kind);
            assert_eq!(tok1.span, tok2.span);
            assert_eq!(string_reader.next_token(), token::Whitespace);
            // read another token:
            let tok3 = string_reader.next_token();
            assert_eq!(string_reader.pos.clone(), BytePos(28));
            let tok4 = Token::new(
                mk_ident("main"),
                Span::new(BytePos(24), BytePos(28), NO_EXPANSION),
            );
            assert_eq!(tok3.kind, tok4.kind);
            assert_eq!(tok3.span, tok4.span);

            assert_eq!(string_reader.next_token(), token::OpenDelim(token::Paren));
            assert_eq!(string_reader.pos.clone(), BytePos(29))
        })
    }

    // check that the given reader produces the desired stream
    // of tokens (stop checking after exhausting the expected vec)
    fn check_tokenization(mut string_reader: StringReader<'_>, expected: Vec<TokenKind>) {
        for expected_tok in &expected {
            assert_eq!(&string_reader.next_token(), expected_tok);
        }
    }

    // make the identifier by looking up the string in the interner
    fn mk_ident(id: &str) -> TokenKind {
        token::Ident(Symbol::intern(id), false)
    }

    fn mk_lit(kind: token::LitKind, symbol: &str, suffix: Option<&str>) -> TokenKind {
        TokenKind::lit(kind, Symbol::intern(symbol), suffix.map(Symbol::intern))
    }

    #[test]
    fn doublecolonparsing() {
        with_default_globals(|| {
            let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
            let sh = mk_sess(sm.clone());
            check_tokenization(setup(&sm, &sh, "a b".to_string()),
                            vec![mk_ident("a"), token::Whitespace, mk_ident("b")]);
        })
    }

    #[test]
    fn dcparsing_2() {
        with_default_globals(|| {
            let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
            let sh = mk_sess(sm.clone());
            check_tokenization(setup(&sm, &sh, "a::b".to_string()),
                            vec![mk_ident("a"), token::ModSep, mk_ident("b")]);
        })
    }

    #[test]
    fn dcparsing_3() {
        with_default_globals(|| {
            let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
            let sh = mk_sess(sm.clone());
            check_tokenization(setup(&sm, &sh, "a ::b".to_string()),
                            vec![mk_ident("a"), token::Whitespace, token::ModSep, mk_ident("b")]);
        })
    }

    #[test]
    fn dcparsing_4() {
        with_default_globals(|| {
            let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
            let sh = mk_sess(sm.clone());
            check_tokenization(setup(&sm, &sh, "a:: b".to_string()),
                            vec![mk_ident("a"), token::ModSep, token::Whitespace, mk_ident("b")]);
        })
    }

    #[test]
    fn character_a() {
        with_default_globals(|| {
            let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
            let sh = mk_sess(sm.clone());
            assert_eq!(setup(&sm, &sh, "'a'".to_string()).next_token(),
                       mk_lit(token::Char, "a", None));
        })
    }

    #[test]
    fn character_space() {
        with_default_globals(|| {
            let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
            let sh = mk_sess(sm.clone());
            assert_eq!(setup(&sm, &sh, "' '".to_string()).next_token(),
                       mk_lit(token::Char, " ", None));
        })
    }

    #[test]
    fn character_escaped() {
        with_default_globals(|| {
            let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
            let sh = mk_sess(sm.clone());
            assert_eq!(setup(&sm, &sh, "'\\n'".to_string()).next_token(),
                       mk_lit(token::Char, "\\n", None));
        })
    }

    #[test]
    fn lifetime_name() {
        with_default_globals(|| {
            let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
            let sh = mk_sess(sm.clone());
            assert_eq!(setup(&sm, &sh, "'abc".to_string()).next_token(),
                       token::Lifetime(Symbol::intern("'abc")));
        })
    }

    #[test]
    fn raw_string() {
        with_default_globals(|| {
            let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
            let sh = mk_sess(sm.clone());
            assert_eq!(setup(&sm, &sh, "r###\"\"#a\\b\x00c\"\"###".to_string()).next_token(),
                       mk_lit(token::StrRaw(3), "\"#a\\b\x00c\"", None));
        })
    }

    #[test]
    fn literal_suffixes() {
        with_default_globals(|| {
            let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
            let sh = mk_sess(sm.clone());
            macro_rules! test {
                ($input: expr, $tok_type: ident, $tok_contents: expr) => {{
                    assert_eq!(setup(&sm, &sh, format!("{}suffix", $input)).next_token(),
                               mk_lit(token::$tok_type, $tok_contents, Some("suffix")));
                    // with a whitespace separator:
                    assert_eq!(setup(&sm, &sh, format!("{} suffix", $input)).next_token(),
                               mk_lit(token::$tok_type, $tok_contents, None));
                }}
            }

            test!("'a'", Char, "a");
            test!("b'a'", Byte, "a");
            test!("\"a\"", Str, "a");
            test!("b\"a\"", ByteStr, "a");
            test!("1234", Integer, "1234");
            test!("0b101", Integer, "0b101");
            test!("0xABC", Integer, "0xABC");
            test!("1.0", Float, "1.0");
            test!("1.0e10", Float, "1.0e10");

            assert_eq!(setup(&sm, &sh, "2us".to_string()).next_token(),
                       mk_lit(token::Integer, "2", Some("us")));
            assert_eq!(setup(&sm, &sh, "r###\"raw\"###suffix".to_string()).next_token(),
                       mk_lit(token::StrRaw(3), "raw", Some("suffix")));
            assert_eq!(setup(&sm, &sh, "br###\"raw\"###suffix".to_string()).next_token(),
                       mk_lit(token::ByteStrRaw(3), "raw", Some("suffix")));
        })
    }

    #[test]
    fn line_doc_comments() {
        assert!(is_doc_comment("///"));
        assert!(is_doc_comment("/// blah"));
        assert!(!is_doc_comment("////"));
    }

    #[test]
    fn nested_block_comments() {
        with_default_globals(|| {
            let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
            let sh = mk_sess(sm.clone());
            let mut lexer = setup(&sm, &sh, "/* /* */ */'a'".to_string());
            assert_eq!(lexer.next_token(), token::Comment);
            assert_eq!(lexer.next_token(), mk_lit(token::Char, "a", None));
        })
    }

    #[test]
    fn crlf_comments() {
        with_default_globals(|| {
            let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
            let sh = mk_sess(sm.clone());
            let mut lexer = setup(&sm, &sh, "// test\r\n/// test\r\n".to_string());
            let comment = lexer.next_token();
            assert_eq!(comment.kind, token::Comment);
            assert_eq!((comment.span.lo(), comment.span.hi()), (BytePos(0), BytePos(7)));
            assert_eq!(lexer.next_token(), token::Whitespace);
            assert_eq!(lexer.next_token(), token::DocComment(Symbol::intern("/// test")));
        })
    }
}
-												syntax: Turn `token::Lit` into a struct

											
										
										
											2019-05-19 01:04:26 +03:00
+								use crate::parse::ParseSess;
-												syntax: Rename `TokenAndSpan` into `Token`

											
										
										
											2019-06-04 18:48:40 +03:00
+								use crate::parse::token::{self, Token, TokenKind};
-												Pre-intern "0", "1", ..., "9", and use where appropriate.

											
										
										
											2019-05-22 19:25:39 +10:00
+								use crate::symbol::{sym, Symbol};
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								use crate::parse::unescape;
 								use crate::parse::unescape_error_reporting::{emit_unescape_error, push_escaped_char};
-												libsyntax => 2018

											
										
										
											2019-02-07 02:33:01 +09:00
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								use errors::{FatalError, Diagnostic, DiagnosticBuilder};
-												cleanup shebang handling in the lexer

											
										
										
											2019-04-04 10:55:30 +03:00
+								use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION};
-												Deprecate the std_unicode crate

											
										
										
											2018-04-05 17:20:08 +02:00
+								use core::unicode::property::Pattern_White_Space;
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
-												syntax: remove uses of `.into_cow()`

											
										
										
											2015-04-15 22:15:50 -07:00
+								use std::borrow::Cow;
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								use std::char;
-												Use iterator and pattern APIs instead of `char_at`

											
										
										
											2018-10-27 21:41:26 +09:00
+								use std::iter;
-												Replace Rc with Lrc for shared data

											
										
										
											2018-02-27 17:11:14 +01:00
+								use rustc_data_structures::sync::Lrc;
-												libsyntax => 2018

											
										
										
											2019-02-07 02:33:01 +09:00
+								use log::debug;
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
 								pub mod comments;
-												Introduce `string_reader.parse_all_token_trees()`.

											
										
										
											2017-01-12 23:32:00 +00:00
+								mod tokentrees;
-												Detect confusing unicode characters and show the alternative

											
										
										
											2015-11-15 02:37:49 +05:30
+								mod unicode_chars;
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
-												Deduplicate mismatched delimiter errors

Delay unmatched delimiter errors until after the parser has run to
deduplicate them when parsing and attempt recovering intelligently.

											
										
										
											2019-01-27 21:04:50 -08:00
+								#[derive(Clone, Debug)]
 								pub struct UnmatchedBrace {
 								    pub expected_delim: token::DelimToken,
 								    pub found_delim: token::DelimToken,
 								    pub found_span: Span,
 								    pub unclosed_span: Option<Span>,
 								    pub candidate_span: Option<Span>,
 								}
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								pub struct StringReader<'a> {
-												make StringReader fields private

											
										
										
											2019-04-03 18:20:50 +03:00
+								    crate sess: &'a ParseSess,
-												mv codemap source_map

											
										
										
											2018-08-18 12:14:14 +02:00
+								    /// The absolute offset within the source_map of the next character to read
-												make StringReader fields private

											
										
										
											2019-04-03 18:20:50 +03:00
+								    crate next_pos: BytePos,
-												mv codemap source_map

											
										
										
											2018-08-18 12:14:14 +02:00
+								    /// The absolute offset within the source_map of the current character
-												make StringReader fields private

											
										
										
											2019-04-03 18:20:50 +03:00
+								    crate pos: BytePos,
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								    /// The current character (which has been read from self.pos)
-												make StringReader fields private

											
										
										
											2019-04-03 18:20:50 +03:00
+								    crate ch: Option<char>,
 								    crate source_file: Lrc<syntax_pos::SourceFile>,
-												Remove `StringReader::terminator`.

It's silly for a hot function like `bump()` to have such an expensive
bounds check. This patch replaces terminator with `end_src_index`.

Note that the `self.terminator` check in `is_eof()` wasn't necessary
because of the way `StringReader` is initialized.

											
										
										
											2018-05-09 12:55:13 +10:00
+								    /// Stop reading src at this index.
-												make StringReader fields private

											
										
										
											2019-04-03 18:20:50 +03:00
+								    crate end_src_index: usize,
-												Crate-ify and delete unused code in syntax::parse

											
										
										
											2018-05-31 16:53:30 -06:00
+								    fatal_errs: Vec<DiagnosticBuilder<'a>>,
-												Encode codemap and span information in crate metadata.

This allows to create proper debuginfo line information for items inlined from other crates (e.g. instantiations of generics).
Only the codemap's 'metadata' is stored in a crate's metadata. That is, just filename, line-beginnings, etc. but not the actual source code itself. We are thus missing the opportunity of making Rust the first "open-source-only" programming language out there. Pity.

											
										
										
											2015-02-11 18:29:49 +01:00
+								    // cache a direct reference to the source text, so that we don't have to
-												mv filemap source_file

											
										
										
											2018-08-18 12:13:56 +02:00
+								    // retrieve it via `self.source_file.src.as_ref().unwrap()` all the time.
-												Rename some stuff in `StringReader`.

- `source_text` becomes `src`, matching `FileMap::src`.

- `byte_offset()` becomes `src_index()`, which makes it clearer that
  it's an index into `src`. (Likewise for variables containing
  `byte_offset` in their name.) This function also now returns a `usize`
  instead of a `BytePos`, because every callsite immediately converted
  the `BytePos` to a `usize`.

											
										
										
											2018-05-09 12:49:39 +10:00
+								    src: Lrc<String>,
-												Move token tree related lexer state to a separate struct

We only used a bunch of fields when tokenizing into a token tree,
so let's move them out of the base lexer

											
										
										
											2019-05-12 19:55:16 +03:00
+								    override_span: Option<Span>,
-												Refactor how spans are combined in the parser.

											
										
										
											2017-03-15 00:22:48 +00:00
+								}
-												Introduce `string_reader.parse_all_token_trees()`.

											
										
										
											2017-01-12 23:32:00 +00:00
+								impl<'a> StringReader<'a> {
-												move constructors to top

											
										
										
											2019-07-03 13:31:52 +03:00
+								    pub fn new(sess: &'a ParseSess,
 								               source_file: Lrc<syntax_pos::SourceFile>,
 								               override_span: Option<Span>) -> Self {
 								        let mut sr = StringReader::new_internal(sess, source_file, override_span);
 								        sr.bump();
 								        sr
 								    }
 								    pub fn retokenize(sess: &'a ParseSess, mut span: Span) -> Self {
 								        let begin = sess.source_map().lookup_byte_offset(span.lo());
 								        let end = sess.source_map().lookup_byte_offset(span.hi());
 								        // Make the range zero-length if the span is invalid.
 								        if span.lo() > span.hi() || begin.sf.start_pos != end.sf.start_pos {
 								            span = span.shrink_to_lo();
 								        }
 								        let mut sr = StringReader::new_internal(sess, begin.sf, None);
 								        // Seek the lexer to the right byte range.
 								        sr.next_pos = span.lo();
 								        sr.end_src_index = sr.src_index(span.hi());
 								        sr.bump();
 								        sr
-												rustc: Fix joint-ness of stringified token-streams

This commit fixes `StringReader`'s parsing of tokens which have been stringified
through procedural macros. Whether or not a token tree is joint is defined by
span information, but when working with procedural macros these spans are often
dummy and/or overridden which means that they end up considering all operators
joint if they can!

The fix here is to track the raw source span as opposed to the overridden span.
With this information we can more accurately classify `Punct` structs as either
joint or not.

Closes #50700

											
										
										
											2018-05-17 09:30:43 -07:00
+								    }
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
-												move constructors to top

											
										
										
											2019-07-03 13:31:52 +03:00
+								    fn new_internal(sess: &'a ParseSess, source_file: Lrc<syntax_pos::SourceFile>,
 								        override_span: Option<Span>) -> Self
 								    {
 								        if source_file.src.is_none() {
 								            sess.span_diagnostic.bug(&format!("Cannot lex source_file without source: {}",
 								                                              source_file.name));
 								        }
 								        let src = (*source_file.src.as_ref().unwrap()).clone();
 								        StringReader {
 								            sess,
 								            next_pos: source_file.start_pos,
 								            pos: source_file.start_pos,
 								            ch: Some('\n'),
 								            source_file,
 								            end_src_index: src.len(),
 								            src,
 								            fatal_errs: Vec::new(),
 								            override_span,
 								        }
 								    }
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
-												Simplify `hygiene::Mark` application, and
remove variant `Token::SubstNt` in favor of `quoted::TokenTree::MetaVar`.

											
										
										
											2017-03-28 05:32:43 +00:00
+								    fn mk_sp(&self, lo: BytePos, hi: BytePos) -> Span {
-												remove unused mk_sp_and_raw

											
										
										
											2019-07-03 15:09:06 +03:00
+								        self.override_span.unwrap_or_else(|| Span::new(lo, hi, NO_EXPANSION))
-												Simplify `hygiene::Mark` application, and
remove variant `Token::SubstNt` in favor of `quoted::TokenTree::MetaVar`.

											
										
										
											2017-03-28 05:32:43 +00:00
+								    }
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
-												syntax: Rename `TokenAndSpan` into `Token`

											
										
										
											2019-06-04 18:48:40 +03:00
+								    fn unwrap_or_abort(&mut self, res: Result<Token, ()>) -> Token {
-												Introduce `string_reader.parse_all_token_trees()`.

											
										
										
											2017-01-12 23:32:00 +00:00
+								        match res {
 								            Ok(tok) => tok,
 								            Err(_) => {
 								                self.emit_fatal_errors();
-												Do not capture stderr in the compiler. Instead just panic silently for fatal errors

											
										
										
											2018-01-21 12:47:58 +01:00
+								                FatalError.raise();
-												Introduce `string_reader.parse_all_token_trees()`.

											
										
										
											2017-01-12 23:32:00 +00:00
+								            }
 								        }
 								    }
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
-												slightly comment lexer API

											
										
										
											2019-07-03 14:06:10 +03:00
+								    /// Returns the next token, including trivia like whitespace or comments.
 								    ///
 								    /// `Err(())` means that some errors were encountered, which can be
 								    /// retrieved using `buffer_fatal_errors`.
-												syntax: Rename `TokenAndSpan` into `Token`

											
										
										
											2019-06-04 18:48:40 +03:00
+								    pub fn try_next_token(&mut self) -> Result<Token, ()> {
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
+								        assert!(self.fatal_errs.is_empty());
-												remove peek_span_src_raw from StringReader

											
										
										
											2019-07-03 12:52:22 +03:00
+								        match self.scan_whitespace_or_comment() {
-												don't rely on spans when checking tokens for jointness

											
										
										
											2019-07-03 15:07:41 +03:00
+								            Some(comment) => Ok(comment),
-												remove peek_span_src_raw from StringReader

											
										
										
											2019-07-03 12:52:22 +03:00
+								            None => {
 								                let (kind, start_pos, end_pos) = if self.is_eof() {
 								                    (token::Eof, self.source_file.end_pos, self.source_file.end_pos)
 								                } else {
 								                    let start_pos = self.pos;
 								                    (self.next_token_inner()?, start_pos, self.pos)
 								                };
-												remove unused mk_sp_and_raw

											
										
										
											2019-07-03 15:09:06 +03:00
+								                let span = self.mk_sp(start_pos, end_pos);
 								                Ok(Token::new(kind, span))
-												Introduce `string_reader.parse_all_token_trees()`.

											
										
										
											2017-01-12 23:32:00 +00:00
+								            }
 								        }
 								    }
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
-												don't rely on spans when checking tokens for jointness

											
										
										
											2019-07-03 15:07:41 +03:00
+								    /// Returns the next token, including trivia like whitespace or comments.
 								    ///
 								    /// Aborts in case of an error.
 								    pub fn next_token(&mut self) -> Token {
 								        let res = self.try_next_token();
-												Introduce `string_reader.parse_all_token_trees()`.

											
										
										
											2017-01-12 23:32:00 +00:00
+								        self.unwrap_or_abort(res)
 								    }
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
 								    #[inline]
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								    fn is_eof(&self) -> bool {
-												Remove `StringReader::terminator`.

It's silly for a hot function like `bump()` to have such an expensive
bounds check. This patch replaces terminator with `end_src_index`.

Note that the `self.terminator` check in `is_eof()` wasn't necessary
because of the way `StringReader` is initialized.

											
										
										
											2018-05-09 12:55:13 +10:00
+								        self.ch.is_none()
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								    }
-												Add note for unterminated raw string error

											
										
										
											2018-02-26 15:04:40 +01:00
-												Clean up minor bits

											
										
										
											2019-05-13 11:41:24 +02:00
+								    fn fail_unterminated_raw_string(&self, pos: BytePos, hash_count: u16) -> ! {
-												Add note for unterminated raw string error

											
										
										
											2018-02-26 15:04:40 +01:00
+								        let mut err = self.struct_span_fatal(pos, pos, "unterminated raw string");
 								        err.span_label(self.mk_sp(pos, pos), "unterminated raw string");
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
-												Add note for unterminated raw string error

											
										
										
											2018-02-26 15:04:40 +01:00
+								        if hash_count > 0 {
 								            err.note(&format!("this raw string should be terminated with `\"{}`",
-												Change the hashcounts in raw `Lit` variants from usize to u16.

This reduces the size of `Token` from 32 bytes to 24 bytes on 64-bit
platforms.

											
										
										
											2018-04-12 19:50:53 +10:00
+								                              "#".repeat(hash_count as usize)));
-												Add note for unterminated raw string error

											
										
										
											2018-02-26 15:04:40 +01:00
+								        }
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
-												Add note for unterminated raw string error

											
										
										
											2018-02-26 15:04:40 +01:00
+								        err.emit();
 								        FatalError.raise();
 								    }
-												make StringReader methods private

											
										
										
											2019-04-03 18:33:54 +03:00
+								    crate fn emit_fatal_errors(&mut self) {
-												Make some fatal lexer errors recoverable

											
										
										
											2016-04-25 17:20:32 +02:00
+								        for err in &mut self.fatal_errs {
 								            err.emit();
 								        }
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
-												Make some fatal lexer errors recoverable

											
										
										
											2016-04-25 17:20:32 +02:00
+								        self.fatal_errs.clear();
 								    }
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
-												buffer errors from initial tokenization when parsing

											
										
										
											2018-11-01 11:57:29 -05:00
+								    pub fn buffer_fatal_errors(&mut self) -> Vec<Diagnostic> {
 								        let mut buffer = Vec::new();
 								        for err in self.fatal_errs.drain(..) {
 								            err.buffer(&mut buffer);
 								        }
 								        buffer
 								    }
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
+								    #[inline]
-												Crate-ify and delete unused code in syntax::parse

											
										
										
											2018-05-31 16:53:30 -06:00
+								    fn ch_is(&self, c: char) -> bool {
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								        self.ch == Some(c)
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    }
-												Don't require mutable StringReader to emit lexer errors

Teach StringReader how to emit errors for arbitrary spans, so we don't
need to modify peek_span. This allows for emitting errors without having
a &mut borrow of the StringReader.

											
										
										
											2014-05-24 01:12:22 -07:00
+								    /// Report a fatal lexical error with a given span.
-												Crate-ify and delete unused code in syntax::parse

											
										
										
											2018-05-31 16:53:30 -06:00
+								    fn fatal_span(&self, sp: Span, m: &str) -> FatalError {
-												Give the `StringReader` a `sess: &ParseSess`.

											
										
										
											2017-01-17 01:14:53 +00:00
+								        self.sess.span_diagnostic.span_fatal(sp, m)
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    }
-												Don't require mutable StringReader to emit lexer errors

Teach StringReader how to emit errors for arbitrary spans, so we don't
need to modify peek_span. This allows for emitting errors without having
a &mut borrow of the StringReader.

											
										
										
											2014-05-24 01:12:22 -07:00
+								    /// Report a lexical error with a given span.
-												Crate-ify and delete unused code in syntax::parse

											
										
										
											2018-05-31 16:53:30 -06:00
+								    fn err_span(&self, sp: Span, m: &str) {
-												Continue evaluating after parsing incorrect binary literal

											
										
										
											2019-01-11 19:56:41 -08:00
+								        self.sess.span_diagnostic.struct_span_err(sp, m).emit();
-												Don't require mutable StringReader to emit lexer errors

Teach StringReader how to emit errors for arbitrary spans, so we don't
need to modify peek_span. This allows for emitting errors without having
a &mut borrow of the StringReader.

											
										
										
											2014-05-24 01:12:22 -07:00
+								    }
-												Improve incomplete unicode escape reporting

This improves diagnostic messages when \u escape is used incorrectly and { is
missing. Instead of saying “unknown character escape: u”, it will now report
that unicode escape sequence is incomplete and suggest what the correct syntax
is.

											
										
										
											2015-07-10 21:37:21 +03:00
-												Don't require mutable StringReader to emit lexer errors

Teach StringReader how to emit errors for arbitrary spans, so we don't
need to modify peek_span. This allows for emitting errors without having
a &mut borrow of the StringReader.

											
										
										
											2014-05-24 01:12:22 -07:00
+								    /// Report a fatal error spanning [`from_pos`, `to_pos`).
-												Start pushing panics outward in lexer.

											
										
										
											2015-10-23 19:20:03 -07:00
+								    fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> FatalError {
-												Simplify `hygiene::Mark` application, and
remove variant `Token::SubstNt` in favor of `quoted::TokenTree::MetaVar`.

											
										
										
											2017-03-28 05:32:43 +00:00
+								        self.fatal_span(self.mk_sp(from_pos, to_pos), m)
-												Don't require mutable StringReader to emit lexer errors

Teach StringReader how to emit errors for arbitrary spans, so we don't
need to modify peek_span. This allows for emitting errors without having
a &mut borrow of the StringReader.

											
										
										
											2014-05-24 01:12:22 -07:00
+								    }
 								    /// Report a lexical error spanning [`from_pos`, `to_pos`).
 								    fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) {
-												Simplify `hygiene::Mark` application, and
remove variant `Token::SubstNt` in favor of `quoted::TokenTree::MetaVar`.

											
										
										
											2017-03-28 05:32:43 +00:00
+								        self.err_span(self.mk_sp(from_pos, to_pos), m)
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    }
 								    /// Report a lexical error spanning [`from_pos`, `to_pos`), appending an
 								    /// escaped character to the error message
-												Start pushing panics outward in lexer.

											
										
										
											2015-10-23 19:20:03 -07:00
+								    fn fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) -> FatalError {
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								        let mut m = m.to_string();
 								        m.push_str(": ");
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								        push_escaped_char(&mut m, c);
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
-												Start pushing panics outward in lexer.

											
										
										
											2015-10-23 19:20:03 -07:00
+								        self.fatal_span_(from_pos, to_pos, &m[..])
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    }
-												Add note for unterminated raw string error

											
										
										
											2018-02-26 15:04:40 +01:00
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
+								    fn struct_span_fatal(&self, from_pos: BytePos, to_pos: BytePos, m: &str)
 								        -> DiagnosticBuilder<'a>
 								    {
-												Add note for unterminated raw string error

											
										
										
											2018-02-26 15:04:40 +01:00
+								        self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), m)
 								    }
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
+								    fn struct_fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char)
 								        -> DiagnosticBuilder<'a>
 								    {
-												use structured errors

											
										
										
											2015-12-21 10:00:43 +13:00
+								        let mut m = m.to_string();
 								        m.push_str(": ");
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								        push_escaped_char(&mut m, c);
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
-												Simplify `hygiene::Mark` application, and
remove variant `Token::SubstNt` in favor of `quoted::TokenTree::MetaVar`.

											
										
										
											2017-03-28 05:32:43 +00:00
+								        self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), &m[..])
-												use structured errors

											
										
										
											2015-12-21 10:00:43 +13:00
+								    }
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
-												Rename some stuff in `StringReader`.

- `source_text` becomes `src`, matching `FileMap::src`.

- `byte_offset()` becomes `src_index()`, which makes it clearer that
  it's an index into `src`. (Likewise for variables containing
  `byte_offset` in their name.) This function also now returns a `usize`
  instead of a `BytePos`, because every callsite immediately converted
  the `BytePos` to a `usize`.

											
										
										
											2018-05-09 12:49:39 +10:00
+								    #[inline]
 								    fn src_index(&self, pos: BytePos) -> usize {
-												mv filemap source_file

											
										
										
											2018-08-18 12:13:56 +02:00
+								        (pos - self.source_file.start_pos).to_usize()
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    }
-												refactor lexer to use idiomatic borrowing

											
										
										
											2019-06-25 21:02:19 +03:00
+								    /// Slice of the source text from `start` up to but excluding `self.pos`,
 								    /// meaning the slice does not include the character `self.ch`.
 								    fn str_from(&self, start: BytePos) -> &str
-												libsyntax: use unboxed closures

											
										
										
											2014-12-08 13:28:32 -05:00
+								    {
-												refactor lexer to use idiomatic borrowing

											
										
										
											2019-06-25 21:02:19 +03:00
+								        self.str_from_to(start, self.pos)
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    }
-												cleanup: rename name_from to symbol_from

Lexer uses Symbols for a lot of stuff, not only for identifiers, so
the "name" terminology is just confusing.

											
										
										
											2019-06-25 21:37:25 +03:00
+								    /// Creates a Symbol from a given offset to the current offset.
 								    fn symbol_from(&self, start: BytePos) -> Symbol {
-												Rename StringReader::last_pos as pos.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:46:54 +11:00
+								        debug!("taking an ident from {:?} to {:?}", start, self.pos);
-												refactor lexer to use idiomatic borrowing

											
										
										
											2019-06-25 21:02:19 +03:00
+								        Symbol::intern(self.str_from(start))
-												lexer: add ident_from and ident_from_to methods

											
										
										
											2014-06-24 17:44:50 -07:00
+								    }
-												cleanup: rename name_from to symbol_from

Lexer uses Symbols for a lot of stuff, not only for identifiers, so
the "name" terminology is just confusing.

											
										
										
											2019-06-25 21:37:25 +03:00
+								    /// As symbol_from, with an explicit endpoint.
 								    fn symbol_from_to(&self, start: BytePos, end: BytePos) -> Symbol {
-												core: split into fmt::Show and fmt::String

fmt::Show is for debugging, and can and should be implemented for
all public types. This trait is used with `{:?}` syntax. There still
exists #[derive(Show)].

fmt::String is for types that faithfully be represented as a String.
Because of this, there is no way to derive fmt::String, all
implementations must be purposeful. It is used by the default format
syntax, `{}`.

This will break most instances of `{}`, since that now requires the type
to impl fmt::String. In most cases, replacing `{}` with `{:?}` is the
correct fix. Types that were being printed specifically for users should
receive a fmt::String implementation to fix this.

Part of #20013

[breaking-change]

											
										
										
											2014-12-20 00:09:35 -08:00
+								        debug!("taking an ident from {:?} to {:?}", start, end);
-												refactor lexer to use idiomatic borrowing

											
										
										
											2019-06-25 21:02:19 +03:00
+								        Symbol::intern(self.str_from_to(start, end))
-												lexer: add ident_from and ident_from_to methods

											
										
										
											2014-06-24 17:44:50 -07:00
+								    }
-												refactor lexer to use idiomatic borrowing

											
										
										
											2019-06-25 21:02:19 +03:00
+								    /// Slice of the source text spanning from `start` up to but excluding `end`.
 								    fn str_from_to(&self, start: BytePos, end: BytePos) -> &str
-												libsyntax: use unboxed closures

											
										
										
											2014-12-08 13:28:32 -05:00
+								    {
-												refactor lexer to use idiomatic borrowing

											
										
										
											2019-06-25 21:02:19 +03:00
+								        &self.src[self.src_index(start)..self.src_index(end)]
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    }
-												Handle CRLF properly in the lexer

The lexer already ignores CRLF in between tokens, but it doesn't
properly handle carriage returns inside strings and doc comments. Teach
it to treat CRLF as LF inside these tokens, and to disallow carriage
returns that are not followed by linefeeds. This includes handling an
escaped CRLF inside a regular string token the same way it handles an
escaped LF.

This is technically a breaking change, as bare carriage returns are no
longer allowed, and CRLF sequences are now treated as LF inside strings
and doc comments, but it's very unlikely to actually affect any
real-world code.

This change is necessary to have Rust code compile on Windows the same
way it does on Unix. The mozilla/rust repository explicitly sets eol=lf
for Rust source files, but other Rust repositories don't. Notably,
rust-http cannot be compiled on Windows without converting the CRLF line
endings back to LF.

[breaking-change]

											
										
										
											2014-05-24 01:13:59 -07:00
+								    /// Converts CRLF to LF in the given string, raising an error on bare CR.
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								    fn translate_crlf<'b>(&self, start: BytePos, s: &'b str, errmsg: &'b str) -> Cow<'b, str> {
-												Use iterator and pattern APIs instead of `char_at`

											
										
										
											2018-10-27 21:41:26 +09:00
+								        let mut chars = s.char_indices().peekable();
 								        while let Some((i, ch)) = chars.next() {
-												Handle CRLF properly in the lexer

The lexer already ignores CRLF in between tokens, but it doesn't
properly handle carriage returns inside strings and doc comments. Teach
it to treat CRLF as LF inside these tokens, and to disallow carriage
returns that are not followed by linefeeds. This includes handling an
escaped CRLF inside a regular string token the same way it handles an
escaped LF.

This is technically a breaking change, as bare carriage returns are no
longer allowed, and CRLF sequences are now treated as LF inside strings
and doc comments, but it's very unlikely to actually affect any
real-world code.

This change is necessary to have Rust code compile on Windows the same
way it does on Unix. The mozilla/rust repository explicitly sets eol=lf
for Rust source files, but other Rust repositories don't. Notably,
rust-http cannot be compiled on Windows without converting the CRLF line
endings back to LF.

[breaking-change]

											
										
										
											2014-05-24 01:13:59 -07:00
+								            if ch == '\r' {
-												Use iterator and pattern APIs instead of `char_at`

											
										
										
											2018-10-27 21:41:26 +09:00
+								                if let Some((lf_idx, '\n')) = chars.peek() {
 								                    return translate_crlf_(self, start, s, *lf_idx, chars, errmsg).into();
-												Handle CRLF properly in the lexer

The lexer already ignores CRLF in between tokens, but it doesn't
properly handle carriage returns inside strings and doc comments. Teach
it to treat CRLF as LF inside these tokens, and to disallow carriage
returns that are not followed by linefeeds. This includes handling an
escaped CRLF inside a regular string token the same way it handles an
escaped LF.

This is technically a breaking change, as bare carriage returns are no
longer allowed, and CRLF sequences are now treated as LF inside strings
and doc comments, but it's very unlikely to actually affect any
real-world code.

This change is necessary to have Rust code compile on Windows the same
way it does on Unix. The mozilla/rust repository explicitly sets eol=lf
for Rust source files, but other Rust repositories don't. Notably,
rust-http cannot be compiled on Windows without converting the CRLF line
endings back to LF.

[breaking-change]

											
										
										
											2014-05-24 01:13:59 -07:00
+								                }
 								                let pos = start + BytePos(i as u32);
-												Use iterator and pattern APIs instead of `char_at`

											
										
										
											2018-10-27 21:41:26 +09:00
+								                let end_pos = start + BytePos((i + ch.len_utf8()) as u32);
-												Handle CRLF properly in the lexer

The lexer already ignores CRLF in between tokens, but it doesn't
properly handle carriage returns inside strings and doc comments. Teach
it to treat CRLF as LF inside these tokens, and to disallow carriage
returns that are not followed by linefeeds. This includes handling an
escaped CRLF inside a regular string token the same way it handles an
escaped LF.

This is technically a breaking change, as bare carriage returns are no
longer allowed, and CRLF sequences are now treated as LF inside strings
and doc comments, but it's very unlikely to actually affect any
real-world code.

This change is necessary to have Rust code compile on Windows the same
way it does on Unix. The mozilla/rust repository explicitly sets eol=lf
for Rust source files, but other Rust repositories don't. Notably,
rust-http cannot be compiled on Windows without converting the CRLF line
endings back to LF.

[breaking-change]

											
										
										
											2014-05-24 01:13:59 -07:00
+								                self.err_span_(pos, end_pos, errmsg);
 								            }
 								        }
-												syntax: remove uses of `.into_cow()`

											
										
										
											2015-04-15 22:15:50 -07:00
+								        return s.into();
-												Handle CRLF properly in the lexer

The lexer already ignores CRLF in between tokens, but it doesn't
properly handle carriage returns inside strings and doc comments. Teach
it to treat CRLF as LF inside these tokens, and to disallow carriage
returns that are not followed by linefeeds. This includes handling an
escaped CRLF inside a regular string token the same way it handles an
escaped LF.

This is technically a breaking change, as bare carriage returns are no
longer allowed, and CRLF sequences are now treated as LF inside strings
and doc comments, but it's very unlikely to actually affect any
real-world code.

This change is necessary to have Rust code compile on Windows the same
way it does on Unix. The mozilla/rust repository explicitly sets eol=lf
for Rust source files, but other Rust repositories don't. Notably,
rust-http cannot be compiled on Windows without converting the CRLF line
endings back to LF.

[breaking-change]

											
										
										
											2014-05-24 01:13:59 -07:00
-												libsyntax => 2018

											
										
										
											2019-02-07 02:33:01 +09:00
+								        fn translate_crlf_(rdr: &StringReader<'_>,
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                           start: BytePos,
 								                           s: &str,
-												Use iterator and pattern APIs instead of `char_at`

											
										
										
											2018-10-27 21:41:26 +09:00
+								                           mut j: usize,
 								                           mut chars: iter::Peekable<impl Iterator<Item = (usize, char)>>,
 								                           errmsg: &str)
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                           -> String {
-												Handle CRLF properly in the lexer

The lexer already ignores CRLF in between tokens, but it doesn't
properly handle carriage returns inside strings and doc comments. Teach
it to treat CRLF as LF inside these tokens, and to disallow carriage
returns that are not followed by linefeeds. This includes handling an
escaped CRLF inside a regular string token the same way it handles an
escaped LF.

This is technically a breaking change, as bare carriage returns are no
longer allowed, and CRLF sequences are now treated as LF inside strings
and doc comments, but it's very unlikely to actually affect any
real-world code.

This change is necessary to have Rust code compile on Windows the same
way it does on Unix. The mozilla/rust repository explicitly sets eol=lf
for Rust source files, but other Rust repositories don't. Notably,
rust-http cannot be compiled on Windows without converting the CRLF line
endings back to LF.

[breaking-change]

											
										
										
											2014-05-24 01:13:59 -07:00
+								            let mut buf = String::with_capacity(s.len());
-												Use iterator and pattern APIs instead of `char_at`

											
										
										
											2018-10-27 21:41:26 +09:00
+								            // Skip first CR
 								            buf.push_str(&s[.. j - 1]);
 								            while let Some((i, ch)) = chars.next() {
-												Handle CRLF properly in the lexer

The lexer already ignores CRLF in between tokens, but it doesn't
properly handle carriage returns inside strings and doc comments. Teach
it to treat CRLF as LF inside these tokens, and to disallow carriage
returns that are not followed by linefeeds. This includes handling an
escaped CRLF inside a regular string token the same way it handles an
escaped LF.

This is technically a breaking change, as bare carriage returns are no
longer allowed, and CRLF sequences are now treated as LF inside strings
and doc comments, but it's very unlikely to actually affect any
real-world code.

This change is necessary to have Rust code compile on Windows the same
way it does on Unix. The mozilla/rust repository explicitly sets eol=lf
for Rust source files, but other Rust repositories don't. Notably,
rust-http cannot be compiled on Windows without converting the CRLF line
endings back to LF.

[breaking-change]

											
										
										
											2014-05-24 01:13:59 -07:00
+								                if ch == '\r' {
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                    if j < i {
 								                        buf.push_str(&s[j..i]);
 								                    }
-												Use iterator and pattern APIs instead of `char_at`

											
										
										
											2018-10-27 21:41:26 +09:00
+								                    let next = i + ch.len_utf8();
-												Handle CRLF properly in the lexer

The lexer already ignores CRLF in between tokens, but it doesn't
properly handle carriage returns inside strings and doc comments. Teach
it to treat CRLF as LF inside these tokens, and to disallow carriage
returns that are not followed by linefeeds. This includes handling an
escaped CRLF inside a regular string token the same way it handles an
escaped LF.

This is technically a breaking change, as bare carriage returns are no
longer allowed, and CRLF sequences are now treated as LF inside strings
and doc comments, but it's very unlikely to actually affect any
real-world code.

This change is necessary to have Rust code compile on Windows the same
way it does on Unix. The mozilla/rust repository explicitly sets eol=lf
for Rust source files, but other Rust repositories don't. Notably,
rust-http cannot be compiled on Windows without converting the CRLF line
endings back to LF.

[breaking-change]

											
										
										
											2014-05-24 01:13:59 -07:00
+								                    j = next;
-												Use iterator and pattern APIs instead of `char_at`

											
										
										
											2018-10-27 21:41:26 +09:00
+								                    if chars.peek().map(|(_, ch)| *ch) != Some('\n') {
-												Handle CRLF properly in the lexer

The lexer already ignores CRLF in between tokens, but it doesn't
properly handle carriage returns inside strings and doc comments. Teach
it to treat CRLF as LF inside these tokens, and to disallow carriage
returns that are not followed by linefeeds. This includes handling an
escaped CRLF inside a regular string token the same way it handles an
escaped LF.

This is technically a breaking change, as bare carriage returns are no
longer allowed, and CRLF sequences are now treated as LF inside strings
and doc comments, but it's very unlikely to actually affect any
real-world code.

This change is necessary to have Rust code compile on Windows the same
way it does on Unix. The mozilla/rust repository explicitly sets eol=lf
for Rust source files, but other Rust repositories don't. Notably,
rust-http cannot be compiled on Windows without converting the CRLF line
endings back to LF.

[breaking-change]

											
										
										
											2014-05-24 01:13:59 -07:00
+								                        let pos = start + BytePos(i as u32);
 								                        let end_pos = start + BytePos(next as u32);
 								                        rdr.err_span_(pos, end_pos, errmsg);
 								                    }
 								                }
 								            }
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            if j < s.len() {
 								                buf.push_str(&s[j..]);
 								            }
-												Handle CRLF properly in the lexer

The lexer already ignores CRLF in between tokens, but it doesn't
properly handle carriage returns inside strings and doc comments. Teach
it to treat CRLF as LF inside these tokens, and to disallow carriage
returns that are not followed by linefeeds. This includes handling an
escaped CRLF inside a regular string token the same way it handles an
escaped LF.

This is technically a breaking change, as bare carriage returns are no
longer allowed, and CRLF sequences are now treated as LF inside strings
and doc comments, but it's very unlikely to actually affect any
real-world code.

This change is necessary to have Rust code compile on Windows the same
way it does on Unix. The mozilla/rust repository explicitly sets eol=lf
for Rust source files, but other Rust repositories don't. Notably,
rust-http cannot be compiled on Windows without converting the CRLF line
endings back to LF.

[breaking-change]

											
										
										
											2014-05-24 01:13:59 -07:00
+								            buf
 								        }
 								    }
-												Update outdated code comments in StringReader

											
										
										
											2018-11-26 21:21:17 +01:00
+								    /// Advance the StringReader by one character.
-												Crate-ify and delete unused code in syntax::parse

											
										
										
											2018-05-31 16:53:30 -06:00
+								    crate fn bump(&mut self) {
-												Rename some stuff in `StringReader`.

- `source_text` becomes `src`, matching `FileMap::src`.

- `byte_offset()` becomes `src_index()`, which makes it clearer that
  it's an index into `src`. (Likewise for variables containing
  `byte_offset` in their name.) This function also now returns a `usize`
  instead of a `BytePos`, because every callsite immediately converted
  the `BytePos` to a `usize`.

											
										
										
											2018-05-09 12:49:39 +10:00
+								        let next_src_index = self.src_index(self.next_pos);
-												Remove `StringReader::terminator`.

It's silly for a hot function like `bump()` to have such an expensive
bounds check. This patch replaces terminator with `end_src_index`.

Note that the `self.terminator` check in `is_eof()` wasn't necessary
because of the way `StringReader` is initialized.

											
										
										
											2018-05-09 12:55:13 +10:00
+								        if next_src_index < self.end_src_index {
-												Rename some stuff in `StringReader`.

- `source_text` becomes `src`, matching `FileMap::src`.

- `byte_offset()` becomes `src_index()`, which makes it clearer that
  it's an index into `src`. (Likewise for variables containing
  `byte_offset` in their name.) This function also now returns a `usize`
  instead of a `BytePos`, because every callsite immediately converted
  the `BytePos` to a `usize`.

											
										
										
											2018-05-09 12:49:39 +10:00
+								            let next_ch = char_at(&self.src, next_src_index);
-												Tweak naming and ordering in `StringReader::bump()`.

This patch removes the "old"/"new" names in favour of "foo"/"next_foo",
which matches the field names.

It also moves the setting of `self.{ch,pos,next_pos}` in the common case
to the end, so that the meaning of "foo"/"next_foo" is consistent until
the end.

											
										
										
											2018-05-04 06:38:15 +10:00
+								            let next_ch_len = next_ch.len_utf8();
 								            self.ch = Some(next_ch);
 								            self.pos = self.next_pos;
 								            self.next_pos = self.next_pos + Pos::from_usize(next_ch_len);
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								        } else {
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								            self.ch = None;
-												Tweak naming and ordering in `StringReader::bump()`.

This patch removes the "old"/"new" names in favour of "foo"/"next_foo",
which matches the field names.

It also moves the setting of `self.{ch,pos,next_pos}` in the common case
to the end, so that the meaning of "foo"/"next_foo" is consistent until
the end.

											
										
										
											2018-05-04 06:38:15 +10:00
+								            self.pos = self.next_pos;
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								        }
 								    }
-												Crate-ify and delete unused code in syntax::parse

											
										
										
											2018-05-31 16:53:30 -06:00
+								    fn nextch(&self) -> Option<char> {
-												Rename some stuff in `StringReader`.

- `source_text` becomes `src`, matching `FileMap::src`.

- `byte_offset()` becomes `src_index()`, which makes it clearer that
  it's an index into `src`. (Likewise for variables containing
  `byte_offset` in their name.) This function also now returns a `usize`
  instead of a `BytePos`, because every callsite immediately converted
  the `BytePos` to a `usize`.

											
										
										
											2018-05-09 12:49:39 +10:00
+								        let next_src_index = self.src_index(self.next_pos);
-												Remove `StringReader::terminator`.

It's silly for a hot function like `bump()` to have such an expensive
bounds check. This patch replaces terminator with `end_src_index`.

Note that the `self.terminator` check in `is_eof()` wasn't necessary
because of the way `StringReader` is initialized.

											
										
										
											2018-05-09 12:55:13 +10:00
+								        if next_src_index < self.end_src_index {
-												Rename some stuff in `StringReader`.

- `source_text` becomes `src`, matching `FileMap::src`.

- `byte_offset()` becomes `src_index()`, which makes it clearer that
  it's an index into `src`. (Likewise for variables containing
  `byte_offset` in their name.) This function also now returns a `usize`
  instead of a `BytePos`, because every callsite immediately converted
  the `BytePos` to a `usize`.

											
										
										
											2018-05-09 12:49:39 +10:00
+								            Some(char_at(&self.src, next_src_index))
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								        } else {
 								            None
 								        }
 								    }
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
+								    #[inline]
-												Crate-ify and delete unused code in syntax::parse

											
										
										
											2018-05-31 16:53:30 -06:00
+								    fn nextch_is(&self, c: char) -> bool {
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								        self.nextch() == Some(c)
 								    }
-												Crate-ify and delete unused code in syntax::parse

											
										
										
											2018-05-31 16:53:30 -06:00
+								    fn nextnextch(&self) -> Option<char> {
-												Rename some stuff in `StringReader`.

- `source_text` becomes `src`, matching `FileMap::src`.

- `byte_offset()` becomes `src_index()`, which makes it clearer that
  it's an index into `src`. (Likewise for variables containing
  `byte_offset` in their name.) This function also now returns a `usize`
  instead of a `BytePos`, because every callsite immediately converted
  the `BytePos` to a `usize`.

											
										
										
											2018-05-09 12:49:39 +10:00
+								        let next_src_index = self.src_index(self.next_pos);
-												Make `nextnextch()` more closely resemble `nextch()`.

											
										
										
											2018-05-09 13:20:55 +10:00
+								        if next_src_index < self.end_src_index {
 								            let next_next_src_index =
 								                next_src_index + char_at(&self.src, next_src_index).len_utf8();
 								            if next_next_src_index < self.end_src_index {
 								                return Some(char_at(&self.src, next_next_src_index));
 								            }
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								        }
-												Make `nextnextch()` more closely resemble `nextch()`.

											
										
										
											2018-05-09 13:20:55 +10:00
+								        None
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    }
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
+								    #[inline]
-												Crate-ify and delete unused code in syntax::parse

											
										
										
											2018-05-31 16:53:30 -06:00
+								    fn nextnextch_is(&self, c: char) -> bool {
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								        self.nextnextch() == Some(c)
 								    }
-												Parse and store suffixes on literals.

This adds an optional suffix at the end of a literal token:
`"foo"bar`. An actual use of a suffix in a expression (or other literal
that the compiler reads) is rejected in the parser.

This doesn't switch the handling of numbers to this system, and doesn't
outlaw illegal suffixes for them yet.

											
										
										
											2014-11-19 15:48:38 +11:00
+								    /// Eats <XID_start><XID_continue>*, if possible.
-												cleanup: rename name_from to symbol_from

Lexer uses Symbols for a lot of stuff, not only for identifiers, so
the "name" terminology is just confusing.

											
										
										
											2019-06-25 21:37:25 +03:00
+								    fn scan_optional_raw_name(&mut self) -> Option<Symbol> {
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								        if !ident_start(self.ch) {
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            return None;
-												Parse and store suffixes on literals.

This adds an optional suffix at the end of a literal token:
`"foo"bar`. An actual use of a suffix in a expression (or other literal
that the compiler reads) is rejected in the parser.

This doesn't switch the handling of numbers to this system, and doesn't
outlaw illegal suffixes for them yet.

											
										
										
											2014-11-19 15:48:38 +11:00
+								        }
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
-												Rename StringReader::last_pos as pos.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:46:54 +11:00
+								        let start = self.pos;
-												Tweak identifer lexing.

By calling `bump()` after getting the first char, to avoid a redundant
`ident_continue()` test on it.

											
										
										
											2018-05-31 10:52:29 +10:00
+								        self.bump();
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								        while ident_continue(self.ch) {
-												Parse and store suffixes on literals.

This adds an optional suffix at the end of a literal token:
`"foo"bar`. An actual use of a suffix in a expression (or other literal
that the compiler reads) is rejected in the parser.

This doesn't switch the handling of numbers to this system, and doesn't
outlaw illegal suffixes for them yet.

											
										
										
											2014-11-19 15:48:38 +11:00
+								            self.bump();
 								        }
-												refactor lexer to use idiomatic borrowing

											
										
										
											2019-06-25 21:02:19 +03:00
+								        match self.str_from(start) {
 								            "_" => {
-												Add warning cycle #42326.

											
										
										
											2017-05-31 16:43:47 +09:00
+								                self.sess.span_diagnostic
-												Address review comments.

											
										
										
											2017-06-05 01:41:33 +00:00
+								                    .struct_span_warn(self.mk_sp(start, self.pos),
-												Add warning cycle #42326.

											
										
										
											2017-05-31 16:43:47 +09:00
+								                                      "underscore literal suffix is not allowed")
 								                    .warn("this was previously accepted by the compiler but is \
 								                          being phased out; it will become a hard error in \
 								                          a future release!")
 								                    .note("for more information, see issue #42326 \
 								                          <https://github.com/rust-lang/rust/issues/42326>")
 								                    .emit();
-												Parse and store suffixes on literals.

This adds an optional suffix at the end of a literal token:
`"foo"bar`. An actual use of a suffix in a expression (or other literal
that the compiler reads) is rejected in the parser.

This doesn't switch the handling of numbers to this system, and doesn't
outlaw illegal suffixes for them yet.

											
										
										
											2014-11-19 15:48:38 +11:00
+								                None
 								            }
-												refactor lexer to use idiomatic borrowing

											
										
										
											2019-06-25 21:02:19 +03:00
+								            name => Some(Symbol::intern(name))
 								        }
-												Parse and store suffixes on literals.

This adds an optional suffix at the end of a literal token:
`"foo"bar`. An actual use of a suffix in a expression (or other literal
that the compiler reads) is rejected in the parser.

This doesn't switch the handling of numbers to this system, and doesn't
outlaw illegal suffixes for them yet.

											
										
										
											2014-11-19 15:48:38 +11:00
+								    }
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								    /// PRECONDITION: self.ch is not whitespace
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    /// Eats any kind of comment.
-												syntax: Rename `TokenAndSpan` into `Token`

											
										
										
											2019-06-04 18:48:40 +03:00
+								    fn scan_comment(&mut self) -> Option<Token> {
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								        if let Some(c) = self.ch {
-												prefer `if let` to match with `None => {}` arm in some places

This is a spiritual succesor to #34268/8531d581, in which we replaced a
number of matches of None to the unit value with `if let` conditionals
where it was judged that this made for clearer/simpler code (as would be
recommended by Manishearth/rust-clippy's `single_match` lint). The same
rationale applies to matches of None to the empty block.

											
										
										
											2016-07-03 14:38:37 -07:00
+								            if c.is_whitespace() {
-												Give the `StringReader` a `sess: &ParseSess`.

											
										
										
											2017-01-17 01:14:53 +00:00
+								                let msg = "called consume_any_line_comment, but there was whitespace";
-												Simplify `hygiene::Mark` application, and
remove variant `Token::SubstNt` in favor of `quoted::TokenTree::MetaVar`.

											
										
										
											2017-03-28 05:32:43 +00:00
+								                self.sess.span_diagnostic.span_err(self.mk_sp(self.pos, self.pos), msg);
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            }
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								        }
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								        if self.ch_is('/') {
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								            match self.nextch() {
 								                Some('/') => {
 								                    self.bump();
 								                    self.bump();
-												Fix CRLF line-ending parsing for comments.
											
										
										
											2015-05-08 20:33:58 +01:00
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								                    // line comments starting with "///" or "//!" are doc-comments
-												Allow bare CR in ////-style comment.

											
										
										
											2017-05-08 22:29:24 +09:00
+								                    let doc_comment = (self.ch_is('/') && !self.nextch_is('/')) || self.ch_is('!');
-												Rename StringReader::last_pos as pos.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:46:54 +11:00
+								                    let start_bpos = self.pos - BytePos(2);
-												Fix CRLF line-ending parsing for comments.
											
										
										
											2015-05-08 20:33:58 +01:00
 								                    while !self.is_eof() {
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								                        match self.ch.unwrap() {
-												Fix CRLF line-ending parsing for comments.
											
										
										
											2015-05-08 20:33:58 +01:00
+								                            '\n' => break,
 								                            '\r' => {
 								                                if self.nextch_is('\n') {
 								                                    // CRLF
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                                    break;
-												Fix byte offset and error message inconsistencies
											
										
										
											2015-05-13 22:05:01 +01:00
+								                                } else if doc_comment {
-												Rename StringReader::last_pos as pos.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:46:54 +11:00
+								                                    self.err_span_(self.pos,
-												Rename StringReader::pos as next_pos.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:41:01 +11:00
+								                                                   self.next_pos,
-												Fix byte offset and error message inconsistencies
											
										
										
											2015-05-13 22:05:01 +01:00
+								                                                   "bare CR not allowed in doc-comment");
-												Handle CRLF properly in the lexer

The lexer already ignores CRLF in between tokens, but it doesn't
properly handle carriage returns inside strings and doc comments. Teach
it to treat CRLF as LF inside these tokens, and to disallow carriage
returns that are not followed by linefeeds. This includes handling an
escaped CRLF inside a regular string token the same way it handles an
escaped LF.

This is technically a breaking change, as bare carriage returns are no
longer allowed, and CRLF sequences are now treated as LF inside strings
and doc comments, but it's very unlikely to actually affect any
real-world code.

This change is necessary to have Rust code compile on Windows the same
way it does on Unix. The mozilla/rust repository explicitly sets eol=lf
for Rust source files, but other Rust repositories don't. Notably,
rust-http cannot be compiled on Windows without converting the CRLF line
endings back to LF.

[breaking-change]

											
										
										
											2014-05-24 01:13:59 -07:00
+								                                }
 								                            }
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                            _ => (),
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								                        }
-												Fix CRLF line-ending parsing for comments.
											
										
										
											2015-05-08 20:33:58 +01:00
+								                        self.bump();
 								                    }
-												syntax: Rename `TokenAndSpan` into `Token`

											
										
										
											2019-06-04 18:48:40 +03:00
+								                    let kind = if doc_comment {
-												cleanup: rename name_from to symbol_from

Lexer uses Symbols for a lot of stuff, not only for identifiers, so
the "name" terminology is just confusing.

											
										
										
											2019-06-25 21:37:25 +03:00
+								                        token::DocComment(self.symbol_from(start_bpos))
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								                    } else {
-												Simplify doc comment lexing

is_doc_comment function checks the first four chars, but this is
redundant, `doc_comment` local var has the same info.

											
										
										
											2019-04-04 19:51:13 +03:00
+								                        token::Comment
 								                    };
-												syntax: Add some helper methods to `Token`

											
										
										
											2019-06-05 09:39:34 +03:00
+								                    Some(Token::new(kind, self.mk_sp(start_bpos, self.pos)))
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								                }
-												lexer: lex WS/COMMENT/SHEBANG rather than skipping

Now, the lexer will categorize every byte in its input according to the
grammar. The parser skips over these while parsing, thus avoiding their
presence in the input to syntax extensions.

											
										
										
											2014-07-04 22:30:39 -07:00
+								                Some('*') => {
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                    self.bump();
 								                    self.bump();
-												lexer: lex WS/COMMENT/SHEBANG rather than skipping

Now, the lexer will categorize every byte in its input according to the
grammar. The parser skips over these while parsing, thus avoiding their
presence in the input to syntax extensions.

											
										
										
											2014-07-04 22:30:39 -07:00
+								                    self.scan_block_comment()
 								                }
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                _ => None,
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								            }
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								        } else if self.ch_is('#') {
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								            if self.nextch_is('!') {
 								                // Parse an inner attribute.
 								                if self.nextnextch_is('[') {
 								                    return None;
 								                }
-												cleanup shebang handling in the lexer

											
										
										
											2019-04-04 10:55:30 +03:00
+								                let is_beginning_of_file = self.pos == self.source_file.start_pos;
 								                if is_beginning_of_file {
-												normalize use of backticks in compiler messages for libsyntax/parse

https://github.com/rust-lang/rust/issues/60532

											
										
										
											2019-07-15 23:23:39 +02:00
+								                    debug!("skipping a shebang");
-												Rename StringReader::last_pos as pos.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:46:54 +11:00
+								                    let start = self.pos;
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								                    while !self.ch_is('\n') && !self.is_eof() {
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                        self.bump();
 								                    }
-												syntax: Add some helper methods to `Token`

											
										
										
											2019-06-05 09:39:34 +03:00
+								                    return Some(Token::new(
-												cleanup: rename name_from to symbol_from

Lexer uses Symbols for a lot of stuff, not only for identifiers, so
the "name" terminology is just confusing.

											
										
										
											2019-06-25 21:37:25 +03:00
+								                        token::Shebang(self.symbol_from(start)),
-												syntax: Add some helper methods to `Token`

											
										
										
											2019-06-05 09:39:34 +03:00
+								                        self.mk_sp(start, self.pos),
 								                    ));
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								                }
 								            }
 								            None
 								        } else {
 								            None
 								        }
 								    }
-												lexer: lex WS/COMMENT/SHEBANG rather than skipping

Now, the lexer will categorize every byte in its input according to the
grammar. The parser skips over these while parsing, thus avoiding their
presence in the input to syntax extensions.

											
										
										
											2014-07-04 22:30:39 -07:00
+								    /// If there is whitespace, shebang, or a comment, scan it. Otherwise,
-												rustc: doc comments

											
										
										
											2019-02-08 14:53:55 +01:00
+								    /// return `None`.
-												syntax: Rename `TokenAndSpan` into `Token`

											
										
										
											2019-06-04 18:48:40 +03:00
+								    fn scan_whitespace_or_comment(&mut self) -> Option<Token> {
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								        match self.ch.unwrap_or('\0') {
-												lexer: lex WS/COMMENT/SHEBANG rather than skipping

Now, the lexer will categorize every byte in its input according to the
grammar. The parser skips over these while parsing, thus avoiding their
presence in the input to syntax extensions.

											
										
										
											2014-07-04 22:30:39 -07:00
+								            // # to handle shebang at start of file -- this is the entry point
 								            // for skipping over all "junk"
 								            '/' | '#' => {
 								                let c = self.scan_comment();
-												core: split into fmt::Show and fmt::String

fmt::Show is for debugging, and can and should be implemented for
all public types. This trait is used with `{:?}` syntax. There still
exists #[derive(Show)].

fmt::String is for types that faithfully be represented as a String.
Because of this, there is no way to derive fmt::String, all
implementations must be purposeful. It is used by the default format
syntax, `{}`.

This will break most instances of `{}`, since that now requires the type
to impl fmt::String. In most cases, replacing `{}` with `{:?}` is the
correct fix. Types that were being printed specifically for users should
receive a fmt::String implementation to fix this.

Part of #20013

[breaking-change]

											
										
										
											2014-12-20 00:09:35 -08:00
+								                debug!("scanning a comment {:?}", c);
-												lexer: lex WS/COMMENT/SHEBANG rather than skipping

Now, the lexer will categorize every byte in its input according to the
grammar. The parser skips over these while parsing, thus avoiding their
presence in the input to syntax extensions.

											
										
										
											2014-07-04 22:30:39 -07:00
+								                c
-												libsyntax: accept only whitespace with the PATTERN_WHITE_SPACE property

This aligns with unicode recommendations and should be stable for all future
unicode releases. See http://unicode.org/reports/tr31/#R3.

This renames `libsyntax::lexer::is_whitespace` to `is_pattern_whitespace`
so potentially breaks users of libsyntax.

											
										
										
											2015-11-12 02:43:43 +00:00
+								            },
 								            c if is_pattern_whitespace(Some(c)) => {
-												Rename StringReader::last_pos as pos.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:46:54 +11:00
+								                let start_bpos = self.pos;
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								                while is_pattern_whitespace(self.ch) {
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                    self.bump();
 								                }
-												syntax: Add some helper methods to `Token`

											
										
										
											2019-06-05 09:39:34 +03:00
+								                let c = Some(Token::new(token::Whitespace, self.mk_sp(start_bpos, self.pos)));
-												core: split into fmt::Show and fmt::String

fmt::Show is for debugging, and can and should be implemented for
all public types. This trait is used with `{:?}` syntax. There still
exists #[derive(Show)].

fmt::String is for types that faithfully be represented as a String.
Because of this, there is no way to derive fmt::String, all
implementations must be purposeful. It is used by the default format
syntax, `{}`.

This will break most instances of `{}`, since that now requires the type
to impl fmt::String. In most cases, replacing `{}` with `{:?}` is the
correct fix. Types that were being printed specifically for users should
receive a fmt::String implementation to fix this.

Part of #20013

[breaking-change]

											
										
										
											2014-12-20 00:09:35 -08:00
+								                debug!("scanning whitespace: {:?}", c);
-												lexer: lex WS/COMMENT/SHEBANG rather than skipping

Now, the lexer will categorize every byte in its input according to the
grammar. The parser skips over these while parsing, thus avoiding their
presence in the input to syntax extensions.

											
										
										
											2014-07-04 22:30:39 -07:00
+								                c
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            }
 								            _ => None,
-												lexer: lex WS/COMMENT/SHEBANG rather than skipping

Now, the lexer will categorize every byte in its input according to the
grammar. The parser skips over these while parsing, thus avoiding their
presence in the input to syntax extensions.

											
										
										
											2014-07-04 22:30:39 -07:00
+								        }
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    }
-												syntax: doc comments all the things

											
										
										
											2014-06-09 13:12:30 -07:00
+								    /// Might return a sugared-doc-attr
-												syntax: Rename `TokenAndSpan` into `Token`

											
										
										
											2019-06-04 18:48:40 +03:00
+								    fn scan_block_comment(&mut self) -> Option<Token> {
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								        // block comments starting with "/**" or "/*!" are doc-comments
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								        let is_doc_comment = self.ch_is('*') || self.ch_is('!');
-												Rename StringReader::last_pos as pos.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:46:54 +11:00
+								        let start_bpos = self.pos - BytePos(2);
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
-												libsyntax: int types -> isize

											
										
										
											2015-01-18 00:18:19 +00:00
+								        let mut level: isize = 1;
-												Handle CRLF properly in the lexer

The lexer already ignores CRLF in between tokens, but it doesn't
properly handle carriage returns inside strings and doc comments. Teach
it to treat CRLF as LF inside these tokens, and to disallow carriage
returns that are not followed by linefeeds. This includes handling an
escaped CRLF inside a regular string token the same way it handles an
escaped LF.

This is technically a breaking change, as bare carriage returns are no
longer allowed, and CRLF sequences are now treated as LF inside strings
and doc comments, but it's very unlikely to actually affect any
real-world code.

This change is necessary to have Rust code compile on Windows the same
way it does on Unix. The mozilla/rust repository explicitly sets eol=lf
for Rust source files, but other Rust repositories don't. Notably,
rust-http cannot be compiled on Windows without converting the CRLF line
endings back to LF.

[breaking-change]

											
										
										
											2014-05-24 01:13:59 -07:00
+								        let mut has_cr = false;
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								        while level > 0 {
 								            if self.is_eof() {
 								                let msg = if is_doc_comment {
 								                    "unterminated block doc-comment"
 								                } else {
 								                    "unterminated block comment"
 								                };
-												Rename StringReader::last_pos as pos.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:46:54 +11:00
+								                let last_bpos = self.pos;
-												Do not capture stderr in the compiler. Instead just panic silently for fatal errors

											
										
										
											2018-01-21 12:47:58 +01:00
+								                self.fatal_span_(start_bpos, last_bpos, msg).raise();
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								            }
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								            let n = self.ch.unwrap();
-												Handle CRLF properly in the lexer

The lexer already ignores CRLF in between tokens, but it doesn't
properly handle carriage returns inside strings and doc comments. Teach
it to treat CRLF as LF inside these tokens, and to disallow carriage
returns that are not followed by linefeeds. This includes handling an
escaped CRLF inside a regular string token the same way it handles an
escaped LF.

This is technically a breaking change, as bare carriage returns are no
longer allowed, and CRLF sequences are now treated as LF inside strings
and doc comments, but it's very unlikely to actually affect any
real-world code.

This change is necessary to have Rust code compile on Windows the same
way it does on Unix. The mozilla/rust repository explicitly sets eol=lf
for Rust source files, but other Rust repositories don't. Notably,
rust-http cannot be compiled on Windows without converting the CRLF line
endings back to LF.

[breaking-change]

											
										
										
											2014-05-24 01:13:59 -07:00
+								            match n {
 								                '/' if self.nextch_is('*') => {
 								                    level += 1;
 								                    self.bump();
 								                }
 								                '*' if self.nextch_is('/') => {
 								                    level -= 1;
 								                    self.bump();
 								                }
 								                '\r' => {
 								                    has_cr = true;
 								                }
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                _ => (),
-												Handle CRLF properly in the lexer

The lexer already ignores CRLF in between tokens, but it doesn't
properly handle carriage returns inside strings and doc comments. Teach
it to treat CRLF as LF inside these tokens, and to disallow carriage
returns that are not followed by linefeeds. This includes handling an
escaped CRLF inside a regular string token the same way it handles an
escaped LF.

This is technically a breaking change, as bare carriage returns are no
longer allowed, and CRLF sequences are now treated as LF inside strings
and doc comments, but it's very unlikely to actually affect any
real-world code.

This change is necessary to have Rust code compile on Windows the same
way it does on Unix. The mozilla/rust repository explicitly sets eol=lf
for Rust source files, but other Rust repositories don't. Notably,
rust-http cannot be compiled on Windows without converting the CRLF line
endings back to LF.

[breaking-change]

											
										
										
											2014-05-24 01:13:59 -07:00
+								            }
 								            self.bump();
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								        }
-												refactor lexer to use idiomatic borrowing

											
										
										
											2019-06-25 21:02:19 +03:00
+								        let string = self.str_from(start_bpos);
 								        // but comments with only "*"s between two "/"s are not
 								        let kind = if is_block_doc_comment(string) {
 								            let string = if has_cr {
 								                self.translate_crlf(start_bpos,
 								                                    string,
 								                                    "bare CR not allowed in block doc-comment")
-												lexer: lex WS/COMMENT/SHEBANG rather than skipping

Now, the lexer will categorize every byte in its input according to the
grammar. The parser skips over these while parsing, thus avoiding their
presence in the input to syntax extensions.

											
										
										
											2014-07-04 22:30:39 -07:00
+								            } else {
-												refactor lexer to use idiomatic borrowing

											
										
										
											2019-06-25 21:02:19 +03:00
+								                string.into()
-												lexer: lex WS/COMMENT/SHEBANG rather than skipping

Now, the lexer will categorize every byte in its input according to the
grammar. The parser skips over these while parsing, thus avoiding their
presence in the input to syntax extensions.

											
										
										
											2014-07-04 22:30:39 -07:00
+								            };
-												refactor lexer to use idiomatic borrowing

											
										
										
											2019-06-25 21:02:19 +03:00
+								            token::DocComment(Symbol::intern(&string[..]))
 								        } else {
 								            token::Comment
 								        };
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
-												refactor lexer to use idiomatic borrowing

											
										
										
											2019-06-25 21:02:19 +03:00
+								        Some(Token::new(kind, self.mk_sp(start_bpos, self.pos)))
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    }
-												Lex binary and octal literals more eagerly.

Previously 0b12 was considered two tokens, 0b1 and 2, as 2 isn't a valid
base 2 digit. This patch changes that to collapse them into one (and
makes `0b12` etc. an error: 2 isn't a valid base 2 digit).

This may break some macro invocations of macros with `tt` (or syntax
extensions) that rely on adjacent digits being separate tokens and hence
is a

[breaking-change]

The fix is to separate the tokens, e.g. `0b12` -> `0b1 2`.

cc https://github.com/rust-lang/rfcs/pull/879

											
										
										
											2015-03-31 00:27:13 +11:00
+								    /// Scan through any digits (base `scan_radix`) or underscores,
 								    /// and return how many digits there were.
 								    ///
 								    /// `real_radix` represents the true radix of the number we're
 								    /// interested in, and errors will be emitted for any digits
 								    /// between `real_radix` and `scan_radix`.
 								    fn scan_digits(&mut self, real_radix: u32, scan_radix: u32) -> usize {
 								        assert!(real_radix <= scan_radix);
-												Omit integer suffix when unnecessary

See PR # 21378 for context

											
										
										
											2015-01-28 01:01:48 +00:00
+								        let mut len = 0;
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								        loop {
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								            let c = self.ch;
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            if c == Some('_') {
 								                debug!("skipping a _");
 								                self.bump();
 								                continue;
 								            }
-												Lex binary and octal literals more eagerly.

Previously 0b12 was considered two tokens, 0b1 and 2, as 2 isn't a valid
base 2 digit. This patch changes that to collapse them into one (and
makes `0b12` etc. an error: 2 isn't a valid base 2 digit).

This may break some macro invocations of macros with `tt` (or syntax
extensions) that rely on adjacent digits being separate tokens and hence
is a

[breaking-change]

The fix is to separate the tokens, e.g. `0b12` -> `0b1 2`.

cc https://github.com/rust-lang/rfcs/pull/879

											
										
										
											2015-03-31 00:27:13 +11:00
+								            match c.and_then(|cc| cc.to_digit(scan_radix)) {
-												syntax: don't parse numeric literals in the lexer

This removes a bunch of token types. Tokens now store the original, unaltered
numeric literal (that is still checked for correctness), which is parsed into
an actual number later, as needed, when creating the AST.

This can change how syntax extensions work, but otherwise poses no visible
changes.

[breaking-change]

											
										
										
											2014-06-18 10:44:20 -07:00
+								                Some(_) => {
-												core: split into fmt::Show and fmt::String

fmt::Show is for debugging, and can and should be implemented for
all public types. This trait is used with `{:?}` syntax. There still
exists #[derive(Show)].

fmt::String is for types that faithfully be represented as a String.
Because of this, there is no way to derive fmt::String, all
implementations must be purposeful. It is used by the default format
syntax, `{}`.

This will break most instances of `{}`, since that now requires the type
to impl fmt::String. In most cases, replacing `{}` with `{:?}` is the
correct fix. Types that were being printed specifically for users should
receive a fmt::String implementation to fix this.

Part of #20013

[breaking-change]

											
										
										
											2014-12-20 00:09:35 -08:00
+								                    debug!("{:?} in scan_digits", c);
-												Lex binary and octal literals more eagerly.

Previously 0b12 was considered two tokens, 0b1 and 2, as 2 isn't a valid
base 2 digit. This patch changes that to collapse them into one (and
makes `0b12` etc. an error: 2 isn't a valid base 2 digit).

This may break some macro invocations of macros with `tt` (or syntax
extensions) that rely on adjacent digits being separate tokens and hence
is a

[breaking-change]

The fix is to separate the tokens, e.g. `0b12` -> `0b1 2`.

cc https://github.com/rust-lang/rfcs/pull/879

											
										
										
											2015-03-31 00:27:13 +11:00
+								                    // check that the hypothetical digit is actually
 								                    // in range for the true radix
 								                    if c.unwrap().to_digit(real_radix).is_none() {
-												Rename StringReader::last_pos as pos.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:46:54 +11:00
+								                        self.err_span_(self.pos,
-												Rename StringReader::pos as next_pos.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:41:01 +11:00
+								                                       self.next_pos,
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                                       &format!("invalid digit for a base {} literal", real_radix));
-												Lex binary and octal literals more eagerly.

Previously 0b12 was considered two tokens, 0b1 and 2, as 2 isn't a valid
base 2 digit. This patch changes that to collapse them into one (and
makes `0b12` etc. an error: 2 isn't a valid base 2 digit).

This may break some macro invocations of macros with `tt` (or syntax
extensions) that rely on adjacent digits being separate tokens and hence
is a

[breaking-change]

The fix is to separate the tokens, e.g. `0b12` -> `0b1 2`.

cc https://github.com/rust-lang/rfcs/pull/879

											
										
										
											2015-03-31 00:27:13 +11:00
+								                    }
-												syntax: don't parse numeric literals in the lexer

This removes a bunch of token types. Tokens now store the original, unaltered
numeric literal (that is still checked for correctness), which is parsed into
an actual number later, as needed, when creating the AST.

This can change how syntax extensions work, but otherwise poses no visible
changes.

[breaking-change]

											
										
										
											2014-06-18 10:44:20 -07:00
+								                    len += 1;
 								                    self.bump();
 								                }
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                _ => return len,
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								            }
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								        }
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    }
-												syntax: don't parse numeric literals in the lexer

This removes a bunch of token types. Tokens now store the original, unaltered
numeric literal (that is still checked for correctness), which is parsed into
an actual number later, as needed, when creating the AST.

This can change how syntax extensions work, but otherwise poses no visible
changes.

[breaking-change]

											
										
										
											2014-06-18 10:44:20 -07:00
+								    /// Lex a LIT_INTEGER or a LIT_FLOAT
-												syntax: Turn `token::Lit` into a struct

											
										
										
											2019-05-19 01:04:26 +03:00
+								    fn scan_number(&mut self, c: char) -> (token::LitKind, Symbol) {
-												syntax: don't parse numeric literals in the lexer

This removes a bunch of token types. Tokens now store the original, unaltered
numeric literal (that is still checked for correctness), which is parsed into
an actual number later, as needed, when creating the AST.

This can change how syntax extensions work, but otherwise poses no visible
changes.

[breaking-change]

											
										
										
											2014-06-18 10:44:20 -07:00
+								        let mut base = 10;
-												Rename StringReader::last_pos as pos.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:46:54 +11:00
+								        let start_bpos = self.pos;
-												syntax: don't parse numeric literals in the lexer

This removes a bunch of token types. Tokens now store the original, unaltered
numeric literal (that is still checked for correctness), which is parsed into
an actual number later, as needed, when creating the AST.

This can change how syntax extensions work, but otherwise poses no visible
changes.

[breaking-change]

											
										
										
											2014-06-18 10:44:20 -07:00
+								        self.bump();
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
+								        let num_digits = if c == '0' {
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								            match self.ch.unwrap_or('\0') {
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                'b' => {
 								                    self.bump();
 								                    base = 2;
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
+								                    self.scan_digits(2, 10)
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                }
 								                'o' => {
 								                    self.bump();
 								                    base = 8;
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
+								                    self.scan_digits(8, 10)
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                }
 								                'x' => {
 								                    self.bump();
 								                    base = 16;
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
+								                    self.scan_digits(16, 16)
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                }
-												migrate codebase to `..=` inclusive range patterns

These were stabilized in March 2018's #47813, and are the Preferred Way
to Do It going forward (q.v. #51043).

											
										
										
											2018-05-28 19:42:11 -07:00
+								                '0'..='9' | '_' | '.' | 'e' | 'E' => {
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
+								                    self.scan_digits(10, 10) + 1
-												syntax: don't parse numeric literals in the lexer

This removes a bunch of token types. Tokens now store the original, unaltered
numeric literal (that is still checked for correctness), which is parsed into
an actual number later, as needed, when creating the AST.

This can change how syntax extensions work, but otherwise poses no visible
changes.

[breaking-change]

											
										
										
											2014-06-18 10:44:20 -07:00
+								                }
 								                _ => {
 								                    // just a 0
-												Pre-intern "0", "1", ..., "9", and use where appropriate.

											
										
										
											2019-05-22 19:25:39 +10:00
+								                    return (token::Integer, sym::integer(0));
-												syntax: don't parse numeric literals in the lexer

This removes a bunch of token types. Tokens now store the original, unaltered
numeric literal (that is still checked for correctness), which is parsed into
an actual number later, as needed, when creating the AST.

This can change how syntax extensions work, but otherwise poses no visible
changes.

[breaking-change]

											
										
										
											2014-06-18 10:44:20 -07:00
+								                }
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								            }
-												Fix various deprecation warnings from char changes

											
										
										
											2014-10-13 13:03:42 -07:00
+								        } else if c.is_digit(10) {
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
+								            self.scan_digits(10, 10) + 1
-												syntax: don't parse numeric literals in the lexer

This removes a bunch of token types. Tokens now store the original, unaltered
numeric literal (that is still checked for correctness), which is parsed into
an actual number later, as needed, when creating the AST.

This can change how syntax extensions work, but otherwise poses no visible
changes.

[breaking-change]

											
										
										
											2014-06-18 10:44:20 -07:00
+								        } else {
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
 								        };
-												syntax: don't parse numeric literals in the lexer

This removes a bunch of token types. Tokens now store the original, unaltered
numeric literal (that is still checked for correctness), which is parsed into
an actual number later, as needed, when creating the AST.

This can change how syntax extensions work, but otherwise poses no visible
changes.

[breaking-change]

											
										
										
											2014-06-18 10:44:20 -07:00
 								        if num_digits == 0 {
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
+								            self.err_span_(start_bpos, self.pos, "no valid digits found for number");
-												syntax: Pre-intern names of all built-in macros

They always end up interned anyway

											
										
										
											2019-06-23 14:59:42 +03:00
+								            return (token::Integer, sym::integer(0));
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								        }
-												syntax: don't parse numeric literals in the lexer

This removes a bunch of token types. Tokens now store the original, unaltered
numeric literal (that is still checked for correctness), which is parsed into
an actual number later, as needed, when creating the AST.

This can change how syntax extensions work, but otherwise poses no visible
changes.

[breaking-change]

											
										
										
											2014-06-18 10:44:20 -07:00
+								        // might be a float, but don't be greedy if this is actually an
 								        // integer literal followed by field/method access or a range pattern
 								        // (`0..2` and `12.foo()`)
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								        if self.ch_is('.') && !self.nextch_is('.') &&
-												Disallow ._ in float literal.

											
										
										
											2017-05-12 22:00:06 +09:00
+								           !ident_start(self.nextch()) {
-												syntax: don't parse numeric literals in the lexer

This removes a bunch of token types. Tokens now store the original, unaltered
numeric literal (that is still checked for correctness), which is parsed into
an actual number later, as needed, when creating the AST.

This can change how syntax extensions work, but otherwise poses no visible
changes.

[breaking-change]

											
										
										
											2014-06-18 10:44:20 -07:00
+								            // might have stuff after the ., and if it does, it needs to start
 								            // with a number
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								            self.bump();
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								            if self.ch.unwrap_or('\0').is_digit(10) {
-												Lex binary and octal literals more eagerly.

Previously 0b12 was considered two tokens, 0b1 and 2, as 2 isn't a valid
base 2 digit. This patch changes that to collapse them into one (and
makes `0b12` etc. an error: 2 isn't a valid base 2 digit).

This may break some macro invocations of macros with `tt` (or syntax
extensions) that rely on adjacent digits being separate tokens and hence
is a

[breaking-change]

The fix is to separate the tokens, e.g. `0b12` -> `0b1 2`.

cc https://github.com/rust-lang/rfcs/pull/879

											
										
										
											2015-03-31 00:27:13 +11:00
+								                self.scan_digits(10, 10);
-												syntax: don't parse numeric literals in the lexer

This removes a bunch of token types. Tokens now store the original, unaltered
numeric literal (that is still checked for correctness), which is parsed into
an actual number later, as needed, when creating the AST.

This can change how syntax extensions work, but otherwise poses no visible
changes.

[breaking-change]

											
										
										
											2014-06-18 10:44:20 -07:00
+								                self.scan_float_exponent();
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								            }
-												Rename StringReader::last_pos as pos.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:46:54 +11:00
+								            let pos = self.pos;
 								            self.check_float_base(start_bpos, pos, base);
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
-												cleanup: rename name_from to symbol_from

Lexer uses Symbols for a lot of stuff, not only for identifiers, so
the "name" terminology is just confusing.

											
										
										
											2019-06-25 21:37:25 +03:00
+								            (token::Float, self.symbol_from(start_bpos))
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								        } else {
-												syntax: don't parse numeric literals in the lexer

This removes a bunch of token types. Tokens now store the original, unaltered
numeric literal (that is still checked for correctness), which is parsed into
an actual number later, as needed, when creating the AST.

This can change how syntax extensions work, but otherwise poses no visible
changes.

[breaking-change]

											
										
										
											2014-06-18 10:44:20 -07:00
+								            // it might be a float if it has an exponent
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								            if self.ch_is('e') || self.ch_is('E') {
-												syntax: don't parse numeric literals in the lexer

This removes a bunch of token types. Tokens now store the original, unaltered
numeric literal (that is still checked for correctness), which is parsed into
an actual number later, as needed, when creating the AST.

This can change how syntax extensions work, but otherwise poses no visible
changes.

[breaking-change]

											
										
										
											2014-06-18 10:44:20 -07:00
+								                self.scan_float_exponent();
-												Rename StringReader::last_pos as pos.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:46:54 +11:00
+								                let pos = self.pos;
 								                self.check_float_base(start_bpos, pos, base);
-												cleanup: rename name_from to symbol_from

Lexer uses Symbols for a lot of stuff, not only for identifiers, so
the "name" terminology is just confusing.

											
										
										
											2019-06-25 21:37:25 +03:00
+								                return (token::Float, self.symbol_from(start_bpos));
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								            }
-												syntax: don't parse numeric literals in the lexer

This removes a bunch of token types. Tokens now store the original, unaltered
numeric literal (that is still checked for correctness), which is parsed into
an actual number later, as needed, when creating the AST.

This can change how syntax extensions work, but otherwise poses no visible
changes.

[breaking-change]

											
										
										
											2014-06-18 10:44:20 -07:00
+								            // but we certainly have an integer!
-												cleanup: rename name_from to symbol_from

Lexer uses Symbols for a lot of stuff, not only for identifiers, so
the "name" terminology is just confusing.

											
										
										
											2019-06-25 21:37:25 +03:00
+								            (token::Integer, self.symbol_from(start_bpos))
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								        }
 								    }
-												syntax: don't parse numeric literals in the lexer

This removes a bunch of token types. Tokens now store the original, unaltered
numeric literal (that is still checked for correctness), which is parsed into
an actual number later, as needed, when creating the AST.

This can change how syntax extensions work, but otherwise poses no visible
changes.

[breaking-change]

											
										
										
											2014-06-18 10:44:20 -07:00
+								    /// Scan over a float exponent.
 								    fn scan_float_exponent(&mut self) {
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								        if self.ch_is('e') || self.ch_is('E') {
-												syntax: don't parse numeric literals in the lexer

This removes a bunch of token types. Tokens now store the original, unaltered
numeric literal (that is still checked for correctness), which is parsed into
an actual number later, as needed, when creating the AST.

This can change how syntax extensions work, but otherwise poses no visible
changes.

[breaking-change]

											
										
										
											2014-06-18 10:44:20 -07:00
+								            self.bump();
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								            if self.ch_is('-') || self.ch_is('+') {
-												syntax: don't parse numeric literals in the lexer

This removes a bunch of token types. Tokens now store the original, unaltered
numeric literal (that is still checked for correctness), which is parsed into
an actual number later, as needed, when creating the AST.

This can change how syntax extensions work, but otherwise poses no visible
changes.

[breaking-change]

											
										
										
											2014-06-18 10:44:20 -07:00
+								                self.bump();
 								            }
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
-												Lex binary and octal literals more eagerly.

Previously 0b12 was considered two tokens, 0b1 and 2, as 2 isn't a valid
base 2 digit. This patch changes that to collapse them into one (and
makes `0b12` etc. an error: 2 isn't a valid base 2 digit).

This may break some macro invocations of macros with `tt` (or syntax
extensions) that rely on adjacent digits being separate tokens and hence
is a

[breaking-change]

The fix is to separate the tokens, e.g. `0b12` -> `0b1 2`.

cc https://github.com/rust-lang/rfcs/pull/879

											
										
										
											2015-03-31 00:27:13 +11:00
+								            if self.scan_digits(10, 10) == 0 {
-												in which we check for confusable Unicodepoints in float literal exponent

The `FatalError.raise()` might seem unmotivated (in most places in
the compiler, `err.emit()` suffices), but it's actually used to
maintain behavior (viz., stop lexing, don't emit potentially spurious
errors looking for the next token after the bad Unicodepoint in the
exponent): the previous revision's `self.err_span_` ultimately calls
`Handler::emit`, which aborts if the `Handler`'s continue_after_error
flag is set, which seems to typically be true during lexing (see
`phase_1_parse_input` and and how `CompileController::basic` has
`continue_parse_after_error: false` in librustc_driver).

Also, let's avoid apostrophes in error messages (the present author
would argue that users expect a reassuringly detached, formal,
above-it-all tone from a Serious tool like a compiler), and use an
RLS-friendly structured suggestion.

Resolves #49746.

											
										
										
											2018-04-15 14:30:23 -07:00
+								                let mut err = self.struct_span_fatal(
 								                    self.pos, self.next_pos,
 								                    "expected at least one digit in exponent"
 								                );
 								                if let Some(ch) = self.ch {
-												Various minor/cosmetic improvements to code

											
										
										
											2018-11-27 02:59:49 +00:00
+								                    // check for e.g., Unicode minus '−' (Issue #49746)
-												Fix test

											
										
										
											2018-05-25 21:57:02 -07:00
+								                    if unicode_chars::check_for_substitution(self, ch, &mut err) {
 								                        self.bump();
 								                        self.scan_digits(10, 10);
 								                    }
-												in which we check for confusable Unicodepoints in float literal exponent

The `FatalError.raise()` might seem unmotivated (in most places in
the compiler, `err.emit()` suffices), but it's actually used to
maintain behavior (viz., stop lexing, don't emit potentially spurious
errors looking for the next token after the bad Unicodepoint in the
exponent): the previous revision's `self.err_span_` ultimately calls
`Handler::emit`, which aborts if the `Handler`'s continue_after_error
flag is set, which seems to typically be true during lexing (see
`phase_1_parse_input` and and how `CompileController::basic` has
`continue_parse_after_error: false` in librustc_driver).

Also, let's avoid apostrophes in error messages (the present author
would argue that users expect a reassuringly detached, formal,
above-it-all tone from a Serious tool like a compiler), and use an
RLS-friendly structured suggestion.

Resolves #49746.

											
										
										
											2018-04-15 14:30:23 -07:00
+								                }
 								                err.emit();
-												syntax: don't parse numeric literals in the lexer

This removes a bunch of token types. Tokens now store the original, unaltered
numeric literal (that is still checked for correctness), which is parsed into
an actual number later, as needed, when creating the AST.

This can change how syntax extensions work, but otherwise poses no visible
changes.

[breaking-change]

											
										
										
											2014-06-18 10:44:20 -07:00
+								            }
 								        }
 								    }
-												rustc: doc comments

											
										
										
											2019-02-08 14:53:55 +01:00
+								    /// Checks that a base is valid for a floating literal, emitting a nice
-												syntax: don't parse numeric literals in the lexer

This removes a bunch of token types. Tokens now store the original, unaltered
numeric literal (that is still checked for correctness), which is parsed into
an actual number later, as needed, when creating the AST.

This can change how syntax extensions work, but otherwise poses no visible
changes.

[breaking-change]

											
										
										
											2014-06-18 10:44:20 -07:00
+								    /// error if it isn't.
-												libsyntax: uint types to usize

											
										
										
											2015-01-17 23:33:05 +00:00
+								    fn check_float_base(&mut self, start_bpos: BytePos, last_bpos: BytePos, base: usize) {
-												syntax: don't parse numeric literals in the lexer

This removes a bunch of token types. Tokens now store the original, unaltered
numeric literal (that is still checked for correctness), which is parsed into
an actual number later, as needed, when creating the AST.

This can change how syntax extensions work, but otherwise poses no visible
changes.

[breaking-change]

											
										
										
											2014-06-18 10:44:20 -07:00
+								        match base {
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+=> {
 								                self.err_span_(start_bpos,
 								                               last_bpos,
 								                               "hexadecimal float literal is not supported")
 								            }
 => {
 								                self.err_span_(start_bpos,
 								                               last_bpos,
 								                               "octal float literal is not supported")
 								            }
 => {
 								                self.err_span_(start_bpos,
 								                               last_bpos,
 								                               "binary float literal is not supported")
 								            }
 								            _ => (),
-												syntax: don't parse numeric literals in the lexer

This removes a bunch of token types. Tokens now store the original, unaltered
numeric literal (that is still checked for correctness), which is parsed into
an actual number later, as needed, when creating the AST.

This can change how syntax extensions work, but otherwise poses no visible
changes.

[breaking-change]

											
										
										
											2014-06-18 10:44:20 -07:00
+								        }
 								    }
-												syntax: Rename `Token` into `TokenKind`

											
										
										
											2019-06-04 17:55:23 +03:00
+								    fn binop(&mut self, op: token::BinOpToken) -> TokenKind {
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								        self.bump();
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								        if self.ch_is('=') {
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								            self.bump();
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								            token::BinOpEq(op)
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								        } else {
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								            token::BinOp(op)
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								        }
 								    }
-												rustc: doc comments

											
										
										
											2019-02-08 14:53:55 +01:00
+								    /// Returns the next token from the string, advances the input past that
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    /// token, and updates the interner
-												syntax: Rename `Token` into `TokenKind`

											
										
										
											2019-06-04 17:55:23 +03:00
+								    fn next_token_inner(&mut self) -> Result<TokenKind, ()> {
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								        let c = self.ch;
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
-												Initial implementation of RFC 2151, Raw Identifiers

											
										
										
											2018-03-09 23:56:40 -06:00
+								        if ident_start(c) {
 								            let (is_ident_start, is_raw_ident) =
 								                match (c.unwrap(), self.nextch(), self.nextnextch()) {
 								                    // r# followed by an identifier starter is a raw identifier.
 								                    // This is an exception to the r# case below.
 								                    ('r', Some('#'), x) if ident_start(x) => (true, true),
 								                    // r as in r" or r#" is part of a raw string literal.
 								                    // b as in b' is part of a byte literal.
 								                    // They are not identifiers, and are handled further down.
 								                    ('r', Some('"'), _) |
 								                    ('r', Some('#'), _) |
 								                    ('b', Some('"'), _) |
 								                    ('b', Some('\''), _) |
 								                    ('b', Some('r'), Some('"')) |
 								                    ('b', Some('r'), Some('#')) => (false, false),
 								                    _ => (true, false),
 								                };
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
-												Initial implementation of RFC 2151, Raw Identifiers

											
										
										
											2018-03-09 23:56:40 -06:00
+								            if is_ident_start {
 								                let raw_start = self.pos;
 								                if is_raw_ident {
 								                    // Consume the 'r#' characters.
 								                    self.bump();
 								                    self.bump();
 								                }
 								                let start = self.pos;
-												Tweak identifer lexing.

By calling `bump()` after getting the first char, to avoid a redundant
`ident_continue()` test on it.

											
										
										
											2018-05-31 10:52:29 +10:00
+								                self.bump();
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
-												Initial implementation of RFC 2151, Raw Identifiers

											
										
										
											2018-03-09 23:56:40 -06:00
+								                while ident_continue(self.ch) {
 								                    self.bump();
 								                }
-												refactor lexer to use idiomatic borrowing

											
										
										
											2019-06-25 21:02:19 +03:00
+								                // FIXME: perform NFKC normalization here. (Issue #2253)
-												cleanup: rename name_from to symbol_from

Lexer uses Symbols for a lot of stuff, not only for identifiers, so
the "name" terminology is just confusing.

											
										
										
											2019-06-25 21:37:25 +03:00
+								                let name = self.symbol_from(start);
-												refactor lexer to use idiomatic borrowing

											
										
										
											2019-06-25 21:02:19 +03:00
+								                if is_raw_ident {
 								                    let span = self.mk_sp(raw_start, self.pos);
 								                    if !name.can_be_raw() {
 								                        self.err_span(span, &format!("`{}` cannot be a raw identifier", name));
-												Move raw_identifiers check to the lexer.

											
										
										
											2018-03-18 11:21:38 -05:00
+								                    }
-												refactor lexer to use idiomatic borrowing

											
										
										
											2019-06-25 21:02:19 +03:00
+								                    self.sess.raw_identifier_spans.borrow_mut().push(span);
 								                }
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
-												refactor lexer to use idiomatic borrowing

											
										
										
											2019-06-25 21:02:19 +03:00
+								                return Ok(token::Ident(name, is_raw_ident));
-												Initial implementation of RFC 2151, Raw Identifiers

											
										
										
											2018-03-09 23:56:40 -06:00
+								            }
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								        }
 								        if is_dec_digit(c) {
-												syntax: Turn `token::Lit` into a struct

											
										
										
											2019-05-19 01:04:26 +03:00
+								            let (kind, symbol) = self.scan_number(c.unwrap());
-												Parse and store suffixes on literals.

This adds an optional suffix at the end of a literal token:
`"foo"bar`. An actual use of a suffix in a expression (or other literal
that the compiler reads) is rejected in the parser.

This doesn't switch the handling of numbers to this system, and doesn't
outlaw illegal suffixes for them yet.

											
										
										
											2014-11-19 15:48:38 +11:00
+								            let suffix = self.scan_optional_raw_name();
-												syntax: Turn `token::Lit` into a struct

											
										
										
											2019-05-19 01:04:26 +03:00
+								            debug!("next_token_inner: scanned number {:?}, {:?}, {:?}", kind, symbol, suffix);
-												syntax: Rename `Token` into `TokenKind`

											
										
										
											2019-06-04 17:55:23 +03:00
+								            return Ok(TokenKind::lit(kind, symbol, suffix));
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								        }
 								        match c.expect("next_token_inner called at EOF") {
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            // One-byte tokens.
 								            ';' => {
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								                self.bump();
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                Ok(token::Semi)
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            }
 								            ',' => {
 								                self.bump();
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                Ok(token::Comma)
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            }
 								            '.' => {
 								                self.bump();
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                if self.ch_is('.') {
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                    self.bump();
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								                    if self.ch_is('.') {
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                        self.bump();
-												Make some fatal lexer errors recoverable

											
										
										
											2016-04-25 17:20:32 +02:00
+								                        Ok(token::DotDotDot)
-												Add support for `..=` syntax

Add ..= to the parser

Add ..= to libproc_macro

Add ..= to ICH

Highlight ..= in rustdoc

Update impl Debug for RangeInclusive to ..=

Replace `...` to `..=` in range docs

Make the dotdoteq warning point to the ...

Add warning for ... in expressions

Updated more tests to the ..= syntax

Updated even more tests to the ..= syntax

Updated the inclusive_range entry in unstable book

											
										
										
											2017-09-19 05:40:04 +00:00
+								                    } else if self.ch_is('=') {
 								                        self.bump();
 								                        Ok(token::DotDotEq)
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                    } else {
-												Make some fatal lexer errors recoverable

											
										
										
											2016-04-25 17:20:32 +02:00
+								                        Ok(token::DotDot)
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                    }
 								                } else {
-												Make some fatal lexer errors recoverable

											
										
										
											2016-04-25 17:20:32 +02:00
+								                    Ok(token::Dot)
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                }
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            }
 								            '(' => {
 								                self.bump();
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                Ok(token::OpenDelim(token::Paren))
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            }
 								            ')' => {
 								                self.bump();
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                Ok(token::CloseDelim(token::Paren))
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            }
 								            '{' => {
 								                self.bump();
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                Ok(token::OpenDelim(token::Brace))
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            }
 								            '}' => {
 								                self.bump();
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                Ok(token::CloseDelim(token::Brace))
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            }
 								            '[' => {
 								                self.bump();
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                Ok(token::OpenDelim(token::Bracket))
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            }
 								            ']' => {
 								                self.bump();
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                Ok(token::CloseDelim(token::Bracket))
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            }
 								            '@' => {
 								                self.bump();
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                Ok(token::At)
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            }
 								            '#' => {
 								                self.bump();
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                Ok(token::Pound)
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            }
 								            '~' => {
 								                self.bump();
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                Ok(token::Tilde)
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            }
 								            '?' => {
 								                self.bump();
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                Ok(token::Question)
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            }
 								            ':' => {
 								                self.bump();
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								                if self.ch_is(':') {
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                    self.bump();
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                    Ok(token::ModSep)
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                } else {
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                    Ok(token::Colon)
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                }
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								            }
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            '$' => {
 								                self.bump();
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                Ok(token::Dollar)
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            }
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            // Multi-byte tokens.
 								            '=' => {
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								                self.bump();
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								                if self.ch_is('=') {
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                    self.bump();
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                    Ok(token::EqEq)
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								                } else if self.ch_is('>') {
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                    self.bump();
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                    Ok(token::FatArrow)
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                } else {
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                    Ok(token::Eq)
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                }
 								            }
 								            '!' => {
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								                self.bump();
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								                if self.ch_is('=') {
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                    self.bump();
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                    Ok(token::Ne)
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                } else {
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                    Ok(token::Not)
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                }
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								            }
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            '<' => {
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								                self.bump();
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								                match self.ch.unwrap_or('\x00') {
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                    '=' => {
 								                        self.bump();
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                        Ok(token::Le)
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                    }
 								                    '<' => {
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                        Ok(self.binop(token::Shl))
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                    }
 								                    '-' => {
 								                        self.bump();
-												Avoid an unnecessary `match` when lexing "<-".

											
										
										
											2018-05-31 13:37:44 +10:00
+								                        Ok(token::LArrow)
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                    }
 								                    _ => {
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                        Ok(token::Lt)
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                    }
 								                }
 								            }
 								            '>' => {
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								                self.bump();
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								                match self.ch.unwrap_or('\x00') {
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                    '=' => {
 								                        self.bump();
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                        Ok(token::Ge)
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                    }
 								                    '>' => {
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                        Ok(self.binop(token::Shr))
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                    }
 								                    _ => {
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                        Ok(token::Gt)
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                    }
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								                }
 								            }
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            '\'' => {
 								                // Either a character constant 'a' OR a lifetime name 'abc
-												Rename StringReader::last_pos as pos.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:46:54 +11:00
+								                let start_with_quote = self.pos;
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                self.bump();
-												Rename StringReader::last_pos as pos.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:46:54 +11:00
+								                let start = self.pos;
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                // If the character is an ident start not followed by another single
 								                // quote, then this is a lifetime name:
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								                let starts_with_number = self.ch.unwrap_or('\x00').is_numeric();
 								                if (ident_start(self.ch) || starts_with_number) && !self.nextch_is('\'') {
 								                    self.bump();
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								                    while ident_continue(self.ch) {
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                        self.bump();
 								                    }
-												Display better snippet for invalid char literal

Given this code:

    fn main() {
        let _ = 'abcd';
    }

The compiler would give a message like:

    error: character literal may only contain one codepoint: ';
    let _ = 'abcd';
                 ^~

With this change, the message now displays:

    error: character literal may only contain one codepoint: 'abcd'
    let _ = 'abcd'
            ^~~~~~

Fixes #30033

											
										
										
											2016-01-07 16:12:28 +00:00
+								                    // lifetimes shouldn't end with a single quote
 								                    // if we find one, then this is an invalid character literal
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								                    if self.ch_is('\'') {
-												cleanup: rename name_from to symbol_from

Lexer uses Symbols for a lot of stuff, not only for identifiers, so
the "name" terminology is just confusing.

											
										
										
											2019-06-25 21:37:25 +03:00
+								                        let symbol = self.symbol_from(start);
-												[WIP] Improve error behavior

											
										
										
											2019-01-20 04:37:29 +09:00
+								                        self.bump();
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								                        self.validate_char_escape(start_with_quote);
-												syntax: Rename `Token` into `TokenKind`

											
										
										
											2019-06-04 17:55:23 +03:00
+								                        return Ok(TokenKind::lit(token::Char, symbol, None));
-												Display better snippet for invalid char literal

Given this code:

    fn main() {
        let _ = 'abcd';
    }

The compiler would give a message like:

    error: character literal may only contain one codepoint: ';
    let _ = 'abcd';
                 ^~

With this change, the message now displays:

    error: character literal may only contain one codepoint: 'abcd'
    let _ = 'abcd'
            ^~~~~~

Fixes #30033

											
										
										
											2016-01-07 16:12:28 +00:00
+								                    }
-												librustc: Fix the issue with labels shadowing variable names by making
the leading quote part of the identifier for the purposes of hygiene.

This adopts @jbclements' solution to #14539.

I'm not sure if this is a breaking change or not.

Closes #12512.

[breaking-change]

											
										
										
											2014-06-10 13:54:13 -07:00
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								                    if starts_with_number {
-												Parse lifetimes that start with a number and give specific error

											
										
										
											2019-03-02 10:38:20 -08:00
+								                        // this is a recovered lifetime written `'1`, error but accept it
 								                        self.err_span_(
 								                            start_with_quote,
 								                            self.pos,
-												review comments

											
										
										
											2019-03-08 13:29:27 -08:00
+								                            "lifetimes cannot start with a number",
-												Parse lifetimes that start with a number and give specific error

											
										
										
											2019-03-02 10:38:20 -08:00
+								                        );
 								                    }
-												syntax: Remove duplicate span from `token::Lifetime`

											
										
										
											2019-06-05 11:00:22 +03:00
+								                    // Include the leading `'` in the real identifier, for macro
 								                    // expansion purposes. See #12512 for the gory details of why
 								                    // this is necessary.
-												cleanup: rename name_from to symbol_from

Lexer uses Symbols for a lot of stuff, not only for identifiers, so
the "name" terminology is just confusing.

											
										
										
											2019-06-25 21:37:25 +03:00
+								                    return Ok(token::Lifetime(self.symbol_from(start_with_quote)));
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								                }
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								                let msg = "unterminated character literal";
-												syntax: Turn `token::Lit` into a struct

											
										
										
											2019-05-19 01:04:26 +03:00
+								                let symbol = self.scan_single_quoted_string(start_with_quote, msg);
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								                self.validate_char_escape(start_with_quote);
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                let suffix = self.scan_optional_raw_name();
-												syntax: Rename `Token` into `TokenKind`

											
										
										
											2019-06-04 17:55:23 +03:00
+								                Ok(TokenKind::lit(token::Char, symbol, suffix))
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								            }
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            'b' => {
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								                self.bump();
-												syntax: Turn `token::Lit` into a struct

											
										
										
											2019-05-19 01:04:26 +03:00
+								                let (kind, symbol) = match self.ch {
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								                    Some('\'') => {
 								                        let start_with_quote = self.pos;
 								                        self.bump();
 								                        let msg = "unterminated byte constant";
-												syntax: Turn `token::Lit` into a struct

											
										
										
											2019-05-19 01:04:26 +03:00
+								                        let symbol = self.scan_single_quoted_string(start_with_quote, msg);
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								                        self.validate_byte_escape(start_with_quote);
-												syntax: Turn `token::Lit` into a struct

											
										
										
											2019-05-19 01:04:26 +03:00
+								                        (token::Byte, symbol)
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								                    },
 								                    Some('"') => {
 								                        let start_with_quote = self.pos;
 								                        let msg = "unterminated double quote byte string";
-												syntax: Turn `token::Lit` into a struct

											
										
										
											2019-05-19 01:04:26 +03:00
+								                        let symbol = self.scan_double_quoted_string(msg);
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								                        self.validate_byte_str_escape(start_with_quote);
-												syntax: Turn `token::Lit` into a struct

											
										
										
											2019-05-19 01:04:26 +03:00
+								                        (token::ByteStr, symbol)
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								                    },
-												Prohibit bare CRs in raw byte strings

											
										
										
											2019-05-13 20:21:44 +02:00
+								                    Some('r') => {
 								                        let (start, end, hash_count) = self.scan_raw_string();
-												cleanup: rename name_from to symbol_from

Lexer uses Symbols for a lot of stuff, not only for identifiers, so
the "name" terminology is just confusing.

											
										
										
											2019-06-25 21:37:25 +03:00
+								                        let symbol = self.symbol_from_to(start, end);
-												Prohibit bare CRs in raw byte strings

											
										
										
											2019-05-13 20:21:44 +02:00
+								                        self.validate_raw_byte_str_escape(start, end);
 								                        (token::ByteStrRaw(hash_count), symbol)
 								                    }
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                    _ => unreachable!(),  // Should have been a token::Ident above.
 								                };
 								                let suffix = self.scan_optional_raw_name();
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
-												syntax: Rename `Token` into `TokenKind`

											
										
										
											2019-06-04 17:55:23 +03:00
+								                Ok(TokenKind::lit(kind, symbol, suffix))
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								            }
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            '"' => {
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								                let start_with_quote = self.pos;
 								                let msg = "unterminated double quote string";
-												syntax: Turn `token::Lit` into a struct

											
										
										
											2019-05-19 01:04:26 +03:00
+								                let symbol = self.scan_double_quoted_string(msg);
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								                self.validate_str_escape(start_with_quote);
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                let suffix = self.scan_optional_raw_name();
-												syntax: Rename `Token` into `TokenKind`

											
										
										
											2019-06-04 17:55:23 +03:00
+								                Ok(TokenKind::lit(token::Str, symbol, suffix))
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								            }
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            'r' => {
-												Validate and transcribe raw strings via unescape module

											
										
										
											2019-05-13 19:52:55 +02:00
+								                let (start, end, hash_count) = self.scan_raw_string();
-												cleanup: rename name_from to symbol_from

Lexer uses Symbols for a lot of stuff, not only for identifiers, so
the "name" terminology is just confusing.

											
										
										
											2019-06-25 21:37:25 +03:00
+								                let symbol = self.symbol_from_to(start, end);
-												Validate and transcribe raw strings via unescape module

											
										
										
											2019-05-13 19:52:55 +02:00
+								                self.validate_raw_str_escape(start, end);
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                let suffix = self.scan_optional_raw_name();
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
-												syntax: Rename `Token` into `TokenKind`

											
										
										
											2019-06-04 17:55:23 +03:00
+								                Ok(TokenKind::lit(token::StrRaw(hash_count), symbol, suffix))
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            }
 								            '-' => {
 								                if self.nextch_is('>') {
 								                    self.bump();
 								                    self.bump();
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                    Ok(token::RArrow)
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                } else {
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                    Ok(self.binop(token::Minus))
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                }
 								            }
 								            '&' => {
 								                if self.nextch_is('&') {
 								                    self.bump();
 								                    self.bump();
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                    Ok(token::AndAnd)
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                } else {
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                    Ok(self.binop(token::And))
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                }
 								            }
 								            '|' => {
 								                match self.nextch() {
 								                    Some('|') => {
 								                        self.bump();
 								                        self.bump();
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                        Ok(token::OrOr)
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                    }
 								                    _ => {
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                        Ok(self.binop(token::Or))
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                    }
 								                }
 								            }
 								            '+' => {
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                Ok(self.binop(token::Plus))
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            }
 								            '*' => {
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                Ok(self.binop(token::Star))
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            }
 								            '/' => {
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                Ok(self.binop(token::Slash))
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            }
 								            '^' => {
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                Ok(self.binop(token::Caret))
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            }
 								            '%' => {
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                Ok(self.binop(token::Percent))
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            }
 								            c => {
-												Rename StringReader::last_pos as pos.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:46:54 +11:00
+								                let last_bpos = self.pos;
-												Rename StringReader::pos as next_pos.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:41:01 +11:00
+								                let bpos = self.next_pos;
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                let mut err = self.struct_fatal_span_char(last_bpos,
 								                                                          bpos,
 								                                                          "unknown start of token",
 								                                                          c);
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								                unicode_chars::check_for_substitution(self, c, &mut err);
-												Make some fatal lexer errors recoverable

											
										
										
											2016-04-25 17:20:32 +02:00
+								                self.fatal_errs.push(err);
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
-												Make some fatal lexer errors recoverable

											
										
										
											2016-04-25 17:20:32 +02:00
+								                Err(())
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								            }
 								        }
 								    }
 								    fn read_to_eol(&mut self) -> String {
 								        let mut val = String::new();
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								        while !self.ch_is('\n') && !self.is_eof() {
 								            val.push(self.ch.unwrap());
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								            self.bump();
 								        }
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								        if self.ch_is('\n') {
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								            self.bump();
 								        }
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								        val
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    }
 								    fn read_one_line_comment(&mut self) -> String {
 								        let val = self.read_to_eol();
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								        assert!((val.as_bytes()[0] == b'/' && val.as_bytes()[1] == b'/') ||
 								                (val.as_bytes()[0] == b'#' && val.as_bytes()[1] == b'!'));
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								        val
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    }
 								    fn consume_non_eol_whitespace(&mut self) {
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								        while is_pattern_whitespace(self.ch) && !self.ch_is('\n') && !self.is_eof() {
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								            self.bump();
 								        }
 								    }
 								    fn peeking_at_comment(&self) -> bool {
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								        (self.ch_is('/') && self.nextch_is('/')) || (self.ch_is('/') && self.nextch_is('*')) ||
-												re-instate comment that was mysteriously disappeared

											
										
										
											2016-01-12 20:52:22 +02:00
+								        // consider shebangs comments, but not inner attributes
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								        (self.ch_is('#') && self.nextch_is('!') && !self.nextnextch_is('['))
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    }
-												lexer: shuffle around some functions

											
										
										
											2014-07-02 09:39:48 -07:00
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								    fn scan_single_quoted_string(&mut self,
 								                                 start_with_quote: BytePos,
-												cleanup: rename name_from to symbol_from

Lexer uses Symbols for a lot of stuff, not only for identifiers, so
the "name" terminology is just confusing.

											
										
										
											2019-06-25 21:37:25 +03:00
+								                                 unterminated_msg: &str) -> Symbol {
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								        // assumes that first `'` is consumed
-												Rename StringReader::last_pos as pos.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:46:54 +11:00
+								        let start = self.pos;
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								        // lex `'''` as a single char, for recovery
 								        if self.ch_is('\'') && self.nextch_is('\'') {
 								            self.bump();
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								        } else {
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								            let mut first = true;
 								            loop {
 								                if self.ch_is('\'') {
 								                    break;
 								                }
 								                if self.ch_is('\\') && (self.nextch_is('\'') || self.nextch_is('\\')) {
 								                    self.bump();
 								                    self.bump();
 								                } else {
 								                    // Only attempt to infer single line string literals. If we encounter
 								                    // a slash, bail out in order to avoid nonsensical suggestion when
 								                    // involving comments.
 								                    if self.is_eof()
 								                        || (self.ch_is('/') && !first)
 								                        || (self.ch_is('\n') && !self.nextch_is('\'')) {
 								                        self.fatal_span_(start_with_quote, self.pos, unterminated_msg.into())
 								                            .raise()
 								                    }
 								                    self.bump();
 								                }
 								                first = false;
 								            }
 								        }
-												lexer: shuffle around some functions

											
										
										
											2014-07-02 09:39:48 -07:00
-												cleanup: rename name_from to symbol_from

Lexer uses Symbols for a lot of stuff, not only for identifiers, so
the "name" terminology is just confusing.

											
										
										
											2019-06-25 21:37:25 +03:00
+								        let id = self.symbol_from(start);
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								        self.bump();
 								        id
-												syntax: support ES6-style unicode escapes

First half of bootstrapping https://github.com/rust-lang/rfcs/pull/446

											
										
										
											2014-12-02 16:48:48 -08:00
+								    }
-												cleanup: rename name_from to symbol_from

Lexer uses Symbols for a lot of stuff, not only for identifiers, so
the "name" terminology is just confusing.

											
										
										
											2019-06-25 21:37:25 +03:00
+								    fn scan_double_quoted_string(&mut self, unterminated_msg: &str) -> Symbol {
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								        debug_assert!(self.ch_is('\"'));
 								        let start_with_quote = self.pos;
-												lexer: shuffle around some functions

											
										
										
											2014-07-02 09:39:48 -07:00
+								        self.bump();
-												Rename StringReader::last_pos as pos.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:46:54 +11:00
+								        let start = self.pos;
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								        while !self.ch_is('"') {
-												lexer: shuffle around some functions

											
										
										
											2014-07-02 09:39:48 -07:00
+								            if self.is_eof() {
-												Rename StringReader::last_pos as pos.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:46:54 +11:00
+								                let pos = self.pos;
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								                self.fatal_span_(start_with_quote, pos, unterminated_msg).raise();
 								            }
 								            if self.ch_is('\\') && (self.nextch_is('\\') || self.nextch_is('"')) {
 								                self.bump();
-												lexer: shuffle around some functions

											
										
										
											2014-07-02 09:39:48 -07:00
+								            }
 								            self.bump();
 								        }
-												cleanup: rename name_from to symbol_from

Lexer uses Symbols for a lot of stuff, not only for identifiers, so
the "name" terminology is just confusing.

											
										
										
											2019-06-25 21:37:25 +03:00
+								        let id = self.symbol_from(start);
-												lexer: shuffle around some functions

											
										
										
											2014-07-02 09:39:48 -07:00
+								        self.bump();
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								        id
-												lexer: shuffle around some functions

											
										
										
											2014-07-02 09:39:48 -07:00
+								    }
-												Add a doc comment for scan_raw_string

											
										
										
											2019-06-09 14:20:29 +02:00
+								    /// Scans a raw (byte) string, returning byte position range for `"<literal>"`
 								    /// (including quotes) along with `#` character count in `(b)r##..."<literal>"##...`;
-												Validate and transcribe raw strings via unescape module

											
										
										
											2019-05-13 19:52:55 +02:00
+								    fn scan_raw_string(&mut self) -> (BytePos, BytePos, u16) {
-												Rename StringReader::last_pos as pos.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:46:54 +11:00
+								        let start_bpos = self.pos;
-												lexer: shuffle around some functions

											
										
										
											2014-07-02 09:39:48 -07:00
+								        self.bump();
-												Separate a `scan_raw_string` (similar `raw_byte` variant)

											
										
										
											2019-05-13 11:42:12 +02:00
+								        let mut hash_count: u16 = 0;
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								        while self.ch_is('#') {
-												provide error message when using more than 65535 hash symbols for raw strings

											
										
										
											2018-05-28 20:19:44 -07:00
+								            if hash_count == 65535 {
 								                let bpos = self.next_pos;
 								                self.fatal_span_(start_bpos,
 								                                 bpos,
-												Separate a `scan_raw_string` (similar `raw_byte` variant)

											
										
										
											2019-05-13 11:42:12 +02:00
+								                                 "too many `#` symbols: raw strings may be \
-												provide error message when using more than 65535 hash symbols for raw strings

											
										
										
											2018-05-28 20:19:44 -07:00
+								                                 delimited by up to 65535 `#` symbols").raise();
 								            }
-												lexer: shuffle around some functions

											
										
										
											2014-07-02 09:39:48 -07:00
+								            self.bump();
 								            hash_count += 1;
 								        }
 								        if self.is_eof() {
-												Add note for unterminated raw string error

											
										
										
											2018-02-26 15:04:40 +01:00
+								            self.fail_unterminated_raw_string(start_bpos, hash_count);
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								        } else if !self.ch_is('"') {
-												Separate a `scan_raw_string` (similar `raw_byte` variant)

											
										
										
											2019-05-13 11:42:12 +02:00
+								            let last_bpos = self.pos;
 								            let curr_char = self.ch.unwrap();
-												Do not capture stderr in the compiler. Instead just panic silently for fatal errors

											
										
										
											2018-01-21 12:47:58 +01:00
+								            self.fatal_span_char(start_bpos,
-												Separate a `scan_raw_string` (similar `raw_byte` variant)

											
										
										
											2019-05-13 11:42:12 +02:00
+								                                 last_bpos,
 								                                 "found invalid character; only `#` is allowed \
 								                                 in raw string delimitation",
 								                                 curr_char).raise();
-												lexer: shuffle around some functions

											
										
										
											2014-07-02 09:39:48 -07:00
+								        }
 								        self.bump();
-												Rename StringReader::last_pos as pos.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:46:54 +11:00
+								        let content_start_bpos = self.pos;
-												lexer: shuffle around some functions

											
										
										
											2014-07-02 09:39:48 -07:00
+								        let mut content_end_bpos;
 								        'outer: loop {
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								            match self.ch {
-												lexer: shuffle around some functions

											
										
										
											2014-07-02 09:39:48 -07:00
+								                None => {
-												Add note for unterminated raw string error

											
										
										
											2018-02-26 15:04:40 +01:00
+								                    self.fail_unterminated_raw_string(start_bpos, hash_count);
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                }
-												lexer: shuffle around some functions

											
										
										
											2014-07-02 09:39:48 -07:00
+								                Some('"') => {
-												Rename StringReader::last_pos as pos.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:46:54 +11:00
+								                    content_end_bpos = self.pos;
-												`for x in range(a, b)` -> `for x in a..b`

sed -i 's/in range(\([^,]*\), *\([^()]*\))/in \1\.\.\2/g' **/*.rs

											
										
										
											2015-01-26 15:46:12 -05:00
+								                    for _ in 0..hash_count {
-												lexer: shuffle around some functions

											
										
										
											2014-07-02 09:39:48 -07:00
+								                        self.bump();
-												Rename StringReader::curr as ch.

Likewise, rename StringReader::curr_is as ch_is.

This is a [breaking-change] for libsyntax.

											
										
										
											2016-10-04 11:55:58 +11:00
+								                        if !self.ch_is('#') {
-												lexer: shuffle around some functions

											
										
										
											2014-07-02 09:39:48 -07:00
+								                            continue 'outer;
 								                        }
 								                    }
 								                    break;
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                }
-												Validate and transcribe raw strings via unescape module

											
										
										
											2019-05-13 19:52:55 +02:00
+								                _ => (),
-												lexer: shuffle around some functions

											
										
										
											2014-07-02 09:39:48 -07:00
+								            }
 								            self.bump();
 								        }
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
-												lexer: shuffle around some functions

											
										
										
											2014-07-02 09:39:48 -07:00
+								        self.bump();
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
-												Validate and transcribe raw strings via unescape module

											
										
										
											2019-05-13 19:52:55 +02:00
+								        (content_start_bpos, content_end_bpos, hash_count)
-												lexer: shuffle around some functions

											
										
										
											2014-07-02 09:39:48 -07:00
+								    }
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
 								    fn validate_char_escape(&self, start_with_quote: BytePos) {
-												refactor lexer to use idiomatic borrowing

											
										
										
											2019-06-25 21:02:19 +03:00
+								        let lit = self.str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1));
 								        if let Err((off, err)) = unescape::unescape_char(lit) {
 								            emit_unescape_error(
 								                &self.sess.span_diagnostic,
 								                lit,
 								                self.mk_sp(start_with_quote, self.pos),
 								                unescape::Mode::Char,
 ..off,
 								                err,
 								            )
 								        }
 								    }
 								    fn validate_byte_escape(&self, start_with_quote: BytePos) {
 								        let lit = self.str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1));
 								        if let Err((off, err)) = unescape::unescape_byte(lit) {
 								            emit_unescape_error(
 								                &self.sess.span_diagnostic,
 								                lit,
 								                self.mk_sp(start_with_quote, self.pos),
 								                unescape::Mode::Byte,
 ..off,
 								                err,
 								            )
 								        }
 								    }
 								    fn validate_str_escape(&self, start_with_quote: BytePos) {
 								        let lit = self.str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1));
 								        unescape::unescape_str(lit, &mut |range, c| {
 								            if let Err(err) = c {
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								                emit_unescape_error(
 								                    &self.sess.span_diagnostic,
 								                    lit,
 								                    self.mk_sp(start_with_quote, self.pos),
-												refactor lexer to use idiomatic borrowing

											
										
										
											2019-06-25 21:02:19 +03:00
+								                    unescape::Mode::Str,
 								                    range,
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								                    err,
 								                )
 								            }
-												refactor lexer to use idiomatic borrowing

											
										
										
											2019-06-25 21:02:19 +03:00
+								        })
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								    }
-												refactor lexer to use idiomatic borrowing

											
										
										
											2019-06-25 21:02:19 +03:00
+								    fn validate_raw_str_escape(&self, content_start: BytePos, content_end: BytePos) {
 								        let lit = self.str_from_to(content_start, content_end);
 								        unescape::unescape_raw_str(lit, &mut |range, c| {
 								            if let Err(err) = c {
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								                emit_unescape_error(
 								                    &self.sess.span_diagnostic,
 								                    lit,
-												refactor lexer to use idiomatic borrowing

											
										
										
											2019-06-25 21:02:19 +03:00
+								                    self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
 								                    unescape::Mode::Str,
 								                    range,
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								                    err,
 								                )
 								            }
-												refactor lexer to use idiomatic borrowing

											
										
										
											2019-06-25 21:02:19 +03:00
+								        })
-												Validate and transcribe raw strings via unescape module

											
										
										
											2019-05-13 19:52:55 +02:00
+								    }
-												Prohibit bare CRs in raw byte strings

											
										
										
											2019-05-13 20:21:44 +02:00
+								    fn validate_raw_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) {
-												refactor lexer to use idiomatic borrowing

											
										
										
											2019-06-25 21:02:19 +03:00
+								        let lit = self.str_from_to(content_start, content_end);
 								        unescape::unescape_raw_byte_str(lit, &mut |range, c| {
 								            if let Err(err) = c {
 								                emit_unescape_error(
 								                    &self.sess.span_diagnostic,
 								                    lit,
 								                    self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
 								                    unescape::Mode::ByteStr,
 								                    range,
 								                    err,
 								                )
 								            }
 								        })
-												Prohibit bare CRs in raw byte strings

											
										
										
											2019-05-13 20:21:44 +02:00
+								    }
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								    fn validate_byte_str_escape(&self, start_with_quote: BytePos) {
-												refactor lexer to use idiomatic borrowing

											
										
										
											2019-06-25 21:02:19 +03:00
+								        let lit = self.str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1));
 								        unescape::unescape_byte_str(lit, &mut |range, c| {
 								            if let Err(err) = c {
 								                emit_unescape_error(
 								                    &self.sess.span_diagnostic,
 								                    lit,
 								                    self.mk_sp(start_with_quote, self.pos),
 								                    unescape::Mode::ByteStr,
 								                    range,
 								                    err,
 								                )
 								            }
 								        })
-												introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules

											
										
										
											2019-04-25 11:48:25 +03:00
+								    }
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								}
-												libsyntax: accept only whitespace with the PATTERN_WHITE_SPACE property

This aligns with unicode recommendations and should be stable for all future
unicode releases. See http://unicode.org/reports/tr31/#R3.

This renames `libsyntax::lexer::is_whitespace` to `is_pattern_whitespace`
so potentially breaks users of libsyntax.

											
										
										
											2015-11-12 02:43:43 +00:00
+								// This tests the character for the unicode property 'PATTERN_WHITE_SPACE' which
 								// is guaranteed to be forward compatible. http://unicode.org/reports/tr31/#R3
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
+								#[inline]
-												Crate-ify and delete unused code in syntax::parse

											
										
										
											2018-05-31 16:53:30 -06:00
+								crate fn is_pattern_whitespace(c: Option<char>) -> bool {
-												libsyntax: accept only whitespace with the PATTERN_WHITE_SPACE property

This aligns with unicode recommendations and should be stable for all future
unicode releases. See http://unicode.org/reports/tr31/#R3.

This renames `libsyntax::lexer::is_whitespace` to `is_pattern_whitespace`
so potentially breaks users of libsyntax.

											
										
										
											2015-11-12 02:43:43 +00:00
+								    c.map_or(false, Pattern_White_Space)
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								}
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
+								#[inline]
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								fn in_range(c: Option<char>, lo: char, hi: char) -> bool {
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
+								    c.map_or(false, |c| lo <= c && c <= hi)
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								}
-												A few cleanups and minor improvements for the lexer

											
										
										
											2018-08-12 15:43:51 +02:00
+								#[inline]
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								fn is_dec_digit(c: Option<char>) -> bool {
-												Fix some clippy warnings in libsyntax

This is mostly removing stray ampersands, needless returns and lifetimes.

											
										
										
											2017-05-12 20:05:39 +02:00
+								    in_range(c, '0', '9')
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								}
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
-												Crate-ify and delete unused code in syntax::parse

											
										
										
											2018-05-31 16:53:30 -06:00
+								fn is_doc_comment(s: &str) -> bool {
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								    let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/') ||
 								              s.starts_with("//!");
-												core: split into fmt::Show and fmt::String

fmt::Show is for debugging, and can and should be implemented for
all public types. This trait is used with `{:?}` syntax. There still
exists #[derive(Show)].

fmt::String is for types that faithfully be represented as a String.
Because of this, there is no way to derive fmt::String, all
implementations must be purposeful. It is used by the default format
syntax, `{}`.

This will break most instances of `{}`, since that now requires the type
to impl fmt::String. In most cases, replacing `{}` with `{:?}` is the
correct fix. Types that were being printed specifically for users should
receive a fmt::String implementation to fix this.

Part of #20013

[breaking-change]

											
										
										
											2014-12-20 00:09:35 -08:00
+								    debug!("is {:?} a doc comment? {}", s, res);
-												lexer: lex WS/COMMENT/SHEBANG rather than skipping

Now, the lexer will categorize every byte in its input according to the
grammar. The parser skips over these while parsing, thus avoiding their
presence in the input to syntax extensions.

											
										
										
											2014-07-04 22:30:39 -07:00
+								    res
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								}
-												Crate-ify and delete unused code in syntax::parse

											
										
										
											2018-05-31 16:53:30 -06:00
+								fn is_block_doc_comment(s: &str) -> bool {
-												fix "make tidy" failure

											
										
										
											2016-01-03 11:20:06 +02:00
+								    // Prevent `/**/` from being parsed as a doc comment
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								    let res = ((s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*') ||
-												fix "make tidy" failure

											
										
										
											2016-01-03 11:20:06 +02:00
+								               s.starts_with("/*!")) && s.len() >= 5;
-												core: split into fmt::Show and fmt::String

fmt::Show is for debugging, and can and should be implemented for
all public types. This trait is used with `{:?}` syntax. There still
exists #[derive(Show)].

fmt::String is for types that faithfully be represented as a String.
Because of this, there is no way to derive fmt::String, all
implementations must be purposeful. It is used by the default format
syntax, `{}`.

This will break most instances of `{}`, since that now requires the type
to impl fmt::String. In most cases, replacing `{}` with `{:?}` is the
correct fix. Types that were being printed specifically for users should
receive a fmt::String implementation to fix this.

Part of #20013

[breaking-change]

											
										
										
											2014-12-20 00:09:35 -08:00
+								    debug!("is {:?} a doc comment? {}", s, res);
-												lexer: lex WS/COMMENT/SHEBANG rather than skipping

Now, the lexer will categorize every byte in its input according to the
grammar. The parser skips over these while parsing, thus avoiding their
presence in the input to syntax extensions.

											
										
										
											2014-07-04 22:30:39 -07:00
+								    res
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								}
-												Parse lifetimes that start with a number and give specific error

											
										
										
											2019-03-02 10:38:20 -08:00
+								/// Determine whether `c` is a valid start for an ident.
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								fn ident_start(c: Option<char>) -> bool {
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								    let c = match c {
 								        Some(c) => c,
 								        None => return false,
 								    };
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								    (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c > '\x7f' && c.is_xid_start())
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								}
 								fn ident_continue(c: Option<char>) -> bool {
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								    let c = match c {
 								        Some(c) => c,
 								        None => return false,
 								    };
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								    (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' ||
 								    (c > '\x7f' && c.is_xid_continue())
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								}
-												Use iterator and pattern APIs instead of `char_at`

											
										
										
											2018-10-27 21:41:26 +09:00
+								#[inline]
 								fn char_at(s: &str, byte: usize) -> char {
 								    s[byte..].chars().next().unwrap()
 								}
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								#[cfg(test)]
-												Change name of unit test sub-module to "tests".

Changes the style guidelines regarding unit tests to recommend using a
sub-module named "tests" instead of "test" for unit tests as "test"
might clash with imports of libtest.

											
										
										
											2015-04-24 17:30:41 +02:00
+								mod tests {
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    use super::*;
-												syntax: Move most of the `TokenKind` methods to `Token`

											
										
										
											2019-06-08 19:45:12 +03:00
+								    use crate::ast::CrateConfig;
-												libsyntax => 2018

											
										
										
											2019-02-07 02:33:01 +09:00
+								    use crate::symbol::Symbol;
-												cleanup shebang handling in the lexer

											
										
										
											2019-04-04 10:55:30 +03:00
+								    use crate::source_map::{SourceMap, FilePathMapping};
-												libsyntax => 2018

											
										
										
											2019-02-07 02:33:01 +09:00
+								    use crate::feature_gate::UnstableFeatures;
 								    use crate::parse::token;
 								    use crate::diagnostics::plugin::ErrorMap;
-												Move `edition` outside the hygiene lock and avoid accessing it

											
										
										
											2019-04-06 00:15:49 +02:00
+								    use crate::with_default_globals;
-												Fallout of std::old_io deprecation

											
										
										
											2015-03-11 15:24:14 -07:00
+								    use std::io;
-												Use PathBuf instead of String where applicable

											
										
										
											2017-12-14 08:09:19 +01:00
+								    use std::path::PathBuf;
-												Move `edition` outside the hygiene lock and avoid accessing it

											
										
										
											2019-04-06 00:15:49 +02:00
+								    use syntax_pos::{BytePos, Span, NO_EXPANSION, edition::Edition};
-												Identify when a stmt could have been parsed as an expr

There are some expressions that can be parsed as a statement without
a trailing semicolon depending on the context, which can lead to
confusing errors due to the same looking code being accepted in some
places and not others. Identify these cases and suggest enclosing in
parenthesis making the parse non-ambiguous without changing the
accepted grammar.

											
										
										
											2019-04-22 19:37:23 -07:00
+								    use rustc_data_structures::fx::{FxHashSet, FxHashMap};
-												Move REGISTERED_DIAGNOSTICS to a ParseSess field

											
										
										
											2018-03-07 02:43:33 +01:00
+								    use rustc_data_structures::sync::Lock;
-												libsyntax => 2018

											
										
										
											2019-02-07 02:33:01 +09:00
-												Rename other occs of (Code/File)Map to Source(Map/File) #51574

											
										
										
											2018-10-29 21:26:13 +01:00
+								    fn mk_sess(sm: Lrc<SourceMap>) -> ParseSess {
-												Add short message-format

											
										
										
											2017-09-16 19:24:08 +02:00
+								        let emitter = errors::emitter::EmitterWriter::new(Box::new(io::sink()),
-												Rename other occs of (Code/File)Map to Source(Map/File) #51574

											
										
										
											2018-10-29 21:26:13 +01:00
+								                                                          Some(sm.clone()),
-												Toggle span highlighting on `-Zteach`

											
										
										
											2018-01-28 18:37:55 -08:00
+								                                                          false,
-												Update more unit test to new API

											
										
										
											2019-03-13 14:06:25 +01:00
+								                                                          false,
-												Add short message-format

											
										
										
											2017-09-16 19:24:08 +02:00
+								                                                          false);
-												Give the `StringReader` a `sess: &ParseSess`.

											
										
										
											2017-01-17 01:14:53 +00:00
+								        ParseSess {
-												Fix with_emitter callers

											
										
										
											2019-03-07 11:15:47 -08:00
+								            span_diagnostic: errors::Handler::with_emitter(true, None, Box::new(emitter)),
-												Give the `StringReader` a `sess: &ParseSess`.

											
										
										
											2017-01-17 01:14:53 +00:00
+								            unstable_features: UnstableFeatures::from_environment(),
-												Use FxHash{Map,Set} instead of the default Hash{Map,Set} everywhere in rustc.

											
										
										
											2018-08-18 13:55:43 +03:00
+								            config: CrateConfig::default(),
-												Make ParseSess thread-safe

											
										
										
											2018-02-15 10:52:26 +01:00
+								            included_mod_stack: Lock::new(Vec::new()),
-												Rename other occs of (Code/File)Map to Source(Map/File) #51574

											
										
										
											2018-10-29 21:26:13 +01:00
+								            source_map: sm,
-												Use FxHash{Map,Set} instead of the default Hash{Map,Set} everywhere in rustc.

											
										
										
											2018-08-18 13:55:43 +03:00
+								            missing_fragment_specifiers: Lock::new(FxHashSet::default()),
-												Make ParseSess thread-safe

											
										
										
											2018-02-15 10:52:26 +01:00
+								            raw_identifier_spans: Lock::new(Vec::new()),
-												Move REGISTERED_DIAGNOSTICS to a ParseSess field

											
										
										
											2018-03-07 02:43:33 +01:00
+								            registered_diagnostics: Lock::new(ErrorMap::new()),
-												Allow by default, fix tests

											
										
										
											2018-07-13 23:48:15 -05:00
+								            buffered_lints: Lock::new(vec![]),
-												Move `edition` outside the hygiene lock and avoid accessing it

											
										
										
											2019-04-06 00:15:49 +02:00
+								            edition: Edition::from_session(),
-												review comments: fix typo and add comments

											
										
										
											2019-05-06 16:00:21 -07:00
+								            ambiguous_block_expr_parse: Lock::new(FxHashMap::default()),
-												Allow attributes in formal function parameters

											
										
										
											2019-06-09 07:58:40 -03:00
+								            param_attr_spans: Lock::new(Vec::new()),
-												let_chains: Inline visit_expr_with_let_maybe_allowed.

											
										
										
											2019-06-17 01:18:22 +02:00
+								            let_chains_spans: Lock::new(Vec::new()),
-												Add separate 'async_closure' feature gate.

											
										
										
											2019-07-02 04:10:19 +02:00
+								            async_closure_spans: Lock::new(Vec::new()),
-												Give the `StringReader` a `sess: &ParseSess`.

											
										
										
											2017-01-17 01:14:53 +00:00
+								        }
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    }
 								    // open a string reader for the given string
-												Rename other occs of (Code/File)Map to Source(Map/File) #51574

											
										
										
											2018-10-29 21:26:13 +01:00
+								    fn setup<'a>(sm: &SourceMap,
-												Give the `StringReader` a `sess: &ParseSess`.

											
										
										
											2017-01-17 01:14:53 +00:00
+								                 sess: &'a ParseSess,
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								                 teststr: String)
 								                 -> StringReader<'a> {
-												adds DocTest filename variant, refactors doctest_offset out of source_map, fixes remaining test failures

											
										
										
											2018-12-04 15:18:03 -05:00
+								        let sf = sm.new_source_file(PathBuf::from(teststr.clone()).into(), teststr);
-												cleanup lexer constructors

											
										
										
											2019-07-03 13:30:12 +03:00
+								        StringReader::new(sess, sf, None)
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    }
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								    #[test]
 								    fn t1() {
-												Move `edition` outside the hygiene lock and avoid accessing it

											
										
										
											2019-04-06 00:15:49 +02:00
+								        with_default_globals(|| {
-												Rename other occs of (Code/File)Map to Source(Map/File) #51574

											
										
										
											2018-10-29 21:26:13 +01:00
+								            let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
 								            let sh = mk_sess(sm.clone());
 								            let mut string_reader = setup(&sm,
-												Remove syntax and syntax_pos thread locals

											
										
										
											2018-03-07 02:44:10 +01:00
+								                                        &sh,
 								                                        "/* my source file */ fn main() { println!(\"zebra\"); }\n"
 								                                            .to_string());
-												syntax: Use `Token` in `TokenTree::Token`

											
										
										
											2019-06-04 20:42:43 +03:00
+								            assert_eq!(string_reader.next_token(), token::Comment);
 								            assert_eq!(string_reader.next_token(), token::Whitespace);
-												Remove syntax and syntax_pos thread locals

											
										
										
											2018-03-07 02:44:10 +01:00
+								            let tok1 = string_reader.next_token();
-												syntax: Add some helper methods to `Token`

											
										
										
											2019-06-05 09:39:34 +03:00
+								            let tok2 = Token::new(
-												syntax: Move most of the `TokenKind` methods to `Token`

											
										
										
											2019-06-08 19:45:12 +03:00
+								                mk_ident("fn"),
-												syntax: Add some helper methods to `Token`

											
										
										
											2019-06-05 09:39:34 +03:00
+								                Span::new(BytePos(21), BytePos(23), NO_EXPANSION),
 								            );
-												syntax: Rename `TokenAndSpan` into `Token`

											
										
										
											2019-06-04 18:48:40 +03:00
+								            assert_eq!(tok1.kind, tok2.kind);
 								            assert_eq!(tok1.span, tok2.span);
-												syntax: Use `Token` in `TokenTree::Token`

											
										
										
											2019-06-04 20:42:43 +03:00
+								            assert_eq!(string_reader.next_token(), token::Whitespace);
-												Remove syntax and syntax_pos thread locals

											
										
										
											2018-03-07 02:44:10 +01:00
+								            // read another token:
 								            let tok3 = string_reader.next_token();
-												remove peek_token from StringReader

											
										
										
											2019-07-02 17:08:11 +03:00
+								            assert_eq!(string_reader.pos.clone(), BytePos(28));
-												syntax: Add some helper methods to `Token`

											
										
										
											2019-06-05 09:39:34 +03:00
+								            let tok4 = Token::new(
 								                mk_ident("main"),
 								                Span::new(BytePos(24), BytePos(28), NO_EXPANSION),
 								            );
-												syntax: Rename `TokenAndSpan` into `Token`

											
										
										
											2019-06-04 18:48:40 +03:00
+								            assert_eq!(tok3.kind, tok4.kind);
 								            assert_eq!(tok3.span, tok4.span);
-												remove peek_token from StringReader

											
										
										
											2019-07-02 17:08:11 +03:00
 								            assert_eq!(string_reader.next_token(), token::OpenDelim(token::Paren));
-												Remove syntax and syntax_pos thread locals

											
										
										
											2018-03-07 02:44:10 +01:00
+								            assert_eq!(string_reader.pos.clone(), BytePos(29))
 								        })
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    }
 								    // check that the given reader produces the desired stream
 								    // of tokens (stop checking after exhausting the expected vec)
-												syntax: Rename `Token` into `TokenKind`

											
										
										
											2019-06-04 17:55:23 +03:00
+								    fn check_tokenization(mut string_reader: StringReader<'_>, expected: Vec<TokenKind>) {
-												`for x in xs.iter()` -> `for x in &xs`

											
										
										
											2015-01-31 12:20:46 -05:00
+								        for expected_tok in &expected {
-												syntax: Use `Token` in `TokenTree::Token`

											
										
										
											2019-06-04 20:42:43 +03:00
+								            assert_eq!(&string_reader.next_token(), expected_tok);
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								        }
 								    }
-												Use an enum rather than a bool in token::Ident

											
										
										
											2014-10-28 02:01:44 +11:00
+								    // make the identifier by looking up the string in the interner
-												syntax: Rename `Token` into `TokenKind`

											
										
										
											2019-06-04 17:55:23 +03:00
+								    fn mk_ident(id: &str) -> TokenKind {
-												syntax: Move most of the `TokenKind` methods to `Token`

											
										
										
											2019-06-08 19:45:12 +03:00
+								        token::Ident(Symbol::intern(id), false)
-												syntax: Turn `token::Lit` into a struct

											
										
										
											2019-05-19 01:04:26 +03:00
+								    }
-												syntax: Rename `Token` into `TokenKind`

											
										
										
											2019-06-04 17:55:23 +03:00
+								    fn mk_lit(kind: token::LitKind, symbol: &str, suffix: Option<&str>) -> TokenKind {
 								        TokenKind::lit(kind, Symbol::intern(symbol), suffix.map(Symbol::intern))
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    }
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								    #[test]
 								    fn doublecolonparsing() {
-												Move `edition` outside the hygiene lock and avoid accessing it

											
										
										
											2019-04-06 00:15:49 +02:00
+								        with_default_globals(|| {
-												Rename other occs of (Code/File)Map to Source(Map/File) #51574

											
										
										
											2018-10-29 21:26:13 +01:00
+								            let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
 								            let sh = mk_sess(sm.clone());
 								            check_tokenization(setup(&sm, &sh, "a b".to_string()),
-												Remove syntax and syntax_pos thread locals

											
										
										
											2018-03-07 02:44:10 +01:00
+								                            vec![mk_ident("a"), token::Whitespace, mk_ident("b")]);
 								        })
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    }
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								    #[test]
 								    fn dcparsing_2() {
-												Move `edition` outside the hygiene lock and avoid accessing it

											
										
										
											2019-04-06 00:15:49 +02:00
+								        with_default_globals(|| {
-												Rename other occs of (Code/File)Map to Source(Map/File) #51574

											
										
										
											2018-10-29 21:26:13 +01:00
+								            let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
 								            let sh = mk_sess(sm.clone());
 								            check_tokenization(setup(&sm, &sh, "a::b".to_string()),
-												Remove syntax and syntax_pos thread locals

											
										
										
											2018-03-07 02:44:10 +01:00
+								                            vec![mk_ident("a"), token::ModSep, mk_ident("b")]);
 								        })
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    }
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								    #[test]
 								    fn dcparsing_3() {
-												Move `edition` outside the hygiene lock and avoid accessing it

											
										
										
											2019-04-06 00:15:49 +02:00
+								        with_default_globals(|| {
-												Rename other occs of (Code/File)Map to Source(Map/File) #51574

											
										
										
											2018-10-29 21:26:13 +01:00
+								            let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
 								            let sh = mk_sess(sm.clone());
 								            check_tokenization(setup(&sm, &sh, "a ::b".to_string()),
-												Remove syntax and syntax_pos thread locals

											
										
										
											2018-03-07 02:44:10 +01:00
+								                            vec![mk_ident("a"), token::Whitespace, token::ModSep, mk_ident("b")]);
 								        })
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    }
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								    #[test]
 								    fn dcparsing_4() {
-												Move `edition` outside the hygiene lock and avoid accessing it

											
										
										
											2019-04-06 00:15:49 +02:00
+								        with_default_globals(|| {
-												Rename other occs of (Code/File)Map to Source(Map/File) #51574

											
										
										
											2018-10-29 21:26:13 +01:00
+								            let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
 								            let sh = mk_sess(sm.clone());
 								            check_tokenization(setup(&sm, &sh, "a:: b".to_string()),
-												Remove syntax and syntax_pos thread locals

											
										
										
											2018-03-07 02:44:10 +01:00
+								                            vec![mk_ident("a"), token::ModSep, token::Whitespace, mk_ident("b")]);
 								        })
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    }
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								    #[test]
 								    fn character_a() {
-												Move `edition` outside the hygiene lock and avoid accessing it

											
										
										
											2019-04-06 00:15:49 +02:00
+								        with_default_globals(|| {
-												Rename other occs of (Code/File)Map to Source(Map/File) #51574

											
										
										
											2018-10-29 21:26:13 +01:00
+								            let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
 								            let sh = mk_sess(sm.clone());
-												syntax: Use `Token` in `TokenTree::Token`

											
										
										
											2019-06-04 20:42:43 +03:00
+								            assert_eq!(setup(&sm, &sh, "'a'".to_string()).next_token(),
-												syntax: Turn `token::Lit` into a struct

											
										
										
											2019-05-19 01:04:26 +03:00
+								                       mk_lit(token::Char, "a", None));
-												Remove syntax and syntax_pos thread locals

											
										
										
											2018-03-07 02:44:10 +01:00
+								        })
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    }
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								    #[test]
 								    fn character_space() {
-												Move `edition` outside the hygiene lock and avoid accessing it

											
										
										
											2019-04-06 00:15:49 +02:00
+								        with_default_globals(|| {
-												Rename other occs of (Code/File)Map to Source(Map/File) #51574

											
										
										
											2018-10-29 21:26:13 +01:00
+								            let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
 								            let sh = mk_sess(sm.clone());
-												syntax: Use `Token` in `TokenTree::Token`

											
										
										
											2019-06-04 20:42:43 +03:00
+								            assert_eq!(setup(&sm, &sh, "' '".to_string()).next_token(),
-												syntax: Turn `token::Lit` into a struct

											
										
										
											2019-05-19 01:04:26 +03:00
+								                       mk_lit(token::Char, " ", None));
-												Remove syntax and syntax_pos thread locals

											
										
										
											2018-03-07 02:44:10 +01:00
+								        })
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    }
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								    #[test]
 								    fn character_escaped() {
-												Move `edition` outside the hygiene lock and avoid accessing it

											
										
										
											2019-04-06 00:15:49 +02:00
+								        with_default_globals(|| {
-												Rename other occs of (Code/File)Map to Source(Map/File) #51574

											
										
										
											2018-10-29 21:26:13 +01:00
+								            let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
 								            let sh = mk_sess(sm.clone());
-												syntax: Use `Token` in `TokenTree::Token`

											
										
										
											2019-06-04 20:42:43 +03:00
+								            assert_eq!(setup(&sm, &sh, "'\\n'".to_string()).next_token(),
-												syntax: Turn `token::Lit` into a struct

											
										
										
											2019-05-19 01:04:26 +03:00
+								                       mk_lit(token::Char, "\\n", None));
-												Remove syntax and syntax_pos thread locals

											
										
										
											2018-03-07 02:44:10 +01:00
+								        })
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    }
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								    #[test]
 								    fn lifetime_name() {
-												Move `edition` outside the hygiene lock and avoid accessing it

											
										
										
											2019-04-06 00:15:49 +02:00
+								        with_default_globals(|| {
-												Rename other occs of (Code/File)Map to Source(Map/File) #51574

											
										
										
											2018-10-29 21:26:13 +01:00
+								            let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
 								            let sh = mk_sess(sm.clone());
-												syntax: Use `Token` in `TokenTree::Token`

											
										
										
											2019-06-04 20:42:43 +03:00
+								            assert_eq!(setup(&sm, &sh, "'abc".to_string()).next_token(),
-												syntax: Remove duplicate span from `token::Lifetime`

											
										
										
											2019-06-05 11:00:22 +03:00
+								                       token::Lifetime(Symbol::intern("'abc")));
-												Remove syntax and syntax_pos thread locals

											
										
										
											2018-03-07 02:44:10 +01:00
+								        })
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    }
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								    #[test]
 								    fn raw_string() {
-												Move `edition` outside the hygiene lock and avoid accessing it

											
										
										
											2019-04-06 00:15:49 +02:00
+								        with_default_globals(|| {
-												Rename other occs of (Code/File)Map to Source(Map/File) #51574

											
										
										
											2018-10-29 21:26:13 +01:00
+								            let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
 								            let sh = mk_sess(sm.clone());
-												syntax: Use `Token` in `TokenTree::Token`

											
										
										
											2019-06-04 20:42:43 +03:00
+								            assert_eq!(setup(&sm, &sh, "r###\"\"#a\\b\x00c\"\"###".to_string()).next_token(),
-												syntax: Turn `token::Lit` into a struct

											
										
										
											2019-05-19 01:04:26 +03:00
+								                       mk_lit(token::StrRaw(3), "\"#a\\b\x00c\"", None));
-												Remove syntax and syntax_pos thread locals

											
										
										
											2018-03-07 02:44:10 +01:00
+								        })
-												Parse and store suffixes on literals.

This adds an optional suffix at the end of a literal token:
`"foo"bar`. An actual use of a suffix in a expression (or other literal
that the compiler reads) is rejected in the parser.

This doesn't switch the handling of numbers to this system, and doesn't
outlaw illegal suffixes for them yet.

											
										
										
											2014-11-19 15:48:38 +11:00
+								    }
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								    #[test]
 								    fn literal_suffixes() {
-												Move `edition` outside the hygiene lock and avoid accessing it

											
										
										
											2019-04-06 00:15:49 +02:00
+								        with_default_globals(|| {
-												Rename other occs of (Code/File)Map to Source(Map/File) #51574

											
										
										
											2018-10-29 21:26:13 +01:00
+								            let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
 								            let sh = mk_sess(sm.clone());
-												Remove syntax and syntax_pos thread locals

											
										
										
											2018-03-07 02:44:10 +01:00
+								            macro_rules! test {
 								                ($input: expr, $tok_type: ident, $tok_contents: expr) => {{
-												syntax: Use `Token` in `TokenTree::Token`

											
										
										
											2019-06-04 20:42:43 +03:00
+								                    assert_eq!(setup(&sm, &sh, format!("{}suffix", $input)).next_token(),
-												syntax: Turn `token::Lit` into a struct

											
										
										
											2019-05-19 01:04:26 +03:00
+								                               mk_lit(token::$tok_type, $tok_contents, Some("suffix")));
-												Remove syntax and syntax_pos thread locals

											
										
										
											2018-03-07 02:44:10 +01:00
+								                    // with a whitespace separator:
-												syntax: Use `Token` in `TokenTree::Token`

											
										
										
											2019-06-04 20:42:43 +03:00
+								                    assert_eq!(setup(&sm, &sh, format!("{} suffix", $input)).next_token(),
-												syntax: Turn `token::Lit` into a struct

											
										
										
											2019-05-19 01:04:26 +03:00
+								                               mk_lit(token::$tok_type, $tok_contents, None));
-												Remove syntax and syntax_pos thread locals

											
										
										
											2018-03-07 02:44:10 +01:00
+								                }}
 								            }
 								            test!("'a'", Char, "a");
 								            test!("b'a'", Byte, "a");
-												syntax: Turn `token::Lit` into a struct

											
										
										
											2019-05-19 01:04:26 +03:00
+								            test!("\"a\"", Str, "a");
-												Remove syntax and syntax_pos thread locals

											
										
										
											2018-03-07 02:44:10 +01:00
+								            test!("b\"a\"", ByteStr, "a");
 								            test!("1234", Integer, "1234");
 								            test!("0b101", Integer, "0b101");
 								            test!("0xABC", Integer, "0xABC");
 								            test!("1.0", Float, "1.0");
 								            test!("1.0e10", Float, "1.0e10");
-												syntax: Use `Token` in `TokenTree::Token`

											
										
										
											2019-06-04 20:42:43 +03:00
+								            assert_eq!(setup(&sm, &sh, "2us".to_string()).next_token(),
-												syntax: Turn `token::Lit` into a struct

											
										
										
											2019-05-19 01:04:26 +03:00
+								                       mk_lit(token::Integer, "2", Some("us")));
-												syntax: Use `Token` in `TokenTree::Token`

											
										
										
											2019-06-04 20:42:43 +03:00
+								            assert_eq!(setup(&sm, &sh, "r###\"raw\"###suffix".to_string()).next_token(),
-												syntax: Turn `token::Lit` into a struct

											
										
										
											2019-05-19 01:04:26 +03:00
+								                       mk_lit(token::StrRaw(3), "raw", Some("suffix")));
-												syntax: Use `Token` in `TokenTree::Token`

											
										
										
											2019-06-04 20:42:43 +03:00
+								            assert_eq!(setup(&sm, &sh, "br###\"raw\"###suffix".to_string()).next_token(),
-												syntax: Turn `token::Lit` into a struct

											
										
										
											2019-05-19 01:04:26 +03:00
+								                       mk_lit(token::ByteStrRaw(3), "raw", Some("suffix")));
-												Remove syntax and syntax_pos thread locals

											
										
										
											2018-03-07 02:44:10 +01:00
+								        })
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    }
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								    #[test]
 								    fn line_doc_comments() {
-												lexer: lex WS/COMMENT/SHEBANG rather than skipping

Now, the lexer will categorize every byte in its input according to the
grammar. The parser skips over these while parsing, thus avoiding their
presence in the input to syntax extensions.

											
										
										
											2014-07-04 22:30:39 -07:00
+								        assert!(is_doc_comment("///"));
 								        assert!(is_doc_comment("/// blah"));
 								        assert!(!is_doc_comment("////"));
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    }
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								    #[test]
 								    fn nested_block_comments() {
-												Move `edition` outside the hygiene lock and avoid accessing it

											
										
										
											2019-04-06 00:15:49 +02:00
+								        with_default_globals(|| {
-												Rename other occs of (Code/File)Map to Source(Map/File) #51574

											
										
										
											2018-10-29 21:26:13 +01:00
+								            let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
 								            let sh = mk_sess(sm.clone());
 								            let mut lexer = setup(&sm, &sh, "/* /* */ */'a'".to_string());
-												syntax: Use `Token` in `TokenTree::Token`

											
										
										
											2019-06-04 20:42:43 +03:00
+								            assert_eq!(lexer.next_token(), token::Comment);
 								            assert_eq!(lexer.next_token(), mk_lit(token::Char, "a", None));
-												Remove syntax and syntax_pos thread locals

											
										
										
											2018-03-07 02:44:10 +01:00
+								        })
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								    }
-												run rustfmt on syntax::parse::lexer

											
										
										
											2016-01-03 11:14:09 +02:00
+								    #[test]
 								    fn crlf_comments() {
-												Move `edition` outside the hygiene lock and avoid accessing it

											
										
										
											2019-04-06 00:15:49 +02:00
+								        with_default_globals(|| {
-												Rename other occs of (Code/File)Map to Source(Map/File) #51574

											
										
										
											2018-10-29 21:26:13 +01:00
+								            let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
 								            let sh = mk_sess(sm.clone());
 								            let mut lexer = setup(&sm, &sh, "// test\r\n/// test\r\n".to_string());
-												Remove syntax and syntax_pos thread locals

											
										
										
											2018-03-07 02:44:10 +01:00
+								            let comment = lexer.next_token();
-												syntax: Rename `TokenAndSpan` into `Token`

											
										
										
											2019-06-04 18:48:40 +03:00
+								            assert_eq!(comment.kind, token::Comment);
 								            assert_eq!((comment.span.lo(), comment.span.hi()), (BytePos(0), BytePos(7)));
-												syntax: Use `Token` in `TokenTree::Token`

											
										
										
											2019-06-04 20:42:43 +03:00
+								            assert_eq!(lexer.next_token(), token::Whitespace);
 								            assert_eq!(lexer.next_token(), token::DocComment(Symbol::intern("/// test")));
-												Remove syntax and syntax_pos thread locals

											
										
										
											2018-03-07 02:44:10 +01:00
+								        })
-												Added test to check that newlines are stripped from comments
											
										
										
											2015-05-13 22:06:26 +01:00
+								    }
-												syntax: methodify the lexer

											
										
										
											2014-05-21 16:57:31 -07:00
+								}