Move literal parsing code into a separate file

Remove some dead code
2019-05-11 02:31:34 +03:00 · 2019-05-11 02:31:34 +03:00 · 3f064cae3d
commit 3f064cae3d
parent 8739668438
13 changed files with 521 additions and 537 deletions
--- a/src/libsyntax/parse/mod.rs
+++ b/src/libsyntax/parse/mod.rs
@ -1,11 +1,10 @@
 //! The main parser interface.

-use crate::ast::{self, CrateConfig, LitKind, NodeId};
+use crate::ast::{self, CrateConfig, NodeId};
 use crate::early_buffered_lints::{BufferedEarlyLint, BufferedEarlyLintId};
 use crate::source_map::{SourceMap, FilePathMapping};
 use crate::feature_gate::UnstableFeatures;
 use crate::parse::parser::Parser;
-use crate::symbol::{keywords, Symbol};
 use crate::syntax::parse::parser::emit_unclosed_delims;
 use crate::tokenstream::{TokenStream, TokenTree};
 use crate::diagnostics::plugin::ErrorMap;
@ -14,7 +13,6 @@ use crate::print::pprust::token_to_string;
 use errors::{Applicability, FatalError, Level, Handler, ColorConfig, Diagnostic, DiagnosticBuilder};
 use rustc_data_structures::sync::{Lrc, Lock};
 use syntax_pos::{Span, SourceFile, FileName, MultiSpan};
-use log::debug;

 use rustc_data_structures::fx::{FxHashSet, FxHashMap};
 use std::borrow::Cow;
@ -25,18 +23,15 @@ pub type PResult<'a, T> = Result<T, DiagnosticBuilder<'a>>;

 #[macro_use]
 pub mod parser;
-
+pub mod attr;
 pub mod lexer;
 pub mod token;
-pub mod attr;
-pub mod diagnostics;

-pub mod classify;
-
-pub(crate) mod unescape;
-use unescape::{unescape_str, unescape_char, unescape_byte_str, unescape_byte};
-
-pub(crate) mod unescape_error_reporting;
+crate mod classify;
+crate mod diagnostics;
+crate mod literal;
+crate mod unescape;
+crate mod unescape_error_reporting;

 /// Info about a parsing session.
 pub struct ParseSess {
@ -334,339 +329,6 @@ pub fn stream_to_parser(sess: &ParseSess, stream: TokenStream) -> Parser<'_> {
    Parser::new(sess, stream, None, true, false)
 }

-/// Parses a string representing a raw string literal into its final form. The
-/// only operation this does is convert embedded CRLF into a single LF.
-fn raw_str_lit(lit: &str) -> String {
-    debug!("raw_str_lit: given {}", lit.escape_default());
-    let mut res = String::with_capacity(lit.len());
-
-    let mut chars = lit.chars().peekable();
-    while let Some(c) = chars.next() {
-        if c == '\r' {
-            if *chars.peek().unwrap() != '\n' {
-                panic!("lexer accepted bare CR");
-            }
-            chars.next();
-            res.push('\n');
-        } else {
-            res.push(c);
-        }
-    }
-
-    res.shrink_to_fit();
-    res
-}
-
-// check if `s` looks like i32 or u1234 etc.
-fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool {
-    s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit())
-}
-
-macro_rules! err {
-    ($opt_diag:expr, |$span:ident, $diag:ident| $($body:tt)*) => {
-        match $opt_diag {
-            Some(($span, $diag)) => { $($body)* }
-            None => return None,
-        }
-    }
-}
-
-crate fn expect_no_suffix(sp: Span, diag: &Handler, kind: &str, suffix: Option<ast::Name>) {
-    match suffix {
-        None => {/* everything ok */}
-        Some(suf) => {
-            let text = suf.as_str();
-            if text.is_empty() {
-                diag.span_bug(sp, "found empty literal suffix in Some")
-            }
-            let mut err = if kind == "a tuple index" &&
-                ["i32", "u32", "isize", "usize"].contains(&text.to_string().as_str())
-            {
-                // #59553: warn instead of reject out of hand to allow the fix to percolate
-                // through the ecosystem when people fix their macros
-                let mut err = diag.struct_span_warn(
-                    sp,
-                    &format!("suffixes on {} are invalid", kind),
-                );
-                err.note(&format!(
-                    "`{}` is *temporarily* accepted on tuple index fields as it was \
-                        incorrectly accepted on stable for a few releases",
-                    text,
-                ));
-                err.help(
-                    "on proc macros, you'll want to use `syn::Index::from` or \
-                        `proc_macro::Literal::*_unsuffixed` for code that will desugar \
-                        to tuple field access",
-                );
-                err.note(
-                    "for more context, see https://github.com/rust-lang/rust/issues/60210",
-                );
-                err
-            } else {
-                diag.struct_span_err(sp, &format!("suffixes on {} are invalid", kind))
-            };
-            err.span_label(sp, format!("invalid suffix `{}`", text));
-            err.emit();
-        }
-    }
-}
-
-impl LitKind {
-    /// Converts literal token with a suffix into a semantic literal.
-    /// Works speculatively and may return `None` is diagnostic handler is not passed.
-    /// If diagnostic handler is passed, always returns `Some`,
-    /// possibly after reporting non-fatal errors and recovery.
-    crate fn from_lit_token(
-        lit: token::Lit,
-        suf: Option<Symbol>,
-        diag: Option<(Span, &Handler)>
-    ) -> Option<LitKind> {
-        if suf.is_some() && !lit.may_have_suffix() {
-            err!(diag, |span, diag| {
-                expect_no_suffix(span, diag, &format!("a {}", lit.literal_name()), suf)
-            });
-        }
-
-        Some(match lit {
-            token::Bool(i) => {
-                assert!(i == keywords::True.name() || i == keywords::False.name());
-                LitKind::Bool(i == keywords::True.name())
-            }
-            token::Byte(i) => {
-                match unescape_byte(&i.as_str()) {
-                    Ok(c) => LitKind::Byte(c),
-                    Err(_) => LitKind::Err(i),
-                }
-            },
-            token::Char(i) => {
-                match unescape_char(&i.as_str()) {
-                    Ok(c) => LitKind::Char(c),
-                    Err(_) => LitKind::Err(i),
-                }
-            },
-            token::Err(i) => LitKind::Err(i),
-
-            // There are some valid suffixes for integer and float literals,
-            // so all the handling is done internally.
-            token::Integer(s) => return integer_lit(&s.as_str(), suf, diag),
-            token::Float(s) => return float_lit(&s.as_str(), suf, diag),
-
-            token::Str_(mut sym) => {
-                // If there are no characters requiring special treatment we can
-                // reuse the symbol from the Token. Otherwise, we must generate a
-                // new symbol because the string in the LitKind is different to the
-                // string in the Token.
-                let mut has_error = false;
-                let s = &sym.as_str();
-                if s.as_bytes().iter().any(|&c| c == b'\\' || c == b'\r') {
-                    let mut buf = String::with_capacity(s.len());
-                    unescape_str(s, &mut |_, unescaped_char| {
-                        match unescaped_char {
-                            Ok(c) => buf.push(c),
-                            Err(_) => has_error = true,
-                        }
-                    });
-                    if has_error {
-                        return Some(LitKind::Err(sym));
-                    }
-                    sym = Symbol::intern(&buf)
-                }
-
-                LitKind::Str(sym, ast::StrStyle::Cooked)
-            }
-            token::StrRaw(mut sym, n) => {
-                // Ditto.
-                let s = &sym.as_str();
-                if s.contains('\r') {
-                    sym = Symbol::intern(&raw_str_lit(s));
-                }
-                LitKind::Str(sym, ast::StrStyle::Raw(n))
-            }
-            token::ByteStr(i) => {
-                let s = &i.as_str();
-                let mut buf = Vec::with_capacity(s.len());
-                let mut has_error = false;
-                unescape_byte_str(s, &mut |_, unescaped_byte| {
-                    match unescaped_byte {
-                        Ok(c) => buf.push(c),
-                        Err(_) => has_error = true,
-                    }
-                });
-                if has_error {
-                    return Some(LitKind::Err(i));
-                }
-                buf.shrink_to_fit();
-                LitKind::ByteStr(Lrc::new(buf))
-            }
-            token::ByteStrRaw(i, _) => {
-                LitKind::ByteStr(Lrc::new(i.to_string().into_bytes()))
-            }
-        })
-    }
-}
-
-fn filtered_float_lit(data: Symbol, suffix: Option<Symbol>, diag: Option<(Span, &Handler)>)
-                      -> Option<LitKind> {
-    debug!("filtered_float_lit: {}, {:?}", data, suffix);
-    let suffix = match suffix {
-        Some(suffix) => suffix,
-        None => return Some(LitKind::FloatUnsuffixed(data)),
-    };
-
-    Some(match &*suffix.as_str() {
-        "f32" => LitKind::Float(data, ast::FloatTy::F32),
-        "f64" => LitKind::Float(data, ast::FloatTy::F64),
-        suf => {
-            err!(diag, |span, diag| {
-                if suf.len() >= 2 && looks_like_width_suffix(&['f'], suf) {
-                    // if it looks like a width, lets try to be helpful.
-                    let msg = format!("invalid width `{}` for float literal", &suf[1..]);
-                    diag.struct_span_err(span, &msg).help("valid widths are 32 and 64").emit()
-                } else {
-                    let msg = format!("invalid suffix `{}` for float literal", suf);
-                    diag.struct_span_err(span, &msg)
-                        .span_label(span, format!("invalid suffix `{}`", suf))
-                        .help("valid suffixes are `f32` and `f64`")
-                        .emit();
-                }
-            });
-
-            LitKind::FloatUnsuffixed(data)
-        }
-    })
-}
-fn float_lit(s: &str, suffix: Option<Symbol>, diag: Option<(Span, &Handler)>)
-                 -> Option<LitKind> {
-    debug!("float_lit: {:?}, {:?}", s, suffix);
-    // FIXME #2252: bounds checking float literals is deferred until trans
-
-    // Strip underscores without allocating a new String unless necessary.
-    let s2;
-    let s = if s.chars().any(|c| c == '_') {
-        s2 = s.chars().filter(|&c| c != '_').collect::<String>();
-        &s2
-    } else {
-        s
-    };
-
-    filtered_float_lit(Symbol::intern(s), suffix, diag)
-}
-
-fn integer_lit(s: &str, suffix: Option<Symbol>, diag: Option<(Span, &Handler)>)
-                   -> Option<LitKind> {
-    // s can only be ascii, byte indexing is fine
-
-    // Strip underscores without allocating a new String unless necessary.
-    let s2;
-    let mut s = if s.chars().any(|c| c == '_') {
-        s2 = s.chars().filter(|&c| c != '_').collect::<String>();
-        &s2
-    } else {
-        s
-    };
-
-    debug!("integer_lit: {}, {:?}", s, suffix);
-
-    let mut base = 10;
-    let orig = s;
-    let mut ty = ast::LitIntType::Unsuffixed;
-
-    if s.starts_with('0') && s.len() > 1 {
-        match s.as_bytes()[1] {
-            b'x' => base = 16,
-            b'o' => base = 8,
-            b'b' => base = 2,
-            _ => { }
-        }
-    }
-
-    // 1f64 and 2f32 etc. are valid float literals.
-    if let Some(suf) = suffix {
-        if looks_like_width_suffix(&['f'], &suf.as_str()) {
-            let err = match base {
-                16 => Some("hexadecimal float literal is not supported"),
-                8 => Some("octal float literal is not supported"),
-                2 => Some("binary float literal is not supported"),
-                _ => None,
-            };
-            if let Some(err) = err {
-                err!(diag, |span, diag| {
-                    diag.struct_span_err(span, err)
-                        .span_label(span, "not supported")
-                        .emit();
-                });
-            }
-            return filtered_float_lit(Symbol::intern(s), Some(suf), diag)
-        }
-    }
-
-    if base != 10 {
-        s = &s[2..];
-    }
-
-    if let Some(suf) = suffix {
-        if suf.as_str().is_empty() {
-            err!(diag, |span, diag| diag.span_bug(span, "found empty literal suffix in Some"));
-        }
-        ty = match &*suf.as_str() {
-            "isize" => ast::LitIntType::Signed(ast::IntTy::Isize),
-            "i8"  => ast::LitIntType::Signed(ast::IntTy::I8),
-            "i16" => ast::LitIntType::Signed(ast::IntTy::I16),
-            "i32" => ast::LitIntType::Signed(ast::IntTy::I32),
-            "i64" => ast::LitIntType::Signed(ast::IntTy::I64),
-            "i128" => ast::LitIntType::Signed(ast::IntTy::I128),
-            "usize" => ast::LitIntType::Unsigned(ast::UintTy::Usize),
-            "u8"  => ast::LitIntType::Unsigned(ast::UintTy::U8),
-            "u16" => ast::LitIntType::Unsigned(ast::UintTy::U16),
-            "u32" => ast::LitIntType::Unsigned(ast::UintTy::U32),
-            "u64" => ast::LitIntType::Unsigned(ast::UintTy::U64),
-            "u128" => ast::LitIntType::Unsigned(ast::UintTy::U128),
-            suf => {
-                // i<digits> and u<digits> look like widths, so lets
-                // give an error message along those lines
-                err!(diag, |span, diag| {
-                    if looks_like_width_suffix(&['i', 'u'], suf) {
-                        let msg = format!("invalid width `{}` for integer literal", &suf[1..]);
-                        diag.struct_span_err(span, &msg)
-                            .help("valid widths are 8, 16, 32, 64 and 128")
-                            .emit();
-                    } else {
-                        let msg = format!("invalid suffix `{}` for numeric literal", suf);
-                        diag.struct_span_err(span, &msg)
-                            .span_label(span, format!("invalid suffix `{}`", suf))
-                            .help("the suffix must be one of the integral types \
-                                   (`u32`, `isize`, etc)")
-                            .emit();
-                    }
-                });
-
-                ty
-            }
-        }
-    }
-
-    debug!("integer_lit: the type is {:?}, base {:?}, the new string is {:?}, the original \
-           string was {:?}, the original suffix was {:?}", ty, base, s, orig, suffix);
-
-    Some(match u128::from_str_radix(s, base) {
-        Ok(r) => LitKind::Int(r, ty),
-        Err(_) => {
-            // small bases are lexed as if they were base 10, e.g, the string
-            // might be `0b10201`. This will cause the conversion above to fail,
-            // but these cases have errors in the lexer: we don't want to emit
-            // two errors, and we especially don't want to emit this error since
-            // it isn't necessarily true.
-            let already_errored = base < 10 &&
-                s.chars().any(|c| c.to_digit(10).map_or(false, |d| d >= base));
-
-            if !already_errored {
-                err!(diag, |span, diag| diag.span_err(span, "int literal is too large"));
-            }
-            LitKind::Int(0, ty)
-        }
-    })
-}
-
 /// A sequence separator.
 pub struct SeqSep {
    /// The seperator token.