Move literal parsing code into a separate file
Remove some dead code
This commit is contained in:
parent
8739668438
commit
3f064cae3d
13 changed files with 521 additions and 537 deletions
|
@ -1,11 +1,10 @@
|
|||
//! The main parser interface.
|
||||
|
||||
use crate::ast::{self, CrateConfig, LitKind, NodeId};
|
||||
use crate::ast::{self, CrateConfig, NodeId};
|
||||
use crate::early_buffered_lints::{BufferedEarlyLint, BufferedEarlyLintId};
|
||||
use crate::source_map::{SourceMap, FilePathMapping};
|
||||
use crate::feature_gate::UnstableFeatures;
|
||||
use crate::parse::parser::Parser;
|
||||
use crate::symbol::{keywords, Symbol};
|
||||
use crate::syntax::parse::parser::emit_unclosed_delims;
|
||||
use crate::tokenstream::{TokenStream, TokenTree};
|
||||
use crate::diagnostics::plugin::ErrorMap;
|
||||
|
@ -14,7 +13,6 @@ use crate::print::pprust::token_to_string;
|
|||
use errors::{Applicability, FatalError, Level, Handler, ColorConfig, Diagnostic, DiagnosticBuilder};
|
||||
use rustc_data_structures::sync::{Lrc, Lock};
|
||||
use syntax_pos::{Span, SourceFile, FileName, MultiSpan};
|
||||
use log::debug;
|
||||
|
||||
use rustc_data_structures::fx::{FxHashSet, FxHashMap};
|
||||
use std::borrow::Cow;
|
||||
|
@ -25,18 +23,15 @@ pub type PResult<'a, T> = Result<T, DiagnosticBuilder<'a>>;
|
|||
|
||||
#[macro_use]
|
||||
pub mod parser;
|
||||
|
||||
pub mod attr;
|
||||
pub mod lexer;
|
||||
pub mod token;
|
||||
pub mod attr;
|
||||
pub mod diagnostics;
|
||||
|
||||
pub mod classify;
|
||||
|
||||
pub(crate) mod unescape;
|
||||
use unescape::{unescape_str, unescape_char, unescape_byte_str, unescape_byte};
|
||||
|
||||
pub(crate) mod unescape_error_reporting;
|
||||
crate mod classify;
|
||||
crate mod diagnostics;
|
||||
crate mod literal;
|
||||
crate mod unescape;
|
||||
crate mod unescape_error_reporting;
|
||||
|
||||
/// Info about a parsing session.
|
||||
pub struct ParseSess {
|
||||
|
@ -334,339 +329,6 @@ pub fn stream_to_parser(sess: &ParseSess, stream: TokenStream) -> Parser<'_> {
|
|||
Parser::new(sess, stream, None, true, false)
|
||||
}
|
||||
|
||||
/// Parses a string representing a raw string literal into its final form. The
|
||||
/// only operation this does is convert embedded CRLF into a single LF.
|
||||
fn raw_str_lit(lit: &str) -> String {
|
||||
debug!("raw_str_lit: given {}", lit.escape_default());
|
||||
let mut res = String::with_capacity(lit.len());
|
||||
|
||||
let mut chars = lit.chars().peekable();
|
||||
while let Some(c) = chars.next() {
|
||||
if c == '\r' {
|
||||
if *chars.peek().unwrap() != '\n' {
|
||||
panic!("lexer accepted bare CR");
|
||||
}
|
||||
chars.next();
|
||||
res.push('\n');
|
||||
} else {
|
||||
res.push(c);
|
||||
}
|
||||
}
|
||||
|
||||
res.shrink_to_fit();
|
||||
res
|
||||
}
|
||||
|
||||
// check if `s` looks like i32 or u1234 etc.
|
||||
fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool {
|
||||
s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit())
|
||||
}
|
||||
|
||||
macro_rules! err {
|
||||
($opt_diag:expr, |$span:ident, $diag:ident| $($body:tt)*) => {
|
||||
match $opt_diag {
|
||||
Some(($span, $diag)) => { $($body)* }
|
||||
None => return None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
crate fn expect_no_suffix(sp: Span, diag: &Handler, kind: &str, suffix: Option<ast::Name>) {
|
||||
match suffix {
|
||||
None => {/* everything ok */}
|
||||
Some(suf) => {
|
||||
let text = suf.as_str();
|
||||
if text.is_empty() {
|
||||
diag.span_bug(sp, "found empty literal suffix in Some")
|
||||
}
|
||||
let mut err = if kind == "a tuple index" &&
|
||||
["i32", "u32", "isize", "usize"].contains(&text.to_string().as_str())
|
||||
{
|
||||
// #59553: warn instead of reject out of hand to allow the fix to percolate
|
||||
// through the ecosystem when people fix their macros
|
||||
let mut err = diag.struct_span_warn(
|
||||
sp,
|
||||
&format!("suffixes on {} are invalid", kind),
|
||||
);
|
||||
err.note(&format!(
|
||||
"`{}` is *temporarily* accepted on tuple index fields as it was \
|
||||
incorrectly accepted on stable for a few releases",
|
||||
text,
|
||||
));
|
||||
err.help(
|
||||
"on proc macros, you'll want to use `syn::Index::from` or \
|
||||
`proc_macro::Literal::*_unsuffixed` for code that will desugar \
|
||||
to tuple field access",
|
||||
);
|
||||
err.note(
|
||||
"for more context, see https://github.com/rust-lang/rust/issues/60210",
|
||||
);
|
||||
err
|
||||
} else {
|
||||
diag.struct_span_err(sp, &format!("suffixes on {} are invalid", kind))
|
||||
};
|
||||
err.span_label(sp, format!("invalid suffix `{}`", text));
|
||||
err.emit();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LitKind {
|
||||
/// Converts literal token with a suffix into a semantic literal.
|
||||
/// Works speculatively and may return `None` is diagnostic handler is not passed.
|
||||
/// If diagnostic handler is passed, always returns `Some`,
|
||||
/// possibly after reporting non-fatal errors and recovery.
|
||||
crate fn from_lit_token(
|
||||
lit: token::Lit,
|
||||
suf: Option<Symbol>,
|
||||
diag: Option<(Span, &Handler)>
|
||||
) -> Option<LitKind> {
|
||||
if suf.is_some() && !lit.may_have_suffix() {
|
||||
err!(diag, |span, diag| {
|
||||
expect_no_suffix(span, diag, &format!("a {}", lit.literal_name()), suf)
|
||||
});
|
||||
}
|
||||
|
||||
Some(match lit {
|
||||
token::Bool(i) => {
|
||||
assert!(i == keywords::True.name() || i == keywords::False.name());
|
||||
LitKind::Bool(i == keywords::True.name())
|
||||
}
|
||||
token::Byte(i) => {
|
||||
match unescape_byte(&i.as_str()) {
|
||||
Ok(c) => LitKind::Byte(c),
|
||||
Err(_) => LitKind::Err(i),
|
||||
}
|
||||
},
|
||||
token::Char(i) => {
|
||||
match unescape_char(&i.as_str()) {
|
||||
Ok(c) => LitKind::Char(c),
|
||||
Err(_) => LitKind::Err(i),
|
||||
}
|
||||
},
|
||||
token::Err(i) => LitKind::Err(i),
|
||||
|
||||
// There are some valid suffixes for integer and float literals,
|
||||
// so all the handling is done internally.
|
||||
token::Integer(s) => return integer_lit(&s.as_str(), suf, diag),
|
||||
token::Float(s) => return float_lit(&s.as_str(), suf, diag),
|
||||
|
||||
token::Str_(mut sym) => {
|
||||
// If there are no characters requiring special treatment we can
|
||||
// reuse the symbol from the Token. Otherwise, we must generate a
|
||||
// new symbol because the string in the LitKind is different to the
|
||||
// string in the Token.
|
||||
let mut has_error = false;
|
||||
let s = &sym.as_str();
|
||||
if s.as_bytes().iter().any(|&c| c == b'\\' || c == b'\r') {
|
||||
let mut buf = String::with_capacity(s.len());
|
||||
unescape_str(s, &mut |_, unescaped_char| {
|
||||
match unescaped_char {
|
||||
Ok(c) => buf.push(c),
|
||||
Err(_) => has_error = true,
|
||||
}
|
||||
});
|
||||
if has_error {
|
||||
return Some(LitKind::Err(sym));
|
||||
}
|
||||
sym = Symbol::intern(&buf)
|
||||
}
|
||||
|
||||
LitKind::Str(sym, ast::StrStyle::Cooked)
|
||||
}
|
||||
token::StrRaw(mut sym, n) => {
|
||||
// Ditto.
|
||||
let s = &sym.as_str();
|
||||
if s.contains('\r') {
|
||||
sym = Symbol::intern(&raw_str_lit(s));
|
||||
}
|
||||
LitKind::Str(sym, ast::StrStyle::Raw(n))
|
||||
}
|
||||
token::ByteStr(i) => {
|
||||
let s = &i.as_str();
|
||||
let mut buf = Vec::with_capacity(s.len());
|
||||
let mut has_error = false;
|
||||
unescape_byte_str(s, &mut |_, unescaped_byte| {
|
||||
match unescaped_byte {
|
||||
Ok(c) => buf.push(c),
|
||||
Err(_) => has_error = true,
|
||||
}
|
||||
});
|
||||
if has_error {
|
||||
return Some(LitKind::Err(i));
|
||||
}
|
||||
buf.shrink_to_fit();
|
||||
LitKind::ByteStr(Lrc::new(buf))
|
||||
}
|
||||
token::ByteStrRaw(i, _) => {
|
||||
LitKind::ByteStr(Lrc::new(i.to_string().into_bytes()))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn filtered_float_lit(data: Symbol, suffix: Option<Symbol>, diag: Option<(Span, &Handler)>)
|
||||
-> Option<LitKind> {
|
||||
debug!("filtered_float_lit: {}, {:?}", data, suffix);
|
||||
let suffix = match suffix {
|
||||
Some(suffix) => suffix,
|
||||
None => return Some(LitKind::FloatUnsuffixed(data)),
|
||||
};
|
||||
|
||||
Some(match &*suffix.as_str() {
|
||||
"f32" => LitKind::Float(data, ast::FloatTy::F32),
|
||||
"f64" => LitKind::Float(data, ast::FloatTy::F64),
|
||||
suf => {
|
||||
err!(diag, |span, diag| {
|
||||
if suf.len() >= 2 && looks_like_width_suffix(&['f'], suf) {
|
||||
// if it looks like a width, lets try to be helpful.
|
||||
let msg = format!("invalid width `{}` for float literal", &suf[1..]);
|
||||
diag.struct_span_err(span, &msg).help("valid widths are 32 and 64").emit()
|
||||
} else {
|
||||
let msg = format!("invalid suffix `{}` for float literal", suf);
|
||||
diag.struct_span_err(span, &msg)
|
||||
.span_label(span, format!("invalid suffix `{}`", suf))
|
||||
.help("valid suffixes are `f32` and `f64`")
|
||||
.emit();
|
||||
}
|
||||
});
|
||||
|
||||
LitKind::FloatUnsuffixed(data)
|
||||
}
|
||||
})
|
||||
}
|
||||
fn float_lit(s: &str, suffix: Option<Symbol>, diag: Option<(Span, &Handler)>)
|
||||
-> Option<LitKind> {
|
||||
debug!("float_lit: {:?}, {:?}", s, suffix);
|
||||
// FIXME #2252: bounds checking float literals is deferred until trans
|
||||
|
||||
// Strip underscores without allocating a new String unless necessary.
|
||||
let s2;
|
||||
let s = if s.chars().any(|c| c == '_') {
|
||||
s2 = s.chars().filter(|&c| c != '_').collect::<String>();
|
||||
&s2
|
||||
} else {
|
||||
s
|
||||
};
|
||||
|
||||
filtered_float_lit(Symbol::intern(s), suffix, diag)
|
||||
}
|
||||
|
||||
fn integer_lit(s: &str, suffix: Option<Symbol>, diag: Option<(Span, &Handler)>)
|
||||
-> Option<LitKind> {
|
||||
// s can only be ascii, byte indexing is fine
|
||||
|
||||
// Strip underscores without allocating a new String unless necessary.
|
||||
let s2;
|
||||
let mut s = if s.chars().any(|c| c == '_') {
|
||||
s2 = s.chars().filter(|&c| c != '_').collect::<String>();
|
||||
&s2
|
||||
} else {
|
||||
s
|
||||
};
|
||||
|
||||
debug!("integer_lit: {}, {:?}", s, suffix);
|
||||
|
||||
let mut base = 10;
|
||||
let orig = s;
|
||||
let mut ty = ast::LitIntType::Unsuffixed;
|
||||
|
||||
if s.starts_with('0') && s.len() > 1 {
|
||||
match s.as_bytes()[1] {
|
||||
b'x' => base = 16,
|
||||
b'o' => base = 8,
|
||||
b'b' => base = 2,
|
||||
_ => { }
|
||||
}
|
||||
}
|
||||
|
||||
// 1f64 and 2f32 etc. are valid float literals.
|
||||
if let Some(suf) = suffix {
|
||||
if looks_like_width_suffix(&['f'], &suf.as_str()) {
|
||||
let err = match base {
|
||||
16 => Some("hexadecimal float literal is not supported"),
|
||||
8 => Some("octal float literal is not supported"),
|
||||
2 => Some("binary float literal is not supported"),
|
||||
_ => None,
|
||||
};
|
||||
if let Some(err) = err {
|
||||
err!(diag, |span, diag| {
|
||||
diag.struct_span_err(span, err)
|
||||
.span_label(span, "not supported")
|
||||
.emit();
|
||||
});
|
||||
}
|
||||
return filtered_float_lit(Symbol::intern(s), Some(suf), diag)
|
||||
}
|
||||
}
|
||||
|
||||
if base != 10 {
|
||||
s = &s[2..];
|
||||
}
|
||||
|
||||
if let Some(suf) = suffix {
|
||||
if suf.as_str().is_empty() {
|
||||
err!(diag, |span, diag| diag.span_bug(span, "found empty literal suffix in Some"));
|
||||
}
|
||||
ty = match &*suf.as_str() {
|
||||
"isize" => ast::LitIntType::Signed(ast::IntTy::Isize),
|
||||
"i8" => ast::LitIntType::Signed(ast::IntTy::I8),
|
||||
"i16" => ast::LitIntType::Signed(ast::IntTy::I16),
|
||||
"i32" => ast::LitIntType::Signed(ast::IntTy::I32),
|
||||
"i64" => ast::LitIntType::Signed(ast::IntTy::I64),
|
||||
"i128" => ast::LitIntType::Signed(ast::IntTy::I128),
|
||||
"usize" => ast::LitIntType::Unsigned(ast::UintTy::Usize),
|
||||
"u8" => ast::LitIntType::Unsigned(ast::UintTy::U8),
|
||||
"u16" => ast::LitIntType::Unsigned(ast::UintTy::U16),
|
||||
"u32" => ast::LitIntType::Unsigned(ast::UintTy::U32),
|
||||
"u64" => ast::LitIntType::Unsigned(ast::UintTy::U64),
|
||||
"u128" => ast::LitIntType::Unsigned(ast::UintTy::U128),
|
||||
suf => {
|
||||
// i<digits> and u<digits> look like widths, so lets
|
||||
// give an error message along those lines
|
||||
err!(diag, |span, diag| {
|
||||
if looks_like_width_suffix(&['i', 'u'], suf) {
|
||||
let msg = format!("invalid width `{}` for integer literal", &suf[1..]);
|
||||
diag.struct_span_err(span, &msg)
|
||||
.help("valid widths are 8, 16, 32, 64 and 128")
|
||||
.emit();
|
||||
} else {
|
||||
let msg = format!("invalid suffix `{}` for numeric literal", suf);
|
||||
diag.struct_span_err(span, &msg)
|
||||
.span_label(span, format!("invalid suffix `{}`", suf))
|
||||
.help("the suffix must be one of the integral types \
|
||||
(`u32`, `isize`, etc)")
|
||||
.emit();
|
||||
}
|
||||
});
|
||||
|
||||
ty
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
debug!("integer_lit: the type is {:?}, base {:?}, the new string is {:?}, the original \
|
||||
string was {:?}, the original suffix was {:?}", ty, base, s, orig, suffix);
|
||||
|
||||
Some(match u128::from_str_radix(s, base) {
|
||||
Ok(r) => LitKind::Int(r, ty),
|
||||
Err(_) => {
|
||||
// small bases are lexed as if they were base 10, e.g, the string
|
||||
// might be `0b10201`. This will cause the conversion above to fail,
|
||||
// but these cases have errors in the lexer: we don't want to emit
|
||||
// two errors, and we especially don't want to emit this error since
|
||||
// it isn't necessarily true.
|
||||
let already_errored = base < 10 &&
|
||||
s.chars().any(|c| c.to_digit(10).map_or(false, |d| d >= base));
|
||||
|
||||
if !already_errored {
|
||||
err!(diag, |span, diag| diag.span_err(span, "int literal is too large"));
|
||||
}
|
||||
LitKind::Int(0, ty)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// A sequence separator.
|
||||
pub struct SeqSep {
|
||||
/// The seperator token.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue