1
Fork 0

mv compiler to compiler/

This commit is contained in:
mark 2020-08-27 22:58:48 -05:00 committed by Vadim Petrochenkov
parent db534b3ac2
commit 9e5f7d5631
1686 changed files with 941 additions and 1051 deletions

View file

@ -0,0 +1,25 @@
//! Routines the parser uses to classify AST nodes
// Predicates on exprs and stmts that the pretty-printer and parser use
use crate::ast;
/// Does this expression require a semicolon to be treated
/// as a statement? The negation of this: 'can this expression
/// be used as a statement without a semicolon' -- is used
/// as an early-bail-out in the parser so that, for instance,
/// if true {...} else {...}
/// |x| 5
/// isn't parsed as (if true {...} else {...} | x) | 5
pub fn expr_requires_semi_to_be_stmt(e: &ast::Expr) -> bool {
match e.kind {
ast::ExprKind::If(..)
| ast::ExprKind::Match(..)
| ast::ExprKind::Block(..)
| ast::ExprKind::While(..)
| ast::ExprKind::Loop(..)
| ast::ExprKind::ForLoop(..)
| ast::ExprKind::TryBlock(..) => false,
_ => true,
}
}

View file

@ -0,0 +1,222 @@
use rustc_span::source_map::SourceMap;
use rustc_span::{BytePos, CharPos, FileName, Pos, Symbol};
#[cfg(test)]
mod tests;
#[derive(Clone, Copy, PartialEq, Debug)]
pub enum CommentStyle {
/// No code on either side of each line of the comment
Isolated,
/// Code exists to the left of the comment
Trailing,
/// Code before /* foo */ and after the comment
Mixed,
/// Just a manual blank line "\n\n", for layout
BlankLine,
}
#[derive(Clone)]
pub struct Comment {
pub style: CommentStyle,
pub lines: Vec<String>,
pub pos: BytePos,
}
/// Makes a doc string more presentable to users.
/// Used by rustdoc and perhaps other tools, but not by rustc.
pub fn beautify_doc_string(data: Symbol) -> String {
/// remove whitespace-only lines from the start/end of lines
fn vertical_trim(lines: Vec<String>) -> Vec<String> {
let mut i = 0;
let mut j = lines.len();
// first line of all-stars should be omitted
if !lines.is_empty() && lines[0].chars().all(|c| c == '*') {
i += 1;
}
while i < j && lines[i].trim().is_empty() {
i += 1;
}
// like the first, a last line of all stars should be omitted
if j > i && lines[j - 1].chars().skip(1).all(|c| c == '*') {
j -= 1;
}
while j > i && lines[j - 1].trim().is_empty() {
j -= 1;
}
lines[i..j].to_vec()
}
/// remove a "[ \t]*\*" block from each line, if possible
fn horizontal_trim(lines: Vec<String>) -> Vec<String> {
let mut i = usize::MAX;
let mut can_trim = true;
let mut first = true;
for line in &lines {
for (j, c) in line.chars().enumerate() {
if j > i || !"* \t".contains(c) {
can_trim = false;
break;
}
if c == '*' {
if first {
i = j;
first = false;
} else if i != j {
can_trim = false;
}
break;
}
}
if i >= line.len() {
can_trim = false;
}
if !can_trim {
break;
}
}
if can_trim {
lines.iter().map(|line| (&line[i + 1..line.len()]).to_string()).collect()
} else {
lines
}
}
let data = data.as_str();
if data.contains('\n') {
let lines = data.lines().map(|s| s.to_string()).collect::<Vec<String>>();
let lines = vertical_trim(lines);
let lines = horizontal_trim(lines);
lines.join("\n")
} else {
data.to_string()
}
}
/// Returns `None` if the first `col` chars of `s` contain a non-whitespace char.
/// Otherwise returns `Some(k)` where `k` is first char offset after that leading
/// whitespace. Note that `k` may be outside bounds of `s`.
fn all_whitespace(s: &str, col: CharPos) -> Option<usize> {
let mut idx = 0;
for (i, ch) in s.char_indices().take(col.to_usize()) {
if !ch.is_whitespace() {
return None;
}
idx = i + ch.len_utf8();
}
Some(idx)
}
fn trim_whitespace_prefix(s: &str, col: CharPos) -> &str {
let len = s.len();
match all_whitespace(&s, col) {
Some(col) => {
if col < len {
&s[col..]
} else {
""
}
}
None => s,
}
}
fn split_block_comment_into_lines(text: &str, col: CharPos) -> Vec<String> {
let mut res: Vec<String> = vec![];
let mut lines = text.lines();
// just push the first line
res.extend(lines.next().map(|it| it.to_string()));
// for other lines, strip common whitespace prefix
for line in lines {
res.push(trim_whitespace_prefix(line, col).to_string())
}
res
}
// it appears this function is called only from pprust... that's
// probably not a good thing.
pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comment> {
let sm = SourceMap::new(sm.path_mapping().clone());
let source_file = sm.new_source_file(path, src);
let text = (*source_file.src.as_ref().unwrap()).clone();
let text: &str = text.as_str();
let start_bpos = source_file.start_pos;
let mut pos = 0;
let mut comments: Vec<Comment> = Vec::new();
let mut code_to_the_left = false;
if let Some(shebang_len) = rustc_lexer::strip_shebang(text) {
comments.push(Comment {
style: CommentStyle::Isolated,
lines: vec![text[..shebang_len].to_string()],
pos: start_bpos,
});
pos += shebang_len;
}
for token in rustc_lexer::tokenize(&text[pos..]) {
let token_text = &text[pos..pos + token.len];
match token.kind {
rustc_lexer::TokenKind::Whitespace => {
if let Some(mut idx) = token_text.find('\n') {
code_to_the_left = false;
while let Some(next_newline) = &token_text[idx + 1..].find('\n') {
idx = idx + 1 + next_newline;
comments.push(Comment {
style: CommentStyle::BlankLine,
lines: vec![],
pos: start_bpos + BytePos((pos + idx) as u32),
});
}
}
}
rustc_lexer::TokenKind::BlockComment { doc_style, .. } => {
if doc_style.is_none() {
let code_to_the_right = match text[pos + token.len..].chars().next() {
Some('\r' | '\n') => false,
_ => true,
};
let style = match (code_to_the_left, code_to_the_right) {
(_, true) => CommentStyle::Mixed,
(false, false) => CommentStyle::Isolated,
(true, false) => CommentStyle::Trailing,
};
// Count the number of chars since the start of the line by rescanning.
let pos_in_file = start_bpos + BytePos(pos as u32);
let line_begin_in_file = source_file.line_begin_pos(pos_in_file);
let line_begin_pos = (line_begin_in_file - start_bpos).to_usize();
let col = CharPos(text[line_begin_pos..pos].chars().count());
let lines = split_block_comment_into_lines(token_text, col);
comments.push(Comment { style, lines, pos: pos_in_file })
}
}
rustc_lexer::TokenKind::LineComment { doc_style } => {
if doc_style.is_none() {
comments.push(Comment {
style: if code_to_the_left {
CommentStyle::Trailing
} else {
CommentStyle::Isolated
},
lines: vec![token_text.to_string()],
pos: start_bpos + BytePos(pos as u32),
})
}
}
_ => {
code_to_the_left = true;
}
}
pos += token.len;
}
comments
}

View file

@ -0,0 +1,43 @@
use super::*;
use rustc_span::with_default_session_globals;
#[test]
fn test_block_doc_comment_1() {
with_default_session_globals(|| {
let comment = "\n * Test \n ** Test\n * Test\n";
let stripped = beautify_doc_string(Symbol::intern(comment));
assert_eq!(stripped, " Test \n* Test\n Test");
})
}
#[test]
fn test_block_doc_comment_2() {
with_default_session_globals(|| {
let comment = "\n * Test\n * Test\n";
let stripped = beautify_doc_string(Symbol::intern(comment));
assert_eq!(stripped, " Test\n Test");
})
}
#[test]
fn test_block_doc_comment_3() {
with_default_session_globals(|| {
let comment = "\n let a: *i32;\n *a = 5;\n";
let stripped = beautify_doc_string(Symbol::intern(comment));
assert_eq!(stripped, " let a: *i32;\n *a = 5;");
})
}
#[test]
fn test_line_doc_comment() {
with_default_session_globals(|| {
let stripped = beautify_doc_string(Symbol::intern(" test"));
assert_eq!(stripped, " test");
let stripped = beautify_doc_string(Symbol::intern("! test"));
assert_eq!(stripped, "! test");
let stripped = beautify_doc_string(Symbol::intern("test"));
assert_eq!(stripped, "test");
let stripped = beautify_doc_string(Symbol::intern("!test"));
assert_eq!(stripped, "!test");
})
}

View file

@ -0,0 +1,108 @@
// FIXME(Centril): Move to rustc_span?
use rustc_span::symbol::Symbol;
use std::cmp;
#[cfg(test)]
mod tests;
/// Finds the Levenshtein distance between two strings
pub fn lev_distance(a: &str, b: &str) -> usize {
// cases which don't require further computation
if a.is_empty() {
return b.chars().count();
} else if b.is_empty() {
return a.chars().count();
}
let mut dcol: Vec<_> = (0..=b.len()).collect();
let mut t_last = 0;
for (i, sc) in a.chars().enumerate() {
let mut current = i;
dcol[0] = current + 1;
for (j, tc) in b.chars().enumerate() {
let next = dcol[j + 1];
if sc == tc {
dcol[j + 1] = current;
} else {
dcol[j + 1] = cmp::min(current, next);
dcol[j + 1] = cmp::min(dcol[j + 1], dcol[j]) + 1;
}
current = next;
t_last = j;
}
}
dcol[t_last + 1]
}
/// Finds the best match for a given word in the given iterator
///
/// As a loose rule to avoid the obviously incorrect suggestions, it takes
/// an optional limit for the maximum allowable edit distance, which defaults
/// to one-third of the given word.
///
/// Besides Levenshtein, we use case insensitive comparison to improve accuracy on an edge case with
/// a lower(upper)case letters mismatch.
pub fn find_best_match_for_name<'a, T>(
iter_names: T,
lookup: Symbol,
dist: Option<usize>,
) -> Option<Symbol>
where
T: Iterator<Item = &'a Symbol>,
{
let lookup = &lookup.as_str();
let max_dist = dist.map_or_else(|| cmp::max(lookup.len(), 3) / 3, |d| d);
let name_vec: Vec<&Symbol> = iter_names.collect();
let (case_insensitive_match, levenshtein_match) = name_vec
.iter()
.filter_map(|&name| {
let dist = lev_distance(lookup, &name.as_str());
if dist <= max_dist { Some((name, dist)) } else { None }
})
// Here we are collecting the next structure:
// (case_insensitive_match, (levenshtein_match, levenshtein_distance))
.fold((None, None), |result, (candidate, dist)| {
(
if candidate.as_str().to_uppercase() == lookup.to_uppercase() {
Some(candidate)
} else {
result.0
},
match result.1 {
None => Some((candidate, dist)),
Some((c, d)) => Some(if dist < d { (candidate, dist) } else { (c, d) }),
},
)
});
// Priority of matches:
// 1. Exact case insensitive match
// 2. Levenshtein distance match
// 3. Sorted word match
if let Some(candidate) = case_insensitive_match {
Some(*candidate)
} else if levenshtein_match.is_some() {
levenshtein_match.map(|(candidate, _)| *candidate)
} else {
find_match_by_sorted_words(name_vec, lookup)
}
}
fn find_match_by_sorted_words<'a>(iter_names: Vec<&'a Symbol>, lookup: &str) -> Option<Symbol> {
iter_names.iter().fold(None, |result, candidate| {
if sort_by_words(&candidate.as_str()) == sort_by_words(lookup) {
Some(**candidate)
} else {
result
}
})
}
fn sort_by_words(name: &str) -> String {
let mut split_words: Vec<&str> = name.split('_').collect();
split_words.sort();
split_words.join("_")
}

View file

@ -0,0 +1,59 @@
use super::*;
#[test]
fn test_lev_distance() {
use std::char::{from_u32, MAX};
// Test bytelength agnosticity
for c in (0..MAX as u32).filter_map(|i| from_u32(i)).map(|i| i.to_string()) {
assert_eq!(lev_distance(&c[..], &c[..]), 0);
}
let a = "\nMäry häd ä little lämb\n\nLittle lämb\n";
let b = "\nMary häd ä little lämb\n\nLittle lämb\n";
let c = "Mary häd ä little lämb\n\nLittle lämb\n";
assert_eq!(lev_distance(a, b), 1);
assert_eq!(lev_distance(b, a), 1);
assert_eq!(lev_distance(a, c), 2);
assert_eq!(lev_distance(c, a), 2);
assert_eq!(lev_distance(b, c), 1);
assert_eq!(lev_distance(c, b), 1);
}
#[test]
fn test_find_best_match_for_name() {
use rustc_span::with_default_session_globals;
with_default_session_globals(|| {
let input = vec![Symbol::intern("aaab"), Symbol::intern("aaabc")];
assert_eq!(
find_best_match_for_name(input.iter(), Symbol::intern("aaaa"), None),
Some(Symbol::intern("aaab"))
);
assert_eq!(
find_best_match_for_name(input.iter(), Symbol::intern("1111111111"), None),
None
);
let input = vec![Symbol::intern("aAAA")];
assert_eq!(
find_best_match_for_name(input.iter(), Symbol::intern("AAAA"), None),
Some(Symbol::intern("aAAA"))
);
let input = vec![Symbol::intern("AAAA")];
// Returns None because `lev_distance > max_dist / 3`
assert_eq!(find_best_match_for_name(input.iter(), Symbol::intern("aaaa"), None), None);
let input = vec![Symbol::intern("AAAA")];
assert_eq!(
find_best_match_for_name(input.iter(), Symbol::intern("aaaa"), Some(4)),
Some(Symbol::intern("AAAA"))
);
let input = vec![Symbol::intern("a_longer_variable_name")];
assert_eq!(
find_best_match_for_name(input.iter(), Symbol::intern("a_variable_longer_name"), None),
Some(Symbol::intern("a_longer_variable_name"))
);
})
}

View file

@ -0,0 +1,320 @@
//! Code related to parsing literals.
use crate::ast::{self, Lit, LitKind};
use crate::token::{self, Token};
use crate::tokenstream::TokenTree;
use rustc_data_structures::sync::Lrc;
use rustc_lexer::unescape::{unescape_byte, unescape_char};
use rustc_lexer::unescape::{unescape_byte_literal, unescape_literal, Mode};
use rustc_span::symbol::{kw, sym, Symbol};
use rustc_span::Span;
use std::ascii;
use tracing::debug;
pub enum LitError {
NotLiteral,
LexerError,
InvalidSuffix,
InvalidIntSuffix,
InvalidFloatSuffix,
NonDecimalFloat(u32),
IntTooLarge,
}
impl LitKind {
/// Converts literal token into a semantic literal.
fn from_lit_token(lit: token::Lit) -> Result<LitKind, LitError> {
let token::Lit { kind, symbol, suffix } = lit;
if suffix.is_some() && !kind.may_have_suffix() {
return Err(LitError::InvalidSuffix);
}
Ok(match kind {
token::Bool => {
assert!(symbol.is_bool_lit());
LitKind::Bool(symbol == kw::True)
}
token::Byte => {
return unescape_byte(&symbol.as_str())
.map(LitKind::Byte)
.map_err(|_| LitError::LexerError);
}
token::Char => {
return unescape_char(&symbol.as_str())
.map(LitKind::Char)
.map_err(|_| LitError::LexerError);
}
// There are some valid suffixes for integer and float literals,
// so all the handling is done internally.
token::Integer => return integer_lit(symbol, suffix),
token::Float => return float_lit(symbol, suffix),
token::Str => {
// If there are no characters requiring special treatment we can
// reuse the symbol from the token. Otherwise, we must generate a
// new symbol because the string in the LitKind is different to the
// string in the token.
let s = symbol.as_str();
let symbol =
if s.contains(&['\\', '\r'][..]) {
let mut buf = String::with_capacity(s.len());
let mut error = Ok(());
unescape_literal(&s, Mode::Str, &mut |_, unescaped_char| {
match unescaped_char {
Ok(c) => buf.push(c),
Err(_) => error = Err(LitError::LexerError),
}
});
error?;
Symbol::intern(&buf)
} else {
symbol
};
LitKind::Str(symbol, ast::StrStyle::Cooked)
}
token::StrRaw(n) => {
// Ditto.
let s = symbol.as_str();
let symbol =
if s.contains('\r') {
let mut buf = String::with_capacity(s.len());
let mut error = Ok(());
unescape_literal(&s, Mode::RawStr, &mut |_, unescaped_char| {
match unescaped_char {
Ok(c) => buf.push(c),
Err(_) => error = Err(LitError::LexerError),
}
});
error?;
buf.shrink_to_fit();
Symbol::intern(&buf)
} else {
symbol
};
LitKind::Str(symbol, ast::StrStyle::Raw(n))
}
token::ByteStr => {
let s = symbol.as_str();
let mut buf = Vec::with_capacity(s.len());
let mut error = Ok(());
unescape_byte_literal(&s, Mode::ByteStr, &mut |_, unescaped_byte| {
match unescaped_byte {
Ok(c) => buf.push(c),
Err(_) => error = Err(LitError::LexerError),
}
});
error?;
buf.shrink_to_fit();
LitKind::ByteStr(Lrc::new(buf))
}
token::ByteStrRaw(_) => {
let s = symbol.as_str();
let bytes = if s.contains('\r') {
let mut buf = Vec::with_capacity(s.len());
let mut error = Ok(());
unescape_byte_literal(&s, Mode::RawByteStr, &mut |_, unescaped_byte| {
match unescaped_byte {
Ok(c) => buf.push(c),
Err(_) => error = Err(LitError::LexerError),
}
});
error?;
buf.shrink_to_fit();
buf
} else {
symbol.to_string().into_bytes()
};
LitKind::ByteStr(Lrc::new(bytes))
}
token::Err => LitKind::Err(symbol),
})
}
/// Attempts to recover a token from semantic literal.
/// This function is used when the original token doesn't exist (e.g. the literal is created
/// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing).
pub fn to_lit_token(&self) -> token::Lit {
let (kind, symbol, suffix) = match *self {
LitKind::Str(symbol, ast::StrStyle::Cooked) => {
// Don't re-intern unless the escaped string is different.
let s = symbol.as_str();
let escaped = s.escape_default().to_string();
let symbol = if s == escaped { symbol } else { Symbol::intern(&escaped) };
(token::Str, symbol, None)
}
LitKind::Str(symbol, ast::StrStyle::Raw(n)) => (token::StrRaw(n), symbol, None),
LitKind::ByteStr(ref bytes) => {
let string = bytes
.iter()
.cloned()
.flat_map(ascii::escape_default)
.map(Into::<char>::into)
.collect::<String>();
(token::ByteStr, Symbol::intern(&string), None)
}
LitKind::Byte(byte) => {
let string: String = ascii::escape_default(byte).map(Into::<char>::into).collect();
(token::Byte, Symbol::intern(&string), None)
}
LitKind::Char(ch) => {
let string: String = ch.escape_default().map(Into::<char>::into).collect();
(token::Char, Symbol::intern(&string), None)
}
LitKind::Int(n, ty) => {
let suffix = match ty {
ast::LitIntType::Unsigned(ty) => Some(ty.name()),
ast::LitIntType::Signed(ty) => Some(ty.name()),
ast::LitIntType::Unsuffixed => None,
};
(token::Integer, sym::integer(n), suffix)
}
LitKind::Float(symbol, ty) => {
let suffix = match ty {
ast::LitFloatType::Suffixed(ty) => Some(ty.name()),
ast::LitFloatType::Unsuffixed => None,
};
(token::Float, symbol, suffix)
}
LitKind::Bool(value) => {
let symbol = if value { kw::True } else { kw::False };
(token::Bool, symbol, None)
}
LitKind::Err(symbol) => (token::Err, symbol, None),
};
token::Lit::new(kind, symbol, suffix)
}
}
impl Lit {
/// Converts literal token into an AST literal.
pub fn from_lit_token(token: token::Lit, span: Span) -> Result<Lit, LitError> {
Ok(Lit { token, kind: LitKind::from_lit_token(token)?, span })
}
/// Converts arbitrary token into an AST literal.
///
/// Keep this in sync with `Token::can_begin_literal_or_bool` excluding unary negation.
pub fn from_token(token: &Token) -> Result<Lit, LitError> {
let lit = match token.uninterpolate().kind {
token::Ident(name, false) if name.is_bool_lit() => {
token::Lit::new(token::Bool, name, None)
}
token::Literal(lit) => lit,
token::Interpolated(ref nt) => {
if let token::NtExpr(expr) | token::NtLiteral(expr) = &**nt {
if let ast::ExprKind::Lit(lit) = &expr.kind {
return Ok(lit.clone());
}
}
return Err(LitError::NotLiteral);
}
_ => return Err(LitError::NotLiteral),
};
Lit::from_lit_token(lit, token.span)
}
/// Attempts to recover an AST literal from semantic literal.
/// This function is used when the original token doesn't exist (e.g. the literal is created
/// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing).
pub fn from_lit_kind(kind: LitKind, span: Span) -> Lit {
Lit { token: kind.to_lit_token(), kind, span }
}
/// Losslessly convert an AST literal into a token stream.
pub fn token_tree(&self) -> TokenTree {
let token = match self.token.kind {
token::Bool => token::Ident(self.token.symbol, false),
_ => token::Literal(self.token),
};
TokenTree::token(token, self.span)
}
}
fn strip_underscores(symbol: Symbol) -> Symbol {
// Do not allocate a new string unless necessary.
let s = symbol.as_str();
if s.contains('_') {
let mut s = s.to_string();
s.retain(|c| c != '_');
return Symbol::intern(&s);
}
symbol
}
fn filtered_float_lit(
symbol: Symbol,
suffix: Option<Symbol>,
base: u32,
) -> Result<LitKind, LitError> {
debug!("filtered_float_lit: {:?}, {:?}, {:?}", symbol, suffix, base);
if base != 10 {
return Err(LitError::NonDecimalFloat(base));
}
Ok(match suffix {
Some(suf) => LitKind::Float(
symbol,
ast::LitFloatType::Suffixed(match suf {
sym::f32 => ast::FloatTy::F32,
sym::f64 => ast::FloatTy::F64,
_ => return Err(LitError::InvalidFloatSuffix),
}),
),
None => LitKind::Float(symbol, ast::LitFloatType::Unsuffixed),
})
}
fn float_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError> {
debug!("float_lit: {:?}, {:?}", symbol, suffix);
filtered_float_lit(strip_underscores(symbol), suffix, 10)
}
fn integer_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError> {
debug!("integer_lit: {:?}, {:?}", symbol, suffix);
let symbol = strip_underscores(symbol);
let s = symbol.as_str();
let base = match s.as_bytes() {
[b'0', b'x', ..] => 16,
[b'0', b'o', ..] => 8,
[b'0', b'b', ..] => 2,
_ => 10,
};
let ty = match suffix {
Some(suf) => match suf {
sym::isize => ast::LitIntType::Signed(ast::IntTy::Isize),
sym::i8 => ast::LitIntType::Signed(ast::IntTy::I8),
sym::i16 => ast::LitIntType::Signed(ast::IntTy::I16),
sym::i32 => ast::LitIntType::Signed(ast::IntTy::I32),
sym::i64 => ast::LitIntType::Signed(ast::IntTy::I64),
sym::i128 => ast::LitIntType::Signed(ast::IntTy::I128),
sym::usize => ast::LitIntType::Unsigned(ast::UintTy::Usize),
sym::u8 => ast::LitIntType::Unsigned(ast::UintTy::U8),
sym::u16 => ast::LitIntType::Unsigned(ast::UintTy::U16),
sym::u32 => ast::LitIntType::Unsigned(ast::UintTy::U32),
sym::u64 => ast::LitIntType::Unsigned(ast::UintTy::U64),
sym::u128 => ast::LitIntType::Unsigned(ast::UintTy::U128),
// `1f64` and `2f32` etc. are valid float literals, and
// `fxxx` looks more like an invalid float literal than invalid integer literal.
_ if suf.as_str().starts_with('f') => return filtered_float_lit(symbol, suffix, base),
_ => return Err(LitError::InvalidIntSuffix),
},
_ => ast::LitIntType::Unsuffixed,
};
let s = &s[if base != 10 { 2 } else { 0 }..];
u128::from_str_radix(s, base).map(|i| LitKind::Int(i, ty)).map_err(|_| {
// Small bases are lexed as if they were base 10, e.g, the string
// might be `0b10201`. This will cause the conversion above to fail,
// but these kinds of errors are already reported by the lexer.
let from_lexer =
base < 10 && s.chars().any(|c| c.to_digit(10).map_or(false, |d| d >= base));
if from_lexer { LitError::LexerError } else { LitError::IntTooLarge }
})
}

View file

@ -0,0 +1,403 @@
use crate::ast::{self, BinOpKind};
use crate::token::{self, BinOpToken, Token};
use rustc_span::symbol::kw;
/// Associative operator with precedence.
///
/// This is the enum which specifies operator precedence and fixity to the parser.
#[derive(Copy, Clone, PartialEq, Debug)]
pub enum AssocOp {
/// `+`
Add,
/// `-`
Subtract,
/// `*`
Multiply,
/// `/`
Divide,
/// `%`
Modulus,
/// `&&`
LAnd,
/// `||`
LOr,
/// `^`
BitXor,
/// `&`
BitAnd,
/// `|`
BitOr,
/// `<<`
ShiftLeft,
/// `>>`
ShiftRight,
/// `==`
Equal,
/// `<`
Less,
/// `<=`
LessEqual,
/// `!=`
NotEqual,
/// `>`
Greater,
/// `>=`
GreaterEqual,
/// `=`
Assign,
/// `?=` where ? is one of the BinOpToken
AssignOp(BinOpToken),
/// `as`
As,
/// `..` range
DotDot,
/// `..=` range
DotDotEq,
/// `:`
Colon,
}
#[derive(PartialEq, Debug)]
pub enum Fixity {
/// The operator is left-associative
Left,
/// The operator is right-associative
Right,
/// The operator is not associative
None,
}
impl AssocOp {
/// Creates a new AssocOP from a token
pub fn from_token(t: &Token) -> Option<AssocOp> {
use AssocOp::*;
match t.kind {
token::BinOpEq(k) => Some(AssignOp(k)),
token::Eq => Some(Assign),
token::BinOp(BinOpToken::Star) => Some(Multiply),
token::BinOp(BinOpToken::Slash) => Some(Divide),
token::BinOp(BinOpToken::Percent) => Some(Modulus),
token::BinOp(BinOpToken::Plus) => Some(Add),
token::BinOp(BinOpToken::Minus) => Some(Subtract),
token::BinOp(BinOpToken::Shl) => Some(ShiftLeft),
token::BinOp(BinOpToken::Shr) => Some(ShiftRight),
token::BinOp(BinOpToken::And) => Some(BitAnd),
token::BinOp(BinOpToken::Caret) => Some(BitXor),
token::BinOp(BinOpToken::Or) => Some(BitOr),
token::Lt => Some(Less),
token::Le => Some(LessEqual),
token::Ge => Some(GreaterEqual),
token::Gt => Some(Greater),
token::EqEq => Some(Equal),
token::Ne => Some(NotEqual),
token::AndAnd => Some(LAnd),
token::OrOr => Some(LOr),
token::DotDot => Some(DotDot),
token::DotDotEq => Some(DotDotEq),
// DotDotDot is no longer supported, but we need some way to display the error
token::DotDotDot => Some(DotDotEq),
token::Colon => Some(Colon),
// `<-` should probably be `< -`
token::LArrow => Some(Less),
_ if t.is_keyword(kw::As) => Some(As),
_ => None,
}
}
/// Creates a new AssocOp from ast::BinOpKind.
pub fn from_ast_binop(op: BinOpKind) -> Self {
use AssocOp::*;
match op {
BinOpKind::Lt => Less,
BinOpKind::Gt => Greater,
BinOpKind::Le => LessEqual,
BinOpKind::Ge => GreaterEqual,
BinOpKind::Eq => Equal,
BinOpKind::Ne => NotEqual,
BinOpKind::Mul => Multiply,
BinOpKind::Div => Divide,
BinOpKind::Rem => Modulus,
BinOpKind::Add => Add,
BinOpKind::Sub => Subtract,
BinOpKind::Shl => ShiftLeft,
BinOpKind::Shr => ShiftRight,
BinOpKind::BitAnd => BitAnd,
BinOpKind::BitXor => BitXor,
BinOpKind::BitOr => BitOr,
BinOpKind::And => LAnd,
BinOpKind::Or => LOr,
}
}
/// Gets the precedence of this operator
pub fn precedence(&self) -> usize {
use AssocOp::*;
match *self {
As | Colon => 14,
Multiply | Divide | Modulus => 13,
Add | Subtract => 12,
ShiftLeft | ShiftRight => 11,
BitAnd => 10,
BitXor => 9,
BitOr => 8,
Less | Greater | LessEqual | GreaterEqual | Equal | NotEqual => 7,
LAnd => 6,
LOr => 5,
DotDot | DotDotEq => 4,
Assign | AssignOp(_) => 2,
}
}
/// Gets the fixity of this operator
pub fn fixity(&self) -> Fixity {
use AssocOp::*;
// NOTE: it is a bug to have an operators that has same precedence but different fixities!
match *self {
Assign | AssignOp(_) => Fixity::Right,
As | Multiply | Divide | Modulus | Add | Subtract | ShiftLeft | ShiftRight | BitAnd
| BitXor | BitOr | Less | Greater | LessEqual | GreaterEqual | Equal | NotEqual
| LAnd | LOr | Colon => Fixity::Left,
DotDot | DotDotEq => Fixity::None,
}
}
pub fn is_comparison(&self) -> bool {
use AssocOp::*;
match *self {
Less | Greater | LessEqual | GreaterEqual | Equal | NotEqual => true,
Assign | AssignOp(_) | As | Multiply | Divide | Modulus | Add | Subtract
| ShiftLeft | ShiftRight | BitAnd | BitXor | BitOr | LAnd | LOr | DotDot | DotDotEq
| Colon => false,
}
}
pub fn is_assign_like(&self) -> bool {
use AssocOp::*;
match *self {
Assign | AssignOp(_) => true,
Less | Greater | LessEqual | GreaterEqual | Equal | NotEqual | As | Multiply
| Divide | Modulus | Add | Subtract | ShiftLeft | ShiftRight | BitAnd | BitXor
| BitOr | LAnd | LOr | DotDot | DotDotEq | Colon => false,
}
}
pub fn to_ast_binop(&self) -> Option<BinOpKind> {
use AssocOp::*;
match *self {
Less => Some(BinOpKind::Lt),
Greater => Some(BinOpKind::Gt),
LessEqual => Some(BinOpKind::Le),
GreaterEqual => Some(BinOpKind::Ge),
Equal => Some(BinOpKind::Eq),
NotEqual => Some(BinOpKind::Ne),
Multiply => Some(BinOpKind::Mul),
Divide => Some(BinOpKind::Div),
Modulus => Some(BinOpKind::Rem),
Add => Some(BinOpKind::Add),
Subtract => Some(BinOpKind::Sub),
ShiftLeft => Some(BinOpKind::Shl),
ShiftRight => Some(BinOpKind::Shr),
BitAnd => Some(BinOpKind::BitAnd),
BitXor => Some(BinOpKind::BitXor),
BitOr => Some(BinOpKind::BitOr),
LAnd => Some(BinOpKind::And),
LOr => Some(BinOpKind::Or),
Assign | AssignOp(_) | As | DotDot | DotDotEq | Colon => None,
}
}
/// This operator could be used to follow a block unambiguously.
///
/// This is used for error recovery at the moment, providing a suggestion to wrap blocks with
/// parentheses while having a high degree of confidence on the correctness of the suggestion.
pub fn can_continue_expr_unambiguously(&self) -> bool {
use AssocOp::*;
match self {
BitXor | // `{ 42 } ^ 3`
Assign | // `{ 42 } = { 42 }`
Divide | // `{ 42 } / 42`
Modulus | // `{ 42 } % 2`
ShiftRight | // `{ 42 } >> 2`
LessEqual | // `{ 42 } <= 3`
Greater | // `{ 42 } > 3`
GreaterEqual | // `{ 42 } >= 3`
AssignOp(_) | // `{ 42 } +=`
As | // `{ 42 } as usize`
// Equal | // `{ 42 } == { 42 }` Accepting these here would regress incorrect
// NotEqual | // `{ 42 } != { 42 } struct literals parser recovery.
Colon => true, // `{ 42 }: usize`
_ => false,
}
}
}
pub const PREC_RESET: i8 = -100;
pub const PREC_CLOSURE: i8 = -40;
pub const PREC_JUMP: i8 = -30;
pub const PREC_RANGE: i8 = -10;
// The range 2..=14 is reserved for AssocOp binary operator precedences.
pub const PREC_PREFIX: i8 = 50;
pub const PREC_POSTFIX: i8 = 60;
pub const PREC_PAREN: i8 = 99;
pub const PREC_FORCE_PAREN: i8 = 100;
#[derive(Debug, Clone, Copy)]
pub enum ExprPrecedence {
Closure,
Break,
Continue,
Ret,
Yield,
Range,
Binary(BinOpKind),
Cast,
Type,
Assign,
AssignOp,
Box,
AddrOf,
Let,
Unary,
Call,
MethodCall,
Field,
Index,
Try,
InlineAsm,
Mac,
Array,
Repeat,
Tup,
Lit,
Path,
Paren,
If,
While,
ForLoop,
Loop,
Match,
Block,
TryBlock,
Struct,
Async,
Await,
Err,
}
impl ExprPrecedence {
pub fn order(self) -> i8 {
match self {
ExprPrecedence::Closure => PREC_CLOSURE,
ExprPrecedence::Break |
ExprPrecedence::Continue |
ExprPrecedence::Ret |
ExprPrecedence::Yield => PREC_JUMP,
// `Range` claims to have higher precedence than `Assign`, but `x .. x = x` fails to
// parse, instead of parsing as `(x .. x) = x`. Giving `Range` a lower precedence
// ensures that `pprust` will add parentheses in the right places to get the desired
// parse.
ExprPrecedence::Range => PREC_RANGE,
// Binop-like expr kinds, handled by `AssocOp`.
ExprPrecedence::Binary(op) => AssocOp::from_ast_binop(op).precedence() as i8,
ExprPrecedence::Cast => AssocOp::As.precedence() as i8,
ExprPrecedence::Type => AssocOp::Colon.precedence() as i8,
ExprPrecedence::Assign |
ExprPrecedence::AssignOp => AssocOp::Assign.precedence() as i8,
// Unary, prefix
ExprPrecedence::Box |
ExprPrecedence::AddrOf |
// Here `let pats = expr` has `let pats =` as a "unary" prefix of `expr`.
// However, this is not exactly right. When `let _ = a` is the LHS of a binop we
// need parens sometimes. E.g. we can print `(let _ = a) && b` as `let _ = a && b`
// but we need to print `(let _ = a) < b` as-is with parens.
ExprPrecedence::Let |
ExprPrecedence::Unary => PREC_PREFIX,
// Unary, postfix
ExprPrecedence::Await |
ExprPrecedence::Call |
ExprPrecedence::MethodCall |
ExprPrecedence::Field |
ExprPrecedence::Index |
ExprPrecedence::Try |
ExprPrecedence::InlineAsm |
ExprPrecedence::Mac => PREC_POSTFIX,
// Never need parens
ExprPrecedence::Array |
ExprPrecedence::Repeat |
ExprPrecedence::Tup |
ExprPrecedence::Lit |
ExprPrecedence::Path |
ExprPrecedence::Paren |
ExprPrecedence::If |
ExprPrecedence::While |
ExprPrecedence::ForLoop |
ExprPrecedence::Loop |
ExprPrecedence::Match |
ExprPrecedence::Block |
ExprPrecedence::TryBlock |
ExprPrecedence::Async |
ExprPrecedence::Struct |
ExprPrecedence::Err => PREC_PAREN,
}
}
}
/// In `let p = e`, operators with precedence `<=` this one requires parenthesis in `e`.
pub fn prec_let_scrutinee_needs_par() -> usize {
AssocOp::LAnd.precedence()
}
/// Suppose we have `let _ = e` and the `order` of `e`.
/// Is the `order` such that `e` in `let _ = e` needs parenthesis when it is on the RHS?
///
/// Conversely, suppose that we have `(let _ = a) OP b` and `order` is that of `OP`.
/// Can we print this as `let _ = a OP b`?
pub fn needs_par_as_let_scrutinee(order: i8) -> bool {
order <= prec_let_scrutinee_needs_par() as i8
}
/// Expressions that syntactically contain an "exterior" struct literal i.e., not surrounded by any
/// parens or other delimiters, e.g., `X { y: 1 }`, `X { y: 1 }.method()`, `foo == X { y: 1 }` and
/// `X { y: 1 } == foo` all do, but `(X { y: 1 }) == foo` does not.
pub fn contains_exterior_struct_lit(value: &ast::Expr) -> bool {
match value.kind {
ast::ExprKind::Struct(..) => true,
ast::ExprKind::Assign(ref lhs, ref rhs, _)
| ast::ExprKind::AssignOp(_, ref lhs, ref rhs)
| ast::ExprKind::Binary(_, ref lhs, ref rhs) => {
// X { y: 1 } + X { y: 2 }
contains_exterior_struct_lit(&lhs) || contains_exterior_struct_lit(&rhs)
}
ast::ExprKind::Await(ref x)
| ast::ExprKind::Unary(_, ref x)
| ast::ExprKind::Cast(ref x, _)
| ast::ExprKind::Type(ref x, _)
| ast::ExprKind::Field(ref x, _)
| ast::ExprKind::Index(ref x, _) => {
// &X { y: 1 }, X { y: 1 }.y
contains_exterior_struct_lit(&x)
}
ast::ExprKind::MethodCall(.., ref exprs, _) => {
// X { y: 1 }.bar(...)
contains_exterior_struct_lit(&exprs[0])
}
_ => false,
}
}