1
Fork 0

mv compiler to compiler/

This commit is contained in:
mark 2020-08-27 22:58:48 -05:00 committed by Vadim Petrochenkov
parent db534b3ac2
commit 9e5f7d5631
1686 changed files with 941 additions and 1051 deletions

View file

@ -0,0 +1,598 @@
use rustc_ast::ast::AttrStyle;
use rustc_ast::token::{self, CommentKind, Token, TokenKind};
use rustc_data_structures::sync::Lrc;
use rustc_errors::{error_code, Applicability, DiagnosticBuilder, FatalError};
use rustc_lexer::Base;
use rustc_lexer::{unescape, RawStrError};
use rustc_session::parse::ParseSess;
use rustc_span::symbol::{sym, Symbol};
use rustc_span::{BytePos, Pos, Span};
use std::char;
use tracing::debug;
mod tokentrees;
mod unescape_error_reporting;
mod unicode_chars;
use rustc_lexer::{unescape::Mode, DocStyle};
use unescape_error_reporting::{emit_unescape_error, push_escaped_char};
#[derive(Clone, Debug)]
pub struct UnmatchedBrace {
pub expected_delim: token::DelimToken,
pub found_delim: Option<token::DelimToken>,
pub found_span: Span,
pub unclosed_span: Option<Span>,
pub candidate_span: Option<Span>,
}
pub struct StringReader<'a> {
sess: &'a ParseSess,
/// Initial position, read-only.
start_pos: BytePos,
/// The absolute offset within the source_map of the current character.
pos: BytePos,
/// Stop reading src at this index.
end_src_index: usize,
/// Source text to tokenize.
src: Lrc<String>,
override_span: Option<Span>,
}
impl<'a> StringReader<'a> {
pub fn new(
sess: &'a ParseSess,
source_file: Lrc<rustc_span::SourceFile>,
override_span: Option<Span>,
) -> Self {
let src = source_file.src.clone().unwrap_or_else(|| {
sess.span_diagnostic
.bug(&format!("cannot lex `source_file` without source: {}", source_file.name));
});
StringReader {
sess,
start_pos: source_file.start_pos,
pos: source_file.start_pos,
end_src_index: src.len(),
src,
override_span,
}
}
fn mk_sp(&self, lo: BytePos, hi: BytePos) -> Span {
self.override_span.unwrap_or_else(|| Span::with_root_ctxt(lo, hi))
}
/// Returns the next token, including trivia like whitespace or comments.
pub fn next_token(&mut self) -> Token {
let start_src_index = self.src_index(self.pos);
let text: &str = &self.src[start_src_index..self.end_src_index];
if text.is_empty() {
let span = self.mk_sp(self.pos, self.pos);
return Token::new(token::Eof, span);
}
{
let is_beginning_of_file = self.pos == self.start_pos;
if is_beginning_of_file {
if let Some(shebang_len) = rustc_lexer::strip_shebang(text) {
let start = self.pos;
self.pos = self.pos + BytePos::from_usize(shebang_len);
let sym = self.symbol_from(start + BytePos::from_usize("#!".len()));
let kind = token::Shebang(sym);
let span = self.mk_sp(start, self.pos);
return Token::new(kind, span);
}
}
}
let token = rustc_lexer::first_token(text);
let start = self.pos;
self.pos = self.pos + BytePos::from_usize(token.len);
debug!("try_next_token: {:?}({:?})", token.kind, self.str_from(start));
let kind = self.cook_lexer_token(token.kind, start);
let span = self.mk_sp(start, self.pos);
Token::new(kind, span)
}
/// Report a fatal lexical error with a given span.
fn fatal_span(&self, sp: Span, m: &str) -> FatalError {
self.sess.span_diagnostic.span_fatal(sp, m)
}
/// Report a lexical error with a given span.
fn err_span(&self, sp: Span, m: &str) {
self.sess.span_diagnostic.struct_span_err(sp, m).emit();
}
/// Report a fatal error spanning [`from_pos`, `to_pos`).
fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> FatalError {
self.fatal_span(self.mk_sp(from_pos, to_pos), m)
}
/// Report a lexical error spanning [`from_pos`, `to_pos`).
fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) {
self.err_span(self.mk_sp(from_pos, to_pos), m)
}
fn struct_fatal_span_char(
&self,
from_pos: BytePos,
to_pos: BytePos,
m: &str,
c: char,
) -> DiagnosticBuilder<'a> {
let mut m = m.to_string();
m.push_str(": ");
push_escaped_char(&mut m, c);
self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), &m[..])
}
/// Turns simple `rustc_lexer::TokenKind` enum into a rich
/// `librustc_ast::TokenKind`. This turns strings into interned
/// symbols and runs additional validation.
fn cook_lexer_token(&self, token: rustc_lexer::TokenKind, start: BytePos) -> TokenKind {
match token {
rustc_lexer::TokenKind::LineComment { doc_style } => {
match doc_style {
Some(doc_style) => {
// Opening delimiter of the length 3 is not included into the symbol.
let content_start = start + BytePos(3);
let content = self.str_from(content_start);
self.cook_doc_comment(content_start, content, CommentKind::Line, doc_style)
}
None => token::Comment,
}
}
rustc_lexer::TokenKind::BlockComment { doc_style, terminated } => {
if !terminated {
let msg = match doc_style {
Some(_) => "unterminated block doc-comment",
None => "unterminated block comment",
};
let last_bpos = self.pos;
self.sess
.span_diagnostic
.struct_span_fatal_with_code(
self.mk_sp(start, last_bpos),
msg,
error_code!(E0758),
)
.emit();
FatalError.raise();
}
match doc_style {
Some(doc_style) => {
// Opening delimiter of the length 3 and closing delimiter of the length 2
// are not included into the symbol.
let content_start = start + BytePos(3);
let content_end = self.pos - BytePos(if terminated { 2 } else { 0 });
let content = self.str_from_to(content_start, content_end);
self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style)
}
None => token::Comment,
}
}
rustc_lexer::TokenKind::Whitespace => token::Whitespace,
rustc_lexer::TokenKind::Ident | rustc_lexer::TokenKind::RawIdent => {
let is_raw_ident = token == rustc_lexer::TokenKind::RawIdent;
let mut ident_start = start;
if is_raw_ident {
ident_start = ident_start + BytePos(2);
}
let sym = nfc_normalize(self.str_from(ident_start));
let span = self.mk_sp(start, self.pos);
self.sess.symbol_gallery.insert(sym, span);
if is_raw_ident {
if !sym.can_be_raw() {
self.err_span(span, &format!("`{}` cannot be a raw identifier", sym));
}
self.sess.raw_identifier_spans.borrow_mut().push(span);
}
token::Ident(sym, is_raw_ident)
}
rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
let suffix_start = start + BytePos(suffix_start as u32);
let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
let suffix = if suffix_start < self.pos {
let string = self.str_from(suffix_start);
if string == "_" {
self.sess
.span_diagnostic
.struct_span_warn(
self.mk_sp(suffix_start, self.pos),
"underscore literal suffix is not allowed",
)
.warn(
"this was previously accepted by the compiler but is \
being phased out; it will become a hard error in \
a future release!",
)
.note(
"see issue #42326 \
<https://github.com/rust-lang/rust/issues/42326> \
for more information",
)
.emit();
None
} else {
Some(Symbol::intern(string))
}
} else {
None
};
token::Literal(token::Lit { kind, symbol, suffix })
}
rustc_lexer::TokenKind::Lifetime { starts_with_number } => {
// Include the leading `'` in the real identifier, for macro
// expansion purposes. See #12512 for the gory details of why
// this is necessary.
let lifetime_name = self.str_from(start);
if starts_with_number {
self.err_span_(start, self.pos, "lifetimes cannot start with a number");
}
let ident = Symbol::intern(lifetime_name);
token::Lifetime(ident)
}
rustc_lexer::TokenKind::Semi => token::Semi,
rustc_lexer::TokenKind::Comma => token::Comma,
rustc_lexer::TokenKind::Dot => token::Dot,
rustc_lexer::TokenKind::OpenParen => token::OpenDelim(token::Paren),
rustc_lexer::TokenKind::CloseParen => token::CloseDelim(token::Paren),
rustc_lexer::TokenKind::OpenBrace => token::OpenDelim(token::Brace),
rustc_lexer::TokenKind::CloseBrace => token::CloseDelim(token::Brace),
rustc_lexer::TokenKind::OpenBracket => token::OpenDelim(token::Bracket),
rustc_lexer::TokenKind::CloseBracket => token::CloseDelim(token::Bracket),
rustc_lexer::TokenKind::At => token::At,
rustc_lexer::TokenKind::Pound => token::Pound,
rustc_lexer::TokenKind::Tilde => token::Tilde,
rustc_lexer::TokenKind::Question => token::Question,
rustc_lexer::TokenKind::Colon => token::Colon,
rustc_lexer::TokenKind::Dollar => token::Dollar,
rustc_lexer::TokenKind::Eq => token::Eq,
rustc_lexer::TokenKind::Bang => token::Not,
rustc_lexer::TokenKind::Lt => token::Lt,
rustc_lexer::TokenKind::Gt => token::Gt,
rustc_lexer::TokenKind::Minus => token::BinOp(token::Minus),
rustc_lexer::TokenKind::And => token::BinOp(token::And),
rustc_lexer::TokenKind::Or => token::BinOp(token::Or),
rustc_lexer::TokenKind::Plus => token::BinOp(token::Plus),
rustc_lexer::TokenKind::Star => token::BinOp(token::Star),
rustc_lexer::TokenKind::Slash => token::BinOp(token::Slash),
rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret),
rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent),
rustc_lexer::TokenKind::Unknown => {
let c = self.str_from(start).chars().next().unwrap();
let mut err =
self.struct_fatal_span_char(start, self.pos, "unknown start of token", c);
// FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs,
// instead of keeping a table in `check_for_substitution`into the token. Ideally,
// this should be inside `rustc_lexer`. However, we should first remove compound
// tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it,
// as there will be less overall work to do this way.
let token = unicode_chars::check_for_substitution(self, start, c, &mut err)
.unwrap_or_else(|| token::Unknown(self.symbol_from(start)));
err.emit();
token
}
}
}
fn cook_doc_comment(
&self,
content_start: BytePos,
content: &str,
comment_kind: CommentKind,
doc_style: DocStyle,
) -> TokenKind {
if content.contains('\r') {
for (idx, _) in content.char_indices().filter(|&(_, c)| c == '\r') {
self.err_span_(
content_start + BytePos(idx as u32),
content_start + BytePos(idx as u32 + 1),
match comment_kind {
CommentKind::Line => "bare CR not allowed in doc-comment",
CommentKind::Block => "bare CR not allowed in block doc-comment",
},
);
}
}
let attr_style = match doc_style {
DocStyle::Outer => AttrStyle::Outer,
DocStyle::Inner => AttrStyle::Inner,
};
token::DocComment(comment_kind, attr_style, Symbol::intern(content))
}
fn cook_lexer_literal(
&self,
start: BytePos,
suffix_start: BytePos,
kind: rustc_lexer::LiteralKind,
) -> (token::LitKind, Symbol) {
// prefix means `"` or `br"` or `r###"`, ...
let (lit_kind, mode, prefix_len, postfix_len) = match kind {
rustc_lexer::LiteralKind::Char { terminated } => {
if !terminated {
self.sess
.span_diagnostic
.struct_span_fatal_with_code(
self.mk_sp(start, suffix_start),
"unterminated character literal",
error_code!(E0762),
)
.emit();
FatalError.raise();
}
(token::Char, Mode::Char, 1, 1) // ' '
}
rustc_lexer::LiteralKind::Byte { terminated } => {
if !terminated {
self.sess
.span_diagnostic
.struct_span_fatal_with_code(
self.mk_sp(start + BytePos(1), suffix_start),
"unterminated byte constant",
error_code!(E0763),
)
.emit();
FatalError.raise();
}
(token::Byte, Mode::Byte, 2, 1) // b' '
}
rustc_lexer::LiteralKind::Str { terminated } => {
if !terminated {
self.sess
.span_diagnostic
.struct_span_fatal_with_code(
self.mk_sp(start, suffix_start),
"unterminated double quote string",
error_code!(E0765),
)
.emit();
FatalError.raise();
}
(token::Str, Mode::Str, 1, 1) // " "
}
rustc_lexer::LiteralKind::ByteStr { terminated } => {
if !terminated {
self.sess
.span_diagnostic
.struct_span_fatal_with_code(
self.mk_sp(start + BytePos(1), suffix_start),
"unterminated double quote byte string",
error_code!(E0766),
)
.emit();
FatalError.raise();
}
(token::ByteStr, Mode::ByteStr, 2, 1) // b" "
}
rustc_lexer::LiteralKind::RawStr { n_hashes, err } => {
self.report_raw_str_error(start, err);
let n = u32::from(n_hashes);
(token::StrRaw(n_hashes), Mode::RawStr, 2 + n, 1 + n) // r##" "##
}
rustc_lexer::LiteralKind::RawByteStr { n_hashes, err } => {
self.report_raw_str_error(start, err);
let n = u32::from(n_hashes);
(token::ByteStrRaw(n_hashes), Mode::RawByteStr, 3 + n, 1 + n) // br##" "##
}
rustc_lexer::LiteralKind::Int { base, empty_int } => {
return if empty_int {
self.sess
.span_diagnostic
.struct_span_err_with_code(
self.mk_sp(start, suffix_start),
"no valid digits found for number",
error_code!(E0768),
)
.emit();
(token::Integer, sym::integer(0))
} else {
self.validate_int_literal(base, start, suffix_start);
(token::Integer, self.symbol_from_to(start, suffix_start))
};
}
rustc_lexer::LiteralKind::Float { base, empty_exponent } => {
if empty_exponent {
self.err_span_(start, self.pos, "expected at least one digit in exponent");
}
match base {
Base::Hexadecimal => self.err_span_(
start,
suffix_start,
"hexadecimal float literal is not supported",
),
Base::Octal => {
self.err_span_(start, suffix_start, "octal float literal is not supported")
}
Base::Binary => {
self.err_span_(start, suffix_start, "binary float literal is not supported")
}
_ => (),
}
let id = self.symbol_from_to(start, suffix_start);
return (token::Float, id);
}
};
let content_start = start + BytePos(prefix_len);
let content_end = suffix_start - BytePos(postfix_len);
let id = self.symbol_from_to(content_start, content_end);
self.validate_literal_escape(mode, content_start, content_end);
(lit_kind, id)
}
pub fn pos(&self) -> BytePos {
self.pos
}
#[inline]
fn src_index(&self, pos: BytePos) -> usize {
(pos - self.start_pos).to_usize()
}
/// Slice of the source text from `start` up to but excluding `self.pos`,
/// meaning the slice does not include the character `self.ch`.
fn str_from(&self, start: BytePos) -> &str {
self.str_from_to(start, self.pos)
}
/// Creates a Symbol from a given offset to the current offset.
fn symbol_from(&self, start: BytePos) -> Symbol {
debug!("taking an ident from {:?} to {:?}", start, self.pos);
Symbol::intern(self.str_from(start))
}
/// As symbol_from, with an explicit endpoint.
fn symbol_from_to(&self, start: BytePos, end: BytePos) -> Symbol {
debug!("taking an ident from {:?} to {:?}", start, end);
Symbol::intern(self.str_from_to(start, end))
}
/// Slice of the source text spanning from `start` up to but excluding `end`.
fn str_from_to(&self, start: BytePos, end: BytePos) -> &str {
&self.src[self.src_index(start)..self.src_index(end)]
}
fn report_raw_str_error(&self, start: BytePos, opt_err: Option<RawStrError>) {
match opt_err {
Some(RawStrError::InvalidStarter { bad_char }) => {
self.report_non_started_raw_string(start, bad_char)
}
Some(RawStrError::NoTerminator { expected, found, possible_terminator_offset }) => self
.report_unterminated_raw_string(start, expected, possible_terminator_offset, found),
Some(RawStrError::TooManyDelimiters { found }) => {
self.report_too_many_hashes(start, found)
}
None => (),
}
}
fn report_non_started_raw_string(&self, start: BytePos, bad_char: char) -> ! {
self.struct_fatal_span_char(
start,
self.pos,
"found invalid character; only `#` is allowed in raw string delimitation",
bad_char,
)
.emit();
FatalError.raise()
}
fn report_unterminated_raw_string(
&self,
start: BytePos,
n_hashes: usize,
possible_offset: Option<usize>,
found_terminators: usize,
) -> ! {
let mut err = self.sess.span_diagnostic.struct_span_fatal_with_code(
self.mk_sp(start, start),
"unterminated raw string",
error_code!(E0748),
);
err.span_label(self.mk_sp(start, start), "unterminated raw string");
if n_hashes > 0 {
err.note(&format!(
"this raw string should be terminated with `\"{}`",
"#".repeat(n_hashes)
));
}
if let Some(possible_offset) = possible_offset {
let lo = start + BytePos(possible_offset as u32);
let hi = lo + BytePos(found_terminators as u32);
let span = self.mk_sp(lo, hi);
err.span_suggestion(
span,
"consider terminating the string here",
"#".repeat(n_hashes),
Applicability::MaybeIncorrect,
);
}
err.emit();
FatalError.raise()
}
/// Note: It was decided to not add a test case, because it would be to big.
/// https://github.com/rust-lang/rust/pull/50296#issuecomment-392135180
fn report_too_many_hashes(&self, start: BytePos, found: usize) -> ! {
self.fatal_span_(
start,
self.pos,
&format!(
"too many `#` symbols: raw strings may be delimited \
by up to 65535 `#` symbols, but found {}",
found
),
)
.raise();
}
fn validate_literal_escape(&self, mode: Mode, content_start: BytePos, content_end: BytePos) {
let lit_content = self.str_from_to(content_start, content_end);
unescape::unescape_literal(lit_content, mode, &mut |range, result| {
// Here we only check for errors. The actual unescaping is done later.
if let Err(err) = result {
let span_with_quotes =
self.mk_sp(content_start - BytePos(1), content_end + BytePos(1));
emit_unescape_error(
&self.sess.span_diagnostic,
lit_content,
span_with_quotes,
mode,
range,
err,
);
}
});
}
fn validate_int_literal(&self, base: Base, content_start: BytePos, content_end: BytePos) {
let base = match base {
Base::Binary => 2,
Base::Octal => 8,
_ => return,
};
let s = self.str_from_to(content_start + BytePos(2), content_end);
for (idx, c) in s.char_indices() {
let idx = idx as u32;
if c != '_' && c.to_digit(base).is_none() {
let lo = content_start + BytePos(2 + idx);
let hi = content_start + BytePos(2 + idx + c.len_utf8() as u32);
self.err_span_(lo, hi, &format!("invalid digit for a base {} literal", base));
}
}
}
}
pub fn nfc_normalize(string: &str) -> Symbol {
use unicode_normalization::{is_nfc_quick, IsNormalized, UnicodeNormalization};
match is_nfc_quick(string.chars()) {
IsNormalized::Yes => Symbol::intern(string),
_ => {
let normalized_str: String = string.chars().nfc().collect();
Symbol::intern(&normalized_str)
}
}
}

View file

@ -0,0 +1,313 @@
use super::{StringReader, UnmatchedBrace};
use rustc_ast::token::{self, DelimToken, Token};
use rustc_ast::tokenstream::{
DelimSpan,
IsJoint::{self, *},
TokenStream, TokenTree, TreeAndJoint,
};
use rustc_ast_pretty::pprust::token_to_string;
use rustc_data_structures::fx::FxHashMap;
use rustc_errors::PResult;
use rustc_span::Span;
impl<'a> StringReader<'a> {
crate fn into_token_trees(self) -> (PResult<'a, TokenStream>, Vec<UnmatchedBrace>) {
let mut tt_reader = TokenTreesReader {
string_reader: self,
token: Token::dummy(),
joint_to_prev: Joint,
open_braces: Vec::new(),
unmatched_braces: Vec::new(),
matching_delim_spans: Vec::new(),
last_unclosed_found_span: None,
last_delim_empty_block_spans: FxHashMap::default(),
matching_block_spans: Vec::new(),
};
let res = tt_reader.parse_all_token_trees();
(res, tt_reader.unmatched_braces)
}
}
struct TokenTreesReader<'a> {
string_reader: StringReader<'a>,
token: Token,
joint_to_prev: IsJoint,
/// Stack of open delimiters and their spans. Used for error message.
open_braces: Vec<(token::DelimToken, Span)>,
unmatched_braces: Vec<UnmatchedBrace>,
/// The type and spans for all braces
///
/// Used only for error recovery when arriving to EOF with mismatched braces.
matching_delim_spans: Vec<(token::DelimToken, Span, Span)>,
last_unclosed_found_span: Option<Span>,
/// Collect empty block spans that might have been auto-inserted by editors.
last_delim_empty_block_spans: FxHashMap<token::DelimToken, Span>,
/// Collect the spans of braces (Open, Close). Used only
/// for detecting if blocks are empty and only braces.
matching_block_spans: Vec<(Span, Span)>,
}
impl<'a> TokenTreesReader<'a> {
// Parse a stream of tokens into a list of `TokenTree`s, up to an `Eof`.
fn parse_all_token_trees(&mut self) -> PResult<'a, TokenStream> {
let mut buf = TokenStreamBuilder::default();
self.real_token();
while self.token != token::Eof {
buf.push(self.parse_token_tree()?);
}
Ok(buf.into_token_stream())
}
// Parse a stream of tokens into a list of `TokenTree`s, up to a `CloseDelim`.
fn parse_token_trees_until_close_delim(&mut self) -> TokenStream {
let mut buf = TokenStreamBuilder::default();
loop {
if let token::CloseDelim(..) = self.token.kind {
return buf.into_token_stream();
}
match self.parse_token_tree() {
Ok(tree) => buf.push(tree),
Err(mut e) => {
e.emit();
return buf.into_token_stream();
}
}
}
}
fn parse_token_tree(&mut self) -> PResult<'a, TreeAndJoint> {
let sm = self.string_reader.sess.source_map();
match self.token.kind {
token::Eof => {
let msg = "this file contains an unclosed delimiter";
let mut err =
self.string_reader.sess.span_diagnostic.struct_span_err(self.token.span, msg);
for &(_, sp) in &self.open_braces {
err.span_label(sp, "unclosed delimiter");
self.unmatched_braces.push(UnmatchedBrace {
expected_delim: token::DelimToken::Brace,
found_delim: None,
found_span: self.token.span,
unclosed_span: Some(sp),
candidate_span: None,
});
}
if let Some((delim, _)) = self.open_braces.last() {
if let Some((_, open_sp, close_sp)) =
self.matching_delim_spans.iter().find(|(d, open_sp, close_sp)| {
if let Some(close_padding) = sm.span_to_margin(*close_sp) {
if let Some(open_padding) = sm.span_to_margin(*open_sp) {
return delim == d && close_padding != open_padding;
}
}
false
})
// these are in reverse order as they get inserted on close, but
{
// we want the last open/first close
err.span_label(*open_sp, "this delimiter might not be properly closed...");
err.span_label(
*close_sp,
"...as it matches this but it has different indentation",
);
}
}
Err(err)
}
token::OpenDelim(delim) => {
// The span for beginning of the delimited section
let pre_span = self.token.span;
// Parse the open delimiter.
self.open_braces.push((delim, self.token.span));
self.real_token();
// Parse the token trees within the delimiters.
// We stop at any delimiter so we can try to recover if the user
// uses an incorrect delimiter.
let tts = self.parse_token_trees_until_close_delim();
// Expand to cover the entire delimited token tree
let delim_span = DelimSpan::from_pair(pre_span, self.token.span);
match self.token.kind {
// Correct delimiter.
token::CloseDelim(d) if d == delim => {
let (open_brace, open_brace_span) = self.open_braces.pop().unwrap();
let close_brace_span = self.token.span;
if tts.is_empty() {
let empty_block_span = open_brace_span.to(close_brace_span);
if !sm.is_multiline(empty_block_span) {
// Only track if the block is in the form of `{}`, otherwise it is
// likely that it was written on purpose.
self.last_delim_empty_block_spans.insert(delim, empty_block_span);
}
}
match (open_brace, delim) {
//only add braces
(DelimToken::Brace, DelimToken::Brace) => {
self.matching_block_spans.push((open_brace_span, close_brace_span));
}
_ => {}
}
if self.open_braces.is_empty() {
// Clear up these spans to avoid suggesting them as we've found
// properly matched delimiters so far for an entire block.
self.matching_delim_spans.clear();
} else {
self.matching_delim_spans.push((
open_brace,
open_brace_span,
close_brace_span,
));
}
// Parse the closing delimiter.
self.real_token();
}
// Incorrect delimiter.
token::CloseDelim(other) => {
let mut unclosed_delimiter = None;
let mut candidate = None;
if self.last_unclosed_found_span != Some(self.token.span) {
// do not complain about the same unclosed delimiter multiple times
self.last_unclosed_found_span = Some(self.token.span);
// This is a conservative error: only report the last unclosed
// delimiter. The previous unclosed delimiters could actually be
// closed! The parser just hasn't gotten to them yet.
if let Some(&(_, sp)) = self.open_braces.last() {
unclosed_delimiter = Some(sp);
};
if let Some(current_padding) = sm.span_to_margin(self.token.span) {
for (brace, brace_span) in &self.open_braces {
if let Some(padding) = sm.span_to_margin(*brace_span) {
// high likelihood of these two corresponding
if current_padding == padding && brace == &other {
candidate = Some(*brace_span);
}
}
}
}
let (tok, _) = self.open_braces.pop().unwrap();
self.unmatched_braces.push(UnmatchedBrace {
expected_delim: tok,
found_delim: Some(other),
found_span: self.token.span,
unclosed_span: unclosed_delimiter,
candidate_span: candidate,
});
} else {
self.open_braces.pop();
}
// If the incorrect delimiter matches an earlier opening
// delimiter, then don't consume it (it can be used to
// close the earlier one). Otherwise, consume it.
// E.g., we try to recover from:
// fn foo() {
// bar(baz(
// } // Incorrect delimiter but matches the earlier `{`
if !self.open_braces.iter().any(|&(b, _)| b == other) {
self.real_token();
}
}
token::Eof => {
// Silently recover, the EOF token will be seen again
// and an error emitted then. Thus we don't pop from
// self.open_braces here.
}
_ => {}
}
Ok(TokenTree::Delimited(delim_span, delim, tts).into())
}
token::CloseDelim(delim) => {
// An unexpected closing delimiter (i.e., there is no
// matching opening delimiter).
let token_str = token_to_string(&self.token);
let msg = format!("unexpected closing delimiter: `{}`", token_str);
let mut err =
self.string_reader.sess.span_diagnostic.struct_span_err(self.token.span, &msg);
// Braces are added at the end, so the last element is the biggest block
if let Some(parent) = self.matching_block_spans.last() {
if let Some(span) = self.last_delim_empty_block_spans.remove(&delim) {
// Check if the (empty block) is in the last properly closed block
if (parent.0.to(parent.1)).contains(span) {
err.span_label(
span,
"block is empty, you might have not meant to close it",
);
} else {
err.span_label(parent.0, "this opening brace...");
err.span_label(parent.1, "...matches this closing brace");
}
} else {
err.span_label(parent.0, "this opening brace...");
err.span_label(parent.1, "...matches this closing brace");
}
}
err.span_label(self.token.span, "unexpected closing delimiter");
Err(err)
}
_ => {
let tt = TokenTree::Token(self.token.take());
self.real_token();
let is_joint = self.joint_to_prev == Joint && self.token.is_op();
Ok((tt, if is_joint { Joint } else { NonJoint }))
}
}
}
fn real_token(&mut self) {
self.joint_to_prev = Joint;
loop {
let token = self.string_reader.next_token();
match token.kind {
token::Whitespace | token::Comment | token::Shebang(_) | token::Unknown(_) => {
self.joint_to_prev = NonJoint;
}
_ => {
self.token = token;
return;
}
}
}
}
}
#[derive(Default)]
struct TokenStreamBuilder {
buf: Vec<TreeAndJoint>,
}
impl TokenStreamBuilder {
fn push(&mut self, (tree, joint): TreeAndJoint) {
if let Some((TokenTree::Token(prev_token), Joint)) = self.buf.last() {
if let TokenTree::Token(token) = &tree {
if let Some(glued) = prev_token.glue(token) {
self.buf.pop();
self.buf.push((TokenTree::Token(glued), joint));
return;
}
}
}
self.buf.push((tree, joint))
}
fn into_token_stream(self) -> TokenStream {
TokenStream::new(self.buf)
}
}

View file

@ -0,0 +1,223 @@
//! Utilities for rendering escape sequence errors as diagnostics.
use std::iter::once;
use std::ops::Range;
use rustc_errors::{Applicability, Handler};
use rustc_lexer::unescape::{EscapeError, Mode};
use rustc_span::{BytePos, Span};
pub(crate) fn emit_unescape_error(
handler: &Handler,
// interior part of the literal, without quotes
lit: &str,
// full span of the literal, including quotes
span_with_quotes: Span,
mode: Mode,
// range of the error inside `lit`
range: Range<usize>,
error: EscapeError,
) {
tracing::debug!(
"emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}",
lit,
span_with_quotes,
mode,
range,
error
);
let span = {
let Range { start, end } = range;
let (start, end) = (start as u32, end as u32);
let lo = span_with_quotes.lo() + BytePos(start + 1);
let hi = lo + BytePos(end - start);
span_with_quotes.with_lo(lo).with_hi(hi)
};
let last_char = || {
let c = lit[range.clone()].chars().rev().next().unwrap();
let span = span.with_lo(span.hi() - BytePos(c.len_utf8() as u32));
(c, span)
};
match error {
EscapeError::LoneSurrogateUnicodeEscape => {
handler
.struct_span_err(span, "invalid unicode character escape")
.help("unicode escape must not be a surrogate")
.emit();
}
EscapeError::OutOfRangeUnicodeEscape => {
handler
.struct_span_err(span, "invalid unicode character escape")
.help("unicode escape must be at most 10FFFF")
.emit();
}
EscapeError::MoreThanOneChar => {
let msg = if mode.is_bytes() {
"if you meant to write a byte string literal, use double quotes"
} else {
"if you meant to write a `str` literal, use double quotes"
};
handler
.struct_span_err(
span_with_quotes,
"character literal may only contain one codepoint",
)
.span_suggestion(
span_with_quotes,
msg,
format!("\"{}\"", lit),
Applicability::MachineApplicable,
)
.emit();
}
EscapeError::EscapeOnlyChar => {
let (c, _span) = last_char();
let mut msg = if mode.is_bytes() {
"byte constant must be escaped: "
} else {
"character constant must be escaped: "
}
.to_string();
push_escaped_char(&mut msg, c);
handler.span_err(span, msg.as_str())
}
EscapeError::BareCarriageReturn => {
let msg = if mode.in_double_quotes() {
"bare CR not allowed in string, use \\r instead"
} else {
"character constant must be escaped: \\r"
};
handler.span_err(span, msg);
}
EscapeError::BareCarriageReturnInRawString => {
assert!(mode.in_double_quotes());
let msg = "bare CR not allowed in raw string";
handler.span_err(span, msg);
}
EscapeError::InvalidEscape => {
let (c, span) = last_char();
let label =
if mode.is_bytes() { "unknown byte escape" } else { "unknown character escape" };
let mut msg = label.to_string();
msg.push_str(": ");
push_escaped_char(&mut msg, c);
let mut diag = handler.struct_span_err(span, msg.as_str());
diag.span_label(span, label);
if c == '{' || c == '}' && !mode.is_bytes() {
diag.help(
"if used in a formatting string, \
curly braces are escaped with `{{` and `}}`",
);
} else if c == '\r' {
diag.help(
"this is an isolated carriage return; \
consider checking your editor and version control settings",
);
}
diag.emit();
}
EscapeError::TooShortHexEscape => {
handler.span_err(span, "numeric character escape is too short")
}
EscapeError::InvalidCharInHexEscape | EscapeError::InvalidCharInUnicodeEscape => {
let (c, span) = last_char();
let mut msg = if error == EscapeError::InvalidCharInHexEscape {
"invalid character in numeric character escape: "
} else {
"invalid character in unicode escape: "
}
.to_string();
push_escaped_char(&mut msg, c);
handler.span_err(span, msg.as_str())
}
EscapeError::NonAsciiCharInByte => {
assert!(mode.is_bytes());
let (_c, span) = last_char();
handler.span_err(
span,
"byte constant must be ASCII. \
Use a \\xHH escape for a non-ASCII byte",
)
}
EscapeError::NonAsciiCharInByteString => {
assert!(mode.is_bytes());
let (_c, span) = last_char();
handler.span_err(span, "raw byte string must be ASCII")
}
EscapeError::OutOfRangeHexEscape => handler.span_err(
span,
"this form of character escape may only be used \
with characters in the range [\\x00-\\x7f]",
),
EscapeError::LeadingUnderscoreUnicodeEscape => {
let (_c, span) = last_char();
handler.span_err(span, "invalid start of unicode escape")
}
EscapeError::OverlongUnicodeEscape => {
handler.span_err(span, "overlong unicode escape (must have at most 6 hex digits)")
}
EscapeError::UnclosedUnicodeEscape => {
handler.span_err(span, "unterminated unicode escape (needed a `}`)")
}
EscapeError::NoBraceInUnicodeEscape => {
let msg = "incorrect unicode escape sequence";
let mut diag = handler.struct_span_err(span, msg);
let mut suggestion = "\\u{".to_owned();
let mut suggestion_len = 0;
let (c, char_span) = last_char();
let chars = once(c).chain(lit[range.end..].chars());
for c in chars.take(6).take_while(|c| c.is_digit(16)) {
suggestion.push(c);
suggestion_len += c.len_utf8();
}
if suggestion_len > 0 {
suggestion.push('}');
let lo = char_span.lo();
let hi = lo + BytePos(suggestion_len as u32);
diag.span_suggestion(
span.with_lo(lo).with_hi(hi),
"format of unicode escape sequences uses braces",
suggestion,
Applicability::MaybeIncorrect,
);
} else {
diag.span_label(span, msg);
diag.help("format of unicode escape sequences is `\\u{...}`");
}
diag.emit();
}
EscapeError::UnicodeEscapeInByte => handler.span_err(
span,
"unicode escape sequences cannot be used \
as a byte or in a byte string",
),
EscapeError::EmptyUnicodeEscape => {
handler.span_err(span, "empty unicode escape (must have at least 1 hex digit)")
}
EscapeError::ZeroChars => handler.span_err(span, "empty character literal"),
EscapeError::LoneSlash => handler.span_err(span, "invalid trailing slash in literal"),
}
}
/// Pushes a character to a message string for error reporting
pub(crate) fn push_escaped_char(msg: &mut String, c: char) {
match c {
'\u{20}'..='\u{7e}' => {
// Don't escape \, ' or " for user-facing messages
msg.push(c);
}
_ => {
msg.extend(c.escape_default());
}
}
}

View file

@ -0,0 +1,392 @@
// Characters and their corresponding confusables were collected from
// http://www.unicode.org/Public/security/10.0.0/confusables.txt
use super::StringReader;
use crate::token;
use rustc_errors::{Applicability, DiagnosticBuilder};
use rustc_span::{symbol::kw, BytePos, Pos, Span};
#[rustfmt::skip] // for line breaks
const UNICODE_ARRAY: &[(char, &str, char)] = &[
('', "Line Separator", ' '),
('', "Paragraph Separator", ' '),
('', "Ogham Space mark", ' '),
(' ', "En Quad", ' '),
('', "Em Quad", ' '),
('', "En Space", ' '),
('', "Em Space", ' '),
('', "Three-Per-Em Space", ' '),
('', "Four-Per-Em Space", ' '),
('', "Six-Per-Em Space", ' '),
('', "Punctuation Space", ' '),
('', "Thin Space", ' '),
('', "Hair Space", ' '),
('', "Medium Mathematical Space", ' '),
(' ', "No-Break Space", ' '),
('', "Figure Space", ' '),
('', "Narrow No-Break Space", ' '),
(' ', "Ideographic Space", ' '),
('ߺ', "Nko Lajanyalan", '_'),
('', "Dashed Low Line", '_'),
('', "Centreline Low Line", '_'),
('', "Wavy Low Line", '_'),
('_', "Fullwidth Low Line", '_'),
('', "Hyphen", '-'),
('', "Non-Breaking Hyphen", '-'),
('', "Figure Dash", '-'),
('', "En Dash", '-'),
('—', "Em Dash", '-'),
('', "Small Em Dash", '-'),
('۔', "Arabic Full Stop", '-'),
('', "Hyphen Bullet", '-'),
('˗', "Modifier Letter Minus Sign", '-'),
('', "Minus Sign", '-'),
('', "Heavy Minus Sign", '-'),
('', "Coptic Letter Dialect-P Ni", '-'),
('ー', "Katakana-Hiragana Prolonged Sound Mark", '-'),
('', "Fullwidth Hyphen-Minus", '-'),
('―', "Horizontal Bar", '-'),
('─', "Box Drawings Light Horizontal", '-'),
('━', "Box Drawings Heavy Horizontal", '-'),
('㇐', "CJK Stroke H", '-'),
('ꟷ', "Latin Epigraphic Letter Sideways I", '-'),
('ᅳ', "Hangul Jungseong Eu", '-'),
('ㅡ', "Hangul Letter Eu", '-'),
('一', "CJK Unified Ideograph-4E00", '-'),
('⼀', "Kangxi Radical One", '-'),
('؍', "Arabic Date Separator", ','),
('٫', "Arabic Decimal Separator", ','),
('', "Single Low-9 Quotation Mark", ','),
('¸', "Cedilla", ','),
('', "Lisu Letter Tone Na Po", ','),
('', "Fullwidth Comma", ','),
(';', "Greek Question Mark", ';'),
('', "Fullwidth Semicolon", ';'),
('︔', "Presentation Form For Vertical Semicolon", ';'),
('', "Devanagari Sign Visarga", ':'),
('', "Gujarati Sign Visarga", ':'),
('', "Fullwidth Colon", ':'),
('։', "Armenian Full Stop", ':'),
('܃', "Syriac Supralinear Colon", ':'),
('܄', "Syriac Sublinear Colon", ':'),
('', "Runic Multiple Punctuation", ':'),
('', "Presentation Form For Vertical Two Dot Leader", ':'),
('', "Mongolian Full Stop", ':'),
('', "Mongolian Manchu Full Stop", ':'),
('', "Two Dot Punctuation", ':'),
('׃', "Hebrew Punctuation Sof Pasuq", ':'),
('˸', "Modifier Letter Raised Colon", ':'),
('', "Modifier Letter Colon", ':'),
('', "Ratio", ':'),
('ː', "Modifier Letter Triangular Colon", ':'),
('', "Lisu Letter Tone Mya Jeu", ':'),
('︓', "Presentation Form For Vertical Colon", ':'),
('', "Fullwidth Exclamation Mark", '!'),
('ǃ', "Latin Letter Retroflex Click", '!'),
('', "Tifinagh Letter Tuareg Yang", '!'),
('︕', "Presentation Form For Vertical Exclamation Mark", '!'),
('ʔ', "Latin Letter Glottal Stop", '?'),
('Ɂ', "Latin Capital Letter Glottal Stop", '?'),
('', "Devanagari Letter Glottal Stop", '?'),
('', "Cherokee Letter He", '?'),
('', "Bamum Letter Ntuu", '?'),
('', "Fullwidth Question Mark", '?'),
('︖', "Presentation Form For Vertical Question Mark", '?'),
('𝅭', "Musical Symbol Combining Augmentation Dot", '.'),
('', "One Dot Leader", '.'),
('܁', "Syriac Supralinear Full Stop", '.'),
('܂', "Syriac Sublinear Full Stop", '.'),
('', "Vai Full Stop", '.'),
('𐩐', "Kharoshthi Punctuation Dot", '.'),
('٠', "Arabic-Indic Digit Zero", '.'),
('۰', "Extended Arabic-Indic Digit Zero", '.'),
('', "Lisu Letter Tone Mya Ti", '.'),
('·', "Middle Dot", '.'),
('・', "Katakana Middle Dot", '.'),
('・', "Halfwidth Katakana Middle Dot", '.'),
('᛫', "Runic Single Punctuation", '.'),
('·', "Greek Ano Teleia", '.'),
('⸱', "Word Separator Middle Dot", '.'),
('𐄁', "Aegean Word Separator Dot", '.'),
('•', "Bullet", '.'),
('‧', "Hyphenation Point", '.'),
('∙', "Bullet Operator", '.'),
('⋅', "Dot Operator", '.'),
('ꞏ', "Latin Letter Sinological Dot", '.'),
('ᐧ', "Canadian Syllabics Final Middle Dot", '.'),
('ᐧ', "Canadian Syllabics Final Middle Dot", '.'),
('', "Fullwidth Full Stop", '.'),
('。', "Ideographic Full Stop", '.'),
('︒', "Presentation Form For Vertical Ideographic Full Stop", '.'),
('՝', "Armenian Comma", '\''),
('', "Fullwidth Apostrophe", '\''),
('', "Left Single Quotation Mark", '\''),
('', "Right Single Quotation Mark", '\''),
('', "Single High-Reversed-9 Quotation Mark", '\''),
('', "Prime", '\''),
('', "Reversed Prime", '\''),
('՚', "Armenian Apostrophe", '\''),
('׳', "Hebrew Punctuation Geresh", '\''),
('`', "Grave Accent", '\''),
('', "Greek Varia", '\''),
('', "Fullwidth Grave Accent", '\''),
('´', "Acute Accent", '\''),
('΄', "Greek Tonos", '\''),
('', "Greek Oxia", '\''),
('', "Greek Koronis", '\''),
('᾿', "Greek Psili", '\''),
('', "Greek Dasia", '\''),
('ʹ', "Modifier Letter Prime", '\''),
('ʹ', "Greek Numeral Sign", '\''),
('ˈ', "Modifier Letter Vertical Line", '\''),
('ˊ', "Modifier Letter Acute Accent", '\''),
('ˋ', "Modifier Letter Grave Accent", '\''),
('˴', "Modifier Letter Middle Grave Accent", '\''),
('ʻ', "Modifier Letter Turned Comma", '\''),
('ʽ', "Modifier Letter Reversed Comma", '\''),
('ʼ', "Modifier Letter Apostrophe", '\''),
('ʾ', "Modifier Letter Right Half Ring", '\''),
('', "Latin Small Letter Saltillo", '\''),
('י', "Hebrew Letter Yod", '\''),
('ߴ', "Nko High Tone Apostrophe", '\''),
('ߵ', "Nko Low Tone Apostrophe", '\''),
('', "Canadian Syllabics West-Cree P", '\''),
('', "Runic Letter Short-Twig-Sol S", '\''),
('𖽑', "Miao Sign Aspiration", '\''),
('𖽒', "Miao Sign Reformed Voicing", '\''),
('᳓', "Vedic Sign Nihshvasa", '"'),
('', "Fullwidth Quotation Mark", '"'),
('“', "Left Double Quotation Mark", '"'),
('”', "Right Double Quotation Mark", '"'),
('‟', "Double High-Reversed-9 Quotation Mark", '"'),
('″', "Double Prime", '"'),
('‶', "Reversed Double Prime", '"'),
('〃', "Ditto Mark", '"'),
('״', "Hebrew Punctuation Gershayim", '"'),
('˝', "Double Acute Accent", '"'),
('ʺ', "Modifier Letter Double Prime", '"'),
('˶', "Modifier Letter Middle Double Acute Accent", '"'),
('˵', "Modifier Letter Middle Double Grave Accent", '"'),
('ˮ', "Modifier Letter Double Apostrophe", '"'),
('ײ', "Hebrew Ligature Yiddish Double Yod", '"'),
('❞', "Heavy Double Comma Quotation Mark Ornament", '"'),
('❝', "Heavy Double Turned Comma Quotation Mark Ornament", '"'),
('', "Fullwidth Left Parenthesis", '('),
('', "Medium Left Parenthesis Ornament", '('),
('', "Ornate Left Parenthesis", '('),
('', "Fullwidth Right Parenthesis", ')'),
('', "Medium Right Parenthesis Ornament", ')'),
('﴿', "Ornate Right Parenthesis", ')'),
('', "Fullwidth Left Square Bracket", '['),
('', "Light Left Tortoise Shell Bracket Ornament", '['),
('「', "Left Corner Bracket", '['),
('『', "Left White Corner Bracket", '['),
('【', "Left Black Lenticular Bracket", '['),
('', "Left Tortoise Shell Bracket", '['),
('〖', "Left White Lenticular Bracket", '['),
('〘', "Left White Tortoise Shell Bracket", '['),
('〚', "Left White Square Bracket", '['),
('', "Fullwidth Right Square Bracket", ']'),
('', "Light Right Tortoise Shell Bracket Ornament", ']'),
('」', "Right Corner Bracket", ']'),
('』', "Right White Corner Bracket", ']'),
('】', "Right Black Lenticular Bracket", ']'),
('', "Right Tortoise Shell Bracket", ']'),
('〗', "Right White Lenticular Bracket", ']'),
('〙', "Right White Tortoise Shell Bracket", ']'),
('〛', "Right White Square Bracket", ']'),
('', "Medium Left Curly Bracket Ornament", '{'),
('𝄔', "Musical Symbol Brace", '{'),
('', "Fullwidth Left Curly Bracket", '{'),
('', "Medium Right Curly Bracket Ornament", '}'),
('', "Fullwidth Right Curly Bracket", '}'),
('', "Low Asterisk", '*'),
('٭', "Arabic Five Pointed Star", '*'),
('', "Asterisk Operator", '*'),
('𐌟', "Old Italic Letter Ess", '*'),
('', "Fullwidth Asterisk", '*'),
('', "Philippine Single Punctuation", '/'),
('', "Caret Insertion Point", '/'),
('', "Division Slash", '/'),
('', "Fraction Slash", '/'),
('', "Box Drawings Light Diagonal Upper Right To Lower Left", '/'),
('', "Mathematical Rising Diagonal", '/'),
('', "Big Solidus", '/'),
('𝈺', "Greek Instrumental Notation Symbol-47", '/'),
('', "CJK Stroke Sp", '/'),
('', "Vertical Kana Repeat Mark Upper Half", '/'),
('', "Coptic Capital Letter Old Coptic Esh", '/'),
('', "Katakana Letter No", '/'),
('丿', "CJK Unified Ideograph-4E3F", '/'),
('', "Kangxi Radical Slash", '/'),
('', "Fullwidth Solidus", '/'),
('', "Fullwidth Reverse Solidus", '\\'),
('', "Small Reverse Solidus", '\\'),
('', "Set Minus", '\\'),
('', "Mathematical Falling Diagonal", '\\'),
('', "Reverse Solidus Operator", '\\'),
('', "Big Reverse Solidus", '\\'),
('', "Greek Vocal Notation Symbol-16", '\\'),
('', "Greek Instrumental Symbol-48", '\\'),
('', "CJK Stroke D", '\\'),
('', "CJK Unified Ideograph-4E36", '\\'),
('', "Kangxi Radical Dot", '\\'),
('、', "Ideographic Comma", '\\'),
('ヽ', "Katakana Iteration Mark", '\\'),
('', "Latin Small Letter Um", '&'),
('', "Fullwidth Ampersand", '&'),
('', "Runic Cross Punctuation", '+'),
('', "Heavy Plus Sign", '+'),
('𐊛', "Lycian Letter H", '+'),
('﬩', "Hebrew Letter Alternative Plus Sign", '+'),
('', "Fullwidth Plus Sign", '+'),
('', "Single Left-Pointing Angle Quotation Mark", '<'),
('', "Heavy Left-Pointing Angle Quotation Mark Ornament", '<'),
('˂', "Modifier Letter Left Arrowhead", '<'),
('𝈶', "Greek Instrumental Symbol-40", '<'),
('', "Canadian Syllabics Pa", '<'),
('', "Runic Letter Kauna", '<'),
('❬', "Medium Left-Pointing Angle Bracket Ornament", '<'),
('⟨', "Mathematical Left Angle Bracket", '<'),
('〈', "Left-Pointing Angle Bracket", '<'),
('〈', "Left Angle Bracket", '<'),
('㇛', "CJK Stroke Pd", '<'),
('く', "Hiragana Letter Ku", '<'),
('𡿨', "CJK Unified Ideograph-21FE8", '<'),
('《', "Left Double Angle Bracket", '<'),
('', "Fullwidth Less-Than Sign", '<'),
('', "Canadian Syllabics Hyphen", '='),
('', "Double Hyphen", '='),
('', "Katakana-Hiragana Double Hyphen", '='),
('', "Lisu Punctuation Full Stop", '='),
('', "Fullwidth Equals Sign", '='),
('', "Single Right-Pointing Angle Quotation Mark", '>'),
('', "Heavy Right-Pointing Angle Quotation Mark Ornament", '>'),
('˃', "Modifier Letter Right Arrowhead", '>'),
('𝈷', "Greek Instrumental Symbol-42", '>'),
('', "Canadian Syllabics Po", '>'),
('𖼿', "Miao Letter Archaic Zza", '>'),
('❭', "Medium Right-Pointing Angle Bracket Ornament", '>'),
('⟩', "Mathematical Right Angle Bracket", '>'),
('〉', "Right-Pointing Angle Bracket", '>'),
('〉', "Right Angle Bracket", '>'),
('》', "Right Double Angle Bracket", '>'),
('', "Fullwidth Greater-Than Sign", '>'),
];
// FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs, instead of
// keeping the substitution token in this table. Ideally, this should be inside `rustc_lexer`.
// However, we should first remove compound tokens like `<<` from `rustc_lexer`, and then add
// fancier error recovery to it, as there will be less overall work to do this way.
const ASCII_ARRAY: &[(char, &str, Option<token::TokenKind>)] = &[
(' ', "Space", Some(token::Whitespace)),
('_', "Underscore", Some(token::Ident(kw::Underscore, false))),
('-', "Minus/Hyphen", Some(token::BinOp(token::Minus))),
(',', "Comma", Some(token::Comma)),
(';', "Semicolon", Some(token::Semi)),
(':', "Colon", Some(token::Colon)),
('!', "Exclamation Mark", Some(token::Not)),
('?', "Question Mark", Some(token::Question)),
('.', "Period", Some(token::Dot)),
('(', "Left Parenthesis", Some(token::OpenDelim(token::Paren))),
(')', "Right Parenthesis", Some(token::CloseDelim(token::Paren))),
('[', "Left Square Bracket", Some(token::OpenDelim(token::Bracket))),
(']', "Right Square Bracket", Some(token::CloseDelim(token::Bracket))),
('{', "Left Curly Brace", Some(token::OpenDelim(token::Brace))),
('}', "Right Curly Brace", Some(token::CloseDelim(token::Brace))),
('*', "Asterisk", Some(token::BinOp(token::Star))),
('/', "Slash", Some(token::BinOp(token::Slash))),
('\\', "Backslash", None),
('&', "Ampersand", Some(token::BinOp(token::And))),
('+', "Plus Sign", Some(token::BinOp(token::Plus))),
('<', "Less-Than Sign", Some(token::Lt)),
('=', "Equals Sign", Some(token::Eq)),
('>', "Greater-Than Sign", Some(token::Gt)),
// FIXME: Literals are already lexed by this point, so we can't recover gracefully just by
// spitting the correct token out.
('\'', "Single Quote", None),
('"', "Quotation Mark", None),
];
crate fn check_for_substitution<'a>(
reader: &StringReader<'a>,
pos: BytePos,
ch: char,
err: &mut DiagnosticBuilder<'a>,
) -> Option<token::TokenKind> {
let (u_name, ascii_char) = match UNICODE_ARRAY.iter().find(|&&(c, _, _)| c == ch) {
Some(&(_u_char, u_name, ascii_char)) => (u_name, ascii_char),
None => return None,
};
let span = Span::with_root_ctxt(pos, pos + Pos::from_usize(ch.len_utf8()));
let (ascii_name, token) = match ASCII_ARRAY.iter().find(|&&(c, _, _)| c == ascii_char) {
Some((_ascii_char, ascii_name, token)) => (ascii_name, token),
None => {
let msg = format!("substitution character not found for '{}'", ch);
reader.sess.span_diagnostic.span_bug_no_panic(span, &msg);
return None;
}
};
// special help suggestion for "directed" double quotes
if let Some(s) = peek_delimited(&reader.src[reader.src_index(pos)..], '“', '”') {
let msg = format!(
"Unicode characters '“' (Left Double Quotation Mark) and \
'”' (Right Double Quotation Mark) look like '{}' ({}), but are not",
ascii_char, ascii_name
);
err.span_suggestion(
Span::with_root_ctxt(
pos,
pos + Pos::from_usize('“'.len_utf8() + s.len() + '”'.len_utf8()),
),
&msg,
format!("\"{}\"", s),
Applicability::MaybeIncorrect,
);
} else {
let msg = format!(
"Unicode character '{}' ({}) looks like '{}' ({}), but it is not",
ch, u_name, ascii_char, ascii_name
);
err.span_suggestion(span, &msg, ascii_char.to_string(), Applicability::MaybeIncorrect);
}
token.clone()
}
/// Extract string if found at current position with given delimiters
fn peek_delimited(text: &str, from_ch: char, to_ch: char) -> Option<&str> {
let mut chars = text.chars();
let first_char = chars.next()?;
if first_char != from_ch {
return None;
}
let last_char_idx = chars.as_str().find(to_ch)?;
Some(&chars.as_str()[..last_char_idx])
}

View file

@ -0,0 +1,595 @@
//! The main parser interface.
#![feature(bool_to_option)]
#![feature(crate_visibility_modifier)]
#![feature(bindings_after_at)]
#![feature(try_blocks)]
#![feature(or_patterns)]
use rustc_ast as ast;
use rustc_ast::token::{self, DelimToken, Nonterminal, Token, TokenKind};
use rustc_ast::tokenstream::{self, IsJoint, TokenStream, TokenTree};
use rustc_ast_pretty::pprust;
use rustc_data_structures::sync::Lrc;
use rustc_errors::{Diagnostic, FatalError, Level, PResult};
use rustc_session::parse::ParseSess;
use rustc_span::{symbol::kw, FileName, SourceFile, Span, DUMMY_SP};
use smallvec::SmallVec;
use std::mem;
use std::path::Path;
use std::str;
use tracing::{debug, info};
pub const MACRO_ARGUMENTS: Option<&'static str> = Some("macro arguments");
#[macro_use]
pub mod parser;
use parser::{emit_unclosed_delims, make_unclosed_delims_error, Parser};
pub mod lexer;
pub mod validate_attr;
// A bunch of utility functions of the form `parse_<thing>_from_<source>`
// where <thing> includes crate, expr, item, stmt, tts, and one that
// uses a HOF to parse anything, and <source> includes file and
// `source_str`.
/// A variant of 'panictry!' that works on a Vec<Diagnostic> instead of a single DiagnosticBuilder.
macro_rules! panictry_buffer {
($handler:expr, $e:expr) => {{
use rustc_errors::FatalError;
use std::result::Result::{Err, Ok};
match $e {
Ok(e) => e,
Err(errs) => {
for e in errs {
$handler.emit_diagnostic(&e);
}
FatalError.raise()
}
}
}};
}
pub fn parse_crate_from_file<'a>(input: &Path, sess: &'a ParseSess) -> PResult<'a, ast::Crate> {
let mut parser = new_parser_from_file(sess, input, None);
parser.parse_crate_mod()
}
pub fn parse_crate_attrs_from_file<'a>(
input: &Path,
sess: &'a ParseSess,
) -> PResult<'a, Vec<ast::Attribute>> {
let mut parser = new_parser_from_file(sess, input, None);
parser.parse_inner_attributes()
}
pub fn parse_crate_from_source_str(
name: FileName,
source: String,
sess: &ParseSess,
) -> PResult<'_, ast::Crate> {
new_parser_from_source_str(sess, name, source).parse_crate_mod()
}
pub fn parse_crate_attrs_from_source_str(
name: FileName,
source: String,
sess: &ParseSess,
) -> PResult<'_, Vec<ast::Attribute>> {
new_parser_from_source_str(sess, name, source).parse_inner_attributes()
}
pub fn parse_stream_from_source_str(
name: FileName,
source: String,
sess: &ParseSess,
override_span: Option<Span>,
) -> TokenStream {
let (stream, mut errors) =
source_file_to_stream(sess, sess.source_map().new_source_file(name, source), override_span);
emit_unclosed_delims(&mut errors, &sess);
stream
}
/// Creates a new parser from a source string.
pub fn new_parser_from_source_str(sess: &ParseSess, name: FileName, source: String) -> Parser<'_> {
panictry_buffer!(&sess.span_diagnostic, maybe_new_parser_from_source_str(sess, name, source))
}
/// Creates a new parser from a source string. Returns any buffered errors from lexing the initial
/// token stream.
pub fn maybe_new_parser_from_source_str(
sess: &ParseSess,
name: FileName,
source: String,
) -> Result<Parser<'_>, Vec<Diagnostic>> {
maybe_source_file_to_parser(sess, sess.source_map().new_source_file(name, source))
}
/// Creates a new parser, handling errors as appropriate if the file doesn't exist.
/// If a span is given, that is used on an error as the as the source of the problem.
pub fn new_parser_from_file<'a>(sess: &'a ParseSess, path: &Path, sp: Option<Span>) -> Parser<'a> {
source_file_to_parser(sess, file_to_source_file(sess, path, sp))
}
/// Creates a new parser, returning buffered diagnostics if the file doesn't exist,
/// or from lexing the initial token stream.
pub fn maybe_new_parser_from_file<'a>(
sess: &'a ParseSess,
path: &Path,
) -> Result<Parser<'a>, Vec<Diagnostic>> {
let file = try_file_to_source_file(sess, path, None).map_err(|db| vec![db])?;
maybe_source_file_to_parser(sess, file)
}
/// Given a `source_file` and config, returns a parser.
fn source_file_to_parser(sess: &ParseSess, source_file: Lrc<SourceFile>) -> Parser<'_> {
panictry_buffer!(&sess.span_diagnostic, maybe_source_file_to_parser(sess, source_file))
}
/// Given a `source_file` and config, return a parser. Returns any buffered errors from lexing the
/// initial token stream.
fn maybe_source_file_to_parser(
sess: &ParseSess,
source_file: Lrc<SourceFile>,
) -> Result<Parser<'_>, Vec<Diagnostic>> {
let end_pos = source_file.end_pos;
let (stream, unclosed_delims) = maybe_file_to_stream(sess, source_file, None)?;
let mut parser = stream_to_parser(sess, stream, None);
parser.unclosed_delims = unclosed_delims;
if parser.token == token::Eof {
parser.token.span = Span::new(end_pos, end_pos, parser.token.span.ctxt());
}
Ok(parser)
}
// Must preserve old name for now, because `quote!` from the *existing*
// compiler expands into it.
pub fn new_parser_from_tts(sess: &ParseSess, tts: Vec<TokenTree>) -> Parser<'_> {
stream_to_parser(sess, tts.into_iter().collect(), crate::MACRO_ARGUMENTS)
}
// Base abstractions
/// Given a session and a path and an optional span (for error reporting),
/// add the path to the session's source_map and return the new source_file or
/// error when a file can't be read.
fn try_file_to_source_file(
sess: &ParseSess,
path: &Path,
spanopt: Option<Span>,
) -> Result<Lrc<SourceFile>, Diagnostic> {
sess.source_map().load_file(path).map_err(|e| {
let msg = format!("couldn't read {}: {}", path.display(), e);
let mut diag = Diagnostic::new(Level::Fatal, &msg);
if let Some(sp) = spanopt {
diag.set_span(sp);
}
diag
})
}
/// Given a session and a path and an optional span (for error reporting),
/// adds the path to the session's `source_map` and returns the new `source_file`.
fn file_to_source_file(sess: &ParseSess, path: &Path, spanopt: Option<Span>) -> Lrc<SourceFile> {
match try_file_to_source_file(sess, path, spanopt) {
Ok(source_file) => source_file,
Err(d) => {
sess.span_diagnostic.emit_diagnostic(&d);
FatalError.raise();
}
}
}
/// Given a `source_file`, produces a sequence of token trees.
pub fn source_file_to_stream(
sess: &ParseSess,
source_file: Lrc<SourceFile>,
override_span: Option<Span>,
) -> (TokenStream, Vec<lexer::UnmatchedBrace>) {
panictry_buffer!(&sess.span_diagnostic, maybe_file_to_stream(sess, source_file, override_span))
}
/// Given a source file, produces a sequence of token trees. Returns any buffered errors from
/// parsing the token stream.
pub fn maybe_file_to_stream(
sess: &ParseSess,
source_file: Lrc<SourceFile>,
override_span: Option<Span>,
) -> Result<(TokenStream, Vec<lexer::UnmatchedBrace>), Vec<Diagnostic>> {
let srdr = lexer::StringReader::new(sess, source_file, override_span);
let (token_trees, unmatched_braces) = srdr.into_token_trees();
match token_trees {
Ok(stream) => Ok((stream, unmatched_braces)),
Err(err) => {
let mut buffer = Vec::with_capacity(1);
err.buffer(&mut buffer);
// Not using `emit_unclosed_delims` to use `db.buffer`
for unmatched in unmatched_braces {
if let Some(err) = make_unclosed_delims_error(unmatched, &sess) {
err.buffer(&mut buffer);
}
}
Err(buffer)
}
}
}
/// Given a stream and the `ParseSess`, produces a parser.
pub fn stream_to_parser<'a>(
sess: &'a ParseSess,
stream: TokenStream,
subparser_name: Option<&'static str>,
) -> Parser<'a> {
Parser::new(sess, stream, false, subparser_name)
}
/// Runs the given subparser `f` on the tokens of the given `attr`'s item.
pub fn parse_in<'a, T>(
sess: &'a ParseSess,
tts: TokenStream,
name: &'static str,
mut f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>,
) -> PResult<'a, T> {
let mut parser = Parser::new(sess, tts, false, Some(name));
let result = f(&mut parser)?;
if parser.token != token::Eof {
parser.unexpected()?;
}
Ok(result)
}
// NOTE(Centril): The following probably shouldn't be here but it acknowledges the
// fact that architecturally, we are using parsing (read on below to understand why).
pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> TokenStream {
// A `Nonterminal` is often a parsed AST item. At this point we now
// need to convert the parsed AST to an actual token stream, e.g.
// un-parse it basically.
//
// Unfortunately there's not really a great way to do that in a
// guaranteed lossless fashion right now. The fallback here is to just
// stringify the AST node and reparse it, but this loses all span
// information.
//
// As a result, some AST nodes are annotated with the token stream they
// came from. Here we attempt to extract these lossless token streams
// before we fall back to the stringification.
let tokens = match *nt {
Nonterminal::NtItem(ref item) => {
prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
}
Nonterminal::NtPat(ref pat) => pat.tokens.clone(),
Nonterminal::NtIdent(ident, is_raw) => {
Some(tokenstream::TokenTree::token(token::Ident(ident.name, is_raw), ident.span).into())
}
Nonterminal::NtLifetime(ident) => {
Some(tokenstream::TokenTree::token(token::Lifetime(ident.name), ident.span).into())
}
Nonterminal::NtTT(ref tt) => Some(tt.clone().into()),
Nonterminal::NtExpr(ref expr) => {
if expr.tokens.is_none() {
debug!("missing tokens for expr {:?}", expr);
}
prepend_attrs(sess, &expr.attrs, expr.tokens.as_ref(), span)
}
_ => None,
};
// FIXME(#43081): Avoid this pretty-print + reparse hack
let source = pprust::nonterminal_to_string(nt);
let filename = FileName::macro_expansion_source_code(&source);
let tokens_for_real = parse_stream_from_source_str(filename, source, sess, Some(span));
// During early phases of the compiler the AST could get modified
// directly (e.g., attributes added or removed) and the internal cache
// of tokens my not be invalidated or updated. Consequently if the
// "lossless" token stream disagrees with our actual stringification
// (which has historically been much more battle-tested) then we go
// with the lossy stream anyway (losing span information).
//
// Note that the comparison isn't `==` here to avoid comparing spans,
// but it *also* is a "probable" equality which is a pretty weird
// definition. We mostly want to catch actual changes to the AST
// like a `#[cfg]` being processed or some weird `macro_rules!`
// expansion.
//
// What we *don't* want to catch is the fact that a user-defined
// literal like `0xf` is stringified as `15`, causing the cached token
// stream to not be literal `==` token-wise (ignoring spans) to the
// token stream we got from stringification.
//
// Instead the "probably equal" check here is "does each token
// recursively have the same discriminant?" We basically don't look at
// the token values here and assume that such fine grained token stream
// modifications, including adding/removing typically non-semantic
// tokens such as extra braces and commas, don't happen.
if let Some(tokens) = tokens {
if tokenstream_probably_equal_for_proc_macro(&tokens, &tokens_for_real, sess) {
return tokens;
}
info!(
"cached tokens found, but they're not \"probably equal\", \
going with stringified version"
);
info!("cached tokens: {:?}", tokens);
info!("reparsed tokens: {:?}", tokens_for_real);
}
tokens_for_real
}
// See comments in `Nonterminal::to_tokenstream` for why we care about
// *probably* equal here rather than actual equality
//
// This is otherwise the same as `eq_unspanned`, only recursing with a
// different method.
pub fn tokenstream_probably_equal_for_proc_macro(
first: &TokenStream,
other: &TokenStream,
sess: &ParseSess,
) -> bool {
// When checking for `probably_eq`, we ignore certain tokens that aren't
// preserved in the AST. Because they are not preserved, the pretty
// printer arbitrarily adds or removes them when printing as token
// streams, making a comparison between a token stream generated from an
// AST and a token stream which was parsed into an AST more reliable.
fn semantic_tree(tree: &TokenTree) -> bool {
if let TokenTree::Token(token) = tree {
if let
// The pretty printer tends to add trailing commas to
// everything, and in particular, after struct fields.
| token::Comma
// The pretty printer emits `NoDelim` as whitespace.
| token::OpenDelim(DelimToken::NoDelim)
| token::CloseDelim(DelimToken::NoDelim)
// The pretty printer collapses many semicolons into one.
| token::Semi
// The pretty printer collapses whitespace arbitrarily and can
// introduce whitespace from `NoDelim`.
| token::Whitespace
// The pretty printer can turn `$crate` into `::crate_name`
| token::ModSep = token.kind {
return false;
}
}
true
}
// When comparing two `TokenStream`s, we ignore the `IsJoint` information.
//
// However, `rustc_parse::lexer::tokentrees::TokenStreamBuilder` will
// use `Token.glue` on adjacent tokens with the proper `IsJoint`.
// Since we are ignoreing `IsJoint`, a 'glued' token (e.g. `BinOp(Shr)`)
// and its 'split'/'unglued' compoenents (e.g. `Gt, Gt`) are equivalent
// when determining if two `TokenStream`s are 'probably equal'.
//
// Therefore, we use `break_two_token_op` to convert all tokens
// to the 'unglued' form (if it exists). This ensures that two
// `TokenStream`s which differ only in how their tokens are glued
// will be considered 'probably equal', which allows us to keep spans.
//
// This is important when the original `TokenStream` contained
// extra spaces (e.g. `f :: < Vec < _ > > ( ) ;'). These extra spaces
// will be omitted when we pretty-print, which can cause the original
// and reparsed `TokenStream`s to differ in the assignment of `IsJoint`,
// leading to some tokens being 'glued' together in one stream but not
// the other. See #68489 for more details.
fn break_tokens(tree: TokenTree) -> impl Iterator<Item = TokenTree> {
// In almost all cases, we should have either zero or one levels
// of 'unglueing'. However, in some unusual cases, we may need
// to iterate breaking tokens mutliple times. For example:
// '[BinOpEq(Shr)] => [Gt, Ge] -> [Gt, Gt, Eq]'
let mut token_trees: SmallVec<[_; 2]>;
if let TokenTree::Token(token) = &tree {
let mut out = SmallVec::<[_; 2]>::new();
out.push(token.clone());
// Iterate to fixpoint:
// * We start off with 'out' containing our initial token, and `temp` empty
// * If we are able to break any tokens in `out`, then `out` will have
// at least one more element than 'temp', so we will try to break tokens
// again.
// * If we cannot break any tokens in 'out', we are done
loop {
let mut temp = SmallVec::<[_; 2]>::new();
let mut changed = false;
for token in out.into_iter() {
if let Some((first, second)) = token.kind.break_two_token_op() {
temp.push(Token::new(first, DUMMY_SP));
temp.push(Token::new(second, DUMMY_SP));
changed = true;
} else {
temp.push(token);
}
}
out = temp;
if !changed {
break;
}
}
token_trees = out.into_iter().map(TokenTree::Token).collect();
} else {
token_trees = SmallVec::new();
token_trees.push(tree);
}
token_trees.into_iter()
}
let expand_nt = |tree: TokenTree| {
if let TokenTree::Token(Token { kind: TokenKind::Interpolated(nt), span }) = &tree {
// When checking tokenstreams for 'probable equality', we are comparing
// a captured (from parsing) `TokenStream` to a reparsed tokenstream.
// The reparsed Tokenstream will never have `None`-delimited groups,
// since they are only ever inserted as a result of macro expansion.
// Therefore, inserting a `None`-delimtied group here (when we
// convert a nested `Nonterminal` to a tokenstream) would cause
// a mismatch with the reparsed tokenstream.
//
// Note that we currently do not handle the case where the
// reparsed stream has a `Parenthesis`-delimited group
// inserted. This will cause a spurious mismatch:
// issue #75734 tracks resolving this.
nt_to_tokenstream(nt, sess, *span).into_trees()
} else {
TokenStream::new(vec![(tree, IsJoint::NonJoint)]).into_trees()
}
};
// Break tokens after we expand any nonterminals, so that we break tokens
// that are produced as a result of nonterminal expansion.
let mut t1 = first.trees().filter(semantic_tree).flat_map(expand_nt).flat_map(break_tokens);
let mut t2 = other.trees().filter(semantic_tree).flat_map(expand_nt).flat_map(break_tokens);
for (t1, t2) in t1.by_ref().zip(t2.by_ref()) {
if !tokentree_probably_equal_for_proc_macro(&t1, &t2, sess) {
return false;
}
}
t1.next().is_none() && t2.next().is_none()
}
// See comments in `Nonterminal::to_tokenstream` for why we care about
// *probably* equal here rather than actual equality
//
// This is otherwise the same as `eq_unspanned`, only recursing with a
// different method.
pub fn tokentree_probably_equal_for_proc_macro(
first: &TokenTree,
other: &TokenTree,
sess: &ParseSess,
) -> bool {
match (first, other) {
(TokenTree::Token(token), TokenTree::Token(token2)) => {
token_probably_equal_for_proc_macro(token, token2)
}
(TokenTree::Delimited(_, delim, tts), TokenTree::Delimited(_, delim2, tts2)) => {
delim == delim2 && tokenstream_probably_equal_for_proc_macro(&tts, &tts2, sess)
}
_ => false,
}
}
// See comments in `Nonterminal::to_tokenstream` for why we care about
// *probably* equal here rather than actual equality
fn token_probably_equal_for_proc_macro(first: &Token, other: &Token) -> bool {
if mem::discriminant(&first.kind) != mem::discriminant(&other.kind) {
return false;
}
use rustc_ast::token::TokenKind::*;
match (&first.kind, &other.kind) {
(&Eq, &Eq)
| (&Lt, &Lt)
| (&Le, &Le)
| (&EqEq, &EqEq)
| (&Ne, &Ne)
| (&Ge, &Ge)
| (&Gt, &Gt)
| (&AndAnd, &AndAnd)
| (&OrOr, &OrOr)
| (&Not, &Not)
| (&Tilde, &Tilde)
| (&At, &At)
| (&Dot, &Dot)
| (&DotDot, &DotDot)
| (&DotDotDot, &DotDotDot)
| (&DotDotEq, &DotDotEq)
| (&Comma, &Comma)
| (&Semi, &Semi)
| (&Colon, &Colon)
| (&ModSep, &ModSep)
| (&RArrow, &RArrow)
| (&LArrow, &LArrow)
| (&FatArrow, &FatArrow)
| (&Pound, &Pound)
| (&Dollar, &Dollar)
| (&Question, &Question)
| (&Whitespace, &Whitespace)
| (&Comment, &Comment)
| (&Eof, &Eof) => true,
(&BinOp(a), &BinOp(b)) | (&BinOpEq(a), &BinOpEq(b)) => a == b,
(&OpenDelim(a), &OpenDelim(b)) | (&CloseDelim(a), &CloseDelim(b)) => a == b,
(&DocComment(a1, a2, a3), &DocComment(b1, b2, b3)) => a1 == b1 && a2 == b2 && a3 == b3,
(&Shebang(a), &Shebang(b)) => a == b,
(&Literal(a), &Literal(b)) => a == b,
(&Lifetime(a), &Lifetime(b)) => a == b,
(&Ident(a, b), &Ident(c, d)) => {
b == d && (a == c || a == kw::DollarCrate || c == kw::DollarCrate)
}
(&Interpolated(..), &Interpolated(..)) => panic!("Unexpanded Interpolated!"),
_ => panic!("forgot to add a token?"),
}
}
fn prepend_attrs(
sess: &ParseSess,
attrs: &[ast::Attribute],
tokens: Option<&tokenstream::TokenStream>,
span: rustc_span::Span,
) -> Option<tokenstream::TokenStream> {
let tokens = tokens?;
if attrs.is_empty() {
return Some(tokens.clone());
}
let mut builder = tokenstream::TokenStreamBuilder::new();
for attr in attrs {
assert_eq!(
attr.style,
ast::AttrStyle::Outer,
"inner attributes should prevent cached tokens from existing"
);
let source = pprust::attribute_to_string(attr);
let macro_filename = FileName::macro_expansion_source_code(&source);
let item = match attr.kind {
ast::AttrKind::Normal(ref item) => item,
ast::AttrKind::DocComment(..) => {
let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
builder.push(stream);
continue;
}
};
// synthesize # [ $path $tokens ] manually here
let mut brackets = tokenstream::TokenStreamBuilder::new();
// For simple paths, push the identifier directly
if item.path.segments.len() == 1 && item.path.segments[0].args.is_none() {
let ident = item.path.segments[0].ident;
let token = token::Ident(ident.name, ident.as_str().starts_with("r#"));
brackets.push(tokenstream::TokenTree::token(token, ident.span));
// ... and for more complicated paths, fall back to a reparse hack that
// should eventually be removed.
} else {
let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
brackets.push(stream);
}
brackets.push(item.args.outer_tokens());
// The span we list here for `#` and for `[ ... ]` are both wrong in
// that it encompasses more than each token, but it hopefully is "good
// enough" for now at least.
builder.push(tokenstream::TokenTree::token(token::Pound, attr.span));
let delim_span = tokenstream::DelimSpan::from_single(attr.span);
builder.push(tokenstream::TokenTree::Delimited(
delim_span,
token::DelimToken::Bracket,
brackets.build(),
));
}
builder.push(tokens.clone());
Some(builder.build())
}

View file

@ -0,0 +1,304 @@
use super::{Parser, PathStyle};
use rustc_ast as ast;
use rustc_ast::attr;
use rustc_ast::token::{self, Nonterminal};
use rustc_ast_pretty::pprust;
use rustc_errors::{error_code, PResult};
use rustc_span::Span;
use tracing::debug;
#[derive(Debug)]
pub(super) enum InnerAttrPolicy<'a> {
Permitted,
Forbidden { reason: &'a str, saw_doc_comment: bool, prev_attr_sp: Option<Span> },
}
const DEFAULT_UNEXPECTED_INNER_ATTR_ERR_MSG: &str = "an inner attribute is not \
permitted in this context";
pub(super) const DEFAULT_INNER_ATTR_FORBIDDEN: InnerAttrPolicy<'_> = InnerAttrPolicy::Forbidden {
reason: DEFAULT_UNEXPECTED_INNER_ATTR_ERR_MSG,
saw_doc_comment: false,
prev_attr_sp: None,
};
impl<'a> Parser<'a> {
/// Parses attributes that appear before an item.
pub(super) fn parse_outer_attributes(&mut self) -> PResult<'a, Vec<ast::Attribute>> {
let mut attrs: Vec<ast::Attribute> = Vec::new();
let mut just_parsed_doc_comment = false;
loop {
debug!("parse_outer_attributes: self.token={:?}", self.token);
if self.check(&token::Pound) {
let inner_error_reason = if just_parsed_doc_comment {
"an inner attribute is not permitted following an outer doc comment"
} else if !attrs.is_empty() {
"an inner attribute is not permitted following an outer attribute"
} else {
DEFAULT_UNEXPECTED_INNER_ATTR_ERR_MSG
};
let inner_parse_policy = InnerAttrPolicy::Forbidden {
reason: inner_error_reason,
saw_doc_comment: just_parsed_doc_comment,
prev_attr_sp: attrs.last().map(|a| a.span),
};
let attr = self.parse_attribute_with_inner_parse_policy(inner_parse_policy)?;
attrs.push(attr);
just_parsed_doc_comment = false;
} else if let token::DocComment(comment_kind, attr_style, data) = self.token.kind {
let attr = attr::mk_doc_comment(comment_kind, attr_style, data, self.token.span);
if attr.style != ast::AttrStyle::Outer {
self.sess
.span_diagnostic
.struct_span_err_with_code(
self.token.span,
"expected outer doc comment",
error_code!(E0753),
)
.note(
"inner doc comments like this (starting with \
`//!` or `/*!`) can only appear before items",
)
.emit();
}
attrs.push(attr);
self.bump();
just_parsed_doc_comment = true;
} else {
break;
}
}
Ok(attrs)
}
/// Matches `attribute = # ! [ meta_item ]`.
///
/// If `permit_inner` is `true`, then a leading `!` indicates an inner
/// attribute.
pub fn parse_attribute(&mut self, permit_inner: bool) -> PResult<'a, ast::Attribute> {
debug!("parse_attribute: permit_inner={:?} self.token={:?}", permit_inner, self.token);
let inner_parse_policy =
if permit_inner { InnerAttrPolicy::Permitted } else { DEFAULT_INNER_ATTR_FORBIDDEN };
self.parse_attribute_with_inner_parse_policy(inner_parse_policy)
}
/// The same as `parse_attribute`, except it takes in an `InnerAttrPolicy`
/// that prescribes how to handle inner attributes.
fn parse_attribute_with_inner_parse_policy(
&mut self,
inner_parse_policy: InnerAttrPolicy<'_>,
) -> PResult<'a, ast::Attribute> {
debug!(
"parse_attribute_with_inner_parse_policy: inner_parse_policy={:?} self.token={:?}",
inner_parse_policy, self.token
);
let lo = self.token.span;
let (span, item, style) = if self.eat(&token::Pound) {
let style =
if self.eat(&token::Not) { ast::AttrStyle::Inner } else { ast::AttrStyle::Outer };
self.expect(&token::OpenDelim(token::Bracket))?;
let item = self.parse_attr_item()?;
self.expect(&token::CloseDelim(token::Bracket))?;
let attr_sp = lo.to(self.prev_token.span);
// Emit error if inner attribute is encountered and forbidden.
if style == ast::AttrStyle::Inner {
self.error_on_forbidden_inner_attr(attr_sp, inner_parse_policy);
}
(attr_sp, item, style)
} else {
let token_str = pprust::token_to_string(&self.token);
let msg = &format!("expected `#`, found `{}`", token_str);
return Err(self.struct_span_err(self.token.span, msg));
};
Ok(attr::mk_attr_from_item(style, item, span))
}
pub(super) fn error_on_forbidden_inner_attr(&self, attr_sp: Span, policy: InnerAttrPolicy<'_>) {
if let InnerAttrPolicy::Forbidden { reason, saw_doc_comment, prev_attr_sp } = policy {
let prev_attr_note =
if saw_doc_comment { "previous doc comment" } else { "previous outer attribute" };
let mut diag = self.struct_span_err(attr_sp, reason);
if let Some(prev_attr_sp) = prev_attr_sp {
diag.span_label(attr_sp, "not permitted following an outer attribute")
.span_label(prev_attr_sp, prev_attr_note);
}
diag.note(
"inner attributes, like `#![no_std]`, annotate the item enclosing them, \
and are usually found at the beginning of source files. \
Outer attributes, like `#[test]`, annotate the item following them.",
)
.emit();
}
}
/// Parses an inner part of an attribute (the path and following tokens).
/// The tokens must be either a delimited token stream, or empty token stream,
/// or the "legacy" key-value form.
/// PATH `(` TOKEN_STREAM `)`
/// PATH `[` TOKEN_STREAM `]`
/// PATH `{` TOKEN_STREAM `}`
/// PATH
/// PATH `=` UNSUFFIXED_LIT
/// The delimiters or `=` are still put into the resulting token stream.
pub fn parse_attr_item(&mut self) -> PResult<'a, ast::AttrItem> {
let item = match self.token.kind {
token::Interpolated(ref nt) => match **nt {
Nonterminal::NtMeta(ref item) => Some(item.clone().into_inner()),
_ => None,
},
_ => None,
};
Ok(if let Some(item) = item {
self.bump();
item
} else {
let path = self.parse_path(PathStyle::Mod)?;
let args = self.parse_attr_args()?;
ast::AttrItem { path, args }
})
}
/// Parses attributes that appear after the opening of an item. These should
/// be preceded by an exclamation mark, but we accept and warn about one
/// terminated by a semicolon.
///
/// Matches `inner_attrs*`.
crate fn parse_inner_attributes(&mut self) -> PResult<'a, Vec<ast::Attribute>> {
let mut attrs: Vec<ast::Attribute> = vec![];
loop {
// Only try to parse if it is an inner attribute (has `!`).
if self.check(&token::Pound) && self.look_ahead(1, |t| t == &token::Not) {
let attr = self.parse_attribute(true)?;
assert_eq!(attr.style, ast::AttrStyle::Inner);
attrs.push(attr);
} else if let token::DocComment(comment_kind, attr_style, data) = self.token.kind {
// We need to get the position of this token before we bump.
let attr = attr::mk_doc_comment(comment_kind, attr_style, data, self.token.span);
if attr.style == ast::AttrStyle::Inner {
attrs.push(attr);
self.bump();
} else {
break;
}
} else {
break;
}
}
Ok(attrs)
}
crate fn parse_unsuffixed_lit(&mut self) -> PResult<'a, ast::Lit> {
let lit = self.parse_lit()?;
debug!("checking if {:?} is unusuffixed", lit);
if !lit.kind.is_unsuffixed() {
self.struct_span_err(lit.span, "suffixed literals are not allowed in attributes")
.help(
"instead of using a suffixed literal (`1u8`, `1.0f32`, etc.), \
use an unsuffixed version (`1`, `1.0`, etc.)",
)
.emit();
}
Ok(lit)
}
/// Parses `cfg_attr(pred, attr_item_list)` where `attr_item_list` is comma-delimited.
pub fn parse_cfg_attr(&mut self) -> PResult<'a, (ast::MetaItem, Vec<(ast::AttrItem, Span)>)> {
let cfg_predicate = self.parse_meta_item()?;
self.expect(&token::Comma)?;
// Presumably, the majority of the time there will only be one attr.
let mut expanded_attrs = Vec::with_capacity(1);
while self.token.kind != token::Eof {
let lo = self.token.span;
let item = self.parse_attr_item()?;
expanded_attrs.push((item, lo.to(self.prev_token.span)));
if !self.eat(&token::Comma) {
break;
}
}
Ok((cfg_predicate, expanded_attrs))
}
/// Matches `COMMASEP(meta_item_inner)`.
crate fn parse_meta_seq_top(&mut self) -> PResult<'a, Vec<ast::NestedMetaItem>> {
// Presumably, the majority of the time there will only be one attr.
let mut nmis = Vec::with_capacity(1);
while self.token.kind != token::Eof {
nmis.push(self.parse_meta_item_inner()?);
if !self.eat(&token::Comma) {
break;
}
}
Ok(nmis)
}
/// Matches the following grammar (per RFC 1559).
///
/// meta_item : PATH ( '=' UNSUFFIXED_LIT | '(' meta_item_inner? ')' )? ;
/// meta_item_inner : (meta_item | UNSUFFIXED_LIT) (',' meta_item_inner)? ;
pub fn parse_meta_item(&mut self) -> PResult<'a, ast::MetaItem> {
let nt_meta = match self.token.kind {
token::Interpolated(ref nt) => match **nt {
token::NtMeta(ref e) => Some(e.clone()),
_ => None,
},
_ => None,
};
if let Some(item) = nt_meta {
return match item.meta(item.path.span) {
Some(meta) => {
self.bump();
Ok(meta)
}
None => self.unexpected(),
};
}
let lo = self.token.span;
let path = self.parse_path(PathStyle::Mod)?;
let kind = self.parse_meta_item_kind()?;
let span = lo.to(self.prev_token.span);
Ok(ast::MetaItem { path, kind, span })
}
crate fn parse_meta_item_kind(&mut self) -> PResult<'a, ast::MetaItemKind> {
Ok(if self.eat(&token::Eq) {
ast::MetaItemKind::NameValue(self.parse_unsuffixed_lit()?)
} else if self.check(&token::OpenDelim(token::Paren)) {
// Matches `meta_seq = ( COMMASEP(meta_item_inner) )`.
let (list, _) = self.parse_paren_comma_seq(|p| p.parse_meta_item_inner())?;
ast::MetaItemKind::List(list)
} else {
ast::MetaItemKind::Word
})
}
/// Matches `meta_item_inner : (meta_item | UNSUFFIXED_LIT) ;`.
fn parse_meta_item_inner(&mut self) -> PResult<'a, ast::NestedMetaItem> {
match self.parse_unsuffixed_lit() {
Ok(lit) => return Ok(ast::NestedMetaItem::Literal(lit)),
Err(ref mut err) => err.cancel(),
}
match self.parse_meta_item() {
Ok(mi) => return Ok(ast::NestedMetaItem::MetaItem(mi)),
Err(ref mut err) => err.cancel(),
}
let found = pprust::token_to_string(&self.token);
let msg = format!("expected unsuffixed literal or identifier, found `{}`", found);
Err(self.struct_span_err(self.token.span, &msg))
}
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,292 @@
use super::Parser;
use rustc_ast::token;
use rustc_ast::{
self as ast, Attribute, GenericBounds, GenericParam, GenericParamKind, WhereClause,
};
use rustc_errors::PResult;
use rustc_span::symbol::{kw, sym};
impl<'a> Parser<'a> {
/// Parses bounds of a lifetime parameter `BOUND + BOUND + BOUND`, possibly with trailing `+`.
///
/// ```text
/// BOUND = LT_BOUND (e.g., `'a`)
/// ```
fn parse_lt_param_bounds(&mut self) -> GenericBounds {
let mut lifetimes = Vec::new();
while self.check_lifetime() {
lifetimes.push(ast::GenericBound::Outlives(self.expect_lifetime()));
if !self.eat_plus() {
break;
}
}
lifetimes
}
/// Matches `typaram = IDENT (`?` unbound)? optbounds ( EQ ty )?`.
fn parse_ty_param(&mut self, preceding_attrs: Vec<Attribute>) -> PResult<'a, GenericParam> {
let ident = self.parse_ident()?;
// Parse optional colon and param bounds.
let bounds = if self.eat(&token::Colon) {
self.parse_generic_bounds(Some(self.prev_token.span))?
} else {
Vec::new()
};
let default = if self.eat(&token::Eq) { Some(self.parse_ty()?) } else { None };
Ok(GenericParam {
ident,
id: ast::DUMMY_NODE_ID,
attrs: preceding_attrs.into(),
bounds,
kind: GenericParamKind::Type { default },
is_placeholder: false,
})
}
fn parse_const_param(&mut self, preceding_attrs: Vec<Attribute>) -> PResult<'a, GenericParam> {
let const_span = self.token.span;
self.expect_keyword(kw::Const)?;
let ident = self.parse_ident()?;
self.expect(&token::Colon)?;
let ty = self.parse_ty()?;
self.sess.gated_spans.gate(sym::min_const_generics, const_span.to(self.prev_token.span));
Ok(GenericParam {
ident,
id: ast::DUMMY_NODE_ID,
attrs: preceding_attrs.into(),
bounds: Vec::new(),
kind: GenericParamKind::Const { ty, kw_span: const_span },
is_placeholder: false,
})
}
/// Parses a (possibly empty) list of lifetime and type parameters, possibly including
/// a trailing comma and erroneous trailing attributes.
pub(super) fn parse_generic_params(&mut self) -> PResult<'a, Vec<ast::GenericParam>> {
let mut params = Vec::new();
loop {
let attrs = self.parse_outer_attributes()?;
if self.check_lifetime() {
let lifetime = self.expect_lifetime();
// Parse lifetime parameter.
let bounds =
if self.eat(&token::Colon) { self.parse_lt_param_bounds() } else { Vec::new() };
params.push(ast::GenericParam {
ident: lifetime.ident,
id: lifetime.id,
attrs: attrs.into(),
bounds,
kind: ast::GenericParamKind::Lifetime,
is_placeholder: false,
});
} else if self.check_keyword(kw::Const) {
// Parse const parameter.
params.push(self.parse_const_param(attrs)?);
} else if self.check_ident() {
// Parse type parameter.
params.push(self.parse_ty_param(attrs)?);
} else if self.token.can_begin_type() {
// Trying to write an associated type bound? (#26271)
let snapshot = self.clone();
match self.parse_ty_where_predicate() {
Ok(where_predicate) => {
self.struct_span_err(
where_predicate.span(),
"bounds on associated types do not belong here",
)
.span_label(where_predicate.span(), "belongs in `where` clause")
.emit();
}
Err(mut err) => {
err.cancel();
*self = snapshot;
break;
}
}
} else {
// Check for trailing attributes and stop parsing.
if !attrs.is_empty() {
if !params.is_empty() {
self.struct_span_err(
attrs[0].span,
"trailing attribute after generic parameter",
)
.span_label(attrs[0].span, "attributes must go before parameters")
.emit();
} else {
self.struct_span_err(attrs[0].span, "attribute without generic parameters")
.span_label(
attrs[0].span,
"attributes are only permitted when preceding parameters",
)
.emit();
}
}
break;
}
if !self.eat(&token::Comma) {
break;
}
}
Ok(params)
}
/// Parses a set of optional generic type parameter declarations. Where
/// clauses are not parsed here, and must be added later via
/// `parse_where_clause()`.
///
/// matches generics = ( ) | ( < > ) | ( < typaramseq ( , )? > ) | ( < lifetimes ( , )? > )
/// | ( < lifetimes , typaramseq ( , )? > )
/// where typaramseq = ( typaram ) | ( typaram , typaramseq )
pub(super) fn parse_generics(&mut self) -> PResult<'a, ast::Generics> {
let span_lo = self.token.span;
let (params, span) = if self.eat_lt() {
let params = self.parse_generic_params()?;
self.expect_gt()?;
(params, span_lo.to(self.prev_token.span))
} else {
(vec![], self.prev_token.span.shrink_to_hi())
};
Ok(ast::Generics {
params,
where_clause: WhereClause {
has_where_token: false,
predicates: Vec::new(),
span: self.prev_token.span.shrink_to_hi(),
},
span,
})
}
/// Parses an optional where-clause and places it in `generics`.
///
/// ```ignore (only-for-syntax-highlight)
/// where T : Trait<U, V> + 'b, 'a : 'b
/// ```
pub(super) fn parse_where_clause(&mut self) -> PResult<'a, WhereClause> {
let mut where_clause = WhereClause {
has_where_token: false,
predicates: Vec::new(),
span: self.prev_token.span.shrink_to_hi(),
};
if !self.eat_keyword(kw::Where) {
return Ok(where_clause);
}
where_clause.has_where_token = true;
let lo = self.prev_token.span;
// We are considering adding generics to the `where` keyword as an alternative higher-rank
// parameter syntax (as in `where<'a>` or `where<T>`. To avoid that being a breaking
// change we parse those generics now, but report an error.
if self.choose_generics_over_qpath(0) {
let generics = self.parse_generics()?;
self.struct_span_err(
generics.span,
"generic parameters on `where` clauses are reserved for future use",
)
.span_label(generics.span, "currently unsupported")
.emit();
}
loop {
let lo = self.token.span;
if self.check_lifetime() && self.look_ahead(1, |t| !t.is_like_plus()) {
let lifetime = self.expect_lifetime();
// Bounds starting with a colon are mandatory, but possibly empty.
self.expect(&token::Colon)?;
let bounds = self.parse_lt_param_bounds();
where_clause.predicates.push(ast::WherePredicate::RegionPredicate(
ast::WhereRegionPredicate {
span: lo.to(self.prev_token.span),
lifetime,
bounds,
},
));
} else if self.check_type() {
where_clause.predicates.push(self.parse_ty_where_predicate()?);
} else {
break;
}
if !self.eat(&token::Comma) {
break;
}
}
where_clause.span = lo.to(self.prev_token.span);
Ok(where_clause)
}
fn parse_ty_where_predicate(&mut self) -> PResult<'a, ast::WherePredicate> {
let lo = self.token.span;
// Parse optional `for<'a, 'b>`.
// This `for` is parsed greedily and applies to the whole predicate,
// the bounded type can have its own `for` applying only to it.
// Examples:
// * `for<'a> Trait1<'a>: Trait2<'a /* ok */>`
// * `(for<'a> Trait1<'a>): Trait2<'a /* not ok */>`
// * `for<'a> for<'b> Trait1<'a, 'b>: Trait2<'a /* ok */, 'b /* not ok */>`
let lifetime_defs = self.parse_late_bound_lifetime_defs()?;
// Parse type with mandatory colon and (possibly empty) bounds,
// or with mandatory equality sign and the second type.
let ty = self.parse_ty()?;
if self.eat(&token::Colon) {
let bounds = self.parse_generic_bounds(Some(self.prev_token.span))?;
Ok(ast::WherePredicate::BoundPredicate(ast::WhereBoundPredicate {
span: lo.to(self.prev_token.span),
bound_generic_params: lifetime_defs,
bounded_ty: ty,
bounds,
}))
// FIXME: Decide what should be used here, `=` or `==`.
// FIXME: We are just dropping the binders in lifetime_defs on the floor here.
} else if self.eat(&token::Eq) || self.eat(&token::EqEq) {
let rhs_ty = self.parse_ty()?;
Ok(ast::WherePredicate::EqPredicate(ast::WhereEqPredicate {
span: lo.to(self.prev_token.span),
lhs_ty: ty,
rhs_ty,
id: ast::DUMMY_NODE_ID,
}))
} else {
self.unexpected()
}
}
pub(super) fn choose_generics_over_qpath(&self, start: usize) -> bool {
// There's an ambiguity between generic parameters and qualified paths in impls.
// If we see `<` it may start both, so we have to inspect some following tokens.
// The following combinations can only start generics,
// but not qualified paths (with one exception):
// `<` `>` - empty generic parameters
// `<` `#` - generic parameters with attributes
// `<` (LIFETIME|IDENT) `>` - single generic parameter
// `<` (LIFETIME|IDENT) `,` - first generic parameter in a list
// `<` (LIFETIME|IDENT) `:` - generic parameter with bounds
// `<` (LIFETIME|IDENT) `=` - generic parameter with a default
// `<` const - generic const parameter
// The only truly ambiguous case is
// `<` IDENT `>` `::` IDENT ...
// we disambiguate it in favor of generics (`impl<T> ::absolute::Path<T> { ... }`)
// because this is what almost always expected in practice, qualified paths in impls
// (`impl <Type>::AssocTy { ... }`) aren't even allowed by type checker at the moment.
self.look_ahead(start, |t| t == &token::Lt)
&& (self.look_ahead(start + 1, |t| t == &token::Pound || t == &token::Gt)
|| self.look_ahead(start + 1, |t| t.is_lifetime() || t.is_ident())
&& self.look_ahead(start + 2, |t| {
matches!(t.kind, token::Gt | token::Comma | token::Colon | token::Eq)
})
|| self.is_keyword_ahead(start + 1, &[kw::Const]))
}
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,170 @@
use rustc_ast::ptr::P;
use rustc_ast::token::{self, Nonterminal, NonterminalKind, Token};
use rustc_ast_pretty::pprust;
use rustc_errors::PResult;
use rustc_span::symbol::{kw, Ident};
use crate::parser::{FollowedByType, Parser, PathStyle};
impl<'a> Parser<'a> {
/// Checks whether a non-terminal may begin with a particular token.
///
/// Returning `false` is a *stability guarantee* that such a matcher will *never* begin with that
/// token. Be conservative (return true) if not sure.
pub fn nonterminal_may_begin_with(kind: NonterminalKind, token: &Token) -> bool {
/// Checks whether the non-terminal may contain a single (non-keyword) identifier.
fn may_be_ident(nt: &token::Nonterminal) -> bool {
match *nt {
token::NtItem(_) | token::NtBlock(_) | token::NtVis(_) | token::NtLifetime(_) => {
false
}
_ => true,
}
}
match kind {
NonterminalKind::Expr => {
token.can_begin_expr()
// This exception is here for backwards compatibility.
&& !token.is_keyword(kw::Let)
}
NonterminalKind::Ty => token.can_begin_type(),
NonterminalKind::Ident => get_macro_ident(token).is_some(),
NonterminalKind::Literal => token.can_begin_literal_maybe_minus(),
NonterminalKind::Vis => match token.kind {
// The follow-set of :vis + "priv" keyword + interpolated
token::Comma | token::Ident(..) | token::Interpolated(..) => true,
_ => token.can_begin_type(),
},
NonterminalKind::Block => match token.kind {
token::OpenDelim(token::Brace) => true,
token::Interpolated(ref nt) => match **nt {
token::NtItem(_)
| token::NtPat(_)
| token::NtTy(_)
| token::NtIdent(..)
| token::NtMeta(_)
| token::NtPath(_)
| token::NtVis(_) => false, // none of these may start with '{'.
_ => true,
},
_ => false,
},
NonterminalKind::Path | NonterminalKind::Meta => match token.kind {
token::ModSep | token::Ident(..) => true,
token::Interpolated(ref nt) => match **nt {
token::NtPath(_) | token::NtMeta(_) => true,
_ => may_be_ident(&nt),
},
_ => false,
},
NonterminalKind::Pat => match token.kind {
token::Ident(..) | // box, ref, mut, and other identifiers (can stricten)
token::OpenDelim(token::Paren) | // tuple pattern
token::OpenDelim(token::Bracket) | // slice pattern
token::BinOp(token::And) | // reference
token::BinOp(token::Minus) | // negative literal
token::AndAnd | // double reference
token::Literal(..) | // literal
token::DotDot | // range pattern (future compat)
token::DotDotDot | // range pattern (future compat)
token::ModSep | // path
token::Lt | // path (UFCS constant)
token::BinOp(token::Shl) => true, // path (double UFCS)
token::Interpolated(ref nt) => may_be_ident(nt),
_ => false,
},
NonterminalKind::Lifetime => match token.kind {
token::Lifetime(_) => true,
token::Interpolated(ref nt) => match **nt {
token::NtLifetime(_) | token::NtTT(_) => true,
_ => false,
},
_ => false,
},
NonterminalKind::TT | NonterminalKind::Item | NonterminalKind::Stmt => match token.kind
{
token::CloseDelim(_) => false,
_ => true,
},
}
}
pub fn parse_nonterminal(&mut self, kind: NonterminalKind) -> PResult<'a, Nonterminal> {
// Any `Nonterminal` which stores its tokens (currently `NtItem` and `NtExpr`)
// needs to have them force-captured here.
// A `macro_rules!` invocation may pass a captured item/expr to a proc-macro,
// which requires having captured tokens available. Since we cannot determine
// in advance whether or not a proc-macro will be (transitively) invoked,
// we always capture tokens for any `Nonterminal` which needs them.
Ok(match kind {
NonterminalKind::Item => match self.collect_tokens(|this| this.parse_item())? {
(Some(mut item), tokens) => {
// If we captured tokens during parsing (due to outer attributes),
// use those.
if item.tokens.is_none() {
item.tokens = Some(tokens);
}
token::NtItem(item)
}
(None, _) => {
return Err(self.struct_span_err(self.token.span, "expected an item keyword"));
}
},
NonterminalKind::Block => token::NtBlock(self.parse_block()?),
NonterminalKind::Stmt => match self.parse_stmt()? {
Some(s) => token::NtStmt(s),
None => return Err(self.struct_span_err(self.token.span, "expected a statement")),
},
NonterminalKind::Pat => {
let (mut pat, tokens) = self.collect_tokens(|this| this.parse_pat(None))?;
// We have have eaten an NtPat, which could already have tokens
if pat.tokens.is_none() {
pat.tokens = Some(tokens);
}
token::NtPat(pat)
}
NonterminalKind::Expr => {
let (mut expr, tokens) = self.collect_tokens(|this| this.parse_expr())?;
// If we captured tokens during parsing (due to outer attributes),
// use those.
if expr.tokens.is_none() {
expr.tokens = Some(tokens);
}
token::NtExpr(expr)
}
NonterminalKind::Literal => token::NtLiteral(self.parse_literal_maybe_minus()?),
NonterminalKind::Ty => token::NtTy(self.parse_ty()?),
// this could be handled like a token, since it is one
NonterminalKind::Ident => {
if let Some((ident, is_raw)) = get_macro_ident(&self.token) {
self.bump();
token::NtIdent(ident, is_raw)
} else {
let token_str = pprust::token_to_string(&self.token);
let msg = &format!("expected ident, found {}", &token_str);
return Err(self.struct_span_err(self.token.span, msg));
}
}
NonterminalKind::Path => token::NtPath(self.parse_path(PathStyle::Type)?),
NonterminalKind::Meta => token::NtMeta(P(self.parse_attr_item()?)),
NonterminalKind::TT => token::NtTT(self.parse_token_tree()),
NonterminalKind::Vis => token::NtVis(self.parse_visibility(FollowedByType::Yes)?),
NonterminalKind::Lifetime => {
if self.check_lifetime() {
token::NtLifetime(self.expect_lifetime().ident)
} else {
let token_str = pprust::token_to_string(&self.token);
let msg = &format!("expected a lifetime, found `{}`", &token_str);
return Err(self.struct_span_err(self.token.span, msg));
}
}
})
}
}
/// The token is an identifier, but not `_`.
/// We prohibit passing `_` to macros expecting `ident` for now.
fn get_macro_ident(token: &Token) -> Option<(Ident, bool)> {
token.ident().filter(|(ident, _)| ident.name != kw::Underscore)
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,516 @@
use super::ty::{AllowPlus, RecoverQPath};
use super::{Parser, TokenType};
use crate::maybe_whole;
use rustc_ast::ptr::P;
use rustc_ast::token::{self, Token};
use rustc_ast::{
self as ast, AngleBracketedArg, AngleBracketedArgs, GenericArg, ParenthesizedArgs,
};
use rustc_ast::{AnonConst, AssocTyConstraint, AssocTyConstraintKind, BlockCheckMode};
use rustc_ast::{Path, PathSegment, QSelf};
use rustc_errors::{pluralize, Applicability, PResult};
use rustc_span::source_map::{BytePos, Span};
use rustc_span::symbol::{kw, sym, Ident};
use std::mem;
use tracing::debug;
/// Specifies how to parse a path.
#[derive(Copy, Clone, PartialEq)]
pub enum PathStyle {
/// In some contexts, notably in expressions, paths with generic arguments are ambiguous
/// with something else. For example, in expressions `segment < ....` can be interpreted
/// as a comparison and `segment ( ....` can be interpreted as a function call.
/// In all such contexts the non-path interpretation is preferred by default for practical
/// reasons, but the path interpretation can be forced by the disambiguator `::`, e.g.
/// `x<y>` - comparisons, `x::<y>` - unambiguously a path.
Expr,
/// In other contexts, notably in types, no ambiguity exists and paths can be written
/// without the disambiguator, e.g., `x<y>` - unambiguously a path.
/// Paths with disambiguators are still accepted, `x::<Y>` - unambiguously a path too.
Type,
/// A path with generic arguments disallowed, e.g., `foo::bar::Baz`, used in imports,
/// visibilities or attributes.
/// Technically, this variant is unnecessary and e.g., `Expr` can be used instead
/// (paths in "mod" contexts have to be checked later for absence of generic arguments
/// anyway, due to macros), but it is used to avoid weird suggestions about expected
/// tokens when something goes wrong.
Mod,
}
impl<'a> Parser<'a> {
/// Parses a qualified path.
/// Assumes that the leading `<` has been parsed already.
///
/// `qualified_path = <type [as trait_ref]>::path`
///
/// # Examples
/// `<T>::default`
/// `<T as U>::a`
/// `<T as U>::F::a<S>` (without disambiguator)
/// `<T as U>::F::a::<S>` (with disambiguator)
pub(super) fn parse_qpath(&mut self, style: PathStyle) -> PResult<'a, (QSelf, Path)> {
let lo = self.prev_token.span;
let ty = self.parse_ty()?;
// `path` will contain the prefix of the path up to the `>`,
// if any (e.g., `U` in the `<T as U>::*` examples
// above). `path_span` has the span of that path, or an empty
// span in the case of something like `<T>::Bar`.
let (mut path, path_span);
if self.eat_keyword(kw::As) {
let path_lo = self.token.span;
path = self.parse_path(PathStyle::Type)?;
path_span = path_lo.to(self.prev_token.span);
} else {
path_span = self.token.span.to(self.token.span);
path = ast::Path { segments: Vec::new(), span: path_span };
}
// See doc comment for `unmatched_angle_bracket_count`.
self.expect(&token::Gt)?;
if self.unmatched_angle_bracket_count > 0 {
self.unmatched_angle_bracket_count -= 1;
debug!("parse_qpath: (decrement) count={:?}", self.unmatched_angle_bracket_count);
}
if !self.recover_colon_before_qpath_proj() {
self.expect(&token::ModSep)?;
}
let qself = QSelf { ty, path_span, position: path.segments.len() };
self.parse_path_segments(&mut path.segments, style)?;
Ok((qself, Path { segments: path.segments, span: lo.to(self.prev_token.span) }))
}
/// Recover from an invalid single colon, when the user likely meant a qualified path.
/// We avoid emitting this if not followed by an identifier, as our assumption that the user
/// intended this to be a qualified path may not be correct.
///
/// ```ignore (diagnostics)
/// <Bar as Baz<T>>:Qux
/// ^ help: use double colon
/// ```
fn recover_colon_before_qpath_proj(&mut self) -> bool {
if self.token.kind != token::Colon
|| self.look_ahead(1, |t| !t.is_ident() || t.is_reserved_ident())
{
return false;
}
self.bump(); // colon
self.diagnostic()
.struct_span_err(
self.prev_token.span,
"found single colon before projection in qualified path",
)
.span_suggestion(
self.prev_token.span,
"use double colon",
"::".to_string(),
Applicability::MachineApplicable,
)
.emit();
true
}
/// Parses simple paths.
///
/// `path = [::] segment+`
/// `segment = ident | ident[::]<args> | ident[::](args) [-> type]`
///
/// # Examples
/// `a::b::C<D>` (without disambiguator)
/// `a::b::C::<D>` (with disambiguator)
/// `Fn(Args)` (without disambiguator)
/// `Fn::(Args)` (with disambiguator)
pub(super) fn parse_path(&mut self, style: PathStyle) -> PResult<'a, Path> {
maybe_whole!(self, NtPath, |path| {
if style == PathStyle::Mod && path.segments.iter().any(|segment| segment.args.is_some())
{
self.struct_span_err(path.span, "unexpected generic arguments in path").emit();
}
path
});
let lo = self.token.span;
let mut segments = Vec::new();
let mod_sep_ctxt = self.token.span.ctxt();
if self.eat(&token::ModSep) {
segments.push(PathSegment::path_root(lo.shrink_to_lo().with_ctxt(mod_sep_ctxt)));
}
self.parse_path_segments(&mut segments, style)?;
Ok(Path { segments, span: lo.to(self.prev_token.span) })
}
pub(super) fn parse_path_segments(
&mut self,
segments: &mut Vec<PathSegment>,
style: PathStyle,
) -> PResult<'a, ()> {
loop {
let segment = self.parse_path_segment(style)?;
if style == PathStyle::Expr {
// In order to check for trailing angle brackets, we must have finished
// recursing (`parse_path_segment` can indirectly call this function),
// that is, the next token must be the highlighted part of the below example:
//
// `Foo::<Bar as Baz<T>>::Qux`
// ^ here
//
// As opposed to the below highlight (if we had only finished the first
// recursion):
//
// `Foo::<Bar as Baz<T>>::Qux`
// ^ here
//
// `PathStyle::Expr` is only provided at the root invocation and never in
// `parse_path_segment` to recurse and therefore can be checked to maintain
// this invariant.
self.check_trailing_angle_brackets(&segment, &[&token::ModSep]);
}
segments.push(segment);
if self.is_import_coupler() || !self.eat(&token::ModSep) {
return Ok(());
}
}
}
pub(super) fn parse_path_segment(&mut self, style: PathStyle) -> PResult<'a, PathSegment> {
let ident = self.parse_path_segment_ident()?;
let is_args_start = |token: &Token| match token.kind {
token::Lt
| token::BinOp(token::Shl)
| token::OpenDelim(token::Paren)
| token::LArrow => true,
_ => false,
};
let check_args_start = |this: &mut Self| {
this.expected_tokens.extend_from_slice(&[
TokenType::Token(token::Lt),
TokenType::Token(token::OpenDelim(token::Paren)),
]);
is_args_start(&this.token)
};
Ok(
if style == PathStyle::Type && check_args_start(self)
|| style != PathStyle::Mod
&& self.check(&token::ModSep)
&& self.look_ahead(1, |t| is_args_start(t))
{
// We use `style == PathStyle::Expr` to check if this is in a recursion or not. If
// it isn't, then we reset the unmatched angle bracket count as we're about to start
// parsing a new path.
if style == PathStyle::Expr {
self.unmatched_angle_bracket_count = 0;
self.max_angle_bracket_count = 0;
}
// Generic arguments are found - `<`, `(`, `::<` or `::(`.
self.eat(&token::ModSep);
let lo = self.token.span;
let args = if self.eat_lt() {
// `<'a, T, A = U>`
let args =
self.parse_angle_args_with_leading_angle_bracket_recovery(style, lo)?;
self.expect_gt()?;
let span = lo.to(self.prev_token.span);
AngleBracketedArgs { args, span }.into()
} else {
// `(T, U) -> R`
let (inputs, _) = self.parse_paren_comma_seq(|p| p.parse_ty())?;
let span = ident.span.to(self.prev_token.span);
let output = self.parse_ret_ty(AllowPlus::No, RecoverQPath::No)?;
ParenthesizedArgs { inputs, output, span }.into()
};
PathSegment { ident, args, id: ast::DUMMY_NODE_ID }
} else {
// Generic arguments are not found.
PathSegment::from_ident(ident)
},
)
}
pub(super) fn parse_path_segment_ident(&mut self) -> PResult<'a, Ident> {
match self.token.ident() {
Some((ident, false)) if ident.is_path_segment_keyword() => {
self.bump();
Ok(ident)
}
_ => self.parse_ident(),
}
}
/// Parses generic args (within a path segment) with recovery for extra leading angle brackets.
/// For the purposes of understanding the parsing logic of generic arguments, this function
/// can be thought of being the same as just calling `self.parse_angle_args()` if the source
/// had the correct amount of leading angle brackets.
///
/// ```ignore (diagnostics)
/// bar::<<<<T as Foo>::Output>();
/// ^^ help: remove extra angle brackets
/// ```
fn parse_angle_args_with_leading_angle_bracket_recovery(
&mut self,
style: PathStyle,
lo: Span,
) -> PResult<'a, Vec<AngleBracketedArg>> {
// We need to detect whether there are extra leading left angle brackets and produce an
// appropriate error and suggestion. This cannot be implemented by looking ahead at
// upcoming tokens for a matching `>` character - if there are unmatched `<` tokens
// then there won't be matching `>` tokens to find.
//
// To explain how this detection works, consider the following example:
//
// ```ignore (diagnostics)
// bar::<<<<T as Foo>::Output>();
// ^^ help: remove extra angle brackets
// ```
//
// Parsing of the left angle brackets starts in this function. We start by parsing the
// `<` token (incrementing the counter of unmatched angle brackets on `Parser` via
// `eat_lt`):
//
// *Upcoming tokens:* `<<<<T as Foo>::Output>;`
// *Unmatched count:* 1
// *`parse_path_segment` calls deep:* 0
//
// This has the effect of recursing as this function is called if a `<` character
// is found within the expected generic arguments:
//
// *Upcoming tokens:* `<<<T as Foo>::Output>;`
// *Unmatched count:* 2
// *`parse_path_segment` calls deep:* 1
//
// Eventually we will have recursed until having consumed all of the `<` tokens and
// this will be reflected in the count:
//
// *Upcoming tokens:* `T as Foo>::Output>;`
// *Unmatched count:* 4
// `parse_path_segment` calls deep:* 3
//
// The parser will continue until reaching the first `>` - this will decrement the
// unmatched angle bracket count and return to the parent invocation of this function
// having succeeded in parsing:
//
// *Upcoming tokens:* `::Output>;`
// *Unmatched count:* 3
// *`parse_path_segment` calls deep:* 2
//
// This will continue until the next `>` character which will also return successfully
// to the parent invocation of this function and decrement the count:
//
// *Upcoming tokens:* `;`
// *Unmatched count:* 2
// *`parse_path_segment` calls deep:* 1
//
// At this point, this function will expect to find another matching `>` character but
// won't be able to and will return an error. This will continue all the way up the
// call stack until the first invocation:
//
// *Upcoming tokens:* `;`
// *Unmatched count:* 2
// *`parse_path_segment` calls deep:* 0
//
// In doing this, we have managed to work out how many unmatched leading left angle
// brackets there are, but we cannot recover as the unmatched angle brackets have
// already been consumed. To remedy this, we keep a snapshot of the parser state
// before we do the above. We can then inspect whether we ended up with a parsing error
// and unmatched left angle brackets and if so, restore the parser state before we
// consumed any `<` characters to emit an error and consume the erroneous tokens to
// recover by attempting to parse again.
//
// In practice, the recursion of this function is indirect and there will be other
// locations that consume some `<` characters - as long as we update the count when
// this happens, it isn't an issue.
let is_first_invocation = style == PathStyle::Expr;
// Take a snapshot before attempting to parse - we can restore this later.
let snapshot = if is_first_invocation { Some(self.clone()) } else { None };
debug!("parse_generic_args_with_leading_angle_bracket_recovery: (snapshotting)");
match self.parse_angle_args() {
Ok(args) => Ok(args),
Err(ref mut e) if is_first_invocation && self.unmatched_angle_bracket_count > 0 => {
// Cancel error from being unable to find `>`. We know the error
// must have been this due to a non-zero unmatched angle bracket
// count.
e.cancel();
// Swap `self` with our backup of the parser state before attempting to parse
// generic arguments.
let snapshot = mem::replace(self, snapshot.unwrap());
debug!(
"parse_generic_args_with_leading_angle_bracket_recovery: (snapshot failure) \
snapshot.count={:?}",
snapshot.unmatched_angle_bracket_count,
);
// Eat the unmatched angle brackets.
for _ in 0..snapshot.unmatched_angle_bracket_count {
self.eat_lt();
}
// Make a span over ${unmatched angle bracket count} characters.
let span = lo.with_hi(lo.lo() + BytePos(snapshot.unmatched_angle_bracket_count));
self.struct_span_err(
span,
&format!(
"unmatched angle bracket{}",
pluralize!(snapshot.unmatched_angle_bracket_count)
),
)
.span_suggestion(
span,
&format!(
"remove extra angle bracket{}",
pluralize!(snapshot.unmatched_angle_bracket_count)
),
String::new(),
Applicability::MachineApplicable,
)
.emit();
// Try again without unmatched angle bracket characters.
self.parse_angle_args()
}
Err(e) => Err(e),
}
}
/// Parses (possibly empty) list of generic arguments / associated item constraints,
/// possibly including trailing comma.
pub(super) fn parse_angle_args(&mut self) -> PResult<'a, Vec<AngleBracketedArg>> {
let mut args = Vec::new();
while let Some(arg) = self.parse_angle_arg()? {
args.push(arg);
if !self.eat(&token::Comma) {
break;
}
}
Ok(args)
}
/// Parses a single argument in the angle arguments `<...>` of a path segment.
fn parse_angle_arg(&mut self) -> PResult<'a, Option<AngleBracketedArg>> {
if self.check_ident() && self.look_ahead(1, |t| matches!(t.kind, token::Eq | token::Colon))
{
// Parse associated type constraint.
let lo = self.token.span;
let ident = self.parse_ident()?;
let kind = if self.eat(&token::Eq) {
let ty = self.parse_assoc_equality_term(ident, self.prev_token.span)?;
AssocTyConstraintKind::Equality { ty }
} else if self.eat(&token::Colon) {
let bounds = self.parse_generic_bounds(Some(self.prev_token.span))?;
AssocTyConstraintKind::Bound { bounds }
} else {
unreachable!();
};
let span = lo.to(self.prev_token.span);
// Gate associated type bounds, e.g., `Iterator<Item: Ord>`.
if let AssocTyConstraintKind::Bound { .. } = kind {
self.sess.gated_spans.gate(sym::associated_type_bounds, span);
}
let constraint = AssocTyConstraint { id: ast::DUMMY_NODE_ID, ident, kind, span };
Ok(Some(AngleBracketedArg::Constraint(constraint)))
} else {
Ok(self.parse_generic_arg()?.map(AngleBracketedArg::Arg))
}
}
/// Parse the term to the right of an associated item equality constraint.
/// That is, parse `<term>` in `Item = <term>`.
/// Right now, this only admits types in `<term>`.
fn parse_assoc_equality_term(&mut self, ident: Ident, eq: Span) -> PResult<'a, P<ast::Ty>> {
let arg = self.parse_generic_arg()?;
let span = ident.span.to(self.prev_token.span);
match arg {
Some(GenericArg::Type(ty)) => return Ok(ty),
Some(GenericArg::Const(expr)) => {
self.struct_span_err(span, "cannot constrain an associated constant to a value")
.span_label(ident.span, "this associated constant...")
.span_label(expr.value.span, "...cannot be constrained to this value")
.emit();
}
Some(GenericArg::Lifetime(lt)) => {
self.struct_span_err(span, "associated lifetimes are not supported")
.span_label(lt.ident.span, "the lifetime is given here")
.help("if you meant to specify a trait object, write `dyn Trait + 'lifetime`")
.emit();
}
None => {
let after_eq = eq.shrink_to_hi();
let before_next = self.token.span.shrink_to_lo();
self.struct_span_err(after_eq.to(before_next), "missing type to the right of `=`")
.span_suggestion(
self.sess.source_map().next_point(eq).to(before_next),
"to constrain the associated type, add a type after `=`",
" TheType".to_string(),
Applicability::HasPlaceholders,
)
.span_suggestion(
eq.to(before_next),
&format!("remove the `=` if `{}` is a type", ident),
String::new(),
Applicability::MaybeIncorrect,
)
.emit();
}
}
Ok(self.mk_ty(span, ast::TyKind::Err))
}
/// Parse a generic argument in a path segment.
/// This does not include constraints, e.g., `Item = u8`, which is handled in `parse_angle_arg`.
fn parse_generic_arg(&mut self) -> PResult<'a, Option<GenericArg>> {
let arg = if self.check_lifetime() && self.look_ahead(1, |t| !t.is_like_plus()) {
// Parse lifetime argument.
GenericArg::Lifetime(self.expect_lifetime())
} else if self.check_const_arg() {
// Parse const argument.
let expr = if let token::OpenDelim(token::Brace) = self.token.kind {
self.parse_block_expr(
None,
self.token.span,
BlockCheckMode::Default,
ast::AttrVec::new(),
)?
} else if self.token.is_ident() {
// FIXME(const_generics): to distinguish between idents for types and consts,
// we should introduce a GenericArg::Ident in the AST and distinguish when
// lowering to the HIR. For now, idents for const args are not permitted.
if self.token.is_bool_lit() {
self.parse_literal_maybe_minus()?
} else {
let span = self.token.span;
let msg = "identifiers may currently not be used for const generics";
self.struct_span_err(span, msg).emit();
let block = self.mk_block_err(span);
self.mk_expr(span, ast::ExprKind::Block(block, None), ast::AttrVec::new())
}
} else {
self.parse_literal_maybe_minus()?
};
GenericArg::Const(AnonConst { id: ast::DUMMY_NODE_ID, value: expr })
} else if self.check_type() {
// Parse type argument.
GenericArg::Type(self.parse_ty()?)
} else {
return Ok(None);
};
Ok(Some(arg))
}
}

View file

@ -0,0 +1,427 @@
use super::attr::DEFAULT_INNER_ATTR_FORBIDDEN;
use super::diagnostics::Error;
use super::expr::LhsExpr;
use super::pat::GateOr;
use super::path::PathStyle;
use super::{BlockMode, Parser, Restrictions, SemiColonMode};
use crate::maybe_whole;
use rustc_ast as ast;
use rustc_ast::ptr::P;
use rustc_ast::token::{self, TokenKind};
use rustc_ast::util::classify;
use rustc_ast::{AttrStyle, AttrVec, Attribute, MacCall, MacStmtStyle};
use rustc_ast::{Block, BlockCheckMode, Expr, ExprKind, Local, Stmt, StmtKind, DUMMY_NODE_ID};
use rustc_errors::{Applicability, PResult};
use rustc_span::source_map::{BytePos, Span};
use rustc_span::symbol::{kw, sym};
use std::mem;
impl<'a> Parser<'a> {
/// Parses a statement. This stops just before trailing semicolons on everything but items.
/// e.g., a `StmtKind::Semi` parses to a `StmtKind::Expr`, leaving the trailing `;` unconsumed.
pub(super) fn parse_stmt(&mut self) -> PResult<'a, Option<Stmt>> {
Ok(self.parse_stmt_without_recovery().unwrap_or_else(|mut e| {
e.emit();
self.recover_stmt_(SemiColonMode::Break, BlockMode::Ignore);
None
}))
}
fn parse_stmt_without_recovery(&mut self) -> PResult<'a, Option<Stmt>> {
maybe_whole!(self, NtStmt, |x| Some(x));
let attrs = self.parse_outer_attributes()?;
let lo = self.token.span;
let stmt = if self.eat_keyword(kw::Let) {
self.parse_local_mk(lo, attrs.into())?
} else if self.is_kw_followed_by_ident(kw::Mut) {
self.recover_stmt_local(lo, attrs.into(), "missing keyword", "let mut")?
} else if self.is_kw_followed_by_ident(kw::Auto) {
self.bump(); // `auto`
let msg = "write `let` instead of `auto` to introduce a new variable";
self.recover_stmt_local(lo, attrs.into(), msg, "let")?
} else if self.is_kw_followed_by_ident(sym::var) {
self.bump(); // `var`
let msg = "write `let` instead of `var` to introduce a new variable";
self.recover_stmt_local(lo, attrs.into(), msg, "let")?
} else if self.check_path() && !self.token.is_qpath_start() && !self.is_path_start_item() {
// We have avoided contextual keywords like `union`, items with `crate` visibility,
// or `auto trait` items. We aim to parse an arbitrary path `a::b` but not something
// that starts like a path (1 token), but it fact not a path.
// Also, we avoid stealing syntax from `parse_item_`.
self.parse_stmt_path_start(lo, attrs)?
} else if let Some(item) = self.parse_item_common(attrs.clone(), false, true, |_| true)? {
// FIXME: Bad copy of attrs
self.mk_stmt(lo.to(item.span), StmtKind::Item(P(item)))
} else if self.eat(&token::Semi) {
// Do not attempt to parse an expression if we're done here.
self.error_outer_attrs(&attrs);
self.mk_stmt(lo, StmtKind::Empty)
} else if self.token != token::CloseDelim(token::Brace) {
// Remainder are line-expr stmts.
let e = self.parse_expr_res(Restrictions::STMT_EXPR, Some(attrs.into()))?;
self.mk_stmt(lo.to(e.span), StmtKind::Expr(e))
} else {
self.error_outer_attrs(&attrs);
return Ok(None);
};
Ok(Some(stmt))
}
fn parse_stmt_path_start(&mut self, lo: Span, attrs: Vec<Attribute>) -> PResult<'a, Stmt> {
let path = self.parse_path(PathStyle::Expr)?;
if self.eat(&token::Not) {
return self.parse_stmt_mac(lo, attrs.into(), path);
}
let expr = if self.check(&token::OpenDelim(token::Brace)) {
self.parse_struct_expr(path, AttrVec::new())?
} else {
let hi = self.prev_token.span;
self.mk_expr(lo.to(hi), ExprKind::Path(None, path), AttrVec::new())
};
let expr = self.with_res(Restrictions::STMT_EXPR, |this| {
let expr = this.parse_dot_or_call_expr_with(expr, lo, attrs.into())?;
this.parse_assoc_expr_with(0, LhsExpr::AlreadyParsed(expr))
})?;
Ok(self.mk_stmt(lo.to(self.prev_token.span), StmtKind::Expr(expr)))
}
/// Parses a statement macro `mac!(args)` provided a `path` representing `mac`.
/// At this point, the `!` token after the path has already been eaten.
fn parse_stmt_mac(&mut self, lo: Span, attrs: AttrVec, path: ast::Path) -> PResult<'a, Stmt> {
let args = self.parse_mac_args()?;
let delim = args.delim();
let hi = self.prev_token.span;
let style =
if delim == token::Brace { MacStmtStyle::Braces } else { MacStmtStyle::NoBraces };
let mac = MacCall { path, args, prior_type_ascription: self.last_type_ascription };
let kind = if delim == token::Brace || self.token == token::Semi || self.token == token::Eof
{
StmtKind::MacCall(P((mac, style, attrs)))
} else {
// Since none of the above applied, this is an expression statement macro.
let e = self.mk_expr(lo.to(hi), ExprKind::MacCall(mac), AttrVec::new());
let e = self.maybe_recover_from_bad_qpath(e, true)?;
let e = self.parse_dot_or_call_expr_with(e, lo, attrs)?;
let e = self.parse_assoc_expr_with(0, LhsExpr::AlreadyParsed(e))?;
StmtKind::Expr(e)
};
Ok(self.mk_stmt(lo.to(hi), kind))
}
/// Error on outer attributes in this context.
/// Also error if the previous token was a doc comment.
fn error_outer_attrs(&self, attrs: &[Attribute]) {
if let [.., last] = attrs {
if last.is_doc_comment() {
self.span_fatal_err(last.span, Error::UselessDocComment).emit();
} else if attrs.iter().any(|a| a.style == AttrStyle::Outer) {
self.struct_span_err(last.span, "expected statement after outer attribute").emit();
}
}
}
fn recover_stmt_local(
&mut self,
lo: Span,
attrs: AttrVec,
msg: &str,
sugg: &str,
) -> PResult<'a, Stmt> {
let stmt = self.parse_local_mk(lo, attrs)?;
self.struct_span_err(lo, "invalid variable declaration")
.span_suggestion(lo, msg, sugg.to_string(), Applicability::MachineApplicable)
.emit();
Ok(stmt)
}
fn parse_local_mk(&mut self, lo: Span, attrs: AttrVec) -> PResult<'a, Stmt> {
let local = self.parse_local(attrs)?;
Ok(self.mk_stmt(lo.to(self.prev_token.span), StmtKind::Local(local)))
}
/// Parses a local variable declaration.
fn parse_local(&mut self, attrs: AttrVec) -> PResult<'a, P<Local>> {
let lo = self.prev_token.span;
let pat = self.parse_top_pat(GateOr::Yes)?;
let (err, ty) = if self.eat(&token::Colon) {
// Save the state of the parser before parsing type normally, in case there is a `:`
// instead of an `=` typo.
let parser_snapshot_before_type = self.clone();
let colon_sp = self.prev_token.span;
match self.parse_ty() {
Ok(ty) => (None, Some(ty)),
Err(mut err) => {
if let Ok(snip) = self.span_to_snippet(pat.span) {
err.span_label(pat.span, format!("while parsing the type for `{}`", snip));
}
let err = if self.check(&token::Eq) {
err.emit();
None
} else {
// Rewind to before attempting to parse the type and continue parsing.
let parser_snapshot_after_type =
mem::replace(self, parser_snapshot_before_type);
Some((parser_snapshot_after_type, colon_sp, err))
};
(err, None)
}
}
} else {
(None, None)
};
let init = match (self.parse_initializer(err.is_some()), err) {
(Ok(init), None) => {
// init parsed, ty parsed
init
}
(Ok(init), Some((_, colon_sp, mut err))) => {
// init parsed, ty error
// Could parse the type as if it were the initializer, it is likely there was a
// typo in the code: `:` instead of `=`. Add suggestion and emit the error.
err.span_suggestion_short(
colon_sp,
"use `=` if you meant to assign",
" =".to_string(),
Applicability::MachineApplicable,
);
err.emit();
// As this was parsed successfully, continue as if the code has been fixed for the
// rest of the file. It will still fail due to the emitted error, but we avoid
// extra noise.
init
}
(Err(mut init_err), Some((snapshot, _, ty_err))) => {
// init error, ty error
init_err.cancel();
// Couldn't parse the type nor the initializer, only raise the type error and
// return to the parser state before parsing the type as the initializer.
// let x: <parse_error>;
*self = snapshot;
return Err(ty_err);
}
(Err(err), None) => {
// init error, ty parsed
// Couldn't parse the initializer and we're not attempting to recover a failed
// parse of the type, return the error.
return Err(err);
}
};
let hi = if self.token == token::Semi { self.token.span } else { self.prev_token.span };
Ok(P(ast::Local { ty, pat, init, id: DUMMY_NODE_ID, span: lo.to(hi), attrs }))
}
/// Parses the RHS of a local variable declaration (e.g., '= 14;').
fn parse_initializer(&mut self, eq_optional: bool) -> PResult<'a, Option<P<Expr>>> {
let eq_consumed = match self.token.kind {
token::BinOpEq(..) => {
// Recover `let x <op>= 1` as `let x = 1`
self.struct_span_err(
self.token.span,
"can't reassign to an uninitialized variable",
)
.span_suggestion_short(
self.token.span,
"initialize the variable",
"=".to_string(),
Applicability::MaybeIncorrect,
)
.emit();
self.bump();
true
}
_ => self.eat(&token::Eq),
};
Ok(if eq_consumed || eq_optional { Some(self.parse_expr()?) } else { None })
}
/// Parses a block. No inner attributes are allowed.
pub(super) fn parse_block(&mut self) -> PResult<'a, P<Block>> {
let (attrs, block) = self.parse_inner_attrs_and_block()?;
if let [.., last] = &*attrs {
self.error_on_forbidden_inner_attr(last.span, DEFAULT_INNER_ATTR_FORBIDDEN);
}
Ok(block)
}
fn error_block_no_opening_brace<T>(&mut self) -> PResult<'a, T> {
let sp = self.token.span;
let tok = super::token_descr(&self.token);
let mut e = self.struct_span_err(sp, &format!("expected `{{`, found {}", tok));
let do_not_suggest_help = self.token.is_keyword(kw::In) || self.token == token::Colon;
// Check to see if the user has written something like
//
// if (cond)
// bar;
//
// which is valid in other languages, but not Rust.
match self.parse_stmt_without_recovery() {
// If the next token is an open brace (e.g., `if a b {`), the place-
// inside-a-block suggestion would be more likely wrong than right.
Ok(Some(_))
if self.look_ahead(1, |t| t == &token::OpenDelim(token::Brace))
|| do_not_suggest_help => {}
Ok(Some(stmt)) => {
let stmt_own_line = self.sess.source_map().is_line_before_span_empty(sp);
let stmt_span = if stmt_own_line && self.eat(&token::Semi) {
// Expand the span to include the semicolon.
stmt.span.with_hi(self.prev_token.span.hi())
} else {
stmt.span
};
if let Ok(snippet) = self.span_to_snippet(stmt_span) {
e.span_suggestion(
stmt_span,
"try placing this code inside a block",
format!("{{ {} }}", snippet),
// Speculative; has been misleading in the past (#46836).
Applicability::MaybeIncorrect,
);
}
}
Err(mut e) => {
self.recover_stmt_(SemiColonMode::Break, BlockMode::Ignore);
e.cancel();
}
_ => {}
}
e.span_label(sp, "expected `{`");
Err(e)
}
/// Parses a block. Inner attributes are allowed.
pub(super) fn parse_inner_attrs_and_block(
&mut self,
) -> PResult<'a, (Vec<Attribute>, P<Block>)> {
self.parse_block_common(self.token.span, BlockCheckMode::Default)
}
/// Parses a block. Inner attributes are allowed.
pub(super) fn parse_block_common(
&mut self,
lo: Span,
blk_mode: BlockCheckMode,
) -> PResult<'a, (Vec<Attribute>, P<Block>)> {
maybe_whole!(self, NtBlock, |x| (Vec::new(), x));
if !self.eat(&token::OpenDelim(token::Brace)) {
return self.error_block_no_opening_brace();
}
Ok((self.parse_inner_attributes()?, self.parse_block_tail(lo, blk_mode)?))
}
/// Parses the rest of a block expression or function body.
/// Precondition: already parsed the '{'.
fn parse_block_tail(&mut self, lo: Span, s: BlockCheckMode) -> PResult<'a, P<Block>> {
let mut stmts = vec![];
while !self.eat(&token::CloseDelim(token::Brace)) {
if self.token == token::Eof {
break;
}
let stmt = match self.parse_full_stmt() {
Err(mut err) => {
self.maybe_annotate_with_ascription(&mut err, false);
err.emit();
self.recover_stmt_(SemiColonMode::Ignore, BlockMode::Ignore);
Some(self.mk_stmt_err(self.token.span))
}
Ok(stmt) => stmt,
};
if let Some(stmt) = stmt {
stmts.push(stmt);
} else {
// Found only `;` or `}`.
continue;
};
}
Ok(self.mk_block(stmts, s, lo.to(self.prev_token.span)))
}
/// Parses a statement, including the trailing semicolon.
pub fn parse_full_stmt(&mut self) -> PResult<'a, Option<Stmt>> {
// Skip looking for a trailing semicolon when we have an interpolated statement.
maybe_whole!(self, NtStmt, |x| Some(x));
let mut stmt = match self.parse_stmt_without_recovery()? {
Some(stmt) => stmt,
None => return Ok(None),
};
let mut eat_semi = true;
match stmt.kind {
// Expression without semicolon.
StmtKind::Expr(ref expr)
if self.token != token::Eof && classify::expr_requires_semi_to_be_stmt(expr) =>
{
// Just check for errors and recover; do not eat semicolon yet.
if let Err(mut e) =
self.expect_one_of(&[], &[token::Semi, token::CloseDelim(token::Brace)])
{
if let TokenKind::DocComment(..) = self.token.kind {
if let Ok(snippet) = self.span_to_snippet(self.token.span) {
let sp = self.token.span;
let marker = &snippet[..3];
let (comment_marker, doc_comment_marker) = marker.split_at(2);
e.span_suggestion(
sp.with_hi(sp.lo() + BytePos(marker.len() as u32)),
&format!(
"add a space before `{}` to use a regular comment",
doc_comment_marker,
),
format!("{} {}", comment_marker, doc_comment_marker),
Applicability::MaybeIncorrect,
);
}
}
e.emit();
self.recover_stmt();
// Don't complain about type errors in body tail after parse error (#57383).
let sp = expr.span.to(self.prev_token.span);
stmt.kind = StmtKind::Expr(self.mk_expr_err(sp));
}
}
StmtKind::Local(..) => {
self.expect_semi()?;
eat_semi = false;
}
StmtKind::Empty => eat_semi = false,
_ => {}
}
if eat_semi && self.eat(&token::Semi) {
stmt = stmt.add_trailing_semicolon();
}
stmt.span = stmt.span.to(self.prev_token.span);
Ok(Some(stmt))
}
pub(super) fn mk_block(&self, stmts: Vec<Stmt>, rules: BlockCheckMode, span: Span) -> P<Block> {
P(Block { stmts, id: DUMMY_NODE_ID, rules, span })
}
pub(super) fn mk_stmt(&self, span: Span, kind: StmtKind) -> Stmt {
Stmt { id: DUMMY_NODE_ID, kind, span }
}
fn mk_stmt_err(&self, span: Span) -> Stmt {
self.mk_stmt(span, StmtKind::Expr(self.mk_expr_err(span)))
}
pub(super) fn mk_block_err(&self, span: Span) -> P<Block> {
self.mk_block(vec![self.mk_stmt_err(span)], BlockCheckMode::Default, span)
}
}

View file

@ -0,0 +1,631 @@
use super::{Parser, PathStyle, TokenType};
use crate::{maybe_recover_from_interpolated_ty_qpath, maybe_whole};
use rustc_ast::ptr::P;
use rustc_ast::token::{self, Token, TokenKind};
use rustc_ast::{self as ast, BareFnTy, FnRetTy, GenericParam, Lifetime, MutTy, Ty, TyKind};
use rustc_ast::{GenericBound, GenericBounds, MacCall, Mutability};
use rustc_ast::{PolyTraitRef, TraitBoundModifier, TraitObjectSyntax};
use rustc_errors::{pluralize, struct_span_err, Applicability, PResult};
use rustc_span::source_map::Span;
use rustc_span::symbol::{kw, sym};
/// Any `?` or `?const` modifiers that appear at the start of a bound.
struct BoundModifiers {
/// `?Trait`.
maybe: Option<Span>,
/// `?const Trait`.
maybe_const: Option<Span>,
}
impl BoundModifiers {
fn to_trait_bound_modifier(&self) -> TraitBoundModifier {
match (self.maybe, self.maybe_const) {
(None, None) => TraitBoundModifier::None,
(Some(_), None) => TraitBoundModifier::Maybe,
(None, Some(_)) => TraitBoundModifier::MaybeConst,
(Some(_), Some(_)) => TraitBoundModifier::MaybeConstMaybe,
}
}
}
#[derive(Copy, Clone, PartialEq)]
pub(super) enum AllowPlus {
Yes,
No,
}
#[derive(PartialEq)]
pub(super) enum RecoverQPath {
Yes,
No,
}
// Is `...` (`CVarArgs`) legal at this level of type parsing?
#[derive(PartialEq)]
enum AllowCVariadic {
Yes,
No,
}
/// Returns `true` if `IDENT t` can start a type -- `IDENT::a::b`, `IDENT<u8, u8>`,
/// `IDENT<<u8 as Trait>::AssocTy>`.
///
/// Types can also be of the form `IDENT(u8, u8) -> u8`, however this assumes
/// that `IDENT` is not the ident of a fn trait.
fn can_continue_type_after_non_fn_ident(t: &Token) -> bool {
t == &token::ModSep || t == &token::Lt || t == &token::BinOp(token::Shl)
}
impl<'a> Parser<'a> {
/// Parses a type.
pub fn parse_ty(&mut self) -> PResult<'a, P<Ty>> {
self.parse_ty_common(AllowPlus::Yes, RecoverQPath::Yes, AllowCVariadic::No)
}
/// Parse a type suitable for a function or function pointer parameter.
/// The difference from `parse_ty` is that this version allows `...`
/// (`CVarArgs`) at the top level of the the type.
pub(super) fn parse_ty_for_param(&mut self) -> PResult<'a, P<Ty>> {
self.parse_ty_common(AllowPlus::Yes, RecoverQPath::Yes, AllowCVariadic::Yes)
}
/// Parses a type in restricted contexts where `+` is not permitted.
///
/// Example 1: `&'a TYPE`
/// `+` is prohibited to maintain operator priority (P(+) < P(&)).
/// Example 2: `value1 as TYPE + value2`
/// `+` is prohibited to avoid interactions with expression grammar.
pub(super) fn parse_ty_no_plus(&mut self) -> PResult<'a, P<Ty>> {
self.parse_ty_common(AllowPlus::No, RecoverQPath::Yes, AllowCVariadic::No)
}
/// Parses an optional return type `[ -> TY ]` in a function declaration.
pub(super) fn parse_ret_ty(
&mut self,
allow_plus: AllowPlus,
recover_qpath: RecoverQPath,
) -> PResult<'a, FnRetTy> {
Ok(if self.eat(&token::RArrow) {
// FIXME(Centril): Can we unconditionally `allow_plus`?
let ty = self.parse_ty_common(allow_plus, recover_qpath, AllowCVariadic::No)?;
FnRetTy::Ty(ty)
} else {
FnRetTy::Default(self.token.span.shrink_to_lo())
})
}
fn parse_ty_common(
&mut self,
allow_plus: AllowPlus,
recover_qpath: RecoverQPath,
allow_c_variadic: AllowCVariadic,
) -> PResult<'a, P<Ty>> {
let allow_qpath_recovery = recover_qpath == RecoverQPath::Yes;
maybe_recover_from_interpolated_ty_qpath!(self, allow_qpath_recovery);
maybe_whole!(self, NtTy, |x| x);
let lo = self.token.span;
let mut impl_dyn_multi = false;
let kind = if self.check(&token::OpenDelim(token::Paren)) {
self.parse_ty_tuple_or_parens(lo, allow_plus)?
} else if self.eat(&token::Not) {
// Never type `!`
TyKind::Never
} else if self.eat(&token::BinOp(token::Star)) {
self.parse_ty_ptr()?
} else if self.eat(&token::OpenDelim(token::Bracket)) {
self.parse_array_or_slice_ty()?
} else if self.check(&token::BinOp(token::And)) || self.check(&token::AndAnd) {
// Reference
self.expect_and()?;
self.parse_borrowed_pointee()?
} else if self.eat_keyword_noexpect(kw::Typeof) {
self.parse_typeof_ty()?
} else if self.eat_keyword(kw::Underscore) {
// A type to be inferred `_`
TyKind::Infer
} else if self.check_fn_front_matter() {
// Function pointer type
self.parse_ty_bare_fn(lo, Vec::new())?
} else if self.check_keyword(kw::For) {
// Function pointer type or bound list (trait object type) starting with a poly-trait.
// `for<'lt> [unsafe] [extern "ABI"] fn (&'lt S) -> T`
// `for<'lt> Trait1<'lt> + Trait2 + 'a`
let lifetime_defs = self.parse_late_bound_lifetime_defs()?;
if self.check_fn_front_matter() {
self.parse_ty_bare_fn(lo, lifetime_defs)?
} else {
let path = self.parse_path(PathStyle::Type)?;
let parse_plus = allow_plus == AllowPlus::Yes && self.check_plus();
self.parse_remaining_bounds_path(lifetime_defs, path, lo, parse_plus)?
}
} else if self.eat_keyword(kw::Impl) {
self.parse_impl_ty(&mut impl_dyn_multi)?
} else if self.is_explicit_dyn_type() {
self.parse_dyn_ty(&mut impl_dyn_multi)?
} else if self.eat_lt() {
// Qualified path
let (qself, path) = self.parse_qpath(PathStyle::Type)?;
TyKind::Path(Some(qself), path)
} else if self.check_path() {
self.parse_path_start_ty(lo, allow_plus)?
} else if self.can_begin_bound() {
self.parse_bare_trait_object(lo, allow_plus)?
} else if self.eat(&token::DotDotDot) {
if allow_c_variadic == AllowCVariadic::Yes {
TyKind::CVarArgs
} else {
// FIXME(Centril): Should we just allow `...` syntactically
// anywhere in a type and use semantic restrictions instead?
self.error_illegal_c_varadic_ty(lo);
TyKind::Err
}
} else {
let msg = format!("expected type, found {}", super::token_descr(&self.token));
let mut err = self.struct_span_err(self.token.span, &msg);
err.span_label(self.token.span, "expected type");
self.maybe_annotate_with_ascription(&mut err, true);
return Err(err);
};
let span = lo.to(self.prev_token.span);
let ty = self.mk_ty(span, kind);
// Try to recover from use of `+` with incorrect priority.
self.maybe_report_ambiguous_plus(allow_plus, impl_dyn_multi, &ty);
self.maybe_recover_from_bad_type_plus(allow_plus, &ty)?;
self.maybe_recover_from_bad_qpath(ty, allow_qpath_recovery)
}
/// Parses either:
/// - `(TYPE)`, a parenthesized type.
/// - `(TYPE,)`, a tuple with a single field of type TYPE.
fn parse_ty_tuple_or_parens(&mut self, lo: Span, allow_plus: AllowPlus) -> PResult<'a, TyKind> {
let mut trailing_plus = false;
let (ts, trailing) = self.parse_paren_comma_seq(|p| {
let ty = p.parse_ty()?;
trailing_plus = p.prev_token.kind == TokenKind::BinOp(token::Plus);
Ok(ty)
})?;
if ts.len() == 1 && !trailing {
let ty = ts.into_iter().next().unwrap().into_inner();
let maybe_bounds = allow_plus == AllowPlus::Yes && self.token.is_like_plus();
match ty.kind {
// `(TY_BOUND_NOPAREN) + BOUND + ...`.
TyKind::Path(None, path) if maybe_bounds => {
self.parse_remaining_bounds_path(Vec::new(), path, lo, true)
}
TyKind::TraitObject(bounds, TraitObjectSyntax::None)
if maybe_bounds && bounds.len() == 1 && !trailing_plus =>
{
self.parse_remaining_bounds(bounds, true)
}
// `(TYPE)`
_ => Ok(TyKind::Paren(P(ty))),
}
} else {
Ok(TyKind::Tup(ts))
}
}
fn parse_bare_trait_object(&mut self, lo: Span, allow_plus: AllowPlus) -> PResult<'a, TyKind> {
let lt_no_plus = self.check_lifetime() && !self.look_ahead(1, |t| t.is_like_plus());
let bounds = self.parse_generic_bounds_common(allow_plus, None)?;
if lt_no_plus {
self.struct_span_err(lo, "lifetime in trait object type must be followed by `+`").emit()
}
Ok(TyKind::TraitObject(bounds, TraitObjectSyntax::None))
}
fn parse_remaining_bounds_path(
&mut self,
generic_params: Vec<GenericParam>,
path: ast::Path,
lo: Span,
parse_plus: bool,
) -> PResult<'a, TyKind> {
let poly_trait_ref = PolyTraitRef::new(generic_params, path, lo.to(self.prev_token.span));
let bounds = vec![GenericBound::Trait(poly_trait_ref, TraitBoundModifier::None)];
self.parse_remaining_bounds(bounds, parse_plus)
}
/// Parse the remainder of a bare trait object type given an already parsed list.
fn parse_remaining_bounds(
&mut self,
mut bounds: GenericBounds,
plus: bool,
) -> PResult<'a, TyKind> {
assert_ne!(self.token, token::Question);
if plus {
self.eat_plus(); // `+`, or `+=` gets split and `+` is discarded
bounds.append(&mut self.parse_generic_bounds(Some(self.prev_token.span))?);
}
Ok(TyKind::TraitObject(bounds, TraitObjectSyntax::None))
}
/// Parses a raw pointer type: `*[const | mut] $type`.
fn parse_ty_ptr(&mut self) -> PResult<'a, TyKind> {
let mutbl = self.parse_const_or_mut().unwrap_or_else(|| {
let span = self.prev_token.span;
let msg = "expected mut or const in raw pointer type";
self.struct_span_err(span, msg)
.span_label(span, msg)
.help("use `*mut T` or `*const T` as appropriate")
.emit();
Mutability::Not
});
let ty = self.parse_ty_no_plus()?;
Ok(TyKind::Ptr(MutTy { ty, mutbl }))
}
/// Parses an array (`[TYPE; EXPR]`) or slice (`[TYPE]`) type.
/// The opening `[` bracket is already eaten.
fn parse_array_or_slice_ty(&mut self) -> PResult<'a, TyKind> {
let elt_ty = self.parse_ty()?;
let ty = if self.eat(&token::Semi) {
TyKind::Array(elt_ty, self.parse_anon_const_expr()?)
} else {
TyKind::Slice(elt_ty)
};
self.expect(&token::CloseDelim(token::Bracket))?;
Ok(ty)
}
fn parse_borrowed_pointee(&mut self) -> PResult<'a, TyKind> {
let opt_lifetime = if self.check_lifetime() { Some(self.expect_lifetime()) } else { None };
let mutbl = self.parse_mutability();
let ty = self.parse_ty_no_plus()?;
Ok(TyKind::Rptr(opt_lifetime, MutTy { ty, mutbl }))
}
// Parses the `typeof(EXPR)`.
// To avoid ambiguity, the type is surrounded by parenthesis.
fn parse_typeof_ty(&mut self) -> PResult<'a, TyKind> {
self.expect(&token::OpenDelim(token::Paren))?;
let expr = self.parse_anon_const_expr()?;
self.expect(&token::CloseDelim(token::Paren))?;
Ok(TyKind::Typeof(expr))
}
/// Parses a function pointer type (`TyKind::BareFn`).
/// ```
/// [unsafe] [extern "ABI"] fn (S) -> T
/// ^~~~~^ ^~~~^ ^~^ ^
/// | | | |
/// | | | Return type
/// Function Style ABI Parameter types
/// ```
/// We actually parse `FnHeader FnDecl`, but we error on `const` and `async` qualifiers.
fn parse_ty_bare_fn(&mut self, lo: Span, params: Vec<GenericParam>) -> PResult<'a, TyKind> {
let ast::FnHeader { ext, unsafety, constness, asyncness } = self.parse_fn_front_matter()?;
let decl = self.parse_fn_decl(|_| false, AllowPlus::No)?;
let whole_span = lo.to(self.prev_token.span);
if let ast::Const::Yes(span) = constness {
self.error_fn_ptr_bad_qualifier(whole_span, span, "const");
}
if let ast::Async::Yes { span, .. } = asyncness {
self.error_fn_ptr_bad_qualifier(whole_span, span, "async");
}
Ok(TyKind::BareFn(P(BareFnTy { ext, unsafety, generic_params: params, decl })))
}
/// Emit an error for the given bad function pointer qualifier.
fn error_fn_ptr_bad_qualifier(&self, span: Span, qual_span: Span, qual: &str) {
self.struct_span_err(span, &format!("an `fn` pointer type cannot be `{}`", qual))
.span_label(qual_span, format!("`{}` because of this", qual))
.span_suggestion_short(
qual_span,
&format!("remove the `{}` qualifier", qual),
String::new(),
Applicability::MaybeIncorrect,
)
.emit();
}
/// Parses an `impl B0 + ... + Bn` type.
fn parse_impl_ty(&mut self, impl_dyn_multi: &mut bool) -> PResult<'a, TyKind> {
// Always parse bounds greedily for better error recovery.
let bounds = self.parse_generic_bounds(None)?;
*impl_dyn_multi = bounds.len() > 1 || self.prev_token.kind == TokenKind::BinOp(token::Plus);
Ok(TyKind::ImplTrait(ast::DUMMY_NODE_ID, bounds))
}
/// Is a `dyn B0 + ... + Bn` type allowed here?
fn is_explicit_dyn_type(&mut self) -> bool {
self.check_keyword(kw::Dyn)
&& (self.token.uninterpolated_span().rust_2018()
|| self.look_ahead(1, |t| {
t.can_begin_bound() && !can_continue_type_after_non_fn_ident(t)
}))
}
/// Parses a `dyn B0 + ... + Bn` type.
///
/// Note that this does *not* parse bare trait objects.
fn parse_dyn_ty(&mut self, impl_dyn_multi: &mut bool) -> PResult<'a, TyKind> {
self.bump(); // `dyn`
// Always parse bounds greedily for better error recovery.
let bounds = self.parse_generic_bounds(None)?;
*impl_dyn_multi = bounds.len() > 1 || self.prev_token.kind == TokenKind::BinOp(token::Plus);
Ok(TyKind::TraitObject(bounds, TraitObjectSyntax::Dyn))
}
/// Parses a type starting with a path.
///
/// This can be:
/// 1. a type macro, `mac!(...)`,
/// 2. a bare trait object, `B0 + ... + Bn`,
/// 3. or a path, `path::to::MyType`.
fn parse_path_start_ty(&mut self, lo: Span, allow_plus: AllowPlus) -> PResult<'a, TyKind> {
// Simple path
let path = self.parse_path(PathStyle::Type)?;
if self.eat(&token::Not) {
// Macro invocation in type position
Ok(TyKind::MacCall(MacCall {
path,
args: self.parse_mac_args()?,
prior_type_ascription: self.last_type_ascription,
}))
} else if allow_plus == AllowPlus::Yes && self.check_plus() {
// `Trait1 + Trait2 + 'a`
self.parse_remaining_bounds_path(Vec::new(), path, lo, true)
} else {
// Just a type path.
Ok(TyKind::Path(None, path))
}
}
fn error_illegal_c_varadic_ty(&self, lo: Span) {
struct_span_err!(
self.sess.span_diagnostic,
lo.to(self.prev_token.span),
E0743,
"C-variadic type `...` may not be nested inside another type",
)
.emit();
}
pub(super) fn parse_generic_bounds(
&mut self,
colon_span: Option<Span>,
) -> PResult<'a, GenericBounds> {
self.parse_generic_bounds_common(AllowPlus::Yes, colon_span)
}
/// Parses bounds of a type parameter `BOUND + BOUND + ...`, possibly with trailing `+`.
///
/// See `parse_generic_bound` for the `BOUND` grammar.
fn parse_generic_bounds_common(
&mut self,
allow_plus: AllowPlus,
colon_span: Option<Span>,
) -> PResult<'a, GenericBounds> {
let mut bounds = Vec::new();
let mut negative_bounds = Vec::new();
while self.can_begin_bound() {
match self.parse_generic_bound()? {
Ok(bound) => bounds.push(bound),
Err(neg_sp) => negative_bounds.push(neg_sp),
}
if allow_plus == AllowPlus::No || !self.eat_plus() {
break;
}
}
if !negative_bounds.is_empty() {
self.error_negative_bounds(colon_span, &bounds, negative_bounds);
}
Ok(bounds)
}
/// Can the current token begin a bound?
fn can_begin_bound(&mut self) -> bool {
// This needs to be synchronized with `TokenKind::can_begin_bound`.
self.check_path()
|| self.check_lifetime()
|| self.check(&token::Not) // Used for error reporting only.
|| self.check(&token::Question)
|| self.check_keyword(kw::For)
|| self.check(&token::OpenDelim(token::Paren))
}
fn error_negative_bounds(
&self,
colon_span: Option<Span>,
bounds: &[GenericBound],
negative_bounds: Vec<Span>,
) {
let negative_bounds_len = negative_bounds.len();
let last_span = *negative_bounds.last().expect("no negative bounds, but still error?");
let mut err = self.struct_span_err(negative_bounds, "negative bounds are not supported");
err.span_label(last_span, "negative bounds are not supported");
if let Some(bound_list) = colon_span {
let bound_list = bound_list.to(self.prev_token.span);
let mut new_bound_list = String::new();
if !bounds.is_empty() {
let mut snippets = bounds.iter().map(|bound| self.span_to_snippet(bound.span()));
while let Some(Ok(snippet)) = snippets.next() {
new_bound_list.push_str(" + ");
new_bound_list.push_str(&snippet);
}
new_bound_list = new_bound_list.replacen(" +", ":", 1);
}
err.tool_only_span_suggestion(
bound_list,
&format!("remove the bound{}", pluralize!(negative_bounds_len)),
new_bound_list,
Applicability::MachineApplicable,
);
}
err.emit();
}
/// Parses a bound according to the grammar:
/// ```
/// BOUND = TY_BOUND | LT_BOUND
/// ```
fn parse_generic_bound(&mut self) -> PResult<'a, Result<GenericBound, Span>> {
let anchor_lo = self.prev_token.span;
let lo = self.token.span;
let has_parens = self.eat(&token::OpenDelim(token::Paren));
let inner_lo = self.token.span;
let is_negative = self.eat(&token::Not);
let modifiers = self.parse_ty_bound_modifiers();
let bound = if self.token.is_lifetime() {
self.error_lt_bound_with_modifiers(modifiers);
self.parse_generic_lt_bound(lo, inner_lo, has_parens)?
} else {
self.parse_generic_ty_bound(lo, has_parens, modifiers)?
};
Ok(if is_negative { Err(anchor_lo.to(self.prev_token.span)) } else { Ok(bound) })
}
/// Parses a lifetime ("outlives") bound, e.g. `'a`, according to:
/// ```
/// LT_BOUND = LIFETIME
/// ```
fn parse_generic_lt_bound(
&mut self,
lo: Span,
inner_lo: Span,
has_parens: bool,
) -> PResult<'a, GenericBound> {
let bound = GenericBound::Outlives(self.expect_lifetime());
if has_parens {
// FIXME(Centril): Consider not erroring here and accepting `('lt)` instead,
// possibly introducing `GenericBound::Paren(P<GenericBound>)`?
self.recover_paren_lifetime(lo, inner_lo)?;
}
Ok(bound)
}
/// Emits an error if any trait bound modifiers were present.
fn error_lt_bound_with_modifiers(&self, modifiers: BoundModifiers) {
if let Some(span) = modifiers.maybe_const {
self.struct_span_err(
span,
"`?const` may only modify trait bounds, not lifetime bounds",
)
.emit();
}
if let Some(span) = modifiers.maybe {
self.struct_span_err(span, "`?` may only modify trait bounds, not lifetime bounds")
.emit();
}
}
/// Recover on `('lifetime)` with `(` already eaten.
fn recover_paren_lifetime(&mut self, lo: Span, inner_lo: Span) -> PResult<'a, ()> {
let inner_span = inner_lo.to(self.prev_token.span);
self.expect(&token::CloseDelim(token::Paren))?;
let mut err = self.struct_span_err(
lo.to(self.prev_token.span),
"parenthesized lifetime bounds are not supported",
);
if let Ok(snippet) = self.span_to_snippet(inner_span) {
err.span_suggestion_short(
lo.to(self.prev_token.span),
"remove the parentheses",
snippet,
Applicability::MachineApplicable,
);
}
err.emit();
Ok(())
}
/// Parses the modifiers that may precede a trait in a bound, e.g. `?Trait` or `?const Trait`.
///
/// If no modifiers are present, this does not consume any tokens.
///
/// ```
/// TY_BOUND_MODIFIERS = "?" ["const" ["?"]]
/// ```
fn parse_ty_bound_modifiers(&mut self) -> BoundModifiers {
if !self.eat(&token::Question) {
return BoundModifiers { maybe: None, maybe_const: None };
}
// `? ...`
let first_question = self.prev_token.span;
if !self.eat_keyword(kw::Const) {
return BoundModifiers { maybe: Some(first_question), maybe_const: None };
}
// `?const ...`
let maybe_const = first_question.to(self.prev_token.span);
self.sess.gated_spans.gate(sym::const_trait_bound_opt_out, maybe_const);
if !self.eat(&token::Question) {
return BoundModifiers { maybe: None, maybe_const: Some(maybe_const) };
}
// `?const ? ...`
let second_question = self.prev_token.span;
BoundModifiers { maybe: Some(second_question), maybe_const: Some(maybe_const) }
}
/// Parses a type bound according to:
/// ```
/// TY_BOUND = TY_BOUND_NOPAREN | (TY_BOUND_NOPAREN)
/// TY_BOUND_NOPAREN = [TY_BOUND_MODIFIERS] [for<LT_PARAM_DEFS>] SIMPLE_PATH
/// ```
///
/// For example, this grammar accepts `?const ?for<'a: 'b> m::Trait<'a>`.
fn parse_generic_ty_bound(
&mut self,
lo: Span,
has_parens: bool,
modifiers: BoundModifiers,
) -> PResult<'a, GenericBound> {
let lifetime_defs = self.parse_late_bound_lifetime_defs()?;
let path = self.parse_path(PathStyle::Type)?;
if has_parens {
self.expect(&token::CloseDelim(token::Paren))?;
}
let modifier = modifiers.to_trait_bound_modifier();
let poly_trait = PolyTraitRef::new(lifetime_defs, path, lo.to(self.prev_token.span));
Ok(GenericBound::Trait(poly_trait, modifier))
}
/// Optionally parses `for<$generic_params>`.
pub(super) fn parse_late_bound_lifetime_defs(&mut self) -> PResult<'a, Vec<GenericParam>> {
if self.eat_keyword(kw::For) {
self.expect_lt()?;
let params = self.parse_generic_params()?;
self.expect_gt()?;
// We rely on AST validation to rule out invalid cases: There must not be type
// parameters, and the lifetime parameters must not have bounds.
Ok(params)
} else {
Ok(Vec::new())
}
}
pub(super) fn check_lifetime(&mut self) -> bool {
self.expected_tokens.push(TokenType::Lifetime);
self.token.is_lifetime()
}
/// Parses a single lifetime `'a` or panics.
pub(super) fn expect_lifetime(&mut self) -> Lifetime {
if let Some(ident) = self.token.lifetime() {
self.bump();
Lifetime { ident, id: ast::DUMMY_NODE_ID }
} else {
self.span_bug(self.token.span, "not a lifetime")
}
}
pub(super) fn mk_ty(&self, span: Span, kind: TyKind) -> P<Ty> {
P(Ty { kind, span, id: ast::DUMMY_NODE_ID })
}
}

View file

@ -0,0 +1,163 @@
//! Meta-syntax validation logic of attributes for post-expansion.
use crate::parse_in;
use rustc_ast::tokenstream::DelimSpan;
use rustc_ast::{self as ast, Attribute, MacArgs, MacDelimiter, MetaItem, MetaItemKind};
use rustc_errors::{Applicability, PResult};
use rustc_feature::{AttributeTemplate, BUILTIN_ATTRIBUTE_MAP};
use rustc_session::lint::builtin::ILL_FORMED_ATTRIBUTE_INPUT;
use rustc_session::parse::ParseSess;
use rustc_span::{sym, Symbol};
pub fn check_meta(sess: &ParseSess, attr: &Attribute) {
if attr.is_doc_comment() {
return;
}
let attr_info =
attr.ident().and_then(|ident| BUILTIN_ATTRIBUTE_MAP.get(&ident.name)).map(|a| **a);
// Check input tokens for built-in and key-value attributes.
match attr_info {
// `rustc_dummy` doesn't have any restrictions specific to built-in attributes.
Some((name, _, template, _)) if name != sym::rustc_dummy => {
check_builtin_attribute(sess, attr, name, template)
}
_ => {
if let MacArgs::Eq(..) = attr.get_normal_item().args {
// All key-value attributes are restricted to meta-item syntax.
parse_meta(sess, attr)
.map_err(|mut err| {
err.emit();
})
.ok();
}
}
}
}
pub fn parse_meta<'a>(sess: &'a ParseSess, attr: &Attribute) -> PResult<'a, MetaItem> {
let item = attr.get_normal_item();
Ok(MetaItem {
span: attr.span,
path: item.path.clone(),
kind: match &item.args {
MacArgs::Empty => MetaItemKind::Word,
MacArgs::Eq(_, t) => {
let v = parse_in(sess, t.clone(), "name value", |p| p.parse_unsuffixed_lit())?;
MetaItemKind::NameValue(v)
}
MacArgs::Delimited(dspan, delim, t) => {
check_meta_bad_delim(sess, *dspan, *delim, "wrong meta list delimiters");
let nmis = parse_in(sess, t.clone(), "meta list", |p| p.parse_meta_seq_top())?;
MetaItemKind::List(nmis)
}
},
})
}
pub fn check_meta_bad_delim(sess: &ParseSess, span: DelimSpan, delim: MacDelimiter, msg: &str) {
if let ast::MacDelimiter::Parenthesis = delim {
return;
}
sess.span_diagnostic
.struct_span_err(span.entire(), msg)
.multipart_suggestion(
"the delimiters should be `(` and `)`",
vec![(span.open, "(".to_string()), (span.close, ")".to_string())],
Applicability::MachineApplicable,
)
.emit();
}
/// Checks that the given meta-item is compatible with this `AttributeTemplate`.
fn is_attr_template_compatible(template: &AttributeTemplate, meta: &ast::MetaItemKind) -> bool {
match meta {
MetaItemKind::Word => template.word,
MetaItemKind::List(..) => template.list.is_some(),
MetaItemKind::NameValue(lit) if lit.kind.is_str() => template.name_value_str.is_some(),
MetaItemKind::NameValue(..) => false,
}
}
pub fn check_builtin_attribute(
sess: &ParseSess,
attr: &Attribute,
name: Symbol,
template: AttributeTemplate,
) {
// Some special attributes like `cfg` must be checked
// before the generic check, so we skip them here.
let should_skip = |name| name == sym::cfg;
// Some of previously accepted forms were used in practice,
// report them as warnings for now.
let should_warn = |name| {
name == sym::doc
|| name == sym::ignore
|| name == sym::inline
|| name == sym::link
|| name == sym::test
|| name == sym::bench
};
match parse_meta(sess, attr) {
Ok(meta) => {
if !should_skip(name) && !is_attr_template_compatible(&template, &meta.kind) {
let error_msg = format!("malformed `{}` attribute input", name);
let mut msg = "attribute must be of the form ".to_owned();
let mut suggestions = vec![];
let mut first = true;
if template.word {
first = false;
let code = format!("#[{}]", name);
msg.push_str(&format!("`{}`", &code));
suggestions.push(code);
}
if let Some(descr) = template.list {
if !first {
msg.push_str(" or ");
}
first = false;
let code = format!("#[{}({})]", name, descr);
msg.push_str(&format!("`{}`", &code));
suggestions.push(code);
}
if let Some(descr) = template.name_value_str {
if !first {
msg.push_str(" or ");
}
let code = format!("#[{} = \"{}\"]", name, descr);
msg.push_str(&format!("`{}`", &code));
suggestions.push(code);
}
if should_warn(name) {
sess.buffer_lint(
&ILL_FORMED_ATTRIBUTE_INPUT,
meta.span,
ast::CRATE_NODE_ID,
&msg,
);
} else {
sess.span_diagnostic
.struct_span_err(meta.span, &error_msg)
.span_suggestions(
meta.span,
if suggestions.len() == 1 {
"must be of the form"
} else {
"the following are the possible correct uses"
},
suggestions.into_iter(),
Applicability::HasPlaceholders,
)
.emit();
}
}
}
Err(mut err) => {
err.emit();
}
}
}