1
Fork 0

Implement macro meta-variable expressions

This commit is contained in:
Caio 2022-03-09 16:46:23 -03:00
parent 10dccdc7fc
commit 8073a88f35
17 changed files with 900 additions and 44 deletions

View file

@ -3,6 +3,7 @@
#![feature(crate_visibility_modifier)]
#![feature(decl_macro)]
#![feature(if_let_guard)]
#![feature(let_chains)]
#![feature(let_else)]
#![feature(proc_macro_diagnostic)]
#![feature(proc_macro_internals)]

View file

@ -6,17 +6,17 @@
crate mod macro_check;
crate mod macro_parser;
crate mod macro_rules;
crate mod metavar_expr;
crate mod quoted;
crate mod transcribe;
use metavar_expr::MetaVarExpr;
use rustc_ast::token::{self, NonterminalKind, Token, TokenKind};
use rustc_ast::tokenstream::DelimSpan;
use rustc_data_structures::sync::Lrc;
use rustc_span::symbol::Ident;
use rustc_span::Span;
use rustc_data_structures::sync::Lrc;
/// Contains the sub-token-trees of a "delimited" token tree, such as the contents of `(`. Note
/// that the delimiter itself might be `NoDelim`.
#[derive(Clone, PartialEq, Encodable, Decodable, Debug)]
@ -73,8 +73,8 @@ enum KleeneOp {
ZeroOrOne,
}
/// Similar to `tokenstream::TokenTree`, except that `$i`, `$i:ident`, and `$(...)`
/// are "first-class" token trees. Useful for parsing macros.
/// Similar to `tokenstream::TokenTree`, except that `$i`, `$i:ident`, `$(...)`,
/// and `${...}` are "first-class" token trees. Useful for parsing macros.
#[derive(Debug, Clone, PartialEq, Encodable, Decodable)]
enum TokenTree {
Token(Token),
@ -85,6 +85,8 @@ enum TokenTree {
MetaVar(Span, Ident),
/// e.g., `$var:expr`. This is only used in the left hand side of MBE macros.
MetaVarDecl(Span, Ident /* name to bind */, Option<NonterminalKind>),
/// A meta-variable expression inside `${...}`
MetaVarExpr(DelimSpan, MetaVarExpr),
}
impl TokenTree {
@ -139,7 +141,9 @@ impl TokenTree {
TokenTree::Token(Token { span, .. })
| TokenTree::MetaVar(span, _)
| TokenTree::MetaVarDecl(span, _, _) => span,
TokenTree::Delimited(span, _) | TokenTree::Sequence(span, _) => span.entire(),
TokenTree::Delimited(span, _)
| TokenTree::MetaVarExpr(span, _)
| TokenTree::Sequence(span, _) => span.entire(),
}
}

View file

@ -278,6 +278,8 @@ fn check_binders(
binders.insert(name, BinderInfo { span, ops: ops.into() });
}
}
// `MetaVarExpr` can not appear in the LHS of a macro arm
TokenTree::MetaVarExpr(..) => {}
TokenTree::Delimited(_, ref del) => {
for tt in &del.tts {
check_binders(sess, node_id, tt, macros, binders, ops, valid);
@ -335,6 +337,8 @@ fn check_occurrences(
let name = MacroRulesNormalizedIdent::new(name);
check_ops_is_prefix(sess, node_id, macros, binders, ops, span, name);
}
// FIXME(c410-f3r) Check token (https://github.com/rust-lang/rust/issues/93902)
TokenTree::MetaVarExpr(..) => {}
TokenTree::Delimited(_, ref del) => {
check_nested_occurrences(sess, node_id, &del.tts, macros, binders, ops, valid);
}

View file

@ -200,7 +200,7 @@ struct MatcherPos<'root, 'tt> {
// This type is used a lot. Make sure it doesn't unintentionally get bigger.
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
rustc_data_structures::static_assert_size!(MatcherPos<'_, '_>, 192);
rustc_data_structures::static_assert_size!(MatcherPos<'_, '_>, 240);
impl<'root, 'tt> MatcherPos<'root, 'tt> {
/// Generates the top-level matcher position in which the "dot" is before the first token of
@ -321,10 +321,13 @@ pub(super) fn count_names(ms: &[TokenTree]) -> usize {
ms.iter().fold(0, |count, elt| {
count
+ match *elt {
TokenTree::Sequence(_, ref seq) => seq.num_captures,
TokenTree::Delimited(_, ref delim) => count_names(&delim.tts),
TokenTree::MetaVar(..) => 0,
TokenTree::MetaVarDecl(..) => 1,
// FIXME(c410-f3r) MetaVarExpr should be handled instead of being ignored
// https://github.com/rust-lang/rust/issues/9390
TokenTree::MetaVarExpr(..) => 0,
TokenTree::Sequence(_, ref seq) => seq.num_captures,
TokenTree::Token(..) => 0,
}
})
@ -436,7 +439,9 @@ fn nameize<I: Iterator<Item = NamedMatch>>(
}
Occupied(..) => return Err((sp, format!("duplicated bind name: {}", bind_name))),
},
TokenTree::MetaVar(..) | TokenTree::Token(..) => (),
// FIXME(c410-f3r) MetaVar and MetaVarExpr should be handled instead of being ignored
// https://github.com/rust-lang/rust/issues/9390
TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) | TokenTree::Token(..) => {}
}
Ok(())
@ -650,7 +655,7 @@ fn inner_parse_loop<'root, 'tt>(
// rules. NOTE that this is not necessarily an error unless _all_ items in
// `cur_items` end up doing this. There may still be some other matchers that do
// end up working out.
TokenTree::Token(..) | TokenTree::MetaVar(..) => {}
TokenTree::Token(..) | TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => {}
}
}
}

View file

@ -580,7 +580,10 @@ fn check_lhs_no_empty_seq(sess: &ParseSess, tts: &[mbe::TokenTree]) -> bool {
use mbe::TokenTree;
for tt in tts {
match *tt {
TokenTree::Token(..) | TokenTree::MetaVar(..) | TokenTree::MetaVarDecl(..) => (),
TokenTree::Token(..)
| TokenTree::MetaVar(..)
| TokenTree::MetaVarDecl(..)
| TokenTree::MetaVarExpr(..) => (),
TokenTree::Delimited(_, ref del) => {
if !check_lhs_no_empty_seq(sess, &del.tts) {
return false;
@ -669,7 +672,10 @@ impl FirstSets {
let mut first = TokenSet::empty();
for tt in tts.iter().rev() {
match *tt {
TokenTree::Token(..) | TokenTree::MetaVar(..) | TokenTree::MetaVarDecl(..) => {
TokenTree::Token(..)
| TokenTree::MetaVar(..)
| TokenTree::MetaVarDecl(..)
| TokenTree::MetaVarExpr(..) => {
first.replace_with(tt.clone());
}
TokenTree::Delimited(span, ref delimited) => {
@ -731,7 +737,10 @@ impl FirstSets {
for tt in tts.iter() {
assert!(first.maybe_empty);
match *tt {
TokenTree::Token(..) | TokenTree::MetaVar(..) | TokenTree::MetaVarDecl(..) => {
TokenTree::Token(..)
| TokenTree::MetaVar(..)
| TokenTree::MetaVarDecl(..)
| TokenTree::MetaVarExpr(..) => {
first.add_one(tt.clone());
return first;
}
@ -907,7 +916,10 @@ fn check_matcher_core(
// First, update `last` so that it corresponds to the set
// of NT tokens that might end the sequence `... token`.
match *token {
TokenTree::Token(..) | TokenTree::MetaVar(..) | TokenTree::MetaVarDecl(..) => {
TokenTree::Token(..)
| TokenTree::MetaVar(..)
| TokenTree::MetaVarDecl(..)
| TokenTree::MetaVarExpr(..) => {
if token_can_be_followed_by_any(token) {
// don't need to track tokens that work with any,
last.replace_with_irrelevant();

View file

@ -0,0 +1,157 @@
use rustc_ast::token;
use rustc_ast::tokenstream::{Cursor, TokenStream, TokenTree};
use rustc_ast::{LitIntType, LitKind};
use rustc_ast_pretty::pprust;
use rustc_errors::{Applicability, PResult};
use rustc_session::parse::ParseSess;
use rustc_span::symbol::Ident;
use rustc_span::Span;
/// A meta-variable expression, for expansions based on properties of meta-variables.
#[derive(Debug, Clone, PartialEq, Encodable, Decodable)]
crate enum MetaVarExpr {
/// The number of repetitions of an identifier, optionally limited to a number
/// of outer-most repetition depths. If the depth limit is `None` then the depth is unlimited.
Count(Ident, Option<usize>),
/// Ignore a meta-variable for repetition without expansion.
Ignore(Ident),
/// The index of the repetition at a particular depth, where 0 is the inner-most
/// repetition. The `usize` is the depth.
Index(usize),
/// The length of the repetition at a particular depth, where 0 is the inner-most
/// repetition. The `usize` is the depth.
Length(usize),
}
impl MetaVarExpr {
/// Attempt to parse a meta-variable expression from a token stream.
crate fn parse<'sess>(
input: &TokenStream,
outer_span: Span,
sess: &'sess ParseSess,
) -> PResult<'sess, MetaVarExpr> {
let mut tts = input.trees();
let ident = parse_ident(&mut tts, sess, outer_span)?;
let Some(TokenTree::Delimited(_, token::Paren, args)) = tts.next() else {
let msg = "meta-variable expression parameter must be wrapped in parentheses";
return Err(sess.span_diagnostic.struct_span_err(ident.span, msg));
};
check_trailing_token(&mut tts, sess)?;
let mut iter = args.trees();
let rslt = match &*ident.as_str() {
"count" => parse_count(&mut iter, sess, ident.span)?,
"ignore" => MetaVarExpr::Ignore(parse_ident(&mut iter, sess, ident.span)?),
"index" => MetaVarExpr::Index(parse_depth(&mut iter, sess, ident.span)?),
"length" => MetaVarExpr::Length(parse_depth(&mut iter, sess, ident.span)?),
_ => {
let err_msg = "unrecognized meta-variable expression";
let mut err = sess.span_diagnostic.struct_span_err(ident.span, err_msg);
err.span_suggestion(
ident.span,
"supported expressions are count, ignore, index and length",
String::new(),
Applicability::MachineApplicable,
);
return Err(err);
}
};
check_trailing_token(&mut iter, sess)?;
Ok(rslt)
}
crate fn ident(&self) -> Option<&Ident> {
match self {
MetaVarExpr::Count(ident, _) | MetaVarExpr::Ignore(ident) => Some(&ident),
MetaVarExpr::Index(..) | MetaVarExpr::Length(..) => None,
}
}
}
// Checks if there are any remaining tokens. For example, `${ignore(ident ... a b c ...)}`
fn check_trailing_token<'sess>(iter: &mut Cursor, sess: &'sess ParseSess) -> PResult<'sess, ()> {
if let Some(tt) = iter.next() {
let mut diag = sess.span_diagnostic.struct_span_err(
tt.span(),
&format!("unexpected token: {}", pprust::tt_to_string(&tt)),
);
diag.span_note(tt.span(), "meta-variable expression must not have trailing tokens");
Err(diag)
} else {
Ok(())
}
}
/// Parse a meta-variable `count` expression: `count(ident[, depth])`
fn parse_count<'sess>(
iter: &mut Cursor,
sess: &'sess ParseSess,
span: Span,
) -> PResult<'sess, MetaVarExpr> {
let ident = parse_ident(iter, sess, span)?;
let depth = if try_eat_comma(iter) { Some(parse_depth(iter, sess, span)?) } else { None };
Ok(MetaVarExpr::Count(ident, depth))
}
/// Parses the depth used by index(depth) and length(depth).
fn parse_depth<'sess>(
iter: &mut Cursor,
sess: &'sess ParseSess,
span: Span,
) -> PResult<'sess, usize> {
let Some(tt) = iter.next() else { return Ok(0) };
let TokenTree::Token(token::Token {
kind: token::TokenKind::Literal(lit), ..
}) = tt else {
return Err(sess.span_diagnostic.struct_span_err(
span,
"meta-variable expression depth must be a literal"
));
};
if let Ok(lit_kind) = LitKind::from_lit_token(lit)
&& let LitKind::Int(n_u128, LitIntType::Unsuffixed) = lit_kind
&& let Ok(n_usize) = usize::try_from(n_u128)
{
Ok(n_usize)
}
else {
let msg = "only unsuffixes integer literals are supported in meta-variable expressions";
Err(sess.span_diagnostic.struct_span_err(span, msg))
}
}
/// Parses an generic ident
fn parse_ident<'sess>(
iter: &mut Cursor,
sess: &'sess ParseSess,
span: Span,
) -> PResult<'sess, Ident> {
let err_fn = |msg| sess.span_diagnostic.struct_span_err(span, msg);
if let Some(tt) = iter.next() && let TokenTree::Token(token) = tt {
if let Some((elem, false)) = token.ident() {
return Ok(elem);
}
let token_str = pprust::token_to_string(&token);
let mut err = err_fn(&format!("expected identifier, found `{}`", &token_str));
err.span_suggestion(
token.span,
&format!("try removing `{}`", &token_str),
String::new(),
Applicability::MaybeIncorrect,
);
return Err(err);
}
Err(err_fn("expected identifier"))
}
/// Tries to move the iterator forward returning `true` if there is a comma. If not, then the
/// iterator is not modified and the result is `false`.
fn try_eat_comma(iter: &mut Cursor) -> bool {
if let Some(TokenTree::Token(token::Token { kind: token::Comma, .. })) = iter.look_ahead(0) {
let _ = iter.next();
return true;
}
false
}

View file

@ -1,13 +1,13 @@
use crate::mbe::macro_parser;
use crate::mbe::{Delimited, KleeneOp, KleeneToken, SequenceRepetition, TokenTree};
use crate::mbe::{Delimited, KleeneOp, KleeneToken, MetaVarExpr, SequenceRepetition, TokenTree};
use rustc_ast::token::{self, Token};
use rustc_ast::tokenstream;
use rustc_ast::{NodeId, DUMMY_NODE_ID};
use rustc_ast_pretty::pprust;
use rustc_feature::Features;
use rustc_session::parse::ParseSess;
use rustc_span::symbol::{kw, Ident};
use rustc_session::parse::{feature_err, ParseSess};
use rustc_span::symbol::{kw, sym, Ident};
use rustc_span::edition::Edition;
use rustc_span::{Span, SyntaxContext};
@ -25,22 +25,22 @@ const VALID_FRAGMENT_NAMES_MSG: &str = "valid fragment specifiers are \
/// # Parameters
///
/// - `input`: a token stream to read from, the contents of which we are parsing.
/// - `expect_matchers`: `parse` can be used to parse either the "patterns" or the "body" of a
/// macro. Both take roughly the same form _except_ that in a pattern, metavars are declared with
/// their "matcher" type. For example `$var:expr` or `$id:ident`. In this example, `expr` and
/// `ident` are "matchers". They are not present in the body of a macro rule -- just in the
/// pattern, so we pass a parameter to indicate whether to expect them or not.
/// - `parsing_patterns`: `parse` can be used to parse either the "patterns" or the "body" of a
/// macro. Both take roughly the same form _except_ that:
/// - In a pattern, metavars are declared with their "matcher" type. For example `$var:expr` or
/// `$id:ident`. In this example, `expr` and `ident` are "matchers". They are not present in the
/// body of a macro rule -- just in the pattern.
/// - Metavariable expressions are only valid in the "body", not the "pattern".
/// - `sess`: the parsing session. Any errors will be emitted to this session.
/// - `node_id`: the NodeId of the macro we are parsing.
/// - `features`: language features so we can do feature gating.
/// - `edition`: the edition of the crate defining the macro
///
/// # Returns
///
/// A collection of `self::TokenTree`. There may also be some errors emitted to `sess`.
pub(super) fn parse(
input: tokenstream::TokenStream,
expect_matchers: bool,
parsing_patterns: bool,
sess: &ParseSess,
node_id: NodeId,
features: &Features,
@ -55,9 +55,9 @@ pub(super) fn parse(
while let Some(tree) = trees.next() {
// Given the parsed tree, if there is a metavar and we are expecting matchers, actually
// parse out the matcher (i.e., in `$id:ident` this would parse the `:` and `ident`).
let tree = parse_tree(tree, &mut trees, expect_matchers, sess, node_id, features, edition);
let tree = parse_tree(tree, &mut trees, parsing_patterns, sess, node_id, features, edition);
match tree {
TokenTree::MetaVar(start_sp, ident) if expect_matchers => {
TokenTree::MetaVar(start_sp, ident) if parsing_patterns => {
let span = match trees.next() {
Some(tokenstream::TokenTree::Token(Token { kind: token::Colon, span })) => {
match trees.next() {
@ -118,6 +118,14 @@ pub(super) fn parse(
result
}
/// Asks for the `macro_metavar_expr` feature if it is not already declared
fn maybe_emit_macro_metavar_expr_feature(features: &Features, sess: &ParseSess, span: Span) {
if !features.macro_metavar_expr {
let msg = "meta-variable expressions are unstable";
feature_err(&sess, sym::macro_metavar_expr, span, msg).emit();
}
}
/// Takes a `tokenstream::TokenTree` and returns a `self::TokenTree`. Specifically, this takes a
/// generic `TokenTree`, such as is used in the rest of the compiler, and returns a `TokenTree`
/// for use in parsing a macro.
@ -129,14 +137,13 @@ pub(super) fn parse(
/// - `tree`: the tree we wish to convert.
/// - `outer_trees`: an iterator over trees. We may need to read more tokens from it in order to finish
/// converting `tree`
/// - `expect_matchers`: same as for `parse` (see above).
/// - `parsing_patterns`: same as [parse].
/// - `sess`: the parsing session. Any errors will be emitted to this session.
/// - `features`: language features so we can do feature gating.
/// - `edition` - the edition of the crate defining the macro
fn parse_tree(
tree: tokenstream::TokenTree,
outer_trees: &mut impl Iterator<Item = tokenstream::TokenTree>,
expect_matchers: bool,
parsing_patterns: bool,
sess: &ParseSess,
node_id: NodeId,
features: &Features,
@ -158,24 +165,57 @@ fn parse_tree(
}
match next {
// `tree` is followed by a delimited set of token trees. This indicates the beginning
// of a repetition sequence in the macro (e.g. `$(pat)*`).
Some(tokenstream::TokenTree::Delimited(span, delim, tts)) => {
// Must have `(` not `{` or `[`
if delim != token::Paren {
let tok = pprust::token_kind_to_string(&token::OpenDelim(delim));
let msg = format!("expected `(`, found `{}`", tok);
sess.span_diagnostic.span_err(span.entire(), &msg);
// `tree` is followed by a delimited set of token trees.
Some(tokenstream::TokenTree::Delimited(delim_span, delim, tts)) => {
if parsing_patterns {
if delim != token::Paren {
span_dollar_dollar_or_metavar_in_the_lhs_err(
sess,
&Token { kind: token::OpenDelim(delim), span: delim_span.entire() },
);
}
} else {
match delim {
token::Brace => {
// The delimiter is `{`. This indicates the beginning
// of a meta-variable expression (e.g. `${count(ident)}`).
// Try to parse the meta-variable expression.
match MetaVarExpr::parse(&tts, delim_span.entire(), sess) {
Err(mut err) => {
err.emit();
// Returns early the same read `$` to avoid spanning
// unrelated diagnostics that could be performed afterwards
return TokenTree::token(token::Dollar, span);
}
Ok(elem) => {
maybe_emit_macro_metavar_expr_feature(
features,
sess,
delim_span.entire(),
);
return TokenTree::MetaVarExpr(delim_span, elem);
}
}
}
token::Paren => {}
_ => {
let tok = pprust::token_kind_to_string(&token::OpenDelim(delim));
let msg = format!("expected `(` or `{{`, found `{}`", tok);
sess.span_diagnostic.span_err(delim_span.entire(), &msg);
}
}
}
// Parse the contents of the sequence itself
let sequence = parse(tts, expect_matchers, sess, node_id, features, edition);
// If we didn't find a metavar expression above, then we must have a
// repetition sequence in the macro (e.g. `$(pat)*`). Parse the
// contents of the sequence itself
let sequence = parse(tts, parsing_patterns, sess, node_id, features, edition);
// Get the Kleene operator and optional separator
let (separator, kleene) =
parse_sep_and_kleene_op(&mut trees, span.entire(), sess);
parse_sep_and_kleene_op(&mut trees, delim_span.entire(), sess);
// Count the number of captured "names" (i.e., named metavars)
let name_captures = macro_parser::count_names(&sequence);
TokenTree::Sequence(
span,
delim_span,
Lrc::new(SequenceRepetition {
tts: sequence,
separator,
@ -197,7 +237,20 @@ fn parse_tree(
}
}
// `tree` is followed by a random token. This is an error.
// `tree` is followed by another `$`. This is an escaped `$`.
Some(tokenstream::TokenTree::Token(Token { kind: token::Dollar, span })) => {
if parsing_patterns {
span_dollar_dollar_or_metavar_in_the_lhs_err(
sess,
&Token { kind: token::Dollar, span },
);
} else {
maybe_emit_macro_metavar_expr_feature(features, sess, span);
}
TokenTree::token(token::Dollar, span)
}
// `tree` is followed by some other token. This is an error.
Some(tokenstream::TokenTree::Token(token)) => {
let msg = format!(
"expected identifier, found `{}`",
@ -221,7 +274,7 @@ fn parse_tree(
span,
Lrc::new(Delimited {
delim,
tts: parse(tts, expect_matchers, sess, node_id, features, edition),
tts: parse(tts, parsing_patterns, sess, node_id, features, edition),
}),
),
}
@ -309,3 +362,15 @@ fn parse_sep_and_kleene_op(
// Return a dummy
(None, KleeneToken::new(KleeneOp::ZeroOrMore, span))
}
// `$$` or a meta-variable is the lhs of a macro but shouldn't.
//
// For example, `macro_rules! foo { ( ${length()} ) => {} }`
fn span_dollar_dollar_or_metavar_in_the_lhs_err<'sess>(sess: &'sess ParseSess, token: &Token) {
sess.span_diagnostic
.span_err(token.span, &format!("unexpected token: {}", pprust::token_to_string(token)));
sess.span_diagnostic.span_note_without_error(
token.span,
"`$$` and meta-variable expressions are not allowed inside macro parameter definitions",
);
}

View file

@ -255,6 +255,11 @@ pub(super) fn transcribe<'a>(
}
}
// Replace meta-variable expressions with the result of their expansion.
mbe::TokenTree::MetaVarExpr(sp, expr) => {
transcribe_metavar_expr(cx, expr, interp, &repeats, &mut result, &sp)?;
}
// If we are entering a new delimiter, we push its contents to the `stack` to be
// processed, and we push all of the currently produced results to the `result_stack`.
// We will produce all of the results of the inside of the `Delimited` and then we will
@ -391,6 +396,28 @@ fn lockstep_iter_size(
_ => LockstepIterSize::Unconstrained,
}
}
TokenTree::MetaVarExpr(_, ref expr) => {
let default_rslt = LockstepIterSize::Unconstrained;
let Some(ident) = expr.ident() else { return default_rslt; };
let name = MacroRulesNormalizedIdent::new(ident.clone());
match lookup_cur_matched(name, interpolations, repeats) {
Some(MatchedSeq(ref ads)) => {
default_rslt.with(LockstepIterSize::Constraint(ads.len(), name))
}
_ => default_rslt,
}
}
TokenTree::Token(..) => LockstepIterSize::Unconstrained,
}
}
fn transcribe_metavar_expr<'a>(
_cx: &ExtCtxt<'a>,
_expr: mbe::MetaVarExpr,
_interp: &FxHashMap<MacroRulesNormalizedIdent, NamedMatch>,
_repeats: &[(usize, usize)],
_result: &mut Vec<TreeAndSpacing>,
_sp: &DelimSpan,
) -> PResult<'a, ()> {
Ok(())
}