Properly allow macro expanded format_args
invocations to uses captures
Originally, this was kinda half-allowed. There were some primitive checks in place that looked at the span to see whether the input was likely a literal. These "source literal" checks are needed because the spans created during `format_args` parsing only make sense when it is indeed a literal that was written in the source code directly. This is orthogonal to the restriction that the first argument must be a "direct literal", not being exanpanded from macros. This restriction was imposed by [RFC 2795] on the basis of being too confusing. But this was only concerned with the argument of the invocation being a literal, not whether it was a source literal (maybe in spirit it meant it being a source literal, this is not clear to me). Since the original check only really cared about source literals (which is good enough to deny the `format_args!(concat!())` example), macros expanding to `format_args` invocations were able to use implicit captures if they spanned the string in a way that lead back to a source string. The "source literal" checks were not strict enough and caused ICEs in certain cases (see # 106191 (the space is intended to avoid spammy backreferences)). So I tightened it up in # 106195 to really only work if it's a direct source literal. This caused the `indoc` crate to break. `indoc` transformed the source literal by removing whitespace, which made it not a "source literal" anymore (which is required to fix the ICE). But since `indoc` spanned the literal in ways that made the old check think that it's a literal, it was able to use implicit captures (which is useful and nice for the users of `indoc`). This commit properly seperates the previously introduced concepts of "source literal" and "direct literal" and therefore allows `indoc` invocations, which don't create "source literals" to use implicit captures again. [RFC 2795]: https://rust-lang.github.io/rfcs/2795-format-args-implicit-identifiers.html#macro-hygiene
This commit is contained in:
parent
0058748944
commit
729185338f
11 changed files with 198 additions and 62 deletions
|
@ -36,6 +36,21 @@ enum PositionUsedAs {
|
|||
}
|
||||
use PositionUsedAs::*;
|
||||
|
||||
struct MacroInput {
|
||||
fmtstr: P<Expr>,
|
||||
args: FormatArguments,
|
||||
/// Whether the first argument was a string literal or a result from eager macro expansion.
|
||||
/// If it's not a string literal, we disallow implicit arugment capturing.
|
||||
///
|
||||
/// This does not correspond to whether we can treat spans to the literal normally, as the whole
|
||||
/// invocation might be the result of another macro expansion, in which case this flag may still be true.
|
||||
///
|
||||
/// See [RFC 2795] for more information.
|
||||
///
|
||||
/// [RFC 2795]: https://rust-lang.github.io/rfcs/2795-format-args-implicit-identifiers.html#macro-hygiene
|
||||
is_direct_literal: bool,
|
||||
}
|
||||
|
||||
/// Parses the arguments from the given list of tokens, returning the diagnostic
|
||||
/// if there's a parse error so we can continue parsing other format!
|
||||
/// expressions.
|
||||
|
@ -45,11 +60,7 @@ use PositionUsedAs::*;
|
|||
/// ```text
|
||||
/// Ok((fmtstr, parsed arguments))
|
||||
/// ```
|
||||
fn parse_args<'a>(
|
||||
ecx: &mut ExtCtxt<'a>,
|
||||
sp: Span,
|
||||
tts: TokenStream,
|
||||
) -> PResult<'a, (P<Expr>, FormatArguments)> {
|
||||
fn parse_args<'a>(ecx: &mut ExtCtxt<'a>, sp: Span, tts: TokenStream) -> PResult<'a, MacroInput> {
|
||||
let mut args = FormatArguments::new();
|
||||
|
||||
let mut p = ecx.new_parser_from_tts(tts);
|
||||
|
@ -59,25 +70,21 @@ fn parse_args<'a>(
|
|||
}
|
||||
|
||||
let first_token = &p.token;
|
||||
let fmtstr = match first_token.kind {
|
||||
token::TokenKind::Literal(token::Lit {
|
||||
kind: token::LitKind::Str | token::LitKind::StrRaw(_),
|
||||
..
|
||||
}) => {
|
||||
// If the first token is a string literal, then a format expression
|
||||
// is constructed from it.
|
||||
//
|
||||
// This allows us to properly handle cases when the first comma
|
||||
// after the format string is mistakenly replaced with any operator,
|
||||
// which cause the expression parser to eat too much tokens.
|
||||
p.parse_literal_maybe_minus()?
|
||||
}
|
||||
_ => {
|
||||
// Otherwise, we fall back to the expression parser.
|
||||
p.parse_expr()?
|
||||
}
|
||||
|
||||
let fmtstr = if let token::Literal(lit) = first_token.kind && matches!(lit.kind, token::Str | token::StrRaw(_)) {
|
||||
// This allows us to properly handle cases when the first comma
|
||||
// after the format string is mistakenly replaced with any operator,
|
||||
// which cause the expression parser to eat too much tokens.
|
||||
p.parse_literal_maybe_minus()?
|
||||
} else {
|
||||
// Otherwise, we fall back to the expression parser.
|
||||
p.parse_expr()?
|
||||
};
|
||||
|
||||
// Only allow implicit captures to be used when the argument is a direct literal
|
||||
// instead of a macro expanding to one.
|
||||
let is_direct_literal = matches!(fmtstr.kind, ExprKind::Lit(_));
|
||||
|
||||
let mut first = true;
|
||||
|
||||
while p.token != token::Eof {
|
||||
|
@ -147,17 +154,19 @@ fn parse_args<'a>(
|
|||
}
|
||||
}
|
||||
}
|
||||
Ok((fmtstr, args))
|
||||
Ok(MacroInput { fmtstr, args, is_direct_literal })
|
||||
}
|
||||
|
||||
pub fn make_format_args(
|
||||
fn make_format_args(
|
||||
ecx: &mut ExtCtxt<'_>,
|
||||
efmt: P<Expr>,
|
||||
mut args: FormatArguments,
|
||||
input: MacroInput,
|
||||
append_newline: bool,
|
||||
) -> Result<FormatArgs, ()> {
|
||||
let msg = "format argument must be a string literal";
|
||||
let unexpanded_fmt_span = efmt.span;
|
||||
let unexpanded_fmt_span = input.fmtstr.span;
|
||||
|
||||
let MacroInput { fmtstr: efmt, mut args, is_direct_literal } = input;
|
||||
|
||||
let (fmt_str, fmt_style, fmt_span) = match expr_to_spanned_string(ecx, efmt, msg) {
|
||||
Ok(mut fmt) if append_newline => {
|
||||
fmt.0 = Symbol::intern(&format!("{}\n", fmt.0));
|
||||
|
@ -208,11 +217,11 @@ pub fn make_format_args(
|
|||
}
|
||||
}
|
||||
|
||||
let is_literal = parser.is_literal;
|
||||
let is_source_literal = parser.is_source_literal;
|
||||
|
||||
if !parser.errors.is_empty() {
|
||||
let err = parser.errors.remove(0);
|
||||
let sp = if is_literal {
|
||||
let sp = if is_source_literal {
|
||||
fmt_span.from_inner(InnerSpan::new(err.span.start, err.span.end))
|
||||
} else {
|
||||
// The format string could be another macro invocation, e.g.:
|
||||
|
@ -230,7 +239,7 @@ pub fn make_format_args(
|
|||
if let Some(note) = err.note {
|
||||
e.note(¬e);
|
||||
}
|
||||
if let Some((label, span)) = err.secondary_label && is_literal {
|
||||
if let Some((label, span)) = err.secondary_label && is_source_literal {
|
||||
e.span_label(fmt_span.from_inner(InnerSpan::new(span.start, span.end)), label);
|
||||
}
|
||||
if err.should_be_replaced_with_positional_argument {
|
||||
|
@ -256,7 +265,7 @@ pub fn make_format_args(
|
|||
}
|
||||
|
||||
let to_span = |inner_span: rustc_parse_format::InnerSpan| {
|
||||
is_literal.then(|| {
|
||||
is_source_literal.then(|| {
|
||||
fmt_span.from_inner(InnerSpan { start: inner_span.start, end: inner_span.end })
|
||||
})
|
||||
};
|
||||
|
@ -304,7 +313,7 @@ pub fn make_format_args(
|
|||
// Name not found in `args`, so we add it as an implicitly captured argument.
|
||||
let span = span.unwrap_or(fmt_span);
|
||||
let ident = Ident::new(name, span);
|
||||
let expr = if is_literal {
|
||||
let expr = if is_direct_literal {
|
||||
ecx.expr_ident(span, ident)
|
||||
} else {
|
||||
// For the moment capturing variables from format strings expanded from macros is
|
||||
|
@ -814,7 +823,7 @@ fn report_invalid_references(
|
|||
// for `println!("{7:7$}", 1);`
|
||||
indexes.sort();
|
||||
indexes.dedup();
|
||||
let span: MultiSpan = if !parser.is_literal || parser.arg_places.is_empty() {
|
||||
let span: MultiSpan = if !parser.is_source_literal || parser.arg_places.is_empty() {
|
||||
MultiSpan::from_span(fmt_span)
|
||||
} else {
|
||||
MultiSpan::from_spans(invalid_refs.iter().filter_map(|&(_, span, _, _)| span).collect())
|
||||
|
@ -855,8 +864,8 @@ fn expand_format_args_impl<'cx>(
|
|||
) -> Box<dyn base::MacResult + 'cx> {
|
||||
sp = ecx.with_def_site_ctxt(sp);
|
||||
match parse_args(ecx, sp, tts) {
|
||||
Ok((efmt, args)) => {
|
||||
if let Ok(format_args) = make_format_args(ecx, efmt, args, nl) {
|
||||
Ok(input) => {
|
||||
if let Ok(format_args) = make_format_args(ecx, input, nl) {
|
||||
MacEager::expr(ecx.expr(sp, ExprKind::FormatArgs(P(format_args))))
|
||||
} else {
|
||||
MacEager::expr(DummyResult::raw_expr(sp, true))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue