1
Fork 0

Auto merge of #106745 - m-ou-se:format-args-ast, r=oli-obk

Move format_args!() into AST (and expand it during AST lowering)

Implements https://github.com/rust-lang/compiler-team/issues/541

This moves FormatArgs from rustc_builtin_macros to rustc_ast_lowering. For now, the end result is the same. But this allows for future changes to do smarter things with format_args!(). It also allows Clippy to directly access the ast::FormatArgs, making things a lot easier.

This change turns the format args types into lang items. The builtin macro used to refer to them by their path. After this change, the path is no longer relevant, making it easier to make changes in `core`.

This updates clippy to use the new language items, but this doesn't yet make clippy use the ast::FormatArgs structure that's now available. That should be done after this is merged.
This commit is contained in:
bors 2023-01-26 12:44:47 +00:00
commit 3e97763872
32 changed files with 750 additions and 449 deletions

View file

@ -297,6 +297,7 @@ impl<'cx, 'a> Context<'cx, 'a> {
| ExprKind::Continue(_)
| ExprKind::Err
| ExprKind::Field(_, _)
| ExprKind::FormatArgs(_)
| ExprKind::ForLoop(_, _, _, _)
| ExprKind::If(_, _, _)
| ExprKind::IncludedBytes(..)

View file

@ -1,7 +1,11 @@
use rustc_ast::ptr::P;
use rustc_ast::token;
use rustc_ast::tokenstream::TokenStream;
use rustc_ast::Expr;
use rustc_ast::{
Expr, ExprKind, FormatAlignment, FormatArgPosition, FormatArgPositionKind, FormatArgs,
FormatArgsPiece, FormatArgument, FormatArgumentKind, FormatArguments, FormatCount,
FormatOptions, FormatPlaceholder, FormatTrait,
};
use rustc_data_structures::fx::FxHashSet;
use rustc_errors::{pluralize, Applicability, MultiSpan, PResult};
use rustc_expand::base::{self, *};
@ -12,21 +16,15 @@ use rustc_span::{BytePos, InnerSpan, Span};
use rustc_lint_defs::builtin::NAMED_ARGUMENTS_USED_POSITIONALLY;
use rustc_lint_defs::{BufferedEarlyLint, BuiltinLintDiagnostics, LintId};
mod ast;
use ast::*;
mod expand;
use expand::expand_parsed_format_args;
// The format_args!() macro is expanded in three steps:
// 1. First, `parse_args` will parse the `(literal, arg, arg, name=arg, name=arg)` syntax,
// but doesn't parse the template (the literal) itself.
// 2. Second, `make_format_args` will parse the template, the format options, resolve argument references,
// produce diagnostics, and turn the whole thing into a `FormatArgs` structure.
// 3. Finally, `expand_parsed_format_args` will turn that `FormatArgs` structure
// into the expression that the macro expands to.
// produce diagnostics, and turn the whole thing into a `FormatArgs` AST node.
// 3. Much later, in AST lowering (rustc_ast_lowering), that `FormatArgs` structure will be turned
// into the expression of type `core::fmt::Arguments`.
// See format/ast.rs for the FormatArgs structure and glossary.
// See rustc_ast/src/format.rs for the FormatArgs structure and glossary.
// Only used in parse_args and report_invalid_references,
// to indicate how a referred argument was used.
@ -850,7 +848,7 @@ fn expand_format_args_impl<'cx>(
match parse_args(ecx, sp, tts) {
Ok((efmt, args)) => {
if let Ok(format_args) = make_format_args(ecx, efmt, args, nl) {
MacEager::expr(expand_parsed_format_args(ecx, format_args))
MacEager::expr(ecx.expr(sp, ExprKind::FormatArgs(P(format_args))))
} else {
MacEager::expr(DummyResult::raw_expr(sp, true))
}

View file

@ -1,240 +0,0 @@
use rustc_ast::ptr::P;
use rustc_ast::Expr;
use rustc_data_structures::fx::FxHashMap;
use rustc_span::symbol::{Ident, Symbol};
use rustc_span::Span;
// Definitions:
//
// format_args!("hello {abc:.xyz$}!!", abc="world");
// └──────────────────────────────────────────────┘
// FormatArgs
//
// format_args!("hello {abc:.xyz$}!!", abc="world");
// └─────────┘
// argument
//
// format_args!("hello {abc:.xyz$}!!", abc="world");
// └───────────────────┘
// template
//
// format_args!("hello {abc:.xyz$}!!", abc="world");
// └────┘└─────────┘└┘
// pieces
//
// format_args!("hello {abc:.xyz$}!!", abc="world");
// └────┘ └┘
// literal pieces
//
// format_args!("hello {abc:.xyz$}!!", abc="world");
// └─────────┘
// placeholder
//
// format_args!("hello {abc:.xyz$}!!", abc="world");
// └─┘ └─┘
// positions (could be names, numbers, empty, or `*`)
/// (Parsed) format args.
///
/// Basically the "AST" for a complete `format_args!()`.
///
/// E.g., `format_args!("hello {name}");`.
#[derive(Clone, Debug)]
pub struct FormatArgs {
pub span: Span,
pub template: Vec<FormatArgsPiece>,
pub arguments: FormatArguments,
}
/// A piece of a format template string.
///
/// E.g. "hello" or "{name}".
#[derive(Clone, Debug)]
pub enum FormatArgsPiece {
Literal(Symbol),
Placeholder(FormatPlaceholder),
}
/// The arguments to format_args!().
///
/// E.g. `1, 2, name="ferris", n=3`,
/// but also implicit captured arguments like `x` in `format_args!("{x}")`.
#[derive(Clone, Debug)]
pub struct FormatArguments {
arguments: Vec<FormatArgument>,
num_unnamed_args: usize,
num_explicit_args: usize,
names: FxHashMap<Symbol, usize>,
}
impl FormatArguments {
pub fn new() -> Self {
Self {
arguments: Vec::new(),
names: FxHashMap::default(),
num_unnamed_args: 0,
num_explicit_args: 0,
}
}
pub fn add(&mut self, arg: FormatArgument) -> usize {
let index = self.arguments.len();
if let Some(name) = arg.kind.ident() {
self.names.insert(name.name, index);
} else if self.names.is_empty() {
// Only count the unnamed args before the first named arg.
// (Any later ones are errors.)
self.num_unnamed_args += 1;
}
if !matches!(arg.kind, FormatArgumentKind::Captured(..)) {
// This is an explicit argument.
// Make sure that all arguments so far are explcit.
assert_eq!(
self.num_explicit_args,
self.arguments.len(),
"captured arguments must be added last"
);
self.num_explicit_args += 1;
}
self.arguments.push(arg);
index
}
pub fn by_name(&self, name: Symbol) -> Option<(usize, &FormatArgument)> {
let i = *self.names.get(&name)?;
Some((i, &self.arguments[i]))
}
pub fn by_index(&self, i: usize) -> Option<&FormatArgument> {
(i < self.num_explicit_args).then(|| &self.arguments[i])
}
pub fn unnamed_args(&self) -> &[FormatArgument] {
&self.arguments[..self.num_unnamed_args]
}
pub fn named_args(&self) -> &[FormatArgument] {
&self.arguments[self.num_unnamed_args..self.num_explicit_args]
}
pub fn explicit_args(&self) -> &[FormatArgument] {
&self.arguments[..self.num_explicit_args]
}
pub fn into_vec(self) -> Vec<FormatArgument> {
self.arguments
}
}
#[derive(Clone, Debug)]
pub struct FormatArgument {
pub kind: FormatArgumentKind,
pub expr: P<Expr>,
}
#[derive(Clone, Debug)]
pub enum FormatArgumentKind {
/// `format_args(…, arg)`
Normal,
/// `format_args(…, arg = 1)`
Named(Ident),
/// `format_args("… {arg} …")`
Captured(Ident),
}
impl FormatArgumentKind {
pub fn ident(&self) -> Option<Ident> {
match self {
&Self::Normal => None,
&Self::Named(id) => Some(id),
&Self::Captured(id) => Some(id),
}
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct FormatPlaceholder {
/// Index into [`FormatArgs::arguments`].
pub argument: FormatArgPosition,
/// The span inside the format string for the full `{…}` placeholder.
pub span: Option<Span>,
/// `{}`, `{:?}`, or `{:x}`, etc.
pub format_trait: FormatTrait,
/// `{}` or `{:.5}` or `{:-^20}`, etc.
pub format_options: FormatOptions,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct FormatArgPosition {
/// Which argument this position refers to (Ok),
/// or would've referred to if it existed (Err).
pub index: Result<usize, usize>,
/// What kind of position this is. See [`FormatArgPositionKind`].
pub kind: FormatArgPositionKind,
/// The span of the name or number.
pub span: Option<Span>,
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum FormatArgPositionKind {
/// `{}` or `{:.*}`
Implicit,
/// `{1}` or `{:1$}` or `{:.1$}`
Number,
/// `{a}` or `{:a$}` or `{:.a$}`
Named,
}
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)]
pub enum FormatTrait {
/// `{}`
Display,
/// `{:?}`
Debug,
/// `{:e}`
LowerExp,
/// `{:E}`
UpperExp,
/// `{:o}`
Octal,
/// `{:p}`
Pointer,
/// `{:b}`
Binary,
/// `{:x}`
LowerHex,
/// `{:X}`
UpperHex,
}
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct FormatOptions {
/// The width. E.g. `{:5}` or `{:width$}`.
pub width: Option<FormatCount>,
/// The precision. E.g. `{:.5}` or `{:.precision$}`.
pub precision: Option<FormatCount>,
/// The alignment. E.g. `{:>}` or `{:<}` or `{:^}`.
pub alignment: Option<FormatAlignment>,
/// The fill character. E.g. the `.` in `{:.>10}`.
pub fill: Option<char>,
/// The `+`, `-`, `0`, `#`, `x?` and `X?` flags.
pub flags: u32,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum FormatAlignment {
/// `{:<}`
Left,
/// `{:>}`
Right,
/// `{:^}`
Center,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum FormatCount {
/// `{:5}` or `{:.5}`
Literal(usize),
/// `{:.*}`, `{:.5$}`, or `{:a$}`, etc.
Argument(FormatArgPosition),
}

View file

@ -1,353 +0,0 @@
use super::*;
use rustc_ast as ast;
use rustc_ast::visit::{self, Visitor};
use rustc_ast::{BlockCheckMode, UnsafeSource};
use rustc_data_structures::fx::FxIndexSet;
use rustc_span::{sym, symbol::kw};
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)]
enum ArgumentType {
Format(FormatTrait),
Usize,
}
fn make_argument(ecx: &ExtCtxt<'_>, sp: Span, arg: P<ast::Expr>, ty: ArgumentType) -> P<ast::Expr> {
// Generate:
// ::core::fmt::ArgumentV1::new_…(arg)
use ArgumentType::*;
use FormatTrait::*;
ecx.expr_call_global(
sp,
ecx.std_path(&[
sym::fmt,
sym::ArgumentV1,
match ty {
Format(Display) => sym::new_display,
Format(Debug) => sym::new_debug,
Format(LowerExp) => sym::new_lower_exp,
Format(UpperExp) => sym::new_upper_exp,
Format(Octal) => sym::new_octal,
Format(Pointer) => sym::new_pointer,
Format(Binary) => sym::new_binary,
Format(LowerHex) => sym::new_lower_hex,
Format(UpperHex) => sym::new_upper_hex,
Usize => sym::from_usize,
},
]),
vec![arg],
)
}
fn make_count(
ecx: &ExtCtxt<'_>,
sp: Span,
count: &Option<FormatCount>,
argmap: &mut FxIndexSet<(usize, ArgumentType)>,
) -> P<ast::Expr> {
// Generate:
// ::core::fmt::rt::v1::Count::…(…)
match count {
Some(FormatCount::Literal(n)) => ecx.expr_call_global(
sp,
ecx.std_path(&[sym::fmt, sym::rt, sym::v1, sym::Count, sym::Is]),
vec![ecx.expr_usize(sp, *n)],
),
Some(FormatCount::Argument(arg)) => {
if let Ok(arg_index) = arg.index {
let (i, _) = argmap.insert_full((arg_index, ArgumentType::Usize));
ecx.expr_call_global(
sp,
ecx.std_path(&[sym::fmt, sym::rt, sym::v1, sym::Count, sym::Param]),
vec![ecx.expr_usize(sp, i)],
)
} else {
DummyResult::raw_expr(sp, true)
}
}
None => ecx.expr_path(ecx.path_global(
sp,
ecx.std_path(&[sym::fmt, sym::rt, sym::v1, sym::Count, sym::Implied]),
)),
}
}
fn make_format_spec(
ecx: &ExtCtxt<'_>,
sp: Span,
placeholder: &FormatPlaceholder,
argmap: &mut FxIndexSet<(usize, ArgumentType)>,
) -> P<ast::Expr> {
// Generate:
// ::core::fmt::rt::v1::Argument {
// position: 0usize,
// format: ::core::fmt::rt::v1::FormatSpec {
// fill: ' ',
// align: ::core::fmt::rt::v1::Alignment::Unknown,
// flags: 0u32,
// precision: ::core::fmt::rt::v1::Count::Implied,
// width: ::core::fmt::rt::v1::Count::Implied,
// },
// }
let position = match placeholder.argument.index {
Ok(arg_index) => {
let (i, _) =
argmap.insert_full((arg_index, ArgumentType::Format(placeholder.format_trait)));
ecx.expr_usize(sp, i)
}
Err(_) => DummyResult::raw_expr(sp, true),
};
let fill = ecx.expr_char(sp, placeholder.format_options.fill.unwrap_or(' '));
let align = ecx.expr_path(ecx.path_global(
sp,
ecx.std_path(&[
sym::fmt,
sym::rt,
sym::v1,
sym::Alignment,
match placeholder.format_options.alignment {
Some(FormatAlignment::Left) => sym::Left,
Some(FormatAlignment::Right) => sym::Right,
Some(FormatAlignment::Center) => sym::Center,
None => sym::Unknown,
},
]),
));
let flags = ecx.expr_u32(sp, placeholder.format_options.flags);
let prec = make_count(ecx, sp, &placeholder.format_options.precision, argmap);
let width = make_count(ecx, sp, &placeholder.format_options.width, argmap);
ecx.expr_struct(
sp,
ecx.path_global(sp, ecx.std_path(&[sym::fmt, sym::rt, sym::v1, sym::Argument])),
vec![
ecx.field_imm(sp, Ident::new(sym::position, sp), position),
ecx.field_imm(
sp,
Ident::new(sym::format, sp),
ecx.expr_struct(
sp,
ecx.path_global(
sp,
ecx.std_path(&[sym::fmt, sym::rt, sym::v1, sym::FormatSpec]),
),
vec![
ecx.field_imm(sp, Ident::new(sym::fill, sp), fill),
ecx.field_imm(sp, Ident::new(sym::align, sp), align),
ecx.field_imm(sp, Ident::new(sym::flags, sp), flags),
ecx.field_imm(sp, Ident::new(sym::precision, sp), prec),
ecx.field_imm(sp, Ident::new(sym::width, sp), width),
],
),
),
],
)
}
pub fn expand_parsed_format_args(ecx: &mut ExtCtxt<'_>, fmt: FormatArgs) -> P<ast::Expr> {
let macsp = ecx.with_def_site_ctxt(ecx.call_site());
let lit_pieces = ecx.expr_array_ref(
fmt.span,
fmt.template
.iter()
.enumerate()
.filter_map(|(i, piece)| match piece {
&FormatArgsPiece::Literal(s) => Some(ecx.expr_str(fmt.span, s)),
&FormatArgsPiece::Placeholder(_) => {
// Inject empty string before placeholders when not already preceded by a literal piece.
if i == 0 || matches!(fmt.template[i - 1], FormatArgsPiece::Placeholder(_)) {
Some(ecx.expr_str(fmt.span, kw::Empty))
} else {
None
}
}
})
.collect(),
);
// Whether we'll use the `Arguments::new_v1_formatted` form (true),
// or the `Arguments::new_v1` form (false).
let mut use_format_options = false;
// Create a list of all _unique_ (argument, format trait) combinations.
// E.g. "{0} {0:x} {0} {1}" -> [(0, Display), (0, LowerHex), (1, Display)]
let mut argmap = FxIndexSet::default();
for piece in &fmt.template {
let FormatArgsPiece::Placeholder(placeholder) = piece else { continue };
if placeholder.format_options != Default::default() {
// Can't use basic form if there's any formatting options.
use_format_options = true;
}
if let Ok(index) = placeholder.argument.index {
if !argmap.insert((index, ArgumentType::Format(placeholder.format_trait))) {
// Duplicate (argument, format trait) combination,
// which we'll only put once in the args array.
use_format_options = true;
}
}
}
let format_options = use_format_options.then(|| {
// Generate:
// &[format_spec_0, format_spec_1, format_spec_2]
ecx.expr_array_ref(
macsp,
fmt.template
.iter()
.filter_map(|piece| {
let FormatArgsPiece::Placeholder(placeholder) = piece else { return None };
Some(make_format_spec(ecx, macsp, placeholder, &mut argmap))
})
.collect(),
)
});
let arguments = fmt.arguments.into_vec();
// If the args array contains exactly all the original arguments once,
// in order, we can use a simple array instead of a `match` construction.
// However, if there's a yield point in any argument except the first one,
// we don't do this, because an ArgumentV1 cannot be kept across yield points.
let use_simple_array = argmap.len() == arguments.len()
&& argmap.iter().enumerate().all(|(i, &(j, _))| i == j)
&& arguments.iter().skip(1).all(|arg| !may_contain_yield_point(&arg.expr));
let args = if use_simple_array {
// Generate:
// &[
// ::core::fmt::ArgumentV1::new_display(&arg0),
// ::core::fmt::ArgumentV1::new_lower_hex(&arg1),
// ::core::fmt::ArgumentV1::new_debug(&arg2),
// ]
ecx.expr_array_ref(
macsp,
arguments
.into_iter()
.zip(argmap)
.map(|(arg, (_, ty))| {
let sp = arg.expr.span.with_ctxt(macsp.ctxt());
make_argument(ecx, sp, ecx.expr_addr_of(sp, arg.expr), ty)
})
.collect(),
)
} else {
// Generate:
// match (&arg0, &arg1, &arg2) {
// args => &[
// ::core::fmt::ArgumentV1::new_display(args.0),
// ::core::fmt::ArgumentV1::new_lower_hex(args.1),
// ::core::fmt::ArgumentV1::new_debug(args.0),
// ]
// }
let args_ident = Ident::new(sym::args, macsp);
let args = argmap
.iter()
.map(|&(arg_index, ty)| {
if let Some(arg) = arguments.get(arg_index) {
let sp = arg.expr.span.with_ctxt(macsp.ctxt());
make_argument(
ecx,
sp,
ecx.expr_field(
sp,
ecx.expr_ident(macsp, args_ident),
Ident::new(sym::integer(arg_index), macsp),
),
ty,
)
} else {
DummyResult::raw_expr(macsp, true)
}
})
.collect();
ecx.expr_addr_of(
macsp,
ecx.expr_match(
macsp,
ecx.expr_tuple(
macsp,
arguments
.into_iter()
.map(|arg| {
ecx.expr_addr_of(arg.expr.span.with_ctxt(macsp.ctxt()), arg.expr)
})
.collect(),
),
vec![ecx.arm(macsp, ecx.pat_ident(macsp, args_ident), ecx.expr_array(macsp, args))],
),
)
};
if let Some(format_options) = format_options {
// Generate:
// ::core::fmt::Arguments::new_v1_formatted(
// lit_pieces,
// args,
// format_options,
// unsafe { ::core::fmt::UnsafeArg::new() }
// )
ecx.expr_call_global(
macsp,
ecx.std_path(&[sym::fmt, sym::Arguments, sym::new_v1_formatted]),
vec![
lit_pieces,
args,
format_options,
ecx.expr_block(P(ast::Block {
stmts: vec![ecx.stmt_expr(ecx.expr_call_global(
macsp,
ecx.std_path(&[sym::fmt, sym::UnsafeArg, sym::new]),
Vec::new(),
))],
id: ast::DUMMY_NODE_ID,
rules: BlockCheckMode::Unsafe(UnsafeSource::CompilerGenerated),
span: macsp,
tokens: None,
could_be_bare_literal: false,
})),
],
)
} else {
// Generate:
// ::core::fmt::Arguments::new_v1(
// lit_pieces,
// args,
// )
ecx.expr_call_global(
macsp,
ecx.std_path(&[sym::fmt, sym::Arguments, sym::new_v1]),
vec![lit_pieces, args],
)
}
}
fn may_contain_yield_point(e: &ast::Expr) -> bool {
struct MayContainYieldPoint(bool);
impl Visitor<'_> for MayContainYieldPoint {
fn visit_expr(&mut self, e: &ast::Expr) {
if let ast::ExprKind::Await(_) | ast::ExprKind::Yield(_) = e.kind {
self.0 = true;
} else {
visit::walk_expr(self, e);
}
}
fn visit_mac_call(&mut self, _: &ast::MacCall) {
self.0 = true;
}
fn visit_attribute(&mut self, _: &ast::Attribute) {
// Conservatively assume this may be a proc macro attribute in
// expression position.
self.0 = true;
}
fn visit_item(&mut self, _: &ast::Item) {
// Do not recurse into nested items.
}
}
let mut visitor = MayContainYieldPoint(false);
visitor.visit_expr(e);
visitor.0
}