Auto merge of #106195 - Nilstrieb:no-more-being-clueless-whether-it-really-is-a-literal, r=compiler-errors
Improve heuristics whether `format_args` string is a source literal Previously, it only checked whether there was _a_ literal at the span of the first argument, not whether the literal actually matched up. This caused issues when a proc macro was generating a different literal with the same span. This requires an annoying special case for literals ending in `\n` because otherwise `println` wouldn't give detailed diagnostics anymore which would be bad. Fixes #106191
This commit is contained in:
commit
0c0b403f19
5 changed files with 105 additions and 23 deletions
|
@ -20,6 +20,7 @@ pub use Flag::*;
|
||||||
pub use Piece::*;
|
pub use Piece::*;
|
||||||
pub use Position::*;
|
pub use Position::*;
|
||||||
|
|
||||||
|
use rustc_lexer::unescape;
|
||||||
use std::iter;
|
use std::iter;
|
||||||
use std::str;
|
use std::str;
|
||||||
use std::string;
|
use std::string;
|
||||||
|
@ -56,6 +57,13 @@ impl InnerWidthMapping {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Whether the input string is a literal. If yes, it contains the inner width mappings.
|
||||||
|
#[derive(Clone, PartialEq, Eq)]
|
||||||
|
enum InputStringKind {
|
||||||
|
NotALiteral,
|
||||||
|
Literal { width_mappings: Vec<InnerWidthMapping> },
|
||||||
|
}
|
||||||
|
|
||||||
/// The type of format string that we are parsing.
|
/// The type of format string that we are parsing.
|
||||||
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
||||||
pub enum ParseMode {
|
pub enum ParseMode {
|
||||||
|
@ -306,7 +314,11 @@ impl<'a> Parser<'a> {
|
||||||
append_newline: bool,
|
append_newline: bool,
|
||||||
mode: ParseMode,
|
mode: ParseMode,
|
||||||
) -> Parser<'a> {
|
) -> Parser<'a> {
|
||||||
let (width_map, is_literal) = find_width_map_from_snippet(snippet, style);
|
let input_string_kind = find_width_map_from_snippet(s, snippet, style);
|
||||||
|
let (width_map, is_literal) = match input_string_kind {
|
||||||
|
InputStringKind::Literal { width_mappings } => (width_mappings, true),
|
||||||
|
InputStringKind::NotALiteral => (Vec::new(), false),
|
||||||
|
};
|
||||||
Parser {
|
Parser {
|
||||||
mode,
|
mode,
|
||||||
input: s,
|
input: s,
|
||||||
|
@ -844,20 +856,40 @@ impl<'a> Parser<'a> {
|
||||||
/// written code (code snippet) and the `InternedString` that gets processed in the `Parser`
|
/// written code (code snippet) and the `InternedString` that gets processed in the `Parser`
|
||||||
/// in order to properly synthesise the intra-string `Span`s for error diagnostics.
|
/// in order to properly synthesise the intra-string `Span`s for error diagnostics.
|
||||||
fn find_width_map_from_snippet(
|
fn find_width_map_from_snippet(
|
||||||
|
input: &str,
|
||||||
snippet: Option<string::String>,
|
snippet: Option<string::String>,
|
||||||
str_style: Option<usize>,
|
str_style: Option<usize>,
|
||||||
) -> (Vec<InnerWidthMapping>, bool) {
|
) -> InputStringKind {
|
||||||
let snippet = match snippet {
|
let snippet = match snippet {
|
||||||
Some(ref s) if s.starts_with('"') || s.starts_with("r\"") || s.starts_with("r#") => s,
|
Some(ref s) if s.starts_with('"') || s.starts_with("r\"") || s.starts_with("r#") => s,
|
||||||
_ => return (vec![], false),
|
_ => return InputStringKind::NotALiteral,
|
||||||
};
|
};
|
||||||
|
|
||||||
if str_style.is_some() {
|
if str_style.is_some() {
|
||||||
return (vec![], true);
|
return InputStringKind::Literal { width_mappings: Vec::new() };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Strip quotes.
|
||||||
let snippet = &snippet[1..snippet.len() - 1];
|
let snippet = &snippet[1..snippet.len() - 1];
|
||||||
|
|
||||||
|
// Macros like `println` add a newline at the end. That technically doens't make them "literals" anymore, but it's fine
|
||||||
|
// since we will never need to point our spans there, so we lie about it here by ignoring it.
|
||||||
|
// Since there might actually be newlines in the source code, we need to normalize away all trailing newlines.
|
||||||
|
// If we only trimmed it off the input, `format!("\n")` would cause a mismatch as here we they actually match up.
|
||||||
|
// Alternatively, we could just count the trailing newlines and only trim one from the input if they don't match up.
|
||||||
|
let input_no_nl = input.trim_end_matches('\n');
|
||||||
|
let Ok(unescaped) = unescape_string(snippet) else {
|
||||||
|
return InputStringKind::NotALiteral;
|
||||||
|
};
|
||||||
|
|
||||||
|
let unescaped_no_nl = unescaped.trim_end_matches('\n');
|
||||||
|
|
||||||
|
if unescaped_no_nl != input_no_nl {
|
||||||
|
// The source string that we're pointing at isn't our input, so spans pointing at it will be incorrect.
|
||||||
|
// This can for example happen with proc macros that respan generated literals.
|
||||||
|
return InputStringKind::NotALiteral;
|
||||||
|
}
|
||||||
|
|
||||||
let mut s = snippet.char_indices();
|
let mut s = snippet.char_indices();
|
||||||
let mut width_mappings = vec![];
|
let mut width_mappings = vec![];
|
||||||
while let Some((pos, c)) = s.next() {
|
while let Some((pos, c)) = s.next() {
|
||||||
|
@ -936,7 +968,21 @@ fn find_width_map_from_snippet(
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
(width_mappings, true)
|
|
||||||
|
InputStringKind::Literal { width_mappings }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn unescape_string(string: &str) -> Result<string::String, unescape::EscapeError> {
|
||||||
|
let mut buf = string::String::new();
|
||||||
|
let mut error = Ok(());
|
||||||
|
unescape::unescape_literal(string, unescape::Mode::Str, &mut |_, unescaped_char| {
|
||||||
|
match unescaped_char {
|
||||||
|
Ok(c) => buf.push(c),
|
||||||
|
Err(err) => error = Err(err),
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
error.map(|_| buf)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Assert a reasonable size for `Piece`
|
// Assert a reasonable size for `Piece`
|
||||||
|
|
|
@ -964,45 +964,40 @@ impl SourceMap {
|
||||||
|
|
||||||
/// Finds the width of the character, either before or after the end of provided span,
|
/// Finds the width of the character, either before or after the end of provided span,
|
||||||
/// depending on the `forwards` parameter.
|
/// depending on the `forwards` parameter.
|
||||||
|
#[instrument(skip(self, sp))]
|
||||||
fn find_width_of_character_at_span(&self, sp: Span, forwards: bool) -> u32 {
|
fn find_width_of_character_at_span(&self, sp: Span, forwards: bool) -> u32 {
|
||||||
let sp = sp.data();
|
let sp = sp.data();
|
||||||
|
|
||||||
if sp.lo == sp.hi && !forwards {
|
if sp.lo == sp.hi && !forwards {
|
||||||
debug!("find_width_of_character_at_span: early return empty span");
|
debug!("early return empty span");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
let local_begin = self.lookup_byte_offset(sp.lo);
|
let local_begin = self.lookup_byte_offset(sp.lo);
|
||||||
let local_end = self.lookup_byte_offset(sp.hi);
|
let local_end = self.lookup_byte_offset(sp.hi);
|
||||||
debug!(
|
debug!("local_begin=`{:?}`, local_end=`{:?}`", local_begin, local_end);
|
||||||
"find_width_of_character_at_span: local_begin=`{:?}`, local_end=`{:?}`",
|
|
||||||
local_begin, local_end
|
|
||||||
);
|
|
||||||
|
|
||||||
if local_begin.sf.start_pos != local_end.sf.start_pos {
|
if local_begin.sf.start_pos != local_end.sf.start_pos {
|
||||||
debug!("find_width_of_character_at_span: begin and end are in different files");
|
debug!("begin and end are in different files");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
let start_index = local_begin.pos.to_usize();
|
let start_index = local_begin.pos.to_usize();
|
||||||
let end_index = local_end.pos.to_usize();
|
let end_index = local_end.pos.to_usize();
|
||||||
debug!(
|
debug!("start_index=`{:?}`, end_index=`{:?}`", start_index, end_index);
|
||||||
"find_width_of_character_at_span: start_index=`{:?}`, end_index=`{:?}`",
|
|
||||||
start_index, end_index
|
|
||||||
);
|
|
||||||
|
|
||||||
// Disregard indexes that are at the start or end of their spans, they can't fit bigger
|
// Disregard indexes that are at the start or end of their spans, they can't fit bigger
|
||||||
// characters.
|
// characters.
|
||||||
if (!forwards && end_index == usize::MIN) || (forwards && start_index == usize::MAX) {
|
if (!forwards && end_index == usize::MIN) || (forwards && start_index == usize::MAX) {
|
||||||
debug!("find_width_of_character_at_span: start or end of span, cannot be multibyte");
|
debug!("start or end of span, cannot be multibyte");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
let source_len = (local_begin.sf.end_pos - local_begin.sf.start_pos).to_usize();
|
let source_len = (local_begin.sf.end_pos - local_begin.sf.start_pos).to_usize();
|
||||||
debug!("find_width_of_character_at_span: source_len=`{:?}`", source_len);
|
debug!("source_len=`{:?}`", source_len);
|
||||||
// Ensure indexes are also not malformed.
|
// Ensure indexes are also not malformed.
|
||||||
if start_index > end_index || end_index > source_len - 1 {
|
if start_index > end_index || end_index > source_len - 1 {
|
||||||
debug!("find_width_of_character_at_span: source indexes are malformed");
|
debug!("source indexes are malformed");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1017,10 +1012,10 @@ impl SourceMap {
|
||||||
} else {
|
} else {
|
||||||
return 1;
|
return 1;
|
||||||
};
|
};
|
||||||
debug!("find_width_of_character_at_span: snippet=`{:?}`", snippet);
|
debug!("snippet=`{:?}`", snippet);
|
||||||
|
|
||||||
let mut target = if forwards { end_index + 1 } else { end_index - 1 };
|
let mut target = if forwards { end_index + 1 } else { end_index - 1 };
|
||||||
debug!("find_width_of_character_at_span: initial target=`{:?}`", target);
|
debug!("initial target=`{:?}`", target);
|
||||||
|
|
||||||
while !snippet.is_char_boundary(target - start_index) && target < source_len {
|
while !snippet.is_char_boundary(target - start_index) && target < source_len {
|
||||||
target = if forwards {
|
target = if forwards {
|
||||||
|
@ -1033,9 +1028,9 @@ impl SourceMap {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
debug!("find_width_of_character_at_span: target=`{:?}`", target);
|
debug!("target=`{:?}`", target);
|
||||||
}
|
}
|
||||||
debug!("find_width_of_character_at_span: final target=`{:?}`", target);
|
debug!("final target=`{:?}`", target);
|
||||||
|
|
||||||
if forwards { (target - end_index) as u32 } else { (end_index - target) as u32 }
|
if forwards { (target - end_index) as u32 } else { (end_index - target) as u32 }
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,7 +5,8 @@
|
||||||
|
|
||||||
extern crate proc_macro;
|
extern crate proc_macro;
|
||||||
|
|
||||||
use proc_macro::{Literal, Span, TokenStream, TokenTree};
|
use proc_macro::{Delimiter, Group, Ident, Literal, Punct, Spacing, Span, TokenStream, TokenTree};
|
||||||
|
use std::iter::FromIterator;
|
||||||
|
|
||||||
#[proc_macro]
|
#[proc_macro]
|
||||||
pub fn foo_with_input_span(input: TokenStream) -> TokenStream {
|
pub fn foo_with_input_span(input: TokenStream) -> TokenStream {
|
||||||
|
@ -26,3 +27,14 @@ pub fn err_with_input_span(input: TokenStream) -> TokenStream {
|
||||||
|
|
||||||
TokenStream::from(TokenTree::Literal(lit))
|
TokenStream::from(TokenTree::Literal(lit))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[proc_macro]
|
||||||
|
pub fn respan_to_invalid_format_literal(input: TokenStream) -> TokenStream {
|
||||||
|
let mut s = Literal::string("{");
|
||||||
|
s.set_span(input.into_iter().next().unwrap().span());
|
||||||
|
TokenStream::from_iter([
|
||||||
|
TokenTree::from(Ident::new("format", Span::call_site())),
|
||||||
|
TokenTree::from(Punct::new('!', Spacing::Alone)),
|
||||||
|
TokenTree::from(Group::new(Delimiter::Parenthesis, TokenTree::from(s).into())),
|
||||||
|
])
|
||||||
|
}
|
||||||
|
|
10
src/test/ui/fmt/respanned-literal-issue-106191.rs
Normal file
10
src/test/ui/fmt/respanned-literal-issue-106191.rs
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
// aux-build:format-string-proc-macro.rs
|
||||||
|
|
||||||
|
extern crate format_string_proc_macro;
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
format_string_proc_macro::respan_to_invalid_format_literal!("¡");
|
||||||
|
//~^ ERROR invalid format string: expected `'}'` but string was terminated
|
||||||
|
format_args!(r#concat!("¡ {"));
|
||||||
|
//~^ ERROR invalid format string: expected `'}'` but string was terminated
|
||||||
|
}
|
19
src/test/ui/fmt/respanned-literal-issue-106191.stderr
Normal file
19
src/test/ui/fmt/respanned-literal-issue-106191.stderr
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
error: invalid format string: expected `'}'` but string was terminated
|
||||||
|
--> $DIR/respanned-literal-issue-106191.rs:6:65
|
||||||
|
|
|
||||||
|
LL | format_string_proc_macro::respan_to_invalid_format_literal!("¡");
|
||||||
|
| ^^^ expected `'}'` in format string
|
||||||
|
|
|
||||||
|
= note: if you intended to print `{`, you can escape it using `{{`
|
||||||
|
|
||||||
|
error: invalid format string: expected `'}'` but string was terminated
|
||||||
|
--> $DIR/respanned-literal-issue-106191.rs:8:18
|
||||||
|
|
|
||||||
|
LL | format_args!(r#concat!("¡ {"));
|
||||||
|
| ^^^^^^^^^^^^^^^^^^^^^^^ expected `'}'` in format string
|
||||||
|
|
|
||||||
|
= note: if you intended to print `{`, you can escape it using `{{`
|
||||||
|
= note: this error originates in the macro `concat` (in Nightly builds, run with -Z macro-backtrace for more info)
|
||||||
|
|
||||||
|
error: aborting due to 2 previous errors
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue