proc_macro: Stay on the "use the cache" path more
Discovered in #50061 we're falling off the "happy path" of using a stringified token stream more often than we should. This was due to the fact that a user-written token like `0xf` is equality-different from the stringified token of `15` (despite being semantically equivalent). This patch updates the call to `eq_unspanned` with an even more awful solution, `probably_equal_for_proc_macro`, which ignores the value of each token and basically only compares the structure of the token stream, assuming that the AST doesn't change just one token at a time. While this is a step towards fixing #50061 there is still one regression from #49154 which needs to be fixed.
This commit is contained in:
parent
ac3c2288f9
commit
e9348738fc
3 changed files with 127 additions and 9 deletions
|
@ -26,6 +26,7 @@ use tokenstream::{TokenStream, TokenTree};
|
||||||
use tokenstream;
|
use tokenstream;
|
||||||
|
|
||||||
use std::{cmp, fmt};
|
use std::{cmp, fmt};
|
||||||
|
use std::mem;
|
||||||
use rustc_data_structures::sync::{Lrc, Lock};
|
use rustc_data_structures::sync::{Lrc, Lock};
|
||||||
|
|
||||||
#[derive(Clone, RustcEncodable, RustcDecodable, PartialEq, Eq, Hash, Debug, Copy)]
|
#[derive(Clone, RustcEncodable, RustcDecodable, PartialEq, Eq, Hash, Debug, Copy)]
|
||||||
|
@ -88,6 +89,12 @@ impl Lit {
|
||||||
ByteStr(_) | ByteStrRaw(..) => "byte string"
|
ByteStr(_) | ByteStrRaw(..) => "byte string"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// See comments in `interpolated_to_tokenstream` for why we care about
|
||||||
|
// *probably* equal here rather than actual equality
|
||||||
|
fn probably_equal_for_proc_macro(&self, other: &Lit) -> bool {
|
||||||
|
mem::discriminant(self) == mem::discriminant(other)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn ident_can_begin_expr(ident: ast::Ident, is_raw: bool) -> bool {
|
pub(crate) fn ident_can_begin_expr(ident: ast::Ident, is_raw: bool) -> bool {
|
||||||
|
@ -530,14 +537,6 @@ impl Token {
|
||||||
// stream they came from. Here we attempt to extract these
|
// stream they came from. Here we attempt to extract these
|
||||||
// lossless token streams before we fall back to the
|
// lossless token streams before we fall back to the
|
||||||
// stringification.
|
// stringification.
|
||||||
//
|
|
||||||
// During early phases of the compiler, though, the AST could
|
|
||||||
// get modified directly (e.g. attributes added or removed) and
|
|
||||||
// the internal cache of tokens my not be invalidated or
|
|
||||||
// updated. Consequently if the "lossless" token stream
|
|
||||||
// disagrees with our actuall stringification (which has
|
|
||||||
// historically been much more battle-tested) then we go with
|
|
||||||
// the lossy stream anyway (losing span information).
|
|
||||||
let mut tokens = None;
|
let mut tokens = None;
|
||||||
|
|
||||||
match nt.0 {
|
match nt.0 {
|
||||||
|
@ -569,13 +568,96 @@ impl Token {
|
||||||
let source = pprust::token_to_string(self);
|
let source = pprust::token_to_string(self);
|
||||||
parse_stream_from_source_str(FileName::MacroExpansion, source, sess, Some(span))
|
parse_stream_from_source_str(FileName::MacroExpansion, source, sess, Some(span))
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// During early phases of the compiler the AST could get modified
|
||||||
|
// directly (e.g. attributes added or removed) and the internal cache
|
||||||
|
// of tokens my not be invalidated or updated. Consequently if the
|
||||||
|
// "lossless" token stream disagrees with our actual stringification
|
||||||
|
// (which has historically been much more battle-tested) then we go
|
||||||
|
// with the lossy stream anyway (losing span information).
|
||||||
|
//
|
||||||
|
// Note that the comparison isn't `==` here to avoid comparing spans,
|
||||||
|
// but it *also* is a "probable" equality which is a pretty weird
|
||||||
|
// definition. We mostly want to catch actual changes to the AST
|
||||||
|
// like a `#[cfg]` being processed or some weird `macro_rules!`
|
||||||
|
// expansion.
|
||||||
|
//
|
||||||
|
// What we *don't* want to catch is the fact that a user-defined
|
||||||
|
// literal like `0xf` is stringified as `15`, causing the cached token
|
||||||
|
// stream to not be literal `==` token-wise (ignoring spans) to the
|
||||||
|
// token stream we got from stringification.
|
||||||
|
//
|
||||||
|
// Instead the "probably equal" check here is "does each token
|
||||||
|
// recursively have the same discriminant?" We basically don't look at
|
||||||
|
// the token values here and assume that such fine grained modifications
|
||||||
|
// of token streams doesn't happen.
|
||||||
if let Some(tokens) = tokens {
|
if let Some(tokens) = tokens {
|
||||||
if tokens.eq_unspanned(&tokens_for_real) {
|
if tokens.probably_equal_for_proc_macro(&tokens_for_real) {
|
||||||
return tokens
|
return tokens
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return tokens_for_real
|
return tokens_for_real
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// See comments in `interpolated_to_tokenstream` for why we care about
|
||||||
|
// *probably* equal here rather than actual equality
|
||||||
|
pub fn probably_equal_for_proc_macro(&self, other: &Token) -> bool {
|
||||||
|
if mem::discriminant(self) != mem::discriminant(other) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
match (self, other) {
|
||||||
|
(&Eq, &Eq) |
|
||||||
|
(&Lt, &Lt) |
|
||||||
|
(&Le, &Le) |
|
||||||
|
(&EqEq, &EqEq) |
|
||||||
|
(&Ne, &Ne) |
|
||||||
|
(&Ge, &Ge) |
|
||||||
|
(&Gt, &Gt) |
|
||||||
|
(&AndAnd, &AndAnd) |
|
||||||
|
(&OrOr, &OrOr) |
|
||||||
|
(&Not, &Not) |
|
||||||
|
(&Tilde, &Tilde) |
|
||||||
|
(&At, &At) |
|
||||||
|
(&Dot, &Dot) |
|
||||||
|
(&DotDot, &DotDot) |
|
||||||
|
(&DotDotDot, &DotDotDot) |
|
||||||
|
(&DotDotEq, &DotDotEq) |
|
||||||
|
(&DotEq, &DotEq) |
|
||||||
|
(&Comma, &Comma) |
|
||||||
|
(&Semi, &Semi) |
|
||||||
|
(&Colon, &Colon) |
|
||||||
|
(&ModSep, &ModSep) |
|
||||||
|
(&RArrow, &RArrow) |
|
||||||
|
(&LArrow, &LArrow) |
|
||||||
|
(&FatArrow, &FatArrow) |
|
||||||
|
(&Pound, &Pound) |
|
||||||
|
(&Dollar, &Dollar) |
|
||||||
|
(&Question, &Question) |
|
||||||
|
(&Whitespace, &Whitespace) |
|
||||||
|
(&Comment, &Comment) |
|
||||||
|
(&Eof, &Eof) => true,
|
||||||
|
|
||||||
|
(&BinOp(a), &BinOp(b)) |
|
||||||
|
(&BinOpEq(a), &BinOpEq(b)) => a == b,
|
||||||
|
|
||||||
|
(&OpenDelim(a), &OpenDelim(b)) |
|
||||||
|
(&CloseDelim(a), &CloseDelim(b)) => a == b,
|
||||||
|
|
||||||
|
(&DocComment(a), &DocComment(b)) |
|
||||||
|
(&Shebang(a), &Shebang(b)) => a == b,
|
||||||
|
|
||||||
|
(&Lifetime(a), &Lifetime(b)) => a.name == b.name,
|
||||||
|
(&Ident(a, b), &Ident(c, d)) => a.name == c.name && b == d,
|
||||||
|
|
||||||
|
(&Literal(ref a, b), &Literal(ref c, d)) => {
|
||||||
|
b == d && a.probably_equal_for_proc_macro(c)
|
||||||
|
}
|
||||||
|
|
||||||
|
(&Interpolated(_), &Interpolated(_)) => false,
|
||||||
|
|
||||||
|
_ => panic!("forgot to add a token?"),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, RustcEncodable, RustcDecodable, Eq, Hash)]
|
#[derive(Clone, RustcEncodable, RustcDecodable, Eq, Hash)]
|
||||||
|
|
|
@ -124,6 +124,24 @@ impl TokenTree {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// See comments in `interpolated_to_tokenstream` for why we care about
|
||||||
|
// *probably* equal here rather than actual equality
|
||||||
|
//
|
||||||
|
// This is otherwise the same as `eq_unspanned`, only recursing with a
|
||||||
|
// different method.
|
||||||
|
pub fn probably_equal_for_proc_macro(&self, other: &TokenTree) -> bool {
|
||||||
|
match (self, other) {
|
||||||
|
(&TokenTree::Token(_, ref tk), &TokenTree::Token(_, ref tk2)) => {
|
||||||
|
tk.probably_equal_for_proc_macro(tk2)
|
||||||
|
}
|
||||||
|
(&TokenTree::Delimited(_, ref dl), &TokenTree::Delimited(_, ref dl2)) => {
|
||||||
|
dl.delim == dl2.delim &&
|
||||||
|
dl.stream().probably_equal_for_proc_macro(&dl2.stream())
|
||||||
|
}
|
||||||
|
(_, _) => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Retrieve the TokenTree's span.
|
/// Retrieve the TokenTree's span.
|
||||||
pub fn span(&self) -> Span {
|
pub fn span(&self) -> Span {
|
||||||
match *self {
|
match *self {
|
||||||
|
@ -250,6 +268,22 @@ impl TokenStream {
|
||||||
t1.next().is_none() && t2.next().is_none()
|
t1.next().is_none() && t2.next().is_none()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// See comments in `interpolated_to_tokenstream` for why we care about
|
||||||
|
// *probably* equal here rather than actual equality
|
||||||
|
//
|
||||||
|
// This is otherwise the same as `eq_unspanned`, only recursing with a
|
||||||
|
// different method.
|
||||||
|
pub fn probably_equal_for_proc_macro(&self, other: &TokenStream) -> bool {
|
||||||
|
let mut t1 = self.trees();
|
||||||
|
let mut t2 = other.trees();
|
||||||
|
for (t1, t2) in t1.by_ref().zip(t2.by_ref()) {
|
||||||
|
if !t1.probably_equal_for_proc_macro(&t2) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
t1.next().is_none() && t2.next().is_none()
|
||||||
|
}
|
||||||
|
|
||||||
/// Precondition: `self` consists of a single token tree.
|
/// Precondition: `self` consists of a single token tree.
|
||||||
/// Returns true if the token tree is a joint operation w.r.t. `proc_macro::TokenNode`.
|
/// Returns true if the token tree is a joint operation w.r.t. `proc_macro::TokenNode`.
|
||||||
pub fn as_tree(self) -> (TokenTree, bool /* joint? */) {
|
pub fn as_tree(self) -> (TokenTree, bool /* joint? */) {
|
||||||
|
|
|
@ -21,6 +21,8 @@ use attribute_with_error::foo;
|
||||||
fn test1() {
|
fn test1() {
|
||||||
let a: i32 = "foo";
|
let a: i32 = "foo";
|
||||||
//~^ ERROR: mismatched types
|
//~^ ERROR: mismatched types
|
||||||
|
let b: i32 = "f'oo";
|
||||||
|
//~^ ERROR: mismatched types
|
||||||
}
|
}
|
||||||
|
|
||||||
fn test2() {
|
fn test2() {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue