proc_macro: Stay on the "use the cache" path more

Discovered in #50061 we're falling off the "happy path" of using a stringified token stream more often than we should. This was due to the fact that a user-written token like `0xf` is equality-different from the stringified token of `15` (despite being semantically equivalent). This patch updates the call to `eq_unspanned` with an even more awful solution, `probably_equal_for_proc_macro`, which ignores the value of each token and basically only compares the structure of the token stream, assuming that the AST doesn't change just one token at a time. While this is a step towards fixing #50061 there is still one regression from #49154 which needs to be fixed.
2018-04-18 19:36:48 -07:00 · 2018-04-18 19:36:48 -07:00 · e9348738fc
commit e9348738fc
parent ac3c2288f9
3 changed files with 127 additions and 9 deletions
--- a/src/libsyntax/parse/token.rs
+++ b/src/libsyntax/parse/token.rs
@ -26,6 +26,7 @@ use tokenstream::{TokenStream, TokenTree};
 use tokenstream;
 use std::{cmp, fmt};
 use std::mem;
 use rustc_data_structures::sync::{Lrc, Lock};
 #[derive(Clone, RustcEncodable, RustcDecodable, PartialEq, Eq, Hash, Debug, Copy)]
@ -88,6 +89,12 @@ impl Lit {
            ByteStr(_) | ByteStrRaw(..) => "byte string"
        }
    }
    // See comments in `interpolated_to_tokenstream` for why we care about
    // *probably* equal here rather than actual equality
    fn probably_equal_for_proc_macro(&self, other: &Lit) -> bool {
        mem::discriminant(self) == mem::discriminant(other)
    }
 }
 pub(crate) fn ident_can_begin_expr(ident: ast::Ident, is_raw: bool) -> bool {
@ -530,14 +537,6 @@ impl Token {
        // stream they came from. Here we attempt to extract these
        // lossless token streams before we fall back to the
        // stringification.
        //
        // During early phases of the compiler, though, the AST could
        // get modified directly (e.g. attributes added or removed) and
        // the internal cache of tokens my not be invalidated or
        // updated. Consequently if the "lossless" token stream
        // disagrees with our actuall stringification (which has
        // historically been much more battle-tested) then we go with
        // the lossy stream anyway (losing span information).
        let mut tokens = None;
        match nt.0 {
@ -569,13 +568,96 @@ impl Token {
            let source = pprust::token_to_string(self);
            parse_stream_from_source_str(FileName::MacroExpansion, source, sess, Some(span))
        });
        // During early phases of the compiler the AST could get modified
        // directly (e.g. attributes added or removed) and the internal cache
        // of tokens my not be invalidated or updated. Consequently if the
        // "lossless" token stream disagrees with our actual stringification
        // (which has historically been much more battle-tested) then we go
        // with the lossy stream anyway (losing span information).
        //
        // Note that the comparison isn't `==` here to avoid comparing spans,
        // but it *also* is a "probable" equality which is a pretty weird
        // definition. We mostly want to catch actual changes to the AST
        // like a `#[cfg]` being processed or some weird `macro_rules!`
        // expansion.
        //
        // What we *don't* want to catch is the fact that a user-defined
        // literal like `0xf` is stringified as `15`, causing the cached token
        // stream to not be literal `==` token-wise (ignoring spans) to the
        // token stream we got from stringification.
        //
        // Instead the "probably equal" check here is "does each token
        // recursively have the same discriminant?" We basically don't look at
        // the token values here and assume that such fine grained modifications
        // of token streams doesn't happen.
        if let Some(tokens) = tokens {
-            if tokens.eq_unspanned(&tokens_for_real) {
+            if tokens.probably_equal_for_proc_macro(&tokens_for_real) {
                return tokens
            }
        }
        return tokens_for_real
    }
    // See comments in `interpolated_to_tokenstream` for why we care about
    // *probably* equal here rather than actual equality
    pub fn probably_equal_for_proc_macro(&self, other: &Token) -> bool {
        if mem::discriminant(self) != mem::discriminant(other) {
            return false
        }
        match (self, other) {
            (&Eq, &Eq) |
            (&Lt, &Lt) |
            (&Le, &Le) |
            (&EqEq, &EqEq) |
            (&Ne, &Ne) |
            (&Ge, &Ge) |
            (&Gt, &Gt) |
            (&AndAnd, &AndAnd) |
            (&OrOr, &OrOr) |
            (&Not, &Not) |
            (&Tilde, &Tilde) |
            (&At, &At) |
            (&Dot, &Dot) |
            (&DotDot, &DotDot) |
            (&DotDotDot, &DotDotDot) |
            (&DotDotEq, &DotDotEq) |
            (&DotEq, &DotEq) |
            (&Comma, &Comma) |
            (&Semi, &Semi) |
            (&Colon, &Colon) |
            (&ModSep, &ModSep) |
            (&RArrow, &RArrow) |
            (&LArrow, &LArrow) |
            (&FatArrow, &FatArrow) |
            (&Pound, &Pound) |
            (&Dollar, &Dollar) |
            (&Question, &Question) |
            (&Whitespace, &Whitespace) |
            (&Comment, &Comment) |
            (&Eof, &Eof) => true,
            (&BinOp(a), &BinOp(b)) |
            (&BinOpEq(a), &BinOpEq(b)) => a == b,
            (&OpenDelim(a), &OpenDelim(b)) |
            (&CloseDelim(a), &CloseDelim(b)) => a == b,
            (&DocComment(a), &DocComment(b)) |
            (&Shebang(a), &Shebang(b)) => a == b,
            (&Lifetime(a), &Lifetime(b)) => a.name == b.name,
            (&Ident(a, b), &Ident(c, d)) => a.name == c.name && b == d,
            (&Literal(ref a, b), &Literal(ref c, d)) => {
                b == d && a.probably_equal_for_proc_macro(c)
            }
            (&Interpolated(_), &Interpolated(_)) => false,
            _ => panic!("forgot to add a token?"),
        }
    }
 }
 #[derive(Clone, RustcEncodable, RustcDecodable, Eq, Hash)]
--- a/src/libsyntax/tokenstream.rs
+++ b/src/libsyntax/tokenstream.rs
@ -124,6 +124,24 @@ impl TokenTree {
        }
    }
    // See comments in `interpolated_to_tokenstream` for why we care about
    // *probably* equal here rather than actual equality
    //
    // This is otherwise the same as `eq_unspanned`, only recursing with a
    // different method.
    pub fn probably_equal_for_proc_macro(&self, other: &TokenTree) -> bool {
        match (self, other) {
            (&TokenTree::Token(_, ref tk), &TokenTree::Token(_, ref tk2)) => {
                tk.probably_equal_for_proc_macro(tk2)
            }
            (&TokenTree::Delimited(_, ref dl), &TokenTree::Delimited(_, ref dl2)) => {
                dl.delim == dl2.delim &&
                dl.stream().probably_equal_for_proc_macro(&dl2.stream())
            }
            (_, _) => false,
        }
    }
    /// Retrieve the TokenTree's span.
    pub fn span(&self) -> Span {
        match *self {
@ -250,6 +268,22 @@ impl TokenStream {
        t1.next().is_none() && t2.next().is_none()
    }
    // See comments in `interpolated_to_tokenstream` for why we care about
    // *probably* equal here rather than actual equality
    //
    // This is otherwise the same as `eq_unspanned`, only recursing with a
    // different method.
    pub fn probably_equal_for_proc_macro(&self, other: &TokenStream) -> bool {
        let mut t1 = self.trees();
        let mut t2 = other.trees();
        for (t1, t2) in t1.by_ref().zip(t2.by_ref()) {
            if !t1.probably_equal_for_proc_macro(&t2) {
                return false;
            }
        }
        t1.next().is_none() && t2.next().is_none()
    }
    /// Precondition: `self` consists of a single token tree.
    /// Returns true if the token tree is a joint operation w.r.t. `proc_macro::TokenNode`.
    pub fn as_tree(self) -> (TokenTree, bool /* joint? */) {
--- a/src/test/compile-fail-fulldeps/proc-macro/attribute-with-error.rs
+++ b/src/test/compile-fail-fulldeps/proc-macro/attribute-with-error.rs
@ -21,6 +21,8 @@ use attribute_with_error::foo;
 fn test1() {
    let a: i32 = "foo";
    //~^ ERROR: mismatched types
    let b: i32 = "f'oo";
    //~^ ERROR: mismatched types
 }
 fn test2() {