Auto merge of #79978 - Aaron1011:fix/capture-broken-token, r=petrochenkov
Properly capture trailing 'unglued' token If we try to capture the `Vec<u8>` in `Option<Vec<u8>>`, we'll need to capture a `>` token which was 'unglued' from a `>>` token. The processing of unglueing a token for parsing purposes bypasses the usual capturing infrastructure, so we currently lose the trailing `>`. As a result, we fall back to the reparsed `TokenStream`, causing us to lose spans. This commit makes token capturing keep track of a trailing 'unglued' token. Note that we don't need to care about unglueing except at the end of the captured tokens - if we capture both the first and second unglued tokens, then we'll end up capturing the full 'glued' token, which already works correctly.
This commit is contained in:
commit
803c60218f
3 changed files with 106 additions and 9 deletions
|
@ -17,7 +17,7 @@ pub use path::PathStyle;
|
||||||
use rustc_ast::ptr::P;
|
use rustc_ast::ptr::P;
|
||||||
use rustc_ast::token::{self, DelimToken, Token, TokenKind};
|
use rustc_ast::token::{self, DelimToken, Token, TokenKind};
|
||||||
use rustc_ast::tokenstream::{self, DelimSpan, LazyTokenStream, Spacing};
|
use rustc_ast::tokenstream::{self, DelimSpan, LazyTokenStream, Spacing};
|
||||||
use rustc_ast::tokenstream::{CreateTokenStream, TokenStream, TokenTree};
|
use rustc_ast::tokenstream::{CreateTokenStream, TokenStream, TokenTree, TreeAndSpacing};
|
||||||
use rustc_ast::DUMMY_NODE_ID;
|
use rustc_ast::DUMMY_NODE_ID;
|
||||||
use rustc_ast::{self as ast, AnonConst, AttrStyle, AttrVec, Const, CrateSugar, Extern, Unsafe};
|
use rustc_ast::{self as ast, AnonConst, AttrStyle, AttrVec, Const, CrateSugar, Extern, Unsafe};
|
||||||
use rustc_ast::{Async, Expr, ExprKind, MacArgs, MacDelimiter, Mutability, StrLit};
|
use rustc_ast::{Async, Expr, ExprKind, MacArgs, MacDelimiter, Mutability, StrLit};
|
||||||
|
@ -132,6 +132,28 @@ struct TokenCursor {
|
||||||
// Counts the number of calls to `next` or `next_desugared`,
|
// Counts the number of calls to `next` or `next_desugared`,
|
||||||
// depending on whether `desugar_doc_comments` is set.
|
// depending on whether `desugar_doc_comments` is set.
|
||||||
num_next_calls: usize,
|
num_next_calls: usize,
|
||||||
|
// During parsing, we may sometimes need to 'unglue' a
|
||||||
|
// glued token into two component tokens
|
||||||
|
// (e.g. '>>' into '>' and '>), so that the parser
|
||||||
|
// can consume them one at a time. This process
|
||||||
|
// bypasses the normal capturing mechanism
|
||||||
|
// (e.g. `num_next_calls` will not be incremented),
|
||||||
|
// since the 'unglued' tokens due not exist in
|
||||||
|
// the original `TokenStream`.
|
||||||
|
//
|
||||||
|
// If we end up consuming both unglued tokens,
|
||||||
|
// then this is not an issue - we'll end up
|
||||||
|
// capturing the single 'glued' token.
|
||||||
|
//
|
||||||
|
// However, in certain circumstances, we may
|
||||||
|
// want to capture just the first 'unglued' token.
|
||||||
|
// For example, capturing the `Vec<u8>`
|
||||||
|
// in `Option<Vec<u8>>` requires us to unglue
|
||||||
|
// the trailing `>>` token. The `append_unglued_token`
|
||||||
|
// field is used to track this token - it gets
|
||||||
|
// appended to the captured stream when
|
||||||
|
// we evaluate a `LazyTokenStream`
|
||||||
|
append_unglued_token: Option<TreeAndSpacing>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
|
@ -336,6 +358,7 @@ impl<'a> Parser<'a> {
|
||||||
stack: Vec::new(),
|
stack: Vec::new(),
|
||||||
num_next_calls: 0,
|
num_next_calls: 0,
|
||||||
desugar_doc_comments,
|
desugar_doc_comments,
|
||||||
|
append_unglued_token: None,
|
||||||
},
|
},
|
||||||
desugar_doc_comments,
|
desugar_doc_comments,
|
||||||
unmatched_angle_bracket_count: 0,
|
unmatched_angle_bracket_count: 0,
|
||||||
|
@ -359,6 +382,10 @@ impl<'a> Parser<'a> {
|
||||||
self.token_cursor.next()
|
self.token_cursor.next()
|
||||||
};
|
};
|
||||||
self.token_cursor.num_next_calls += 1;
|
self.token_cursor.num_next_calls += 1;
|
||||||
|
// We've retrieved an token from the underlying
|
||||||
|
// cursor, so we no longer need to worry about
|
||||||
|
// an unglued token. See `break_and_eat` for more details
|
||||||
|
self.token_cursor.append_unglued_token = None;
|
||||||
if next.span.is_dummy() {
|
if next.span.is_dummy() {
|
||||||
// Tweak the location for better diagnostics, but keep syntactic context intact.
|
// Tweak the location for better diagnostics, but keep syntactic context intact.
|
||||||
next.span = fallback_span.with_ctxt(next.span.ctxt());
|
next.span = fallback_span.with_ctxt(next.span.ctxt());
|
||||||
|
@ -555,6 +582,14 @@ impl<'a> Parser<'a> {
|
||||||
let first_span = self.sess.source_map().start_point(self.token.span);
|
let first_span = self.sess.source_map().start_point(self.token.span);
|
||||||
let second_span = self.token.span.with_lo(first_span.hi());
|
let second_span = self.token.span.with_lo(first_span.hi());
|
||||||
self.token = Token::new(first, first_span);
|
self.token = Token::new(first, first_span);
|
||||||
|
// Keep track of this token - if we end token capturing now,
|
||||||
|
// we'll want to append this token to the captured stream.
|
||||||
|
//
|
||||||
|
// If we consume any additional tokens, then this token
|
||||||
|
// is not needed (we'll capture the entire 'glued' token),
|
||||||
|
// and `next_tok` will set this field to `None`
|
||||||
|
self.token_cursor.append_unglued_token =
|
||||||
|
Some((TokenTree::Token(self.token.clone()), Spacing::Alone));
|
||||||
// Use the spacing of the glued token as the spacing
|
// Use the spacing of the glued token as the spacing
|
||||||
// of the unglued second token.
|
// of the unglued second token.
|
||||||
self.bump_with((Token::new(second, second_span), self.token_spacing));
|
self.bump_with((Token::new(second, second_span), self.token_spacing));
|
||||||
|
@ -1230,6 +1265,7 @@ impl<'a> Parser<'a> {
|
||||||
num_calls: usize,
|
num_calls: usize,
|
||||||
desugar_doc_comments: bool,
|
desugar_doc_comments: bool,
|
||||||
trailing_semi: bool,
|
trailing_semi: bool,
|
||||||
|
append_unglued_token: Option<TreeAndSpacing>,
|
||||||
}
|
}
|
||||||
impl CreateTokenStream for LazyTokenStreamImpl {
|
impl CreateTokenStream for LazyTokenStreamImpl {
|
||||||
fn create_token_stream(&self) -> TokenStream {
|
fn create_token_stream(&self) -> TokenStream {
|
||||||
|
@ -1253,12 +1289,18 @@ impl<'a> Parser<'a> {
|
||||||
}))
|
}))
|
||||||
.take(num_calls);
|
.take(num_calls);
|
||||||
|
|
||||||
make_token_stream(tokens)
|
make_token_stream(tokens, self.append_unglued_token.clone())
|
||||||
}
|
}
|
||||||
fn add_trailing_semi(&self) -> Box<dyn CreateTokenStream> {
|
fn add_trailing_semi(&self) -> Box<dyn CreateTokenStream> {
|
||||||
if self.trailing_semi {
|
if self.trailing_semi {
|
||||||
panic!("Called `add_trailing_semi` twice!");
|
panic!("Called `add_trailing_semi` twice!");
|
||||||
}
|
}
|
||||||
|
if self.append_unglued_token.is_some() {
|
||||||
|
panic!(
|
||||||
|
"Cannot call `add_trailing_semi` when we have an unglued token {:?}",
|
||||||
|
self.append_unglued_token
|
||||||
|
);
|
||||||
|
}
|
||||||
let mut new = self.clone();
|
let mut new = self.clone();
|
||||||
new.trailing_semi = true;
|
new.trailing_semi = true;
|
||||||
Box::new(new)
|
Box::new(new)
|
||||||
|
@ -1271,6 +1313,7 @@ impl<'a> Parser<'a> {
|
||||||
cursor_snapshot,
|
cursor_snapshot,
|
||||||
desugar_doc_comments: self.desugar_doc_comments,
|
desugar_doc_comments: self.desugar_doc_comments,
|
||||||
trailing_semi: false,
|
trailing_semi: false,
|
||||||
|
append_unglued_token: self.token_cursor.append_unglued_token.clone(),
|
||||||
};
|
};
|
||||||
Ok((ret, Some(LazyTokenStream::new(lazy_impl))))
|
Ok((ret, Some(LazyTokenStream::new(lazy_impl))))
|
||||||
}
|
}
|
||||||
|
@ -1325,7 +1368,10 @@ pub fn emit_unclosed_delims(unclosed_delims: &mut Vec<UnmatchedBrace>, sess: &Pa
|
||||||
/// Converts a flattened iterator of tokens (including open and close delimiter tokens)
|
/// Converts a flattened iterator of tokens (including open and close delimiter tokens)
|
||||||
/// into a `TokenStream`, creating a `TokenTree::Delimited` for each matching pair
|
/// into a `TokenStream`, creating a `TokenTree::Delimited` for each matching pair
|
||||||
/// of open and close delims.
|
/// of open and close delims.
|
||||||
fn make_token_stream(tokens: impl Iterator<Item = (Token, Spacing)>) -> TokenStream {
|
fn make_token_stream(
|
||||||
|
tokens: impl Iterator<Item = (Token, Spacing)>,
|
||||||
|
append_unglued_token: Option<TreeAndSpacing>,
|
||||||
|
) -> TokenStream {
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
struct FrameData {
|
struct FrameData {
|
||||||
open: Span,
|
open: Span,
|
||||||
|
@ -1348,14 +1394,17 @@ fn make_token_stream(tokens: impl Iterator<Item = (Token, Spacing)>) -> TokenStr
|
||||||
.inner
|
.inner
|
||||||
.push((delimited, Spacing::Alone));
|
.push((delimited, Spacing::Alone));
|
||||||
}
|
}
|
||||||
token => stack
|
token => {
|
||||||
|
stack
|
||||||
.last_mut()
|
.last_mut()
|
||||||
.expect("Bottom token frame is missing!")
|
.expect("Bottom token frame is missing!")
|
||||||
.inner
|
.inner
|
||||||
.push((TokenTree::Token(token), spacing)),
|
.push((TokenTree::Token(token), spacing));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let final_buf = stack.pop().expect("Missing final buf!");
|
}
|
||||||
|
let mut final_buf = stack.pop().expect("Missing final buf!");
|
||||||
|
final_buf.inner.extend(append_unglued_token);
|
||||||
assert!(stack.is_empty(), "Stack should be empty: final_buf={:?} stack={:?}", final_buf, stack);
|
assert!(stack.is_empty(), "Stack should be empty: final_buf={:?} stack={:?}", final_buf, stack);
|
||||||
TokenStream::new(final_buf.inner)
|
TokenStream::new(final_buf.inner)
|
||||||
}
|
}
|
||||||
|
|
20
src/test/ui/proc-macro/capture-unglued-token.rs
Normal file
20
src/test/ui/proc-macro/capture-unglued-token.rs
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
// aux-build:test-macros.rs
|
||||||
|
// compile-flags: -Z span-debug
|
||||||
|
// check-pass
|
||||||
|
|
||||||
|
// Tests that we properly handle parsing a nonterminal
|
||||||
|
// where we have two consecutive angle brackets (one inside
|
||||||
|
// the nonterminal, and one outside)
|
||||||
|
|
||||||
|
#![no_std] // Don't load unnecessary hygiene information from std
|
||||||
|
extern crate std;
|
||||||
|
extern crate test_macros;
|
||||||
|
|
||||||
|
macro_rules! trailing_angle {
|
||||||
|
(Option<$field:ty>) => {
|
||||||
|
test_macros::print_bang_consume!($field);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
trailing_angle!(Option<Vec<u8>>);
|
||||||
|
fn main() {}
|
28
src/test/ui/proc-macro/capture-unglued-token.stdout
Normal file
28
src/test/ui/proc-macro/capture-unglued-token.stdout
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
PRINT-BANG INPUT (DISPLAY): Vec<u8>
|
||||||
|
PRINT-BANG RE-COLLECTED (DISPLAY): Vec < u8 >
|
||||||
|
PRINT-BANG INPUT (DEBUG): TokenStream [
|
||||||
|
Group {
|
||||||
|
delimiter: None,
|
||||||
|
stream: TokenStream [
|
||||||
|
Ident {
|
||||||
|
ident: "Vec",
|
||||||
|
span: $DIR/capture-unglued-token.rs:19:24: 19:27 (#0),
|
||||||
|
},
|
||||||
|
Punct {
|
||||||
|
ch: '<',
|
||||||
|
spacing: Alone,
|
||||||
|
span: $DIR/capture-unglued-token.rs:19:27: 19:28 (#0),
|
||||||
|
},
|
||||||
|
Ident {
|
||||||
|
ident: "u8",
|
||||||
|
span: $DIR/capture-unglued-token.rs:19:28: 19:30 (#0),
|
||||||
|
},
|
||||||
|
Punct {
|
||||||
|
ch: '>',
|
||||||
|
spacing: Alone,
|
||||||
|
span: $DIR/capture-unglued-token.rs:19:30: 19:31 (#0),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
span: $DIR/capture-unglued-token.rs:15:42: 15:48 (#4),
|
||||||
|
},
|
||||||
|
]
|
Loading…
Add table
Add a link
Reference in a new issue