1
Fork 0

Rollup merge of #107544 - nnethercote:improve-TokenCursor, r=petrochenkov

Improve `TokenCursor`.

Some small improvements, for things that were bugging me.

Best reviewed one commit at a time.

r? ``@petrochenkov``
This commit is contained in:
Dylan DPC 2023-02-03 23:04:51 +05:30 committed by GitHub
commit 815dc9c480
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 98 additions and 91 deletions

View file

@ -41,7 +41,8 @@ use std::{fmt, iter};
/// Nothing special happens to misnamed or misplaced `SubstNt`s. /// Nothing special happens to misnamed or misplaced `SubstNt`s.
#[derive(Debug, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)] #[derive(Debug, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]
pub enum TokenTree { pub enum TokenTree {
/// A single token. /// A single token. Should never be `OpenDelim` or `CloseDelim`, because
/// delimiters are implicitly represented by `Delimited`.
Token(Token, Spacing), Token(Token, Spacing),
/// A delimited sequence of token trees. /// A delimited sequence of token trees.
Delimited(DelimSpan, Delimiter, TokenStream), Delimited(DelimSpan, Delimiter, TokenStream),
@ -388,12 +389,12 @@ impl TokenStream {
self.0.len() self.0.len()
} }
pub fn trees(&self) -> CursorRef<'_> { pub fn trees(&self) -> RefTokenTreeCursor<'_> {
CursorRef::new(self) RefTokenTreeCursor::new(self)
} }
pub fn into_trees(self) -> Cursor { pub fn into_trees(self) -> TokenTreeCursor {
Cursor::new(self) TokenTreeCursor::new(self)
} }
/// Compares two `TokenStream`s, checking equality without regarding span information. /// Compares two `TokenStream`s, checking equality without regarding span information.
@ -551,16 +552,17 @@ impl TokenStream {
} }
} }
/// By-reference iterator over a [`TokenStream`]. /// By-reference iterator over a [`TokenStream`], that produces `&TokenTree`
/// items.
#[derive(Clone)] #[derive(Clone)]
pub struct CursorRef<'t> { pub struct RefTokenTreeCursor<'t> {
stream: &'t TokenStream, stream: &'t TokenStream,
index: usize, index: usize,
} }
impl<'t> CursorRef<'t> { impl<'t> RefTokenTreeCursor<'t> {
fn new(stream: &'t TokenStream) -> Self { fn new(stream: &'t TokenStream) -> Self {
CursorRef { stream, index: 0 } RefTokenTreeCursor { stream, index: 0 }
} }
pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> { pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> {
@ -568,7 +570,7 @@ impl<'t> CursorRef<'t> {
} }
} }
impl<'t> Iterator for CursorRef<'t> { impl<'t> Iterator for RefTokenTreeCursor<'t> {
type Item = &'t TokenTree; type Item = &'t TokenTree;
fn next(&mut self) -> Option<&'t TokenTree> { fn next(&mut self) -> Option<&'t TokenTree> {
@ -579,15 +581,16 @@ impl<'t> Iterator for CursorRef<'t> {
} }
} }
/// Owning by-value iterator over a [`TokenStream`]. /// Owning by-value iterator over a [`TokenStream`], that produces `TokenTree`
/// items.
// FIXME: Many uses of this can be replaced with by-reference iterator to avoid clones. // FIXME: Many uses of this can be replaced with by-reference iterator to avoid clones.
#[derive(Clone)] #[derive(Clone)]
pub struct Cursor { pub struct TokenTreeCursor {
pub stream: TokenStream, pub stream: TokenStream,
index: usize, index: usize,
} }
impl Iterator for Cursor { impl Iterator for TokenTreeCursor {
type Item = TokenTree; type Item = TokenTree;
fn next(&mut self) -> Option<TokenTree> { fn next(&mut self) -> Option<TokenTree> {
@ -598,9 +601,9 @@ impl Iterator for Cursor {
} }
} }
impl Cursor { impl TokenTreeCursor {
fn new(stream: TokenStream) -> Self { fn new(stream: TokenStream) -> Self {
Cursor { stream, index: 0 } TokenTreeCursor { stream, index: 0 }
} }
#[inline] #[inline]
@ -614,6 +617,15 @@ impl Cursor {
pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> { pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> {
self.stream.0.get(self.index + n) self.stream.0.get(self.index + n)
} }
// Replace the previously obtained token tree with `tts`, and rewind to
// just before them.
pub fn replace_prev_and_rewind(&mut self, tts: Vec<TokenTree>) {
assert!(self.index > 0);
self.index -= 1;
let stream = Lrc::make_mut(&mut self.stream.0);
stream.splice(self.index..self.index + 1, tts);
}
} }
#[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)] #[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]

View file

@ -1,5 +1,5 @@
use rustc_ast::token::{self, Delimiter}; use rustc_ast::token::{self, Delimiter};
use rustc_ast::tokenstream::{CursorRef, TokenStream, TokenTree}; use rustc_ast::tokenstream::{RefTokenTreeCursor, TokenStream, TokenTree};
use rustc_ast::{LitIntType, LitKind}; use rustc_ast::{LitIntType, LitKind};
use rustc_ast_pretty::pprust; use rustc_ast_pretty::pprust;
use rustc_errors::{Applicability, PResult}; use rustc_errors::{Applicability, PResult};
@ -72,7 +72,7 @@ impl MetaVarExpr {
// Checks if there are any remaining tokens. For example, `${ignore(ident ... a b c ...)}` // Checks if there are any remaining tokens. For example, `${ignore(ident ... a b c ...)}`
fn check_trailing_token<'sess>( fn check_trailing_token<'sess>(
iter: &mut CursorRef<'_>, iter: &mut RefTokenTreeCursor<'_>,
sess: &'sess ParseSess, sess: &'sess ParseSess,
) -> PResult<'sess, ()> { ) -> PResult<'sess, ()> {
if let Some(tt) = iter.next() { if let Some(tt) = iter.next() {
@ -88,7 +88,7 @@ fn check_trailing_token<'sess>(
/// Parse a meta-variable `count` expression: `count(ident[, depth])` /// Parse a meta-variable `count` expression: `count(ident[, depth])`
fn parse_count<'sess>( fn parse_count<'sess>(
iter: &mut CursorRef<'_>, iter: &mut RefTokenTreeCursor<'_>,
sess: &'sess ParseSess, sess: &'sess ParseSess,
span: Span, span: Span,
) -> PResult<'sess, MetaVarExpr> { ) -> PResult<'sess, MetaVarExpr> {
@ -99,7 +99,7 @@ fn parse_count<'sess>(
/// Parses the depth used by index(depth) and length(depth). /// Parses the depth used by index(depth) and length(depth).
fn parse_depth<'sess>( fn parse_depth<'sess>(
iter: &mut CursorRef<'_>, iter: &mut RefTokenTreeCursor<'_>,
sess: &'sess ParseSess, sess: &'sess ParseSess,
span: Span, span: Span,
) -> PResult<'sess, usize> { ) -> PResult<'sess, usize> {
@ -126,7 +126,7 @@ fn parse_depth<'sess>(
/// Parses an generic ident /// Parses an generic ident
fn parse_ident<'sess>( fn parse_ident<'sess>(
iter: &mut CursorRef<'_>, iter: &mut RefTokenTreeCursor<'_>,
sess: &'sess ParseSess, sess: &'sess ParseSess,
span: Span, span: Span,
) -> PResult<'sess, Ident> { ) -> PResult<'sess, Ident> {
@ -152,7 +152,7 @@ fn parse_ident<'sess>(
/// Tries to move the iterator forward returning `true` if there is a comma. If not, then the /// Tries to move the iterator forward returning `true` if there is a comma. If not, then the
/// iterator is not modified and the result is `false`. /// iterator is not modified and the result is `false`.
fn try_eat_comma(iter: &mut CursorRef<'_>) -> bool { fn try_eat_comma(iter: &mut RefTokenTreeCursor<'_>) -> bool {
if let Some(TokenTree::Token(token::Token { kind: token::Comma, .. }, _)) = iter.look_ahead(0) { if let Some(TokenTree::Token(token::Token { kind: token::Comma, .. }, _)) = iter.look_ahead(0) {
let _ = iter.next(); let _ = iter.next();
return true; return true;

View file

@ -469,6 +469,6 @@ mod size_asserts {
use rustc_data_structures::static_assert_size; use rustc_data_structures::static_assert_size;
// tidy-alphabetical-start // tidy-alphabetical-start
static_assert_size!(AttrWrapper, 16); static_assert_size!(AttrWrapper, 16);
static_assert_size!(LazyAttrTokenStreamImpl, 144); static_assert_size!(LazyAttrTokenStreamImpl, 120);
// tidy-alphabetical-end // tidy-alphabetical-end
} }

View file

@ -2141,7 +2141,7 @@ impl<'a> Parser<'a> {
} }
if self.token.kind == TokenKind::Semi if self.token.kind == TokenKind::Semi
&& matches!(self.token_cursor.frame.delim_sp, Some((Delimiter::Parenthesis, _))) && matches!(self.token_cursor.stack.last(), Some((_, Delimiter::Parenthesis, _)))
&& self.may_recover() && self.may_recover()
{ {
// It is likely that the closure body is a block but where the // It is likely that the closure body is a block but where the

View file

@ -19,9 +19,8 @@ pub use path::PathStyle;
use rustc_ast::ptr::P; use rustc_ast::ptr::P;
use rustc_ast::token::{self, Delimiter, Nonterminal, Token, TokenKind}; use rustc_ast::token::{self, Delimiter, Nonterminal, Token, TokenKind};
use rustc_ast::tokenstream::AttributesData; use rustc_ast::tokenstream::{AttributesData, DelimSpan, Spacing};
use rustc_ast::tokenstream::{self, DelimSpan, Spacing}; use rustc_ast::tokenstream::{TokenStream, TokenTree, TokenTreeCursor};
use rustc_ast::tokenstream::{TokenStream, TokenTree};
use rustc_ast::util::case::Case; use rustc_ast::util::case::Case;
use rustc_ast::AttrId; use rustc_ast::AttrId;
use rustc_ast::DUMMY_NODE_ID; use rustc_ast::DUMMY_NODE_ID;
@ -168,7 +167,7 @@ pub struct Parser<'a> {
// This type is used a lot, e.g. it's cloned when matching many declarative macro rules with nonterminals. Make sure // This type is used a lot, e.g. it's cloned when matching many declarative macro rules with nonterminals. Make sure
// it doesn't unintentionally get bigger. // it doesn't unintentionally get bigger.
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
rustc_data_structures::static_assert_size!(Parser<'_>, 336); rustc_data_structures::static_assert_size!(Parser<'_>, 312);
/// Stores span information about a closure. /// Stores span information about a closure.
#[derive(Clone)] #[derive(Clone)]
@ -221,18 +220,27 @@ impl<'a> Drop for Parser<'a> {
} }
} }
/// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that
/// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b)
/// use this type to emit them as a linear sequence. But a linear sequence is
/// what the parser expects, for the most part.
#[derive(Clone)] #[derive(Clone)]
struct TokenCursor { struct TokenCursor {
// The current (innermost) frame. `frame` and `stack` could be combined, // Cursor for the current (innermost) token stream. The delimiters for this
// but it's faster to have them separately to access `frame` directly // token stream are found in `self.stack.last()`; when that is `None` then
// rather than via something like `stack.last().unwrap()` or // we are in the outermost token stream which never has delimiters.
// `stack[stack.len() - 1]`. tree_cursor: TokenTreeCursor,
frame: TokenCursorFrame,
// Additional frames that enclose `frame`. // Token streams surrounding the current one. The delimiters for stack[n]'s
stack: Vec<TokenCursorFrame>, // tokens are in `stack[n-1]`. `stack[0]` (when present) has no delimiters
// because it's the outermost token stream which never has delimiters.
stack: Vec<(TokenTreeCursor, Delimiter, DelimSpan)>,
desugar_doc_comments: bool, desugar_doc_comments: bool,
// Counts the number of calls to `{,inlined_}next`. // Counts the number of calls to `{,inlined_}next`.
num_next_calls: usize, num_next_calls: usize,
// During parsing, we may sometimes need to 'unglue' a // During parsing, we may sometimes need to 'unglue' a
// glued token into two component tokens // glued token into two component tokens
// (e.g. '>>' into '>' and '>), so that the parser // (e.g. '>>' into '>' and '>), so that the parser
@ -257,18 +265,6 @@ struct TokenCursor {
break_last_token: bool, break_last_token: bool,
} }
#[derive(Clone)]
struct TokenCursorFrame {
delim_sp: Option<(Delimiter, DelimSpan)>,
tree_cursor: tokenstream::Cursor,
}
impl TokenCursorFrame {
fn new(delim_sp: Option<(Delimiter, DelimSpan)>, tts: TokenStream) -> Self {
TokenCursorFrame { delim_sp, tree_cursor: tts.into_trees() }
}
}
impl TokenCursor { impl TokenCursor {
fn next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) { fn next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) {
self.inlined_next(desugar_doc_comments) self.inlined_next(desugar_doc_comments)
@ -281,38 +277,47 @@ impl TokenCursor {
// FIXME: we currently don't return `Delimiter` open/close delims. To fix #67062 we will // FIXME: we currently don't return `Delimiter` open/close delims. To fix #67062 we will
// need to, whereupon the `delim != Delimiter::Invisible` conditions below can be // need to, whereupon the `delim != Delimiter::Invisible` conditions below can be
// removed. // removed.
if let Some(tree) = self.frame.tree_cursor.next_ref() { if let Some(tree) = self.tree_cursor.next_ref() {
match tree { match tree {
&TokenTree::Token(ref token, spacing) => match (desugar_doc_comments, token) { &TokenTree::Token(ref token, spacing) => match (desugar_doc_comments, token) {
(true, &Token { kind: token::DocComment(_, attr_style, data), span }) => { (true, &Token { kind: token::DocComment(_, attr_style, data), span }) => {
return self.desugar(attr_style, data, span); let desugared = self.desugar(attr_style, data, span);
self.tree_cursor.replace_prev_and_rewind(desugared);
// Continue to get the first token of the desugared doc comment.
}
_ => {
debug_assert!(!matches!(
token.kind,
token::OpenDelim(_) | token::CloseDelim(_)
));
return (token.clone(), spacing);
} }
_ => return (token.clone(), spacing),
}, },
&TokenTree::Delimited(sp, delim, ref tts) => { &TokenTree::Delimited(sp, delim, ref tts) => {
// Set `open_delim` to true here because we deal with it immediately. let trees = tts.clone().into_trees();
let frame = TokenCursorFrame::new(Some((delim, sp)), tts.clone()); self.stack.push((mem::replace(&mut self.tree_cursor, trees), delim, sp));
self.stack.push(mem::replace(&mut self.frame, frame));
if delim != Delimiter::Invisible { if delim != Delimiter::Invisible {
return (Token::new(token::OpenDelim(delim), sp.open), Spacing::Alone); return (Token::new(token::OpenDelim(delim), sp.open), Spacing::Alone);
} }
// No open delimiter to return; continue on to the next iteration. // No open delimiter to return; continue on to the next iteration.
} }
}; };
} else if let Some(frame) = self.stack.pop() { } else if let Some((tree_cursor, delim, span)) = self.stack.pop() {
if let Some((delim, span)) = self.frame.delim_sp && delim != Delimiter::Invisible { // We have exhausted this token stream. Move back to its parent token stream.
self.frame = frame; self.tree_cursor = tree_cursor;
if delim != Delimiter::Invisible {
return (Token::new(token::CloseDelim(delim), span.close), Spacing::Alone); return (Token::new(token::CloseDelim(delim), span.close), Spacing::Alone);
} }
self.frame = frame;
// No close delimiter to return; continue on to the next iteration. // No close delimiter to return; continue on to the next iteration.
} else { } else {
// We have exhausted the outermost token stream.
return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone); return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone);
} }
} }
} }
fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> (Token, Spacing) { // Desugar a doc comment into something like `#[doc = r"foo"]`.
fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> Vec<TokenTree> {
// Searches for the occurrences of `"#*` and returns the minimum number of `#`s // Searches for the occurrences of `"#*` and returns the minimum number of `#`s
// required to wrap the text. E.g. // required to wrap the text. E.g.
// - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0) // - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0)
@ -346,27 +351,15 @@ impl TokenCursor {
.collect::<TokenStream>(), .collect::<TokenStream>(),
); );
self.stack.push(mem::replace(
&mut self.frame,
TokenCursorFrame::new(
None,
if attr_style == AttrStyle::Inner { if attr_style == AttrStyle::Inner {
[ vec![
TokenTree::token_alone(token::Pound, span), TokenTree::token_alone(token::Pound, span),
TokenTree::token_alone(token::Not, span), TokenTree::token_alone(token::Not, span),
body, body,
] ]
.into_iter()
.collect::<TokenStream>()
} else { } else {
[TokenTree::token_alone(token::Pound, span), body] vec![TokenTree::token_alone(token::Pound, span), body]
.into_iter() }
.collect::<TokenStream>()
},
),
));
self.next(/* desugar_doc_comments */ false)
} }
} }
@ -475,7 +468,7 @@ impl<'a> Parser<'a> {
restrictions: Restrictions::empty(), restrictions: Restrictions::empty(),
expected_tokens: Vec::new(), expected_tokens: Vec::new(),
token_cursor: TokenCursor { token_cursor: TokenCursor {
frame: TokenCursorFrame::new(None, tokens), tree_cursor: tokens.into_trees(),
stack: Vec::new(), stack: Vec::new(),
num_next_calls: 0, num_next_calls: 0,
desugar_doc_comments, desugar_doc_comments,
@ -1142,14 +1135,16 @@ impl<'a> Parser<'a> {
return looker(&self.token); return looker(&self.token);
} }
let frame = &self.token_cursor.frame; let tree_cursor = &self.token_cursor.tree_cursor;
if let Some((delim, span)) = frame.delim_sp && delim != Delimiter::Invisible { if let Some(&(_, delim, span)) = self.token_cursor.stack.last()
&& delim != Delimiter::Invisible
{
let all_normal = (0..dist).all(|i| { let all_normal = (0..dist).all(|i| {
let token = frame.tree_cursor.look_ahead(i); let token = tree_cursor.look_ahead(i);
!matches!(token, Some(TokenTree::Delimited(_, Delimiter::Invisible, _))) !matches!(token, Some(TokenTree::Delimited(_, Delimiter::Invisible, _)))
}); });
if all_normal { if all_normal {
return match frame.tree_cursor.look_ahead(dist - 1) { return match tree_cursor.look_ahead(dist - 1) {
Some(tree) => match tree { Some(tree) => match tree {
TokenTree::Token(token, _) => looker(token), TokenTree::Token(token, _) => looker(token),
TokenTree::Delimited(dspan, delim, _) => { TokenTree::Delimited(dspan, delim, _) => {
@ -1310,10 +1305,10 @@ impl<'a> Parser<'a> {
pub(crate) fn parse_token_tree(&mut self) -> TokenTree { pub(crate) fn parse_token_tree(&mut self) -> TokenTree {
match self.token.kind { match self.token.kind {
token::OpenDelim(..) => { token::OpenDelim(..) => {
// Grab the tokens from this frame. // Grab the tokens within the delimiters.
let frame = &self.token_cursor.frame; let tree_cursor = &self.token_cursor.tree_cursor;
let stream = frame.tree_cursor.stream.clone(); let stream = tree_cursor.stream.clone();
let (delim, span) = frame.delim_sp.unwrap(); let (_, delim, span) = *self.token_cursor.stack.last().unwrap();
// Advance the token cursor through the entire delimited // Advance the token cursor through the entire delimited
// sequence. After getting the `OpenDelim` we are *within* the // sequence. After getting the `OpenDelim` we are *within* the

View file

@ -13,7 +13,7 @@ use std::collections::HashMap;
use std::panic::{catch_unwind, AssertUnwindSafe}; use std::panic::{catch_unwind, AssertUnwindSafe};
use rustc_ast::token::{BinOpToken, Delimiter, Token, TokenKind}; use rustc_ast::token::{BinOpToken, Delimiter, Token, TokenKind};
use rustc_ast::tokenstream::{Cursor, TokenStream, TokenTree}; use rustc_ast::tokenstream::{TokenStream, TokenTree, TokenTreeCursor};
use rustc_ast::{ast, ptr}; use rustc_ast::{ast, ptr};
use rustc_ast_pretty::pprust; use rustc_ast_pretty::pprust;
use rustc_span::{ use rustc_span::{
@ -736,7 +736,7 @@ impl MacroArgParser {
self.buf.clear(); self.buf.clear();
} }
fn add_meta_variable(&mut self, iter: &mut Cursor) -> Option<()> { fn add_meta_variable(&mut self, iter: &mut TokenTreeCursor) -> Option<()> {
match iter.next() { match iter.next() {
Some(TokenTree::Token( Some(TokenTree::Token(
Token { Token {
@ -768,7 +768,7 @@ impl MacroArgParser {
&mut self, &mut self,
inner: Vec<ParsedMacroArg>, inner: Vec<ParsedMacroArg>,
delim: Delimiter, delim: Delimiter,
iter: &mut Cursor, iter: &mut TokenTreeCursor,
) -> Option<()> { ) -> Option<()> {
let mut buffer = String::new(); let mut buffer = String::new();
let mut first = true; let mut first = true;
@ -1121,7 +1121,7 @@ pub(crate) fn macro_style(mac: &ast::MacCall, context: &RewriteContext<'_>) -> D
// Currently we do not attempt to parse any further than that. // Currently we do not attempt to parse any further than that.
#[derive(new)] #[derive(new)]
struct MacroParser { struct MacroParser {
toks: Cursor, toks: TokenTreeCursor,
} }
impl MacroParser { impl MacroParser {