Auto merge of #50855 - nnethercote:fewer-macro_parser-allocs, r=petrochenkov
Speed up the macro parser These three commits reduce the number of allocations done by the macro parser, in some cases dramatically. For example, for a clean check builds of html5ever, the number of allocations is reduced by 40%. Here are the rustc-benchmarks that are sped up by at least 1%. ``` html5ever-check avg: -6.6% min: -10.3% max: -4.1% html5ever avg: -5.2% min: -9.5% max: -2.8% html5ever-opt avg: -4.3% min: -9.3% max: -1.6% crates.io-check avg: -1.8% min: -2.9% max: -0.6% crates.io-opt avg: -1.0% min: -2.2% max: -0.1% crates.io avg: -1.1% min: -2.2% max: -0.2% ```
This commit is contained in:
commit
4c26e2e3fb
5 changed files with 87 additions and 35 deletions
|
@ -82,7 +82,7 @@
|
|||
|
||||
pub use self::NamedMatch::*;
|
||||
pub use self::ParseResult::*;
|
||||
use self::TokenTreeOrTokenTreeVec::*;
|
||||
use self::TokenTreeOrTokenTreeSlice::*;
|
||||
|
||||
use ast::Ident;
|
||||
use syntax_pos::{self, BytePos, Span};
|
||||
|
@ -97,6 +97,7 @@ use tokenstream::TokenStream;
|
|||
use util::small_vector::SmallVector;
|
||||
|
||||
use std::mem;
|
||||
use std::ops::{Deref, DerefMut};
|
||||
use std::rc::Rc;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::hash_map::Entry::{Occupied, Vacant};
|
||||
|
@ -106,12 +107,12 @@ use std::collections::hash_map::Entry::{Occupied, Vacant};
|
|||
/// Either a sequence of token trees or a single one. This is used as the representation of the
|
||||
/// sequence of tokens that make up a matcher.
|
||||
#[derive(Clone)]
|
||||
enum TokenTreeOrTokenTreeVec {
|
||||
enum TokenTreeOrTokenTreeSlice<'a> {
|
||||
Tt(TokenTree),
|
||||
TtSeq(Vec<TokenTree>),
|
||||
TtSeq(&'a [TokenTree]),
|
||||
}
|
||||
|
||||
impl TokenTreeOrTokenTreeVec {
|
||||
impl<'a> TokenTreeOrTokenTreeSlice<'a> {
|
||||
/// Returns the number of constituent top-level token trees of `self` (top-level in that it
|
||||
/// will not recursively descend into subtrees).
|
||||
fn len(&self) -> usize {
|
||||
|
@ -135,9 +136,9 @@ impl TokenTreeOrTokenTreeVec {
|
|||
/// This is used by `inner_parse_loop` to keep track of delimited submatchers that we have
|
||||
/// descended into.
|
||||
#[derive(Clone)]
|
||||
struct MatcherTtFrame {
|
||||
struct MatcherTtFrame<'a> {
|
||||
/// The "parent" matcher that we are descending into.
|
||||
elts: TokenTreeOrTokenTreeVec,
|
||||
elts: TokenTreeOrTokenTreeSlice<'a>,
|
||||
/// The position of the "dot" in `elts` at the time we descended.
|
||||
idx: usize,
|
||||
}
|
||||
|
@ -145,9 +146,9 @@ struct MatcherTtFrame {
|
|||
/// Represents a single "position" (aka "matcher position", aka "item"), as described in the module
|
||||
/// documentation.
|
||||
#[derive(Clone)]
|
||||
struct MatcherPos {
|
||||
struct MatcherPos<'a> {
|
||||
/// The token or sequence of tokens that make up the matcher
|
||||
top_elts: TokenTreeOrTokenTreeVec,
|
||||
top_elts: TokenTreeOrTokenTreeSlice<'a>,
|
||||
/// The position of the "dot" in this matcher
|
||||
idx: usize,
|
||||
/// The beginning position in the source that the beginning of this matcher corresponds to. In
|
||||
|
@ -186,7 +187,7 @@ struct MatcherPos {
|
|||
sep: Option<Token>,
|
||||
/// The "parent" matcher position if we are in a repetition. That is, the matcher position just
|
||||
/// before we enter the sequence.
|
||||
up: Option<Box<MatcherPos>>,
|
||||
up: Option<MatcherPosHandle<'a>>,
|
||||
|
||||
// Specifically used to "unzip" token trees. By "unzip", we mean to unwrap the delimiters from
|
||||
// a delimited token tree (e.g. something wrapped in `(` `)`) or to get the contents of a doc
|
||||
|
@ -195,10 +196,10 @@ struct MatcherPos {
|
|||
/// pat ) pat`), we need to keep track of the matchers we are descending into. This stack does
|
||||
/// that where the bottom of the stack is the outermost matcher.
|
||||
// Also, throughout the comments, this "descent" is often referred to as "unzipping"...
|
||||
stack: Vec<MatcherTtFrame>,
|
||||
stack: Vec<MatcherTtFrame<'a>>,
|
||||
}
|
||||
|
||||
impl MatcherPos {
|
||||
impl<'a> MatcherPos<'a> {
|
||||
/// Add `m` as a named match for the `idx`-th metavar.
|
||||
fn push_match(&mut self, idx: usize, m: NamedMatch) {
|
||||
let matches = Rc::make_mut(&mut self.matches[idx]);
|
||||
|
@ -206,6 +207,49 @@ impl MatcherPos {
|
|||
}
|
||||
}
|
||||
|
||||
// Lots of MatcherPos instances are created at runtime. Allocating them on the
|
||||
// heap is slow. Furthermore, using SmallVec<MatcherPos> to allocate them all
|
||||
// on the stack is also slow, because MatcherPos is quite a large type and
|
||||
// instances get moved around a lot between vectors, which requires lots of
|
||||
// slow memcpy calls.
|
||||
//
|
||||
// Therefore, the initial MatcherPos is always allocated on the stack,
|
||||
// subsequent ones (of which there aren't that many) are allocated on the heap,
|
||||
// and this type is used to encapsulate both cases.
|
||||
enum MatcherPosHandle<'a> {
|
||||
Ref(&'a mut MatcherPos<'a>),
|
||||
Box(Box<MatcherPos<'a>>),
|
||||
}
|
||||
|
||||
impl<'a> Clone for MatcherPosHandle<'a> {
|
||||
// This always produces a new Box.
|
||||
fn clone(&self) -> Self {
|
||||
MatcherPosHandle::Box(match *self {
|
||||
MatcherPosHandle::Ref(ref r) => Box::new((**r).clone()),
|
||||
MatcherPosHandle::Box(ref b) => b.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Deref for MatcherPosHandle<'a> {
|
||||
type Target = MatcherPos<'a>;
|
||||
fn deref(&self) -> &Self::Target {
|
||||
match *self {
|
||||
MatcherPosHandle::Ref(ref r) => r,
|
||||
MatcherPosHandle::Box(ref b) => b,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> DerefMut for MatcherPosHandle<'a> {
|
||||
fn deref_mut(&mut self) -> &mut MatcherPos<'a> {
|
||||
match *self {
|
||||
MatcherPosHandle::Ref(ref mut r) => r,
|
||||
MatcherPosHandle::Box(ref mut b) => b,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents the possible results of an attempted parse.
|
||||
pub enum ParseResult<T> {
|
||||
/// Parsed successfully.
|
||||
|
@ -241,10 +285,10 @@ fn create_matches(len: usize) -> Vec<Rc<Vec<NamedMatch>>> {
|
|||
|
||||
/// Generate the top-level matcher position in which the "dot" is before the first token of the
|
||||
/// matcher `ms` and we are going to start matching at position `lo` in the source.
|
||||
fn initial_matcher_pos(ms: Vec<TokenTree>, lo: BytePos) -> Box<MatcherPos> {
|
||||
let match_idx_hi = count_names(&ms[..]);
|
||||
fn initial_matcher_pos(ms: &[TokenTree], lo: BytePos) -> MatcherPos {
|
||||
let match_idx_hi = count_names(ms);
|
||||
let matches = create_matches(match_idx_hi);
|
||||
Box::new(MatcherPos {
|
||||
MatcherPos {
|
||||
// Start with the top level matcher given to us
|
||||
top_elts: TtSeq(ms), // "elts" is an abbr. for "elements"
|
||||
// The "dot" is before the first token of the matcher
|
||||
|
@ -267,7 +311,7 @@ fn initial_matcher_pos(ms: Vec<TokenTree>, lo: BytePos) -> Box<MatcherPos> {
|
|||
seq_op: None,
|
||||
sep: None,
|
||||
up: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// `NamedMatch` is a pattern-match result for a single `token::MATCH_NONTERMINAL`:
|
||||
|
@ -394,12 +438,12 @@ fn token_name_eq(t1: &Token, t2: &Token) -> bool {
|
|||
/// # Returns
|
||||
///
|
||||
/// A `ParseResult`. Note that matches are kept track of through the items generated.
|
||||
fn inner_parse_loop(
|
||||
fn inner_parse_loop<'a>(
|
||||
sess: &ParseSess,
|
||||
cur_items: &mut SmallVector<Box<MatcherPos>>,
|
||||
next_items: &mut Vec<Box<MatcherPos>>,
|
||||
eof_items: &mut SmallVector<Box<MatcherPos>>,
|
||||
bb_items: &mut SmallVector<Box<MatcherPos>>,
|
||||
cur_items: &mut SmallVector<MatcherPosHandle<'a>>,
|
||||
next_items: &mut Vec<MatcherPosHandle<'a>>,
|
||||
eof_items: &mut SmallVector<MatcherPosHandle<'a>>,
|
||||
bb_items: &mut SmallVector<MatcherPosHandle<'a>>,
|
||||
token: &Token,
|
||||
span: syntax_pos::Span,
|
||||
) -> ParseResult<()> {
|
||||
|
@ -502,7 +546,7 @@ fn inner_parse_loop(
|
|||
}
|
||||
|
||||
let matches = create_matches(item.matches.len());
|
||||
cur_items.push(Box::new(MatcherPos {
|
||||
cur_items.push(MatcherPosHandle::Box(Box::new(MatcherPos {
|
||||
stack: vec![],
|
||||
sep: seq.separator.clone(),
|
||||
seq_op: Some(seq.op),
|
||||
|
@ -514,7 +558,7 @@ fn inner_parse_loop(
|
|||
up: Some(item),
|
||||
sp_lo: sp.lo(),
|
||||
top_elts: Tt(TokenTree::Sequence(sp, seq)),
|
||||
}));
|
||||
})));
|
||||
}
|
||||
|
||||
// We need to match a metavar (but the identifier is invalid)... this is an error
|
||||
|
@ -596,7 +640,11 @@ pub fn parse(
|
|||
// processes all of these possible matcher positions and produces posible next positions into
|
||||
// `next_items`. After some post-processing, the contents of `next_items` replenish `cur_items`
|
||||
// and we start over again.
|
||||
let mut cur_items = SmallVector::one(initial_matcher_pos(ms.to_owned(), parser.span.lo()));
|
||||
//
|
||||
// This MatcherPos instance is allocated on the stack. All others -- and
|
||||
// there are frequently *no* others! -- are allocated on the heap.
|
||||
let mut initial = initial_matcher_pos(ms, parser.span.lo());
|
||||
let mut cur_items = SmallVector::one(MatcherPosHandle::Ref(&mut initial));
|
||||
let mut next_items = Vec::new();
|
||||
|
||||
loop {
|
||||
|
|
|
@ -27,6 +27,7 @@ use parse::token::Token::*;
|
|||
use symbol::Symbol;
|
||||
use tokenstream::{TokenStream, TokenTree};
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::hash_map::Entry;
|
||||
|
||||
|
@ -142,7 +143,7 @@ fn generic_extension<'cx>(cx: &'cx mut ExtCtxt,
|
|||
}
|
||||
|
||||
let directory = Directory {
|
||||
path: cx.current_expansion.module.directory.clone(),
|
||||
path: Cow::from(cx.current_expansion.module.directory.as_path()),
|
||||
ownership: cx.current_expansion.directory_ownership,
|
||||
};
|
||||
let mut p = Parser::new(cx.parse_sess(), tts, Some(directory), true, false);
|
||||
|
|
|
@ -23,6 +23,7 @@ use symbol::Symbol;
|
|||
use tokenstream::{TokenStream, TokenTree};
|
||||
use diagnostics::plugin::ErrorMap;
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashSet;
|
||||
use std::iter;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
@ -89,8 +90,8 @@ impl ParseSess {
|
|||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Directory {
|
||||
pub path: PathBuf,
|
||||
pub struct Directory<'a> {
|
||||
pub path: Cow<'a, Path>,
|
||||
pub ownership: DirectoryOwnership,
|
||||
}
|
||||
|
||||
|
|
|
@ -57,6 +57,7 @@ use tokenstream::{self, Delimited, ThinTokenStream, TokenTree, TokenStream};
|
|||
use symbol::{Symbol, keywords};
|
||||
use util::ThinVec;
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::cmp;
|
||||
use std::mem;
|
||||
use std::path::{self, Path, PathBuf};
|
||||
|
@ -228,7 +229,7 @@ pub struct Parser<'a> {
|
|||
prev_token_kind: PrevTokenKind,
|
||||
pub restrictions: Restrictions,
|
||||
/// Used to determine the path to externally loaded source files
|
||||
pub directory: Directory,
|
||||
pub directory: Directory<'a>,
|
||||
/// Whether to parse sub-modules in other files.
|
||||
pub recurse_into_file_modules: bool,
|
||||
/// Name of the root module this parser originated from. If `None`, then the
|
||||
|
@ -535,7 +536,7 @@ enum TokenExpectType {
|
|||
impl<'a> Parser<'a> {
|
||||
pub fn new(sess: &'a ParseSess,
|
||||
tokens: TokenStream,
|
||||
directory: Option<Directory>,
|
||||
directory: Option<Directory<'a>>,
|
||||
recurse_into_file_modules: bool,
|
||||
desugar_doc_comments: bool)
|
||||
-> Self {
|
||||
|
@ -549,7 +550,7 @@ impl<'a> Parser<'a> {
|
|||
restrictions: Restrictions::empty(),
|
||||
recurse_into_file_modules,
|
||||
directory: Directory {
|
||||
path: PathBuf::new(),
|
||||
path: Cow::from(PathBuf::new()),
|
||||
ownership: DirectoryOwnership::Owned { relative: None }
|
||||
},
|
||||
root_module_name: None,
|
||||
|
@ -572,9 +573,9 @@ impl<'a> Parser<'a> {
|
|||
if let Some(directory) = directory {
|
||||
parser.directory = directory;
|
||||
} else if !parser.span.source_equal(&DUMMY_SP) {
|
||||
if let FileName::Real(path) = sess.codemap().span_to_unmapped_path(parser.span) {
|
||||
parser.directory.path = path;
|
||||
parser.directory.path.pop();
|
||||
if let FileName::Real(mut path) = sess.codemap().span_to_unmapped_path(parser.span) {
|
||||
path.pop();
|
||||
parser.directory.path = Cow::from(path);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6008,10 +6009,10 @@ impl<'a> Parser<'a> {
|
|||
|
||||
fn push_directory(&mut self, id: Ident, attrs: &[Attribute]) {
|
||||
if let Some(path) = attr::first_attr_value_str_by_name(attrs, "path") {
|
||||
self.directory.path.push(&path.as_str());
|
||||
self.directory.path.to_mut().push(&path.as_str());
|
||||
self.directory.ownership = DirectoryOwnership::Owned { relative: None };
|
||||
} else {
|
||||
self.directory.path.push(&id.name.as_str());
|
||||
self.directory.path.to_mut().push(&id.name.as_str());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -31,6 +31,7 @@ use print::pprust;
|
|||
use serialize::{Decoder, Decodable, Encoder, Encodable};
|
||||
use util::RcSlice;
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::{fmt, iter, mem};
|
||||
use std::hash::{self, Hash};
|
||||
|
||||
|
@ -106,7 +107,7 @@ impl TokenTree {
|
|||
-> macro_parser::NamedParseResult {
|
||||
// `None` is because we're not interpolating
|
||||
let directory = Directory {
|
||||
path: cx.current_expansion.module.directory.clone(),
|
||||
path: Cow::from(cx.current_expansion.module.directory.as_path()),
|
||||
ownership: cx.current_expansion.directory_ownership,
|
||||
};
|
||||
macro_parser::parse(cx.parse_sess(), tts, mtch, Some(directory), true)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue