Auto merge of #50855 - nnethercote:fewer-macro_parser-allocs, r=petrochenkov
Speed up the macro parser These three commits reduce the number of allocations done by the macro parser, in some cases dramatically. For example, for a clean check builds of html5ever, the number of allocations is reduced by 40%. Here are the rustc-benchmarks that are sped up by at least 1%. ``` html5ever-check avg: -6.6% min: -10.3% max: -4.1% html5ever avg: -5.2% min: -9.5% max: -2.8% html5ever-opt avg: -4.3% min: -9.3% max: -1.6% crates.io-check avg: -1.8% min: -2.9% max: -0.6% crates.io-opt avg: -1.0% min: -2.2% max: -0.1% crates.io avg: -1.1% min: -2.2% max: -0.2% ```
This commit is contained in:
commit
4c26e2e3fb
5 changed files with 87 additions and 35 deletions
|
@ -82,7 +82,7 @@
|
||||||
|
|
||||||
pub use self::NamedMatch::*;
|
pub use self::NamedMatch::*;
|
||||||
pub use self::ParseResult::*;
|
pub use self::ParseResult::*;
|
||||||
use self::TokenTreeOrTokenTreeVec::*;
|
use self::TokenTreeOrTokenTreeSlice::*;
|
||||||
|
|
||||||
use ast::Ident;
|
use ast::Ident;
|
||||||
use syntax_pos::{self, BytePos, Span};
|
use syntax_pos::{self, BytePos, Span};
|
||||||
|
@ -97,6 +97,7 @@ use tokenstream::TokenStream;
|
||||||
use util::small_vector::SmallVector;
|
use util::small_vector::SmallVector;
|
||||||
|
|
||||||
use std::mem;
|
use std::mem;
|
||||||
|
use std::ops::{Deref, DerefMut};
|
||||||
use std::rc::Rc;
|
use std::rc::Rc;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::collections::hash_map::Entry::{Occupied, Vacant};
|
use std::collections::hash_map::Entry::{Occupied, Vacant};
|
||||||
|
@ -106,12 +107,12 @@ use std::collections::hash_map::Entry::{Occupied, Vacant};
|
||||||
/// Either a sequence of token trees or a single one. This is used as the representation of the
|
/// Either a sequence of token trees or a single one. This is used as the representation of the
|
||||||
/// sequence of tokens that make up a matcher.
|
/// sequence of tokens that make up a matcher.
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
enum TokenTreeOrTokenTreeVec {
|
enum TokenTreeOrTokenTreeSlice<'a> {
|
||||||
Tt(TokenTree),
|
Tt(TokenTree),
|
||||||
TtSeq(Vec<TokenTree>),
|
TtSeq(&'a [TokenTree]),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TokenTreeOrTokenTreeVec {
|
impl<'a> TokenTreeOrTokenTreeSlice<'a> {
|
||||||
/// Returns the number of constituent top-level token trees of `self` (top-level in that it
|
/// Returns the number of constituent top-level token trees of `self` (top-level in that it
|
||||||
/// will not recursively descend into subtrees).
|
/// will not recursively descend into subtrees).
|
||||||
fn len(&self) -> usize {
|
fn len(&self) -> usize {
|
||||||
|
@ -135,9 +136,9 @@ impl TokenTreeOrTokenTreeVec {
|
||||||
/// This is used by `inner_parse_loop` to keep track of delimited submatchers that we have
|
/// This is used by `inner_parse_loop` to keep track of delimited submatchers that we have
|
||||||
/// descended into.
|
/// descended into.
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
struct MatcherTtFrame {
|
struct MatcherTtFrame<'a> {
|
||||||
/// The "parent" matcher that we are descending into.
|
/// The "parent" matcher that we are descending into.
|
||||||
elts: TokenTreeOrTokenTreeVec,
|
elts: TokenTreeOrTokenTreeSlice<'a>,
|
||||||
/// The position of the "dot" in `elts` at the time we descended.
|
/// The position of the "dot" in `elts` at the time we descended.
|
||||||
idx: usize,
|
idx: usize,
|
||||||
}
|
}
|
||||||
|
@ -145,9 +146,9 @@ struct MatcherTtFrame {
|
||||||
/// Represents a single "position" (aka "matcher position", aka "item"), as described in the module
|
/// Represents a single "position" (aka "matcher position", aka "item"), as described in the module
|
||||||
/// documentation.
|
/// documentation.
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
struct MatcherPos {
|
struct MatcherPos<'a> {
|
||||||
/// The token or sequence of tokens that make up the matcher
|
/// The token or sequence of tokens that make up the matcher
|
||||||
top_elts: TokenTreeOrTokenTreeVec,
|
top_elts: TokenTreeOrTokenTreeSlice<'a>,
|
||||||
/// The position of the "dot" in this matcher
|
/// The position of the "dot" in this matcher
|
||||||
idx: usize,
|
idx: usize,
|
||||||
/// The beginning position in the source that the beginning of this matcher corresponds to. In
|
/// The beginning position in the source that the beginning of this matcher corresponds to. In
|
||||||
|
@ -186,7 +187,7 @@ struct MatcherPos {
|
||||||
sep: Option<Token>,
|
sep: Option<Token>,
|
||||||
/// The "parent" matcher position if we are in a repetition. That is, the matcher position just
|
/// The "parent" matcher position if we are in a repetition. That is, the matcher position just
|
||||||
/// before we enter the sequence.
|
/// before we enter the sequence.
|
||||||
up: Option<Box<MatcherPos>>,
|
up: Option<MatcherPosHandle<'a>>,
|
||||||
|
|
||||||
// Specifically used to "unzip" token trees. By "unzip", we mean to unwrap the delimiters from
|
// Specifically used to "unzip" token trees. By "unzip", we mean to unwrap the delimiters from
|
||||||
// a delimited token tree (e.g. something wrapped in `(` `)`) or to get the contents of a doc
|
// a delimited token tree (e.g. something wrapped in `(` `)`) or to get the contents of a doc
|
||||||
|
@ -195,10 +196,10 @@ struct MatcherPos {
|
||||||
/// pat ) pat`), we need to keep track of the matchers we are descending into. This stack does
|
/// pat ) pat`), we need to keep track of the matchers we are descending into. This stack does
|
||||||
/// that where the bottom of the stack is the outermost matcher.
|
/// that where the bottom of the stack is the outermost matcher.
|
||||||
// Also, throughout the comments, this "descent" is often referred to as "unzipping"...
|
// Also, throughout the comments, this "descent" is often referred to as "unzipping"...
|
||||||
stack: Vec<MatcherTtFrame>,
|
stack: Vec<MatcherTtFrame<'a>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MatcherPos {
|
impl<'a> MatcherPos<'a> {
|
||||||
/// Add `m` as a named match for the `idx`-th metavar.
|
/// Add `m` as a named match for the `idx`-th metavar.
|
||||||
fn push_match(&mut self, idx: usize, m: NamedMatch) {
|
fn push_match(&mut self, idx: usize, m: NamedMatch) {
|
||||||
let matches = Rc::make_mut(&mut self.matches[idx]);
|
let matches = Rc::make_mut(&mut self.matches[idx]);
|
||||||
|
@ -206,6 +207,49 @@ impl MatcherPos {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Lots of MatcherPos instances are created at runtime. Allocating them on the
|
||||||
|
// heap is slow. Furthermore, using SmallVec<MatcherPos> to allocate them all
|
||||||
|
// on the stack is also slow, because MatcherPos is quite a large type and
|
||||||
|
// instances get moved around a lot between vectors, which requires lots of
|
||||||
|
// slow memcpy calls.
|
||||||
|
//
|
||||||
|
// Therefore, the initial MatcherPos is always allocated on the stack,
|
||||||
|
// subsequent ones (of which there aren't that many) are allocated on the heap,
|
||||||
|
// and this type is used to encapsulate both cases.
|
||||||
|
enum MatcherPosHandle<'a> {
|
||||||
|
Ref(&'a mut MatcherPos<'a>),
|
||||||
|
Box(Box<MatcherPos<'a>>),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Clone for MatcherPosHandle<'a> {
|
||||||
|
// This always produces a new Box.
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
MatcherPosHandle::Box(match *self {
|
||||||
|
MatcherPosHandle::Ref(ref r) => Box::new((**r).clone()),
|
||||||
|
MatcherPosHandle::Box(ref b) => b.clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Deref for MatcherPosHandle<'a> {
|
||||||
|
type Target = MatcherPos<'a>;
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
match *self {
|
||||||
|
MatcherPosHandle::Ref(ref r) => r,
|
||||||
|
MatcherPosHandle::Box(ref b) => b,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> DerefMut for MatcherPosHandle<'a> {
|
||||||
|
fn deref_mut(&mut self) -> &mut MatcherPos<'a> {
|
||||||
|
match *self {
|
||||||
|
MatcherPosHandle::Ref(ref mut r) => r,
|
||||||
|
MatcherPosHandle::Box(ref mut b) => b,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Represents the possible results of an attempted parse.
|
/// Represents the possible results of an attempted parse.
|
||||||
pub enum ParseResult<T> {
|
pub enum ParseResult<T> {
|
||||||
/// Parsed successfully.
|
/// Parsed successfully.
|
||||||
|
@ -241,10 +285,10 @@ fn create_matches(len: usize) -> Vec<Rc<Vec<NamedMatch>>> {
|
||||||
|
|
||||||
/// Generate the top-level matcher position in which the "dot" is before the first token of the
|
/// Generate the top-level matcher position in which the "dot" is before the first token of the
|
||||||
/// matcher `ms` and we are going to start matching at position `lo` in the source.
|
/// matcher `ms` and we are going to start matching at position `lo` in the source.
|
||||||
fn initial_matcher_pos(ms: Vec<TokenTree>, lo: BytePos) -> Box<MatcherPos> {
|
fn initial_matcher_pos(ms: &[TokenTree], lo: BytePos) -> MatcherPos {
|
||||||
let match_idx_hi = count_names(&ms[..]);
|
let match_idx_hi = count_names(ms);
|
||||||
let matches = create_matches(match_idx_hi);
|
let matches = create_matches(match_idx_hi);
|
||||||
Box::new(MatcherPos {
|
MatcherPos {
|
||||||
// Start with the top level matcher given to us
|
// Start with the top level matcher given to us
|
||||||
top_elts: TtSeq(ms), // "elts" is an abbr. for "elements"
|
top_elts: TtSeq(ms), // "elts" is an abbr. for "elements"
|
||||||
// The "dot" is before the first token of the matcher
|
// The "dot" is before the first token of the matcher
|
||||||
|
@ -267,7 +311,7 @@ fn initial_matcher_pos(ms: Vec<TokenTree>, lo: BytePos) -> Box<MatcherPos> {
|
||||||
seq_op: None,
|
seq_op: None,
|
||||||
sep: None,
|
sep: None,
|
||||||
up: None,
|
up: None,
|
||||||
})
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// `NamedMatch` is a pattern-match result for a single `token::MATCH_NONTERMINAL`:
|
/// `NamedMatch` is a pattern-match result for a single `token::MATCH_NONTERMINAL`:
|
||||||
|
@ -394,12 +438,12 @@ fn token_name_eq(t1: &Token, t2: &Token) -> bool {
|
||||||
/// # Returns
|
/// # Returns
|
||||||
///
|
///
|
||||||
/// A `ParseResult`. Note that matches are kept track of through the items generated.
|
/// A `ParseResult`. Note that matches are kept track of through the items generated.
|
||||||
fn inner_parse_loop(
|
fn inner_parse_loop<'a>(
|
||||||
sess: &ParseSess,
|
sess: &ParseSess,
|
||||||
cur_items: &mut SmallVector<Box<MatcherPos>>,
|
cur_items: &mut SmallVector<MatcherPosHandle<'a>>,
|
||||||
next_items: &mut Vec<Box<MatcherPos>>,
|
next_items: &mut Vec<MatcherPosHandle<'a>>,
|
||||||
eof_items: &mut SmallVector<Box<MatcherPos>>,
|
eof_items: &mut SmallVector<MatcherPosHandle<'a>>,
|
||||||
bb_items: &mut SmallVector<Box<MatcherPos>>,
|
bb_items: &mut SmallVector<MatcherPosHandle<'a>>,
|
||||||
token: &Token,
|
token: &Token,
|
||||||
span: syntax_pos::Span,
|
span: syntax_pos::Span,
|
||||||
) -> ParseResult<()> {
|
) -> ParseResult<()> {
|
||||||
|
@ -502,7 +546,7 @@ fn inner_parse_loop(
|
||||||
}
|
}
|
||||||
|
|
||||||
let matches = create_matches(item.matches.len());
|
let matches = create_matches(item.matches.len());
|
||||||
cur_items.push(Box::new(MatcherPos {
|
cur_items.push(MatcherPosHandle::Box(Box::new(MatcherPos {
|
||||||
stack: vec![],
|
stack: vec![],
|
||||||
sep: seq.separator.clone(),
|
sep: seq.separator.clone(),
|
||||||
seq_op: Some(seq.op),
|
seq_op: Some(seq.op),
|
||||||
|
@ -514,7 +558,7 @@ fn inner_parse_loop(
|
||||||
up: Some(item),
|
up: Some(item),
|
||||||
sp_lo: sp.lo(),
|
sp_lo: sp.lo(),
|
||||||
top_elts: Tt(TokenTree::Sequence(sp, seq)),
|
top_elts: Tt(TokenTree::Sequence(sp, seq)),
|
||||||
}));
|
})));
|
||||||
}
|
}
|
||||||
|
|
||||||
// We need to match a metavar (but the identifier is invalid)... this is an error
|
// We need to match a metavar (but the identifier is invalid)... this is an error
|
||||||
|
@ -596,7 +640,11 @@ pub fn parse(
|
||||||
// processes all of these possible matcher positions and produces posible next positions into
|
// processes all of these possible matcher positions and produces posible next positions into
|
||||||
// `next_items`. After some post-processing, the contents of `next_items` replenish `cur_items`
|
// `next_items`. After some post-processing, the contents of `next_items` replenish `cur_items`
|
||||||
// and we start over again.
|
// and we start over again.
|
||||||
let mut cur_items = SmallVector::one(initial_matcher_pos(ms.to_owned(), parser.span.lo()));
|
//
|
||||||
|
// This MatcherPos instance is allocated on the stack. All others -- and
|
||||||
|
// there are frequently *no* others! -- are allocated on the heap.
|
||||||
|
let mut initial = initial_matcher_pos(ms, parser.span.lo());
|
||||||
|
let mut cur_items = SmallVector::one(MatcherPosHandle::Ref(&mut initial));
|
||||||
let mut next_items = Vec::new();
|
let mut next_items = Vec::new();
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
|
|
|
@ -27,6 +27,7 @@ use parse::token::Token::*;
|
||||||
use symbol::Symbol;
|
use symbol::Symbol;
|
||||||
use tokenstream::{TokenStream, TokenTree};
|
use tokenstream::{TokenStream, TokenTree};
|
||||||
|
|
||||||
|
use std::borrow::Cow;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::collections::hash_map::Entry;
|
use std::collections::hash_map::Entry;
|
||||||
|
|
||||||
|
@ -142,7 +143,7 @@ fn generic_extension<'cx>(cx: &'cx mut ExtCtxt,
|
||||||
}
|
}
|
||||||
|
|
||||||
let directory = Directory {
|
let directory = Directory {
|
||||||
path: cx.current_expansion.module.directory.clone(),
|
path: Cow::from(cx.current_expansion.module.directory.as_path()),
|
||||||
ownership: cx.current_expansion.directory_ownership,
|
ownership: cx.current_expansion.directory_ownership,
|
||||||
};
|
};
|
||||||
let mut p = Parser::new(cx.parse_sess(), tts, Some(directory), true, false);
|
let mut p = Parser::new(cx.parse_sess(), tts, Some(directory), true, false);
|
||||||
|
|
|
@ -23,6 +23,7 @@ use symbol::Symbol;
|
||||||
use tokenstream::{TokenStream, TokenTree};
|
use tokenstream::{TokenStream, TokenTree};
|
||||||
use diagnostics::plugin::ErrorMap;
|
use diagnostics::plugin::ErrorMap;
|
||||||
|
|
||||||
|
use std::borrow::Cow;
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::iter;
|
use std::iter;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
|
@ -89,8 +90,8 @@ impl ParseSess {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Directory {
|
pub struct Directory<'a> {
|
||||||
pub path: PathBuf,
|
pub path: Cow<'a, Path>,
|
||||||
pub ownership: DirectoryOwnership,
|
pub ownership: DirectoryOwnership,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -57,6 +57,7 @@ use tokenstream::{self, Delimited, ThinTokenStream, TokenTree, TokenStream};
|
||||||
use symbol::{Symbol, keywords};
|
use symbol::{Symbol, keywords};
|
||||||
use util::ThinVec;
|
use util::ThinVec;
|
||||||
|
|
||||||
|
use std::borrow::Cow;
|
||||||
use std::cmp;
|
use std::cmp;
|
||||||
use std::mem;
|
use std::mem;
|
||||||
use std::path::{self, Path, PathBuf};
|
use std::path::{self, Path, PathBuf};
|
||||||
|
@ -228,7 +229,7 @@ pub struct Parser<'a> {
|
||||||
prev_token_kind: PrevTokenKind,
|
prev_token_kind: PrevTokenKind,
|
||||||
pub restrictions: Restrictions,
|
pub restrictions: Restrictions,
|
||||||
/// Used to determine the path to externally loaded source files
|
/// Used to determine the path to externally loaded source files
|
||||||
pub directory: Directory,
|
pub directory: Directory<'a>,
|
||||||
/// Whether to parse sub-modules in other files.
|
/// Whether to parse sub-modules in other files.
|
||||||
pub recurse_into_file_modules: bool,
|
pub recurse_into_file_modules: bool,
|
||||||
/// Name of the root module this parser originated from. If `None`, then the
|
/// Name of the root module this parser originated from. If `None`, then the
|
||||||
|
@ -535,7 +536,7 @@ enum TokenExpectType {
|
||||||
impl<'a> Parser<'a> {
|
impl<'a> Parser<'a> {
|
||||||
pub fn new(sess: &'a ParseSess,
|
pub fn new(sess: &'a ParseSess,
|
||||||
tokens: TokenStream,
|
tokens: TokenStream,
|
||||||
directory: Option<Directory>,
|
directory: Option<Directory<'a>>,
|
||||||
recurse_into_file_modules: bool,
|
recurse_into_file_modules: bool,
|
||||||
desugar_doc_comments: bool)
|
desugar_doc_comments: bool)
|
||||||
-> Self {
|
-> Self {
|
||||||
|
@ -549,7 +550,7 @@ impl<'a> Parser<'a> {
|
||||||
restrictions: Restrictions::empty(),
|
restrictions: Restrictions::empty(),
|
||||||
recurse_into_file_modules,
|
recurse_into_file_modules,
|
||||||
directory: Directory {
|
directory: Directory {
|
||||||
path: PathBuf::new(),
|
path: Cow::from(PathBuf::new()),
|
||||||
ownership: DirectoryOwnership::Owned { relative: None }
|
ownership: DirectoryOwnership::Owned { relative: None }
|
||||||
},
|
},
|
||||||
root_module_name: None,
|
root_module_name: None,
|
||||||
|
@ -572,9 +573,9 @@ impl<'a> Parser<'a> {
|
||||||
if let Some(directory) = directory {
|
if let Some(directory) = directory {
|
||||||
parser.directory = directory;
|
parser.directory = directory;
|
||||||
} else if !parser.span.source_equal(&DUMMY_SP) {
|
} else if !parser.span.source_equal(&DUMMY_SP) {
|
||||||
if let FileName::Real(path) = sess.codemap().span_to_unmapped_path(parser.span) {
|
if let FileName::Real(mut path) = sess.codemap().span_to_unmapped_path(parser.span) {
|
||||||
parser.directory.path = path;
|
path.pop();
|
||||||
parser.directory.path.pop();
|
parser.directory.path = Cow::from(path);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6008,10 +6009,10 @@ impl<'a> Parser<'a> {
|
||||||
|
|
||||||
fn push_directory(&mut self, id: Ident, attrs: &[Attribute]) {
|
fn push_directory(&mut self, id: Ident, attrs: &[Attribute]) {
|
||||||
if let Some(path) = attr::first_attr_value_str_by_name(attrs, "path") {
|
if let Some(path) = attr::first_attr_value_str_by_name(attrs, "path") {
|
||||||
self.directory.path.push(&path.as_str());
|
self.directory.path.to_mut().push(&path.as_str());
|
||||||
self.directory.ownership = DirectoryOwnership::Owned { relative: None };
|
self.directory.ownership = DirectoryOwnership::Owned { relative: None };
|
||||||
} else {
|
} else {
|
||||||
self.directory.path.push(&id.name.as_str());
|
self.directory.path.to_mut().push(&id.name.as_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -31,6 +31,7 @@ use print::pprust;
|
||||||
use serialize::{Decoder, Decodable, Encoder, Encodable};
|
use serialize::{Decoder, Decodable, Encoder, Encodable};
|
||||||
use util::RcSlice;
|
use util::RcSlice;
|
||||||
|
|
||||||
|
use std::borrow::Cow;
|
||||||
use std::{fmt, iter, mem};
|
use std::{fmt, iter, mem};
|
||||||
use std::hash::{self, Hash};
|
use std::hash::{self, Hash};
|
||||||
|
|
||||||
|
@ -106,7 +107,7 @@ impl TokenTree {
|
||||||
-> macro_parser::NamedParseResult {
|
-> macro_parser::NamedParseResult {
|
||||||
// `None` is because we're not interpolating
|
// `None` is because we're not interpolating
|
||||||
let directory = Directory {
|
let directory = Directory {
|
||||||
path: cx.current_expansion.module.directory.clone(),
|
path: Cow::from(cx.current_expansion.module.directory.as_path()),
|
||||||
ownership: cx.current_expansion.directory_ownership,
|
ownership: cx.current_expansion.directory_ownership,
|
||||||
};
|
};
|
||||||
macro_parser::parse(cx.parse_sess(), tts, mtch, Some(directory), true)
|
macro_parser::parse(cx.parse_sess(), tts, mtch, Some(directory), true)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue