auto merge of #5559 : jbclements/rust/change-to-tt-based-parsing, r=jbclements
Changes the parser to parse all streams into token-trees before hitting the parser proper, in preparation for hygiene. As an added bonus, it appears to speed up the parser (albeit by a totally imperceptible 1%). Also, many comments in the parser. Also, field renaming in token-trees (readme->forest, cur->stack).
This commit is contained in:
commit
6153aae809
6 changed files with 124 additions and 35 deletions
|
@ -151,7 +151,7 @@ pub fn parse_input(sess: Session, +cfg: ast::crate_cfg, input: input)
|
|||
-> @ast::crate {
|
||||
match input {
|
||||
file_input(ref file) => {
|
||||
parse::parse_crate_from_file(&(*file), cfg, sess.parse_sess)
|
||||
parse::parse_crate_from_file_using_tts(&(*file), cfg, sess.parse_sess)
|
||||
}
|
||||
str_input(ref src) => {
|
||||
// FIXME (#2319): Don't really want to box the source string
|
||||
|
|
|
@ -26,7 +26,7 @@ use core::vec;
|
|||
`~` */
|
||||
///an unzipping of `token_tree`s
|
||||
struct TtFrame {
|
||||
readme: @mut ~[ast::token_tree],
|
||||
forest: @mut ~[ast::token_tree],
|
||||
idx: uint,
|
||||
dotdotdoted: bool,
|
||||
sep: Option<Token>,
|
||||
|
@ -37,7 +37,7 @@ pub struct TtReader {
|
|||
sp_diag: @span_handler,
|
||||
interner: @ident_interner,
|
||||
// the unzipped tree:
|
||||
cur: @mut TtFrame,
|
||||
stack: @mut TtFrame,
|
||||
/* for MBE-style macro transcription */
|
||||
interpolations: LinearMap<ident, @named_match>,
|
||||
repeat_idx: ~[uint],
|
||||
|
@ -58,8 +58,8 @@ pub fn new_tt_reader(sp_diag: @span_handler,
|
|||
let r = @mut TtReader {
|
||||
sp_diag: sp_diag,
|
||||
interner: itr,
|
||||
cur: @mut TtFrame {
|
||||
readme: @mut src,
|
||||
stack: @mut TtFrame {
|
||||
forest: @mut src,
|
||||
idx: 0u,
|
||||
dotdotdoted: false,
|
||||
sep: None,
|
||||
|
@ -81,7 +81,7 @@ pub fn new_tt_reader(sp_diag: @span_handler,
|
|||
|
||||
fn dup_tt_frame(f: @mut TtFrame) -> @mut TtFrame {
|
||||
@mut TtFrame {
|
||||
readme: @mut (copy *f.readme),
|
||||
forest: @mut (copy *f.forest),
|
||||
idx: f.idx,
|
||||
dotdotdoted: f.dotdotdoted,
|
||||
sep: copy f.sep,
|
||||
|
@ -96,7 +96,7 @@ pub fn dup_tt_reader(r: @mut TtReader) -> @mut TtReader {
|
|||
@mut TtReader {
|
||||
sp_diag: r.sp_diag,
|
||||
interner: r.interner,
|
||||
cur: dup_tt_frame(r.cur),
|
||||
stack: dup_tt_frame(r.stack),
|
||||
interpolations: r.interpolations,
|
||||
repeat_idx: copy r.repeat_idx,
|
||||
repeat_len: copy r.repeat_len,
|
||||
|
@ -167,7 +167,8 @@ fn lockstep_iter_size(t: token_tree, r: &mut TtReader) -> lis {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
// return the next token from the TtReader.
|
||||
// EFFECT: advances the reader's token field
|
||||
pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan {
|
||||
let ret_val = TokenAndSpan {
|
||||
tok: copy r.cur_tok,
|
||||
|
@ -175,37 +176,37 @@ pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan {
|
|||
};
|
||||
loop {
|
||||
{
|
||||
let cur = &mut *r.cur;
|
||||
let readme = &mut *cur.readme;
|
||||
if cur.idx < readme.len() {
|
||||
let stack = &mut *r.stack;
|
||||
let forest = &mut *stack.forest;
|
||||
if stack.idx < forest.len() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* done with this set; pop or repeat? */
|
||||
if ! r.cur.dotdotdoted
|
||||
if ! r.stack.dotdotdoted
|
||||
|| { *r.repeat_idx.last() == *r.repeat_len.last() - 1 } {
|
||||
|
||||
match r.cur.up {
|
||||
match r.stack.up {
|
||||
None => {
|
||||
r.cur_tok = EOF;
|
||||
return ret_val;
|
||||
}
|
||||
Some(tt_f) => {
|
||||
if r.cur.dotdotdoted {
|
||||
if r.stack.dotdotdoted {
|
||||
r.repeat_idx.pop();
|
||||
r.repeat_len.pop();
|
||||
}
|
||||
|
||||
r.cur = tt_f;
|
||||
r.cur.idx += 1u;
|
||||
r.stack = tt_f;
|
||||
r.stack.idx += 1u;
|
||||
}
|
||||
}
|
||||
|
||||
} else { /* repeat */
|
||||
r.cur.idx = 0u;
|
||||
r.stack.idx = 0u;
|
||||
r.repeat_idx[r.repeat_idx.len() - 1u] += 1u;
|
||||
match r.cur.sep {
|
||||
match r.stack.sep {
|
||||
Some(copy tk) => {
|
||||
r.cur_tok = tk; /* repeat same span, I guess */
|
||||
return ret_val;
|
||||
|
@ -216,21 +217,21 @@ pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan {
|
|||
}
|
||||
loop { /* because it's easiest, this handles `tt_delim` not starting
|
||||
with a `tt_tok`, even though it won't happen */
|
||||
match r.cur.readme[r.cur.idx] {
|
||||
match r.stack.forest[r.stack.idx] {
|
||||
tt_delim(copy tts) => {
|
||||
r.cur = @mut TtFrame {
|
||||
readme: @mut tts,
|
||||
r.stack = @mut TtFrame {
|
||||
forest: @mut tts,
|
||||
idx: 0u,
|
||||
dotdotdoted: false,
|
||||
sep: None,
|
||||
up: option::Some(r.cur)
|
||||
up: option::Some(r.stack)
|
||||
};
|
||||
// if this could be 0-length, we'd need to potentially recur here
|
||||
}
|
||||
tt_tok(sp, copy tok) => {
|
||||
r.cur_span = sp;
|
||||
r.cur_tok = tok;
|
||||
r.cur.idx += 1u;
|
||||
r.stack.idx += 1u;
|
||||
return ret_val;
|
||||
}
|
||||
tt_seq(sp, copy tts, copy sep, zerok) => {
|
||||
|
@ -256,17 +257,17 @@ pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan {
|
|||
once");
|
||||
}
|
||||
|
||||
r.cur.idx += 1u;
|
||||
r.stack.idx += 1u;
|
||||
return tt_next_token(r);
|
||||
} else {
|
||||
r.repeat_len.push(len);
|
||||
r.repeat_idx.push(0u);
|
||||
r.cur = @mut TtFrame {
|
||||
readme: @mut tts,
|
||||
r.stack = @mut TtFrame {
|
||||
forest: @mut tts,
|
||||
idx: 0u,
|
||||
dotdotdoted: true,
|
||||
sep: sep,
|
||||
up: Some(r.cur)
|
||||
up: Some(r.stack)
|
||||
};
|
||||
}
|
||||
}
|
||||
|
@ -280,13 +281,13 @@ pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan {
|
|||
(b) we actually can, since it's a token. */
|
||||
matched_nonterminal(nt_ident(sn,b)) => {
|
||||
r.cur_span = sp; r.cur_tok = IDENT(sn,b);
|
||||
r.cur.idx += 1u;
|
||||
r.stack.idx += 1u;
|
||||
return ret_val;
|
||||
}
|
||||
matched_nonterminal(ref other_whole_nt) => {
|
||||
r.cur_span = sp;
|
||||
r.cur_tok = INTERPOLATED(copy *other_whole_nt);
|
||||
r.cur.idx += 1u;
|
||||
r.stack.idx += 1u;
|
||||
return ret_val;
|
||||
}
|
||||
matched_seq(*) => {
|
||||
|
|
|
@ -159,6 +159,9 @@ pub impl Parser {
|
|||
}
|
||||
}
|
||||
|
||||
// if the given word is not a keyword, signal an error.
|
||||
// if the next token is the given keyword, eat it and return
|
||||
// true. Otherwise, return false.
|
||||
fn eat_keyword(&self, word: &~str) -> bool {
|
||||
self.require_keyword(word);
|
||||
let is_kw = match *self.token {
|
||||
|
@ -169,6 +172,9 @@ pub impl Parser {
|
|||
is_kw
|
||||
}
|
||||
|
||||
// if the given word is not a keyword, signal an error.
|
||||
// if the next token is not the given word, signal an error.
|
||||
// otherwise, eat it.
|
||||
fn expect_keyword(&self, word: &~str) {
|
||||
self.require_keyword(word);
|
||||
if !self.eat_keyword(word) {
|
||||
|
@ -182,10 +188,12 @@ pub impl Parser {
|
|||
}
|
||||
}
|
||||
|
||||
// return true if the given string is a strict keyword
|
||||
fn is_strict_keyword(&self, word: &~str) -> bool {
|
||||
self.strict_keywords.contains(word)
|
||||
}
|
||||
|
||||
// signal an error if the current token is a strict keyword
|
||||
fn check_strict_keywords(&self) {
|
||||
match *self.token {
|
||||
token::IDENT(_, false) => {
|
||||
|
@ -196,16 +204,19 @@ pub impl Parser {
|
|||
}
|
||||
}
|
||||
|
||||
// signal an error if the given string is a strict keyword
|
||||
fn check_strict_keywords_(&self, w: &~str) {
|
||||
if self.is_strict_keyword(w) {
|
||||
self.fatal(fmt!("found `%s` in ident position", *w));
|
||||
}
|
||||
}
|
||||
|
||||
// return true if this is a reserved keyword
|
||||
fn is_reserved_keyword(&self, word: &~str) -> bool {
|
||||
self.reserved_keywords.contains(word)
|
||||
}
|
||||
|
||||
// signal an error if the current token is a reserved keyword
|
||||
fn check_reserved_keywords(&self) {
|
||||
match *self.token {
|
||||
token::IDENT(_, false) => {
|
||||
|
@ -216,6 +227,7 @@ pub impl Parser {
|
|||
}
|
||||
}
|
||||
|
||||
// signal an error if the given string is a reserved keyword
|
||||
fn check_reserved_keywords_(&self, w: &~str) {
|
||||
if self.is_reserved_keyword(w) {
|
||||
self.fatal(fmt!("`%s` is a reserved keyword", *w));
|
||||
|
@ -223,7 +235,8 @@ pub impl Parser {
|
|||
}
|
||||
|
||||
// expect and consume a GT. if a >> is seen, replace it
|
||||
// with a single > and continue.
|
||||
// with a single > and continue. If a GT is not seen,
|
||||
// signal an error.
|
||||
fn expect_gt(&self) {
|
||||
if *self.token == token::GT {
|
||||
self.bump();
|
||||
|
|
|
@ -80,7 +80,8 @@ pub fn new_low_level_string_reader(span_diagnostic: @span_handler,
|
|||
last_pos: filemap.start_pos,
|
||||
col: CharPos(0),
|
||||
curr: initial_char,
|
||||
filemap: filemap, interner: itr,
|
||||
filemap: filemap,
|
||||
interner: itr,
|
||||
/* dummy values; not read */
|
||||
peek_tok: token::EOF,
|
||||
peek_span: codemap::dummy_sp()
|
||||
|
@ -150,6 +151,7 @@ impl reader for TtReader {
|
|||
}
|
||||
|
||||
// EFFECT: advance peek_tok and peek_span to refer to the next token.
|
||||
// EFFECT: update the interner, maybe.
|
||||
fn string_advance_token(r: @mut StringReader) {
|
||||
match (consume_whitespace_and_comments(r)) {
|
||||
Some(comment) => {
|
||||
|
@ -539,6 +541,9 @@ fn ident_continue(c: char) -> bool {
|
|||
|| (c > 'z' && char::is_XID_continue(c))
|
||||
}
|
||||
|
||||
// return the next token from the string
|
||||
// EFFECT: advances the input past that token
|
||||
// EFFECT: updates the interner
|
||||
fn next_token_inner(rdr: @mut StringReader) -> token::Token {
|
||||
let mut accum_str = ~"";
|
||||
let mut c = rdr.curr;
|
||||
|
|
|
@ -45,10 +45,14 @@ pub mod classify;
|
|||
/// Reporting obsolete syntax
|
||||
pub mod obsolete;
|
||||
|
||||
// info about a parsing session.
|
||||
// This structure and the reader both have
|
||||
// an interner associated with them. If they're
|
||||
// not the same, bad things can happen.
|
||||
pub struct ParseSess {
|
||||
cm: @codemap::CodeMap,
|
||||
cm: @codemap::CodeMap, // better be the same as the one in the reader!
|
||||
next_id: node_id,
|
||||
span_diagnostic: @span_handler,
|
||||
span_diagnostic: @span_handler, // better be the same as the one in the reader!
|
||||
interner: @ident_interner,
|
||||
}
|
||||
|
||||
|
@ -90,6 +94,19 @@ pub fn parse_crate_from_file(
|
|||
// why is there no p.abort_if_errors here?
|
||||
}
|
||||
|
||||
pub fn parse_crate_from_file_using_tts(
|
||||
input: &Path,
|
||||
cfg: ast::crate_cfg,
|
||||
sess: @mut ParseSess
|
||||
) -> @ast::crate {
|
||||
let p = new_parser_from_file(sess, /*bad*/ copy cfg, input);
|
||||
let tts = p.parse_all_token_trees();
|
||||
new_parser_from_tts(sess,cfg,tts).parse_crate_mod(/*bad*/ copy cfg)
|
||||
// why is there no p.abort_if_errors here?
|
||||
}
|
||||
|
||||
|
||||
|
||||
pub fn parse_crate_from_source_str(
|
||||
name: ~str,
|
||||
source: @~str,
|
||||
|
@ -313,6 +330,7 @@ mod test {
|
|||
use std;
|
||||
use core::io;
|
||||
use core::option::None;
|
||||
use ast;
|
||||
|
||||
#[test] fn to_json_str<E : Encodable<std::json::Encoder>>(val: @E) -> ~str {
|
||||
do io::with_str_writer |writer| {
|
||||
|
@ -320,10 +338,38 @@ mod test {
|
|||
}
|
||||
}
|
||||
|
||||
#[test] fn alltts () {
|
||||
fn string_to_crate (source_str : @~str) -> @ast::crate {
|
||||
parse_crate_from_source_str(
|
||||
~"bogofile",
|
||||
source_str,
|
||||
~[],
|
||||
new_parse_sess(None))
|
||||
}
|
||||
|
||||
fn string_to_tt_to_crate (source_str : @~str) -> @ast::crate {
|
||||
let tts = parse_tts_from_source_str(
|
||||
~"bogofile",
|
||||
@~"fn foo (x : int) { x; }",
|
||||
source_str,
|
||||
~[],
|
||||
new_parse_sess(None));
|
||||
new_parser_from_tts(new_parse_sess(None),~[],tts)
|
||||
.parse_crate_mod(~[])
|
||||
}
|
||||
|
||||
// make sure that parsing from TTs produces the same result
|
||||
// as parsing from strings
|
||||
#[test] fn tts_produce_the_same_result () {
|
||||
let source_str = @~"fn foo (x : int) { x; }";
|
||||
assert_eq!(string_to_tt_to_crate(source_str),
|
||||
string_to_crate(source_str));
|
||||
}
|
||||
|
||||
// check the contents of the tt manually:
|
||||
#[test] fn alltts () {
|
||||
let source_str = @~"fn foo (x : int) { x; }";
|
||||
let tts = parse_tts_from_source_str(
|
||||
~"bogofile",
|
||||
source_str,
|
||||
~[],
|
||||
new_parse_sess(None));
|
||||
assert_eq!(
|
||||
|
|
|
@ -248,6 +248,7 @@ pub fn Parser(sess: @mut ParseSess,
|
|||
}
|
||||
}
|
||||
|
||||
// ooh, nasty mutable fields everywhere....
|
||||
pub struct Parser {
|
||||
sess: @mut ParseSess,
|
||||
cfg: crate_cfg,
|
||||
|
@ -340,6 +341,7 @@ pub impl Parser {
|
|||
self.sess.interner.get(id)
|
||||
}
|
||||
|
||||
// is this one of the keywords that signals a closure type?
|
||||
fn token_is_closure_keyword(&self, tok: &token::Token) -> bool {
|
||||
self.token_is_keyword(&~"pure", tok) ||
|
||||
self.token_is_keyword(&~"unsafe", tok) ||
|
||||
|
@ -347,6 +349,7 @@ pub impl Parser {
|
|||
self.token_is_keyword(&~"fn", tok)
|
||||
}
|
||||
|
||||
// parse a ty_bare_fun type:
|
||||
fn parse_ty_bare_fn(&self) -> ty_
|
||||
{
|
||||
/*
|
||||
|
@ -376,6 +379,7 @@ pub impl Parser {
|
|||
});
|
||||
}
|
||||
|
||||
// parse a ty_closure type
|
||||
fn parse_ty_closure(&self,
|
||||
sigil: ast::Sigil,
|
||||
region: Option<@ast::Lifetime>) -> ty_
|
||||
|
@ -434,6 +438,7 @@ pub impl Parser {
|
|||
}
|
||||
}
|
||||
|
||||
// parse a function type (following the 'fn')
|
||||
fn parse_ty_fn_decl(&self) -> (fn_decl, OptVec<ast::Lifetime>) {
|
||||
/*
|
||||
|
||||
|
@ -545,12 +550,14 @@ pub impl Parser {
|
|||
}
|
||||
|
||||
|
||||
// parse a possibly mutable type
|
||||
fn parse_mt(&self) -> mt {
|
||||
let mutbl = self.parse_mutability();
|
||||
let t = self.parse_ty(false);
|
||||
mt { ty: t, mutbl: mutbl }
|
||||
}
|
||||
|
||||
// parse [mut/const/imm] ID : TY
|
||||
fn parse_ty_field(&self) -> ty_field {
|
||||
let lo = self.span.lo;
|
||||
let mutbl = self.parse_mutability();
|
||||
|
@ -567,6 +574,7 @@ pub impl Parser {
|
|||
)
|
||||
}
|
||||
|
||||
// parse optional return type [ -> TY ] in function decl
|
||||
fn parse_ret_ty(&self) -> (ret_style, @Ty) {
|
||||
return if self.eat(&token::RARROW) {
|
||||
let lo = self.span.lo;
|
||||
|
@ -595,6 +603,7 @@ pub impl Parser {
|
|||
}
|
||||
}
|
||||
|
||||
// parse a type.
|
||||
// Useless second parameter for compatibility with quasiquote macros.
|
||||
// Bleh!
|
||||
fn parse_ty(&self, _: bool) -> @Ty {
|
||||
|
@ -631,15 +640,19 @@ pub impl Parser {
|
|||
t
|
||||
}
|
||||
} else if *self.token == token::AT {
|
||||
// MANAGED POINTER
|
||||
self.bump();
|
||||
self.parse_box_or_uniq_pointee(ManagedSigil, ty_box)
|
||||
} else if *self.token == token::TILDE {
|
||||
// OWNED POINTER
|
||||
self.bump();
|
||||
self.parse_box_or_uniq_pointee(OwnedSigil, ty_uniq)
|
||||
} else if *self.token == token::BINOP(token::STAR) {
|
||||
// STAR POINTER (bare pointer?)
|
||||
self.bump();
|
||||
ty_ptr(self.parse_mt())
|
||||
} else if *self.token == token::LBRACE {
|
||||
// STRUCTURAL RECORD (remove?)
|
||||
let elems = self.parse_unspanned_seq(
|
||||
&token::LBRACE,
|
||||
&token::RBRACE,
|
||||
|
@ -652,6 +665,7 @@ pub impl Parser {
|
|||
self.obsolete(*self.last_span, ObsoleteRecordType);
|
||||
ty_nil
|
||||
} else if *self.token == token::LBRACKET {
|
||||
// VECTOR
|
||||
self.expect(&token::LBRACKET);
|
||||
let mt = self.parse_mt();
|
||||
if mt.mutbl == m_mutbl { // `m_const` too after snapshot
|
||||
|
@ -667,16 +681,20 @@ pub impl Parser {
|
|||
self.expect(&token::RBRACKET);
|
||||
t
|
||||
} else if *self.token == token::BINOP(token::AND) {
|
||||
// BORROWED POINTER
|
||||
self.bump();
|
||||
self.parse_borrowed_pointee()
|
||||
} else if self.eat_keyword(&~"extern") {
|
||||
// EXTERN FUNCTION
|
||||
self.parse_ty_bare_fn()
|
||||
} else if self.token_is_closure_keyword(© *self.token) {
|
||||
// CLOSURE
|
||||
let result = self.parse_ty_closure(ast::BorrowedSigil, None);
|
||||
self.obsolete(*self.last_span, ObsoleteBareFnType);
|
||||
result
|
||||
} else if *self.token == token::MOD_SEP
|
||||
|| is_ident_or_path(&*self.token) {
|
||||
// NAMED TYPE
|
||||
let path = self.parse_path_with_tps(false);
|
||||
ty_path(path, self.get_id())
|
||||
} else {
|
||||
|
@ -885,6 +903,8 @@ pub impl Parser {
|
|||
let global = self.eat(&token::MOD_SEP);
|
||||
let mut ids = ~[];
|
||||
loop {
|
||||
// if there's a ::< coming, stop processing
|
||||
// the path.
|
||||
let is_not_last =
|
||||
self.look_ahead(2u) != token::LT
|
||||
&& self.look_ahead(1u) == token::MOD_SEP;
|
||||
|
@ -904,6 +924,9 @@ pub impl Parser {
|
|||
types: ~[] }
|
||||
}
|
||||
|
||||
// parse a path optionally with type parameters. If 'colons'
|
||||
// is true, then type parameters must be preceded by colons,
|
||||
// as in a::t::<t1,t2>
|
||||
fn parse_path_with_tps(&self, colons: bool) -> @ast::path {
|
||||
debug!("parse_path_with_tps(colons=%b)", colons);
|
||||
|
||||
|
@ -1071,6 +1094,7 @@ pub impl Parser {
|
|||
self.token_is_keyword(&~"const", tok)
|
||||
}
|
||||
|
||||
// parse mutability declaration (mut/const/imm)
|
||||
fn parse_mutability(&self) -> mutability {
|
||||
if self.eat_keyword(&~"mut") {
|
||||
m_mutbl
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue