1
Fork 0

syntax: methodify the lexer

This commit is contained in:
Corey Richardson 2014-05-21 16:57:31 -07:00
parent 5343eb7e0c
commit 46d1af28b5
8 changed files with 1195 additions and 1187 deletions

View file

@ -34,7 +34,7 @@ pub fn highlight(src: &str, class: Option<&str>) -> String {
let mut out = io::MemWriter::new(); let mut out = io::MemWriter::new();
doit(&sess, doit(&sess,
lexer::new_string_reader(&sess.span_diagnostic, fm), lexer::StringReader::new(&sess.span_diagnostic, fm),
class, class,
&mut out).unwrap(); &mut out).unwrap();
str::from_utf8_lossy(out.unwrap().as_slice()).to_string() str::from_utf8_lossy(out.unwrap().as_slice()).to_string()

View file

@ -15,7 +15,7 @@ use ast::{AttrId, Attribute, Attribute_, MetaItem, MetaWord, MetaNameValue, Meta
use codemap::{Span, Spanned, spanned, dummy_spanned}; use codemap::{Span, Spanned, spanned, dummy_spanned};
use codemap::BytePos; use codemap::BytePos;
use diagnostic::SpanHandler; use diagnostic::SpanHandler;
use parse::comments::{doc_comment_style, strip_doc_comment_decoration}; use parse::lexer::comments::{doc_comment_style, strip_doc_comment_decoration};
use parse::token::InternedString; use parse::token::InternedString;
use parse::token; use parse::token;
use crateid::CrateId; use crateid::CrateId;

File diff suppressed because it is too large Load diff

View file

@ -11,8 +11,8 @@
use ast; use ast;
use codemap::{BytePos, CharPos, CodeMap, Pos}; use codemap::{BytePos, CharPos, CodeMap, Pos};
use diagnostic; use diagnostic;
use parse::lexer::{is_whitespace, with_str_from, Reader}; use parse::lexer::{is_whitespace, Reader};
use parse::lexer::{StringReader, bump, is_eof, nextch_is, TokenAndSpan}; use parse::lexer::{StringReader, TokenAndSpan};
use parse::lexer::{is_line_non_doc_comment, is_block_non_doc_comment}; use parse::lexer::{is_line_non_doc_comment, is_block_non_doc_comment};
use parse::lexer; use parse::lexer;
use parse::token; use parse::token;
@ -141,31 +141,6 @@ pub fn strip_doc_comment_decoration(comment: &str) -> String {
fail!("not a doc-comment: {}", comment); fail!("not a doc-comment: {}", comment);
} }
fn read_to_eol(rdr: &mut StringReader) -> String {
let mut val = String::new();
while !rdr.curr_is('\n') && !is_eof(rdr) {
val.push_char(rdr.curr.unwrap());
bump(rdr);
}
if rdr.curr_is('\n') { bump(rdr); }
return val
}
fn read_one_line_comment(rdr: &mut StringReader) -> String {
let val = read_to_eol(rdr);
assert!((val.as_slice()[0] == '/' as u8 &&
val.as_slice()[1] == '/' as u8) ||
(val.as_slice()[0] == '#' as u8 &&
val.as_slice()[1] == '!' as u8));
return val;
}
fn consume_non_eol_whitespace(rdr: &mut StringReader) {
while is_whitespace(rdr.curr) && !rdr.curr_is('\n') && !is_eof(rdr) {
bump(rdr);
}
}
fn push_blank_line_comment(rdr: &StringReader, comments: &mut Vec<Comment>) { fn push_blank_line_comment(rdr: &StringReader, comments: &mut Vec<Comment>) {
debug!(">>> blank-line comment"); debug!(">>> blank-line comment");
comments.push(Comment { comments.push(Comment {
@ -177,11 +152,11 @@ fn push_blank_line_comment(rdr: &StringReader, comments: &mut Vec<Comment>) {
fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader, fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader,
comments: &mut Vec<Comment>) { comments: &mut Vec<Comment>) {
while is_whitespace(rdr.curr) && !is_eof(rdr) { while is_whitespace(rdr.curr) && !rdr.is_eof() {
if rdr.col == CharPos(0u) && rdr.curr_is('\n') { if rdr.col == CharPos(0u) && rdr.curr_is('\n') {
push_blank_line_comment(rdr, &mut *comments); push_blank_line_comment(rdr, &mut *comments);
} }
bump(rdr); rdr.bump();
} }
} }
@ -193,7 +168,7 @@ fn read_shebang_comment(rdr: &mut StringReader, code_to_the_left: bool,
debug!("<<< shebang comment"); debug!("<<< shebang comment");
comments.push(Comment { comments.push(Comment {
style: if code_to_the_left { Trailing } else { Isolated }, style: if code_to_the_left { Trailing } else { Isolated },
lines: vec!(read_one_line_comment(rdr)), lines: vec!(rdr.read_one_line_comment()),
pos: p pos: p
}); });
} }
@ -203,15 +178,15 @@ fn read_line_comments(rdr: &mut StringReader, code_to_the_left: bool,
debug!(">>> line comments"); debug!(">>> line comments");
let p = rdr.last_pos; let p = rdr.last_pos;
let mut lines: Vec<String> = Vec::new(); let mut lines: Vec<String> = Vec::new();
while rdr.curr_is('/') && nextch_is(rdr, '/') { while rdr.curr_is('/') && rdr.nextch_is('/') {
let line = read_one_line_comment(rdr); let line = rdr.read_one_line_comment();
debug!("{}", line); debug!("{}", line);
// Doc comments are not put in comments. // Doc comments are not put in comments.
if is_doc_comment(line.as_slice()) { if is_doc_comment(line.as_slice()) {
break; break;
} }
lines.push(line); lines.push(line);
consume_non_eol_whitespace(rdr); rdr.consume_non_eol_whitespace();
} }
debug!("<<< line comments"); debug!("<<< line comments");
if !lines.is_empty() { if !lines.is_empty() {
@ -265,21 +240,21 @@ fn read_block_comment(rdr: &mut StringReader,
let p = rdr.last_pos; let p = rdr.last_pos;
let mut lines: Vec<String> = Vec::new(); let mut lines: Vec<String> = Vec::new();
let col = rdr.col; let col = rdr.col;
bump(rdr); rdr.bump();
bump(rdr); rdr.bump();
let mut curr_line = String::from_str("/*"); let mut curr_line = String::from_str("/*");
// doc-comments are not really comments, they are attributes // doc-comments are not really comments, they are attributes
if (rdr.curr_is('*') && !nextch_is(rdr, '*')) || rdr.curr_is('!') { if (rdr.curr_is('*') && !rdr.nextch_is('*')) || rdr.curr_is('!') {
while !(rdr.curr_is('*') && nextch_is(rdr, '/')) && !is_eof(rdr) { while !(rdr.curr_is('*') && rdr.nextch_is('/')) && !rdr.is_eof() {
curr_line.push_char(rdr.curr.unwrap()); curr_line.push_char(rdr.curr.unwrap());
bump(rdr); rdr.bump();
} }
if !is_eof(rdr) { if !rdr.is_eof() {
curr_line.push_str("*/"); curr_line.push_str("*/");
bump(rdr); rdr.bump();
bump(rdr); rdr.bump();
} }
if !is_block_non_doc_comment(curr_line.as_slice()) { if !is_block_non_doc_comment(curr_line.as_slice()) {
return return
@ -290,7 +265,7 @@ fn read_block_comment(rdr: &mut StringReader,
let mut level: int = 1; let mut level: int = 1;
while level > 0 { while level > 0 {
debug!("=== block comment level {}", level); debug!("=== block comment level {}", level);
if is_eof(rdr) { if rdr.is_eof() {
rdr.fatal("unterminated block comment"); rdr.fatal("unterminated block comment");
} }
if rdr.curr_is('\n') { if rdr.curr_is('\n') {
@ -298,21 +273,21 @@ fn read_block_comment(rdr: &mut StringReader,
curr_line, curr_line,
col); col);
curr_line = String::new(); curr_line = String::new();
bump(rdr); rdr.bump();
} else { } else {
curr_line.push_char(rdr.curr.unwrap()); curr_line.push_char(rdr.curr.unwrap());
if rdr.curr_is('/') && nextch_is(rdr, '*') { if rdr.curr_is('/') && rdr.nextch_is('*') {
bump(rdr); rdr.bump();
bump(rdr); rdr.bump();
curr_line.push_char('*'); curr_line.push_char('*');
level += 1; level += 1;
} else { } else {
if rdr.curr_is('*') && nextch_is(rdr, '/') { if rdr.curr_is('*') && rdr.nextch_is('/') {
bump(rdr); rdr.bump();
bump(rdr); rdr.bump();
curr_line.push_char('/'); curr_line.push_char('/');
level -= 1; level -= 1;
} else { bump(rdr); } } else { rdr.bump(); }
} }
} }
} }
@ -324,31 +299,24 @@ fn read_block_comment(rdr: &mut StringReader,
} }
let mut style = if code_to_the_left { Trailing } else { Isolated }; let mut style = if code_to_the_left { Trailing } else { Isolated };
consume_non_eol_whitespace(rdr); rdr.consume_non_eol_whitespace();
if !is_eof(rdr) && !rdr.curr_is('\n') && lines.len() == 1u { if !rdr.is_eof() && !rdr.curr_is('\n') && lines.len() == 1u {
style = Mixed; style = Mixed;
} }
debug!("<<< block comment"); debug!("<<< block comment");
comments.push(Comment {style: style, lines: lines, pos: p}); comments.push(Comment {style: style, lines: lines, pos: p});
} }
fn peeking_at_comment(rdr: &StringReader) -> bool {
return (rdr.curr_is('/') && nextch_is(rdr, '/')) ||
(rdr.curr_is('/') && nextch_is(rdr, '*')) ||
// consider shebangs comments, but not inner attributes
(rdr.curr_is('#') && nextch_is(rdr, '!') &&
!lexer::nextnextch_is(rdr, '['));
}
fn consume_comment(rdr: &mut StringReader, fn consume_comment(rdr: &mut StringReader,
code_to_the_left: bool, code_to_the_left: bool,
comments: &mut Vec<Comment> ) { comments: &mut Vec<Comment> ) {
debug!(">>> consume comment"); debug!(">>> consume comment");
if rdr.curr_is('/') && nextch_is(rdr, '/') { if rdr.curr_is('/') && rdr.nextch_is('/') {
read_line_comments(rdr, code_to_the_left, comments); read_line_comments(rdr, code_to_the_left, comments);
} else if rdr.curr_is('/') && nextch_is(rdr, '*') { } else if rdr.curr_is('/') && rdr.nextch_is('*') {
read_block_comment(rdr, code_to_the_left, comments); read_block_comment(rdr, code_to_the_left, comments);
} else if rdr.curr_is('#') && nextch_is(rdr, '!') { } else if rdr.curr_is('#') && rdr.nextch_is('!') {
read_shebang_comment(rdr, code_to_the_left, comments); read_shebang_comment(rdr, code_to_the_left, comments);
} else { fail!(); } } else { fail!(); }
debug!("<<< consume comment"); debug!("<<< consume comment");
@ -362,8 +330,7 @@ pub struct Literal {
// it appears this function is called only from pprust... that's // it appears this function is called only from pprust... that's
// probably not a good thing. // probably not a good thing.
pub fn gather_comments_and_literals(span_diagnostic: pub fn gather_comments_and_literals(span_diagnostic: &diagnostic::SpanHandler,
&diagnostic::SpanHandler,
path: String, path: String,
srdr: &mut io::Reader) srdr: &mut io::Reader)
-> (Vec<Comment>, Vec<Literal>) { -> (Vec<Comment>, Vec<Literal>) {
@ -371,20 +338,20 @@ pub fn gather_comments_and_literals(span_diagnostic:
let src = str::from_utf8(src.as_slice()).unwrap().to_string(); let src = str::from_utf8(src.as_slice()).unwrap().to_string();
let cm = CodeMap::new(); let cm = CodeMap::new();
let filemap = cm.new_filemap(path, src); let filemap = cm.new_filemap(path, src);
let mut rdr = lexer::new_low_level_string_reader(span_diagnostic, filemap); let mut rdr = lexer::StringReader::new_raw(span_diagnostic, filemap);
let mut comments: Vec<Comment> = Vec::new(); let mut comments: Vec<Comment> = Vec::new();
let mut literals: Vec<Literal> = Vec::new(); let mut literals: Vec<Literal> = Vec::new();
let mut first_read: bool = true; let mut first_read: bool = true;
while !is_eof(&rdr) { while !rdr.is_eof() {
loop { loop {
let mut code_to_the_left = !first_read; let mut code_to_the_left = !first_read;
consume_non_eol_whitespace(&mut rdr); rdr.consume_non_eol_whitespace();
if rdr.curr_is('\n') { if rdr.curr_is('\n') {
code_to_the_left = false; code_to_the_left = false;
consume_whitespace_counting_blank_lines(&mut rdr, &mut comments); consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
} }
while peeking_at_comment(&rdr) { while rdr.peeking_at_comment() {
consume_comment(&mut rdr, code_to_the_left, &mut comments); consume_comment(&mut rdr, code_to_the_left, &mut comments);
consume_whitespace_counting_blank_lines(&mut rdr, &mut comments); consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
} }
@ -397,7 +364,7 @@ pub fn gather_comments_and_literals(span_diagnostic:
//discard, and look ahead; we're working with internal state //discard, and look ahead; we're working with internal state
let TokenAndSpan {tok: tok, sp: sp} = rdr.peek(); let TokenAndSpan {tok: tok, sp: sp} = rdr.peek();
if token::is_lit(&tok) { if token::is_lit(&tok) {
with_str_from(&rdr, bstart, |s| { rdr.with_str_from(bstart, |s| {
debug!("tok lit: {}", s); debug!("tok lit: {}", s);
literals.push(Literal {lit: s.to_string(), pos: sp.lo}); literals.push(Literal {lit: s.to_string(), pos: sp.lo});
}) })

File diff suppressed because it is too large Load diff

View file

@ -25,7 +25,6 @@ use std::str;
pub mod lexer; pub mod lexer;
pub mod parser; pub mod parser;
pub mod token; pub mod token;
pub mod comments;
pub mod attr; pub mod attr;
pub mod common; pub mod common;
@ -255,7 +254,7 @@ pub fn filemap_to_tts(sess: &ParseSess, filemap: Rc<FileMap>)
// it appears to me that the cfg doesn't matter here... indeed, // it appears to me that the cfg doesn't matter here... indeed,
// parsing tt's probably shouldn't require a parser at all. // parsing tt's probably shouldn't require a parser at all.
let cfg = Vec::new(); let cfg = Vec::new();
let srdr = lexer::new_string_reader(&sess.span_diagnostic, filemap); let srdr = lexer::StringReader::new(&sess.span_diagnostic, filemap);
let mut p1 = Parser::new(sess, cfg, box srdr); let mut p1 = Parser::new(sess, cfg, box srdr);
p1.parse_all_token_trees() p1.parse_all_token_trees()
} }

View file

@ -166,7 +166,7 @@ pub fn to_str(t: &Token) -> String {
ANDAND => "&&".to_string(), ANDAND => "&&".to_string(),
BINOP(op) => binop_to_str(op).to_string(), BINOP(op) => binop_to_str(op).to_string(),
BINOPEQ(op) => { BINOPEQ(op) => {
let mut s = binop_to_str(op).to_strbuf(); let mut s = binop_to_str(op).to_string();
s.push_str("="); s.push_str("=");
s s
} }

View file

@ -20,7 +20,8 @@ use codemap;
use diagnostic; use diagnostic;
use parse::classify::expr_is_simple_block; use parse::classify::expr_is_simple_block;
use parse::token::IdentInterner; use parse::token::IdentInterner;
use parse::{comments, token}; use parse::token;
use parse::lexer::comments;
use parse; use parse;
use print::pp::{break_offset, word, space, zerobreak, hardbreak}; use print::pp::{break_offset, word, space, zerobreak, hardbreak};
use print::pp::{Breaks, Consistent, Inconsistent, eof}; use print::pp::{Breaks, Consistent, Inconsistent, eof};