lexer: lex WS/COMMENT/SHEBANG rather than skipping
Now, the lexer will categorize every byte in its input according to the grammar. The parser skips over these while parsing, thus avoiding their presence in the input to syntax extensions.
This commit is contained in:
parent
cc4213418e
commit
f512779554
6 changed files with 133 additions and 86 deletions
|
@ -325,10 +325,24 @@ fn is_plain_ident_or_underscore(t: &token::Token) -> bool {
|
|||
is_plain_ident(t) || *t == token::UNDERSCORE
|
||||
}
|
||||
|
||||
/// Get a token the parser cares about
|
||||
fn real_token(rdr: &mut Reader) -> TokenAndSpan {
|
||||
let mut t = rdr.next_token();
|
||||
loop {
|
||||
match t.tok {
|
||||
token::WS | token::COMMENT | token::SHEBANG(_) => {
|
||||
t = rdr.next_token();
|
||||
},
|
||||
_ => break
|
||||
}
|
||||
}
|
||||
t
|
||||
}
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
pub fn new(sess: &'a ParseSess, cfg: ast::CrateConfig,
|
||||
mut rdr: Box<Reader>) -> Parser<'a> {
|
||||
let tok0 = rdr.next_token();
|
||||
let tok0 = real_token(rdr);
|
||||
let span = tok0.sp;
|
||||
let placeholder = TokenAndSpan {
|
||||
tok: token::UNDERSCORE,
|
||||
|
@ -864,7 +878,7 @@ impl<'a> Parser<'a> {
|
|||
None
|
||||
};
|
||||
let next = if self.buffer_start == self.buffer_end {
|
||||
self.reader.next_token()
|
||||
real_token(self.reader)
|
||||
} else {
|
||||
// Avoid token copies with `replace`.
|
||||
let buffer_start = self.buffer_start as uint;
|
||||
|
@ -908,7 +922,7 @@ impl<'a> Parser<'a> {
|
|||
-> R {
|
||||
let dist = distance as int;
|
||||
while self.buffer_length() < dist {
|
||||
self.buffer[self.buffer_end as uint] = self.reader.next_token();
|
||||
self.buffer[self.buffer_end as uint] = real_token(self.reader);
|
||||
self.buffer_end = (self.buffer_end + 1) & 3;
|
||||
}
|
||||
f(&self.buffer[((self.buffer_start + dist - 1) & 3) as uint].tok)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue