1
Fork 0

Add block-comment support, various additional operators. Rustc can lex itself now.

This commit is contained in:
Graydon Hoare 2010-08-31 13:29:00 -07:00
parent 47e86a05ae
commit cfca901cfe
2 changed files with 165 additions and 61 deletions

View file

@ -238,16 +238,48 @@ fn consume_any_whitespace(reader rdr) {
fn consume_any_line_comment(reader rdr) { fn consume_any_line_comment(reader rdr) {
if (rdr.curr() == '/') { if (rdr.curr() == '/') {
if (rdr.next() == '/') { alt (rdr.next()) {
while (rdr.curr() != '\n') { case ('/') {
rdr.bump(); while (rdr.curr() != '\n') {
rdr.bump();
}
// Restart whitespace munch.
be consume_any_whitespace(rdr);
}
case ('*') {
rdr.bump();
rdr.bump();
be consume_block_comment(rdr);
}
case (_) {
ret;
} }
// Restart whitespace munch.
be consume_any_whitespace(rdr);
} }
} }
} }
fn consume_block_comment(reader rdr) {
let int level = 1;
while (level > 0) {
if (rdr.curr() == '/' && rdr.next() == '*') {
rdr.bump();
rdr.bump();
level += 1;
} else {
if (rdr.curr() == '*' && rdr.next() == '/') {
rdr.bump();
rdr.bump();
level -= 1;
} else {
rdr.bump();
}
}
}
// restart whitespace munch.
be consume_any_whitespace(rdr);
}
fn next_token(reader rdr) -> token.token { fn next_token(reader rdr) -> token.token {
auto accum_str = ""; auto accum_str = "";
auto accum_int = 0; auto accum_int = 0;
@ -310,18 +342,19 @@ fn next_token(reader rdr) -> token.token {
} }
fn op_or_opeq(reader rdr, token.op op) -> token.token { fn binop(reader rdr, token.binop op) -> token.token {
rdr.bump(); rdr.bump();
if (rdr.next() == '=') { if (rdr.next() == '=') {
rdr.bump(); rdr.bump();
ret token.OPEQ(op); ret token.BINOPEQ(op);
} else { } else {
ret token.OP(op); ret token.BINOP(op);
} }
} }
alt (c) { alt (c) {
// One-byte tokens. // One-byte tokens.
case (':') { rdr.bump(); ret token.COLON(); }
case (';') { rdr.bump(); ret token.SEMI(); } case (';') { rdr.bump(); ret token.SEMI(); }
case (',') { rdr.bump(); ret token.COMMA(); } case (',') { rdr.bump(); ret token.COMMA(); }
case ('.') { rdr.bump(); ret token.DOT(); } case ('.') { rdr.bump(); ret token.DOT(); }
@ -334,16 +367,74 @@ fn next_token(reader rdr) -> token.token {
case ('@') { rdr.bump(); ret token.AT(); } case ('@') { rdr.bump(); ret token.AT(); }
case ('#') { rdr.bump(); ret token.POUND(); } case ('#') { rdr.bump(); ret token.POUND(); }
case ('_') { rdr.bump(); ret token.UNDERSCORE(); } case ('_') { rdr.bump(); ret token.UNDERSCORE(); }
case ('~') { rdr.bump(); ret token.TILDE(); }
// Multi-byte tokens. // Multi-byte tokens.
case ('=') { case ('=') {
if (rdr.next() == '=') { rdr.bump();
if (rdr.curr() == '=') {
rdr.bump(); rdr.bump();
rdr.bump(); ret token.EQEQ();
ret token.OP(token.EQEQ());
} else { } else {
ret token.EQ();
}
}
case ('!') {
rdr.bump();
if (rdr.curr() == '=') {
rdr.bump(); rdr.bump();
ret token.OP(token.EQ()); ret token.NE();
} else {
ret token.NOT();
}
}
case ('<') {
rdr.bump();
alt (rdr.curr()) {
case ('=') {
rdr.bump();
ret token.LE();
}
case ('<') {
ret binop(rdr, token.LSL());
}
case ('-') {
rdr.bump();
ret token.LARROW();
}
case ('|') {
rdr.bump();
ret token.SEND();
}
case (_) {
ret token.LT();
}
}
}
case ('>') {
rdr.bump();
alt (rdr.curr()) {
case ('=') {
rdr.bump();
ret token.GE();
}
case ('>') {
if (rdr.next() == '>') {
rdr.bump();
ret binop(rdr, token.ASR());
} else {
ret binop(rdr, token.LSR());
}
}
case (_) {
ret token.GT();
}
} }
} }
@ -426,7 +517,7 @@ fn next_token(reader rdr) -> token.token {
rdr.bump(); rdr.bump();
ret token.RARROW(); ret token.RARROW();
} else { } else {
ret op_or_opeq(rdr, token.MINUS()); ret binop(rdr, token.MINUS());
} }
} }
@ -434,34 +525,40 @@ fn next_token(reader rdr) -> token.token {
if (rdr.next() == '&') { if (rdr.next() == '&') {
rdr.bump(); rdr.bump();
rdr.bump(); rdr.bump();
ret token.OP(token.ANDAND()); ret token.ANDAND();
} else { } else {
ret op_or_opeq(rdr, token.AND()); ret binop(rdr, token.AND());
}
}
case ('|') {
if (rdr.next() == '|') {
rdr.bump();
rdr.bump();
ret token.OROR();
} else {
ret binop(rdr, token.OR());
} }
} }
case ('+') { case ('+') {
ret op_or_opeq(rdr, token.PLUS()); ret binop(rdr, token.PLUS());
} }
case ('*') { case ('*') {
ret op_or_opeq(rdr, token.STAR()); ret binop(rdr, token.STAR());
} }
case ('/') { case ('/') {
ret op_or_opeq(rdr, token.STAR()); ret binop(rdr, token.STAR());
}
case ('!') {
ret op_or_opeq(rdr, token.NOT());
} }
case ('^') { case ('^') {
ret op_or_opeq(rdr, token.CARET()); ret binop(rdr, token.CARET());
} }
case ('%') { case ('%') {
ret op_or_opeq(rdr, token.PERCENT()); ret binop(rdr, token.PERCENT());
} }
} }

View file

@ -3,12 +3,21 @@ import util.common.ty_mach_to_str;
import std._int; import std._int;
import std._uint; import std._uint;
type op = tag type binop = tag
(PLUS(), (PLUS(),
MINUS(), MINUS(),
STAR(), STAR(),
SLASH(), SLASH(),
PERCENT(), PERCENT(),
CARET(),
AND(),
OR(),
LSL(),
LSR(),
ASR());
type token = tag
(/* Expression-operator symbols. */
EQ(), EQ(),
LT(), LT(),
LE(), LE(),
@ -16,20 +25,14 @@ type op = tag
NE(), NE(),
GE(), GE(),
GT(), GT(),
ANDAND(),
OROR(),
NOT(), NOT(),
TILDE(), TILDE(),
CARET(),
AND(),
ANDAND(),
OR(),
OROR(),
LSL(),
LSR(),
ASR());
type token = tag BINOP(binop),
(OP(op), BINOPEQ(binop),
OPEQ(op),
AS(), AS(),
WITH(), WITH(),
@ -152,40 +155,44 @@ type token = tag
BRACEQUOTE(str), BRACEQUOTE(str),
EOF()); EOF());
fn op_to_str(op o) -> str { fn binop_to_str(binop o) -> str {
alt (o) { alt (o) {
case (PLUS()) { ret "+"; } case (PLUS()) { ret "+"; }
case (MINUS()) { ret "-"; } case (MINUS()) { ret "-"; }
case (STAR()) { ret "*"; } case (STAR()) { ret "*"; }
case (SLASH()) { ret "/"; } case (SLASH()) { ret "/"; }
case (PERCENT()) { ret "%"; } case (PERCENT()) { ret "%"; }
case (EQ()) { ret "="; } case (CARET()) { ret "^"; }
case (LT()) { ret "<"; } case (AND()) { ret "&"; }
case (LE()) { ret "<="; } case (OR()) { ret "|"; }
case (EQEQ()) { ret "=="; } case (LSL()) { ret "<<"; }
case (NE()) { ret "!="; } case (LSR()) { ret ">>"; }
case (GE()) { ret ">="; } case (ASR()) { ret ">>>"; }
case (GT()) { ret ">"; }
case (NOT()) { ret "!"; }
case (TILDE()) { ret "~"; }
case (CARET()) { ret "^"; }
case (AND()) { ret "&"; }
case (ANDAND()) { ret "&&"; }
case (OR()) { ret "|"; }
case (OROR()) { ret "||"; }
case (LSL()) { ret "<<"; }
case (LSR()) { ret ">>"; }
case (ASR()) { ret ">>>"; }
} }
} }
fn to_str(token t) -> str { fn to_str(token t) -> str {
alt (t) { alt (t) {
case (OP(op)) { ret op_to_str(op); }
case (OPEQ(op)) { ret op_to_str(op) + "="; } case (EQ()) { ret "="; }
case (LT()) { ret "<"; }
case (LE()) { ret "<="; }
case (EQEQ()) { ret "=="; }
case (NE()) { ret "!="; }
case (GE()) { ret ">="; }
case (GT()) { ret ">"; }
case (NOT()) { ret "!"; }
case (TILDE()) { ret "~"; }
case (OROR()) { ret "||"; }
case (ANDAND()) { ret "&&"; }
case (BINOP(op)) { ret binop_to_str(op); }
case (BINOPEQ(op)) { ret binop_to_str(op) + "="; }
case (AS()) { ret "as"; } case (AS()) { ret "as"; }
case (WITH()) { ret "with"; } case (WITH()) { ret "with"; }
/* Structural symbols */ /* Structural symbols */
case (AT()) { ret "@"; } case (AT()) { ret "@"; }
case (DOT()) { ret "."; } case (DOT()) { ret "."; }