1
Fork 0

Make clear the differentiation between char pos and byte pos in filemaps. Fix up error printing for files with multi-byte characters.

This commit is contained in:
Josh Matthews 2011-07-16 02:01:10 -04:00 committed by Brian Anderson
parent 3ce43f37d2
commit a411c865b2
6 changed files with 64 additions and 34 deletions

View file

@ -103,7 +103,7 @@ obj session(@config targ_cfg,
fn unimpl(str msg) -> ! { self.bug("unimplemented " + msg); } fn unimpl(str msg) -> ! { self.bug("unimplemented " + msg); }
fn get_codemap() -> codemap::codemap { ret parse_sess.cm; } fn get_codemap() -> codemap::codemap { ret parse_sess.cm; }
fn lookup_pos(uint pos) -> codemap::loc { fn lookup_pos(uint pos) -> codemap::loc {
ret codemap::lookup_pos(parse_sess.cm, pos); ret codemap::lookup_char_pos(parse_sess.cm, pos);
} }
fn get_parse_sess() -> parse_sess { ret parse_sess; } fn get_parse_sess() -> parse_sess { ret parse_sess; }
fn next_node_id() -> ast::node_id { fn next_node_id() -> ast::node_id {

View file

@ -9,50 +9,69 @@ import std::option::none;
type filename = str; type filename = str;
type file_pos = rec(uint ch, uint byte);
/* A codemap is a thing that maps uints to file/line/column positions /* A codemap is a thing that maps uints to file/line/column positions
* in a crate. This to make it possible to represent the positions * in a crate. This to make it possible to represent the positions
* with single-word things, rather than passing records all over the * with single-word things, rather than passing records all over the
* compiler. * compiler.
*/ */
type filemap = @rec(filename name, uint start_pos, mutable uint[] lines); type filemap = @rec(filename name, file_pos start_pos,
mutable file_pos[] lines);
type codemap = @rec(mutable filemap[] files); type codemap = @rec(mutable filemap[] files);
type loc = rec(filename filename, uint line, uint col); type loc = rec(filename filename, uint line, uint col);
fn new_codemap() -> codemap { fn new_codemap() -> codemap {
let filemap[] files = ~[]; ret @rec(mutable files=~[]);
ret @rec(mutable files=files);
} }
fn new_filemap(filename filename, uint start_pos) -> filemap { fn new_filemap(filename filename, uint start_pos_ch, uint start_pos_byte)
ret @rec(name=filename, start_pos=start_pos, mutable lines=[start_pos]); -> filemap {
ret @rec(name=filename, start_pos=rec(ch=start_pos_ch,
byte=start_pos_byte),
mutable lines=~[rec(ch=start_pos_ch, byte=start_pos_byte)]);
} }
fn next_line(filemap file, uint pos) { file.lines += ~[pos]; } fn next_line(filemap file, uint chpos, uint byte_pos) {
file.lines += ~[rec(ch=chpos, byte=byte_pos)];
}
fn lookup_pos(codemap map, uint pos) -> loc { type lookup_fn = fn (file_pos pos) -> uint;
fn lookup_pos(codemap map, uint pos, lookup_fn lookup) -> loc {
auto a = 0u; auto a = 0u;
auto b = ivec::len[filemap](map.files); auto b = ivec::len(map.files);
while (b - a > 1u) { while (b - a > 1u) {
auto m = (a + b) / 2u; auto m = (a + b) / 2u;
if (map.files.(m).start_pos > pos) { b = m; } else { a = m; } if (lookup(map.files.(m).start_pos) > pos) { b = m; } else { a = m; }
} }
auto f = map.files.(a); auto f = map.files.(a);
a = 0u; a = 0u;
b = ivec::len[uint](f.lines); b = ivec::len(f.lines);
while (b - a > 1u) { while (b - a > 1u) {
auto m = (a + b) / 2u; auto m = (a + b) / 2u;
if (f.lines.(m) > pos) { b = m; } else { a = m; } if (lookup(f.lines.(m)) > pos) { b = m; } else { a = m; }
} }
ret rec(filename=f.name, line=a + 1u, col=pos - f.lines.(a)); ret rec(filename=f.name, line=a + 1u, col=pos - lookup(f.lines.(a)));
}
fn lookup_char_pos(codemap map, uint pos) -> loc {
fn lookup(file_pos pos) -> uint { ret pos.ch; }
ret lookup_pos(map, pos, lookup);
}
fn lookup_byte_pos(codemap map, uint pos) -> loc {
fn lookup(file_pos pos) -> uint { ret pos.byte; }
ret lookup_pos(map, pos, lookup);
} }
type span = rec(uint lo, uint hi); type span = rec(uint lo, uint hi);
fn span_to_str(&span sp, &codemap cm) -> str { fn span_to_str(&span sp, &codemap cm) -> str {
auto lo = lookup_pos(cm, sp.lo); auto lo = lookup_char_pos(cm, sp.lo);
auto hi = lookup_pos(cm, sp.hi); auto hi = lookup_char_pos(cm, sp.hi);
ret #fmt("%s:%u:%u:%u:%u", lo.filename, lo.line, lo.col, hi.line, hi.col); ret #fmt("%s:%u:%u:%u:%u", lo.filename, lo.line, lo.col, hi.line, hi.col);
} }
@ -115,8 +134,7 @@ fn emit_diagnostic(&option::t[span] sp, &str msg, &str kind, u8 color,
// If there's one line at fault we can easily point to the problem // If there's one line at fault we can easily point to the problem
if (ivec::len(lines.lines) == 1u) { if (ivec::len(lines.lines) == 1u) {
auto lo = codemap::lookup_pos(cm, option::get(sp).lo); auto lo = lookup_char_pos(cm, option::get(sp).lo);
auto lo = lookup_pos(cm, option::get(sp).lo);
auto digits = 0u; auto digits = 0u;
auto num = lines.lines.(0) / 10u; auto num = lines.lines.(0) / 10u;
@ -129,7 +147,7 @@ fn emit_diagnostic(&option::t[span] sp, &str msg, &str kind, u8 color,
while (left > 0u) { str::push_char(s, ' '); left -= 1u; } while (left > 0u) { str::push_char(s, ' '); left -= 1u; }
s += "^"; s += "^";
auto hi = lookup_pos(cm, option::get(sp).hi); auto hi = lookup_char_pos(cm, option::get(sp).hi);
if (hi.col != lo.col) { if (hi.col != lo.col) {
// the ^ already takes up one space // the ^ already takes up one space
auto width = hi.col - lo.col - 1u; auto width = hi.col - lo.col - 1u;
@ -158,8 +176,8 @@ fn emit_note(&option::t[span] sp, &str msg, &codemap cm) {
type file_lines = rec(str name, uint[] lines); type file_lines = rec(str name, uint[] lines);
fn span_to_lines(span sp, codemap::codemap cm) -> @file_lines { fn span_to_lines(span sp, codemap::codemap cm) -> @file_lines {
auto lo = codemap::lookup_pos(cm, sp.lo); auto lo = lookup_char_pos(cm, sp.lo);
auto hi = codemap::lookup_pos(cm, sp.hi); auto hi = lookup_char_pos(cm, sp.hi);
auto lines = ~[]; auto lines = ~[];
for each (uint i in uint::range(lo.line - 1u, hi.line as uint)) { for each (uint i in uint::range(lo.line - 1u, hi.line as uint)) {
lines += ~[i]; lines += ~[i];
@ -168,10 +186,10 @@ fn span_to_lines(span sp, codemap::codemap cm) -> @file_lines {
} }
fn get_line(filemap fm, int line, &str file) -> str { fn get_line(filemap fm, int line, &str file) -> str {
let uint begin = fm.lines.(line) - fm.start_pos; let uint begin = fm.lines.(line).byte - fm.start_pos.byte;
let uint end; let uint end;
if (line as uint < ivec::len(fm.lines) - 1u) { if (line as uint < ivec::len(fm.lines) - 1u) {
end = fm.lines.(line + 1) - fm.start_pos; end = fm.lines.(line + 1).byte - fm.start_pos.byte;
} else { } else {
// If we're not done parsing the file, we're at the limit of what's // If we're not done parsing the file, we're at the limit of what's
// parsed. If we just slice the rest of the string, we'll print out // parsed. If we just slice the rest of the string, we'll print out

View file

@ -21,6 +21,7 @@ type ctx =
mutable str[] deps, mutable str[] deps,
parser::parse_sess sess, parser::parse_sess sess,
mutable uint chpos, mutable uint chpos,
mutable uint byte_pos,
ast::crate_cfg cfg); ast::crate_cfg cfg);
fn eval_crate_directives(ctx cx, &(@ast::crate_directive)[] cdirs, fn eval_crate_directives(ctx cx, &(@ast::crate_directive)[] cdirs,
@ -56,7 +57,8 @@ fn eval_crate_directive(ctx cx, @ast::crate_directive cdir, str prefix,
}; };
if (cx.mode == mode_depend) { cx.deps += ~[full_path]; ret; } if (cx.mode == mode_depend) { cx.deps += ~[full_path]; ret; }
auto p0 = auto p0 =
new_parser_from_file(cx.sess, cx.cfg, full_path, cx.chpos); new_parser_from_file(cx.sess, cx.cfg, full_path, cx.chpos,
cx.byte_pos);
auto inner_attrs = parse_inner_attrs_and_next(p0); auto inner_attrs = parse_inner_attrs_and_next(p0);
auto mod_attrs = attrs + inner_attrs._0; auto mod_attrs = attrs + inner_attrs._0;
auto first_item_outer_attrs = inner_attrs._1; auto first_item_outer_attrs = inner_attrs._1;
@ -65,8 +67,9 @@ fn eval_crate_directive(ctx cx, @ast::crate_directive cdir, str prefix,
auto i = syntax::parse::parser::mk_item auto i = syntax::parse::parser::mk_item
(p0, cdir.span.lo, cdir.span.hi, id, ast::item_mod(m0), (p0, cdir.span.lo, cdir.span.hi, id, ast::item_mod(m0),
mod_attrs); mod_attrs);
// Thread defids and chpos through the parsers // Thread defids, chpos and byte_pos through the parsers
cx.chpos = p0.get_chpos(); cx.chpos = p0.get_chpos();
cx.byte_pos = p0.get_byte_pos();
items += ~[i]; items += ~[i];
} }
case (ast::cdir_dir_mod(?id, ?dir_opt, ?cdirs, ?attrs)) { case (ast::cdir_dir_mod(?id, ?dir_opt, ?cdirs, ?attrs)) {

View file

@ -24,6 +24,7 @@ type reader =
fn get_mark_str() -> str ; fn get_mark_str() -> str ;
fn get_interner() -> @interner::interner[str] ; fn get_interner() -> @interner::interner[str] ;
fn get_chpos() -> uint ; fn get_chpos() -> uint ;
fn get_byte_pos() -> uint ;
fn get_col() -> uint ; fn get_col() -> uint ;
fn get_filemap() -> codemap::filemap ; fn get_filemap() -> codemap::filemap ;
fn err(str) ; fn err(str) ;
@ -53,6 +54,7 @@ fn new_reader(&codemap::codemap cm, str src, codemap::filemap filemap,
} }
fn get_mark_chpos() -> uint { ret mark_chpos; } fn get_mark_chpos() -> uint { ret mark_chpos; }
fn get_chpos() -> uint { ret chpos; } fn get_chpos() -> uint { ret chpos; }
fn get_byte_pos() -> uint { ret pos; }
fn curr() -> char { ret ch; } fn curr() -> char { ret ch; }
fn next() -> char { fn next() -> char {
if (pos < len) { if (pos < len) {
@ -70,7 +72,10 @@ fn new_reader(&codemap::codemap cm, str src, codemap::filemap filemap,
if (pos < len) { if (pos < len) {
col += 1u; col += 1u;
chpos += 1u; chpos += 1u;
if (ch == '\n') { codemap::next_line(fm, chpos); col = 0u; } if (ch == '\n') {
codemap::next_line(fm, chpos, pos + fm.start_pos.byte);
col = 0u;
}
auto next = str::char_range_at(src, pos); auto next = str::char_range_at(src, pos);
pos = next._1; pos = next._1;
ch = next._0; ch = next._0;
@ -86,7 +91,8 @@ fn new_reader(&codemap::codemap cm, str src, codemap::filemap filemap,
let str[] strs = ~[]; let str[] strs = ~[];
auto rd = auto rd =
reader(cm, src, str::byte_len(src), 0u, 0u, -1 as char, 0u, reader(cm, src, str::byte_len(src), 0u, 0u, -1 as char, 0u,
filemap.start_pos, filemap.start_pos, strs, filemap, itr); filemap.start_pos.ch, filemap.start_pos.ch, strs, filemap,
itr);
rd.init(); rd.init();
ret rd; ret rd;
} }
@ -737,7 +743,7 @@ fn gather_comments_and_literals(&codemap::codemap cm, str path)
auto srdr = ioivec::file_reader(path); auto srdr = ioivec::file_reader(path);
auto src = str::unsafe_from_bytes_ivec(srdr.read_whole_stream()); auto src = str::unsafe_from_bytes_ivec(srdr.read_whole_stream());
auto itr = @interner::mk[str](str::hash, str::eq); auto itr = @interner::mk[str](str::hash, str::eq);
auto rdr = new_reader(cm, src, codemap::new_filemap(path, 0u), itr); auto rdr = new_reader(cm, src, codemap::new_filemap(path, 0u, 0u), itr);
let cmnt[] comments = ~[]; let cmnt[] comments = ~[];
let lit[] literals = ~[]; let lit[] literals = ~[];
let bool first_read = true; let bool first_read = true;

View file

@ -50,17 +50,18 @@ type parser =
fn get_filemap() -> codemap::filemap ; fn get_filemap() -> codemap::filemap ;
fn get_bad_expr_words() -> hashmap[str, ()] ; fn get_bad_expr_words() -> hashmap[str, ()] ;
fn get_chpos() -> uint ; fn get_chpos() -> uint ;
fn get_byte_pos() -> uint ;
fn get_id() -> ast::node_id ; fn get_id() -> ast::node_id ;
fn get_sess() -> parse_sess; fn get_sess() -> parse_sess;
}; };
fn new_parser_from_file(parse_sess sess, ast::crate_cfg cfg, fn new_parser_from_file(parse_sess sess, ast::crate_cfg cfg,
str path, uint pos) -> parser { str path, uint chpos, uint byte_pos) -> parser {
auto ftype = SOURCE_FILE; auto ftype = SOURCE_FILE;
if (str::ends_with(path, ".rc")) { ftype = CRATE_FILE; } if (str::ends_with(path, ".rc")) { ftype = CRATE_FILE; }
auto srdr = ioivec::file_reader(path); auto srdr = ioivec::file_reader(path);
auto src = str::unsafe_from_bytes_ivec(srdr.read_whole_stream()); auto src = str::unsafe_from_bytes_ivec(srdr.read_whole_stream());
auto filemap = codemap::new_filemap(path, pos); auto filemap = codemap::new_filemap(path, chpos, byte_pos);
sess.cm.files += ~[filemap]; sess.cm.files += ~[filemap];
auto itr = @interner::mk(str::hash, str::eq); auto itr = @interner::mk(str::hash, str::eq);
auto rdr = lexer::new_reader(sess.cm, src, filemap, itr); auto rdr = lexer::new_reader(sess.cm, src, filemap, itr);
@ -114,6 +115,7 @@ fn new_parser(parse_sess sess, ast::crate_cfg cfg, lexer::reader rdr,
fn get_filemap() -> codemap::filemap { ret rdr.get_filemap(); } fn get_filemap() -> codemap::filemap { ret rdr.get_filemap(); }
fn get_bad_expr_words() -> hashmap[str, ()] { ret bad_words; } fn get_bad_expr_words() -> hashmap[str, ()] { ret bad_words; }
fn get_chpos() -> uint { ret rdr.get_chpos(); } fn get_chpos() -> uint { ret rdr.get_chpos(); }
fn get_byte_pos() -> uint { ret rdr.get_byte_pos(); }
fn get_id() -> ast::node_id { ret next_node_id(sess); } fn get_id() -> ast::node_id { ret next_node_id(sess); }
fn get_sess() -> parse_sess { ret sess; } fn get_sess() -> parse_sess { ret sess; }
} }
@ -2378,7 +2380,7 @@ fn parse_native_view(&parser p) -> (@ast::view_item)[] {
fn parse_crate_from_source_file(&str input, &ast::crate_cfg cfg, fn parse_crate_from_source_file(&str input, &ast::crate_cfg cfg,
&parse_sess sess) -> @ast::crate { &parse_sess sess) -> @ast::crate {
auto p = new_parser_from_file(sess, cfg, input, 0u); auto p = new_parser_from_file(sess, cfg, input, 0u, 0u);
ret parse_crate_mod(p, cfg, sess); ret parse_crate_mod(p, cfg, sess);
} }
@ -2386,7 +2388,7 @@ fn parse_crate_from_source_str(&str name, &str source, &ast::crate_cfg cfg,
&codemap::codemap cm) -> @ast::crate { &codemap::codemap cm) -> @ast::crate {
auto sess = @rec(cm=cm, mutable next_id=0); auto sess = @rec(cm=cm, mutable next_id=0);
auto ftype = SOURCE_FILE; auto ftype = SOURCE_FILE;
auto filemap = codemap::new_filemap(name, 0u); auto filemap = codemap::new_filemap(name, 0u, 0u);
sess.cm.files += ~[filemap]; sess.cm.files += ~[filemap];
auto itr = @interner::mk(str::hash, str::eq); auto itr = @interner::mk(str::hash, str::eq);
auto rdr = lexer::new_reader(sess.cm, source, filemap, itr); auto rdr = lexer::new_reader(sess.cm, source, filemap, itr);
@ -2504,7 +2506,7 @@ fn parse_crate_directives(&parser p, token::token term,
fn parse_crate_from_crate_file(&str input, &ast::crate_cfg cfg, fn parse_crate_from_crate_file(&str input, &ast::crate_cfg cfg,
&parse_sess sess) -> @ast::crate { &parse_sess sess) -> @ast::crate {
auto p = new_parser_from_file(sess, cfg, input, 0u); auto p = new_parser_from_file(sess, cfg, input, 0u, 0u);
auto lo = p.get_lo_pos(); auto lo = p.get_lo_pos();
auto prefix = std::fs::dirname(p.get_filemap().name); auto prefix = std::fs::dirname(p.get_filemap().name);
auto leading_attrs = parse_inner_attrs_and_next(p); auto leading_attrs = parse_inner_attrs_and_next(p);
@ -2517,6 +2519,7 @@ fn parse_crate_from_crate_file(&str input, &ast::crate_cfg cfg,
mutable deps=deps, mutable deps=deps,
sess=sess, sess=sess,
mutable chpos=p.get_chpos(), mutable chpos=p.get_chpos(),
mutable byte_pos=p.get_byte_pos(),
cfg = p.get_cfg()); cfg = p.get_cfg());
auto m = eval::eval_crate_directives_to_mod(cx, cdirs, prefix); auto m = eval::eval_crate_directives_to_mod(cx, cdirs, prefix);
auto hi = p.get_hi_pos(); auto hi = p.get_hi_pos();

View file

@ -1345,8 +1345,8 @@ fn maybe_print_trailing_comment(&ps s, codemap::span span,
alt (next_comment(s)) { alt (next_comment(s)) {
case (some(?cmnt)) { case (some(?cmnt)) {
if (cmnt.style != lexer::trailing) { ret; } if (cmnt.style != lexer::trailing) { ret; }
auto span_line = codemap::lookup_pos(cm, span.hi); auto span_line = codemap::lookup_char_pos(cm, span.hi);
auto comment_line = codemap::lookup_pos(cm, cmnt.pos); auto comment_line = codemap::lookup_char_pos(cm, cmnt.pos);
auto next = cmnt.pos + 1u; auto next = cmnt.pos + 1u;
alt (next_pos) { case (none) { } case (some(?p)) { next = p; } } alt (next_pos) { case (none) { } case (some(?p)) { next = p; } }
if (span.hi < cmnt.pos && cmnt.pos < next && if (span.hi < cmnt.pos && cmnt.pos < next &&