Change spans to use byte offsets instead of char offsets

This commit is contained in:
Brian Anderson 2012-11-15 19:37:29 -08:00
parent 8cba337cce
commit 81d20156cd
12 changed files with 161 additions and 89 deletions

View file

@ -112,7 +112,7 @@ type compile_unit_md = {name: ~str};
type subprogram_md = {id: ast::node_id}; type subprogram_md = {id: ast::node_id};
type local_var_md = {id: ast::node_id}; type local_var_md = {id: ast::node_id};
type tydesc_md = {hash: uint}; type tydesc_md = {hash: uint};
type block_md = {start: codemap::Loc<CharPos>, end: codemap::Loc<CharPos>}; type block_md = {start: codemap::Loc, end: codemap::Loc};
type argument_md = {id: ast::node_id}; type argument_md = {id: ast::node_id};
type retval_md = {id: ast::node_id}; type retval_md = {id: ast::node_id};

View file

@ -1,7 +1,7 @@
use codemap::{span, CharPos}; use codemap::{span, BytePos};
use ast::*; use ast::*;
pure fn spanned<T>(+lo: CharPos, +hi: CharPos, +t: T) -> spanned<T> { pure fn spanned<T>(+lo: BytePos, +hi: BytePos, +t: T) -> spanned<T> {
respan(mk_sp(lo, hi), move t) respan(mk_sp(lo, hi), move t)
} }
@ -14,12 +14,12 @@ pure fn dummy_spanned<T>(+t: T) -> spanned<T> {
} }
/* assuming that we're not in macro expansion */ /* assuming that we're not in macro expansion */
pure fn mk_sp(+lo: CharPos, +hi: CharPos) -> span { pure fn mk_sp(+lo: BytePos, +hi: BytePos) -> span {
span {lo: lo, hi: hi, expn_info: None} span {lo: lo, hi: hi, expn_info: None}
} }
// make this a const, once the compiler supports it // make this a const, once the compiler supports it
pure fn dummy_sp() -> span { return mk_sp(CharPos(0), CharPos(0)); } pure fn dummy_sp() -> span { return mk_sp(BytePos(0), BytePos(0)); }

View file

@ -6,7 +6,7 @@ use either::Either;
use diagnostic::span_handler; use diagnostic::span_handler;
use ast_util::{spanned, dummy_spanned}; use ast_util::{spanned, dummy_spanned};
use parse::comments::{doc_comment_style, strip_doc_comment_decoration}; use parse::comments::{doc_comment_style, strip_doc_comment_decoration};
use codemap::CharPos; use codemap::BytePos;
// Constructors // Constructors
export mk_name_value_item_str; export mk_name_value_item_str;
@ -76,7 +76,7 @@ fn mk_attr(item: @ast::meta_item) -> ast::attribute {
} }
fn mk_sugared_doc_attr(text: ~str, fn mk_sugared_doc_attr(text: ~str,
+lo: CharPos, +hi: CharPos) -> ast::attribute { +lo: BytePos, +hi: BytePos) -> ast::attribute {
let lit = spanned(lo, hi, ast::lit_str(@text)); let lit = spanned(lo, hi, ast::lit_str(@text));
let attr = { let attr = {
style: doc_comment_style(text), style: doc_comment_style(text),

View file

@ -118,8 +118,8 @@ impl CharPos: to_bytes::IterBytes {
} }
pub struct span { pub struct span {
lo: CharPos, lo: BytePos,
hi: CharPos, hi: BytePos,
expn_info: Option<@ExpnInfo> expn_info: Option<@ExpnInfo>
} }
@ -141,8 +141,10 @@ impl<D: Deserializer> span: Deserializable<D> {
} }
} }
pub struct Loc<A: Pos> { // XXX col shouldn't be CharPos because col is not an absolute location in the
file: @FileMap, line: uint, col: A // codemap, and BytePos and CharPos always represent absolute positions
pub struct Loc {
file: @FileMap, line: uint, col: CharPos
} }
/// An absolute offset within the CodeMap (not a relative offset within a /// An absolute offset within the CodeMap (not a relative offset within a
@ -178,12 +180,24 @@ pub enum FileSubstr {
pub FssExternal({filename: ~str, line: uint, col: CharPos}) pub FssExternal({filename: ~str, line: uint, col: CharPos})
} }
/// Identifies an offset of a multi-byte character in a FileMap
pub struct MultiByteChar {
/// The absolute offset of the character in the CodeMap
pos: BytePos,
/// The number of bytes, >=2
bytes: uint,
/// The complete number of 'extra' bytes through this character in the
/// FileMap
sum: uint
}
pub struct FileMap { pub struct FileMap {
name: FileName, name: FileName,
substr: FileSubstr, substr: FileSubstr,
src: @~str, src: @~str,
start_pos: FilePos, start_pos: FilePos,
mut lines: ~[FilePos] mut lines: ~[FilePos],
multibyte_chars: DVec<MultiByteChar>
} }
pub impl FileMap { pub impl FileMap {
@ -194,7 +208,8 @@ pub impl FileMap {
return FileMap { return FileMap {
name: filename, substr: substr, src: src, name: filename, substr: substr, src: src,
start_pos: start_pos, start_pos: start_pos,
mut lines: ~[] mut lines: ~[],
multibyte_chars: DVec()
}; };
} }
@ -219,6 +234,21 @@ pub impl FileMap {
str::slice(*self.src, begin, end) str::slice(*self.src, begin, end)
} }
pub fn record_multibyte_char(&self, pos: BytePos, bytes: uint) {
assert bytes >=2 && bytes <= 4;
let sum = if self.multibyte_chars.len() > 0 {
self.multibyte_chars.last().sum
} else {
0
};
let sum = sum + bytes;
let mbc = MultiByteChar {
pos: pos,
bytes: bytes,
sum: sum
};
self.multibyte_chars.push(mbc);
}
} }
pub struct CodeMap { pub struct CodeMap {
@ -254,12 +284,11 @@ pub impl CodeMap {
pos.line, pos.col.to_uint()); pos.line, pos.col.to_uint());
} }
pub fn lookup_char_pos(&self, +pos: CharPos) -> Loc<CharPos> { pub fn lookup_char_pos(&self, +pos: BytePos) -> Loc {
pure fn lookup(pos: FilePos) -> uint { return pos.ch.to_uint(); } return self.lookup_pos(pos);
return self.lookup_pos(pos, lookup);
} }
pub fn lookup_char_pos_adj(&self, +pos: CharPos) pub fn lookup_char_pos_adj(&self, +pos: BytePos)
-> {filename: ~str, line: uint, col: CharPos, file: Option<@FileMap>} -> {filename: ~str, line: uint, col: CharPos, file: Option<@FileMap>}
{ {
let loc = self.lookup_char_pos(pos); let loc = self.lookup_char_pos(pos);
@ -272,7 +301,7 @@ pub impl CodeMap {
} }
FssInternal(sp) => { FssInternal(sp) => {
self.lookup_char_pos_adj( self.lookup_char_pos_adj(
sp.lo + (pos - loc.file.start_pos.ch)) sp.lo + (pos - loc.file.start_pos.byte))
} }
FssExternal(eloc) => { FssExternal(eloc) => {
{filename: /* FIXME (#2543) */ copy eloc.filename, {filename: /* FIXME (#2543) */ copy eloc.filename,
@ -284,14 +313,13 @@ pub impl CodeMap {
} }
pub fn adjust_span(&self, sp: span) -> span { pub fn adjust_span(&self, sp: span) -> span {
pure fn lookup(pos: FilePos) -> uint { return pos.ch.to_uint(); } let line = self.lookup_line(sp.lo);
let line = self.lookup_line(sp.lo, lookup);
match (line.fm.substr) { match (line.fm.substr) {
FssNone => sp, FssNone => sp,
FssInternal(s) => { FssInternal(s) => {
self.adjust_span(span { self.adjust_span(span {
lo: s.lo + (sp.lo - line.fm.start_pos.ch), lo: s.lo + (sp.lo - line.fm.start_pos.byte),
hi: s.lo + (sp.hi - line.fm.start_pos.ch), hi: s.lo + (sp.hi - line.fm.start_pos.byte),
expn_info: sp.expn_info expn_info: sp.expn_info
}) })
} }
@ -321,18 +349,6 @@ pub impl CodeMap {
return @FileLines {file: lo.file, lines: lines}; return @FileLines {file: lo.file, lines: lines};
} }
fn lookup_byte_offset(&self, +chpos: CharPos)
-> {fm: @FileMap, pos: BytePos} {
pure fn lookup(pos: FilePos) -> uint { return pos.ch.to_uint(); }
let {fm, line} = self.lookup_line(chpos, lookup);
let line_offset = fm.lines[line].byte - fm.start_pos.byte;
let col = chpos - fm.lines[line].ch;
let col_offset = str::count_bytes(*fm.src,
line_offset.to_uint(),
col.to_uint());
{fm: fm, pos: line_offset + BytePos(col_offset)}
}
pub fn span_to_snippet(&self, sp: span) -> ~str { pub fn span_to_snippet(&self, sp: span) -> ~str {
let begin = self.lookup_byte_offset(sp.lo); let begin = self.lookup_byte_offset(sp.lo);
let end = self.lookup_byte_offset(sp.hi); let end = self.lookup_byte_offset(sp.hi);
@ -351,15 +367,14 @@ pub impl CodeMap {
} }
priv impl CodeMap { priv impl CodeMap {
fn lookup_line<A: Pos>(&self, pos: A, lookup: LookupFn)
-> {fm: @FileMap, line: uint} fn lookup_filemap_idx(&self, +pos: BytePos) -> uint {
{
let len = self.files.len(); let len = self.files.len();
let mut a = 0u; let mut a = 0u;
let mut b = len; let mut b = len;
while b - a > 1u { while b - a > 1u {
let m = (a + b) / 2u; let m = (a + b) / 2u;
if lookup(self.files[m].start_pos) > pos.to_uint() { if self.files[m].start_pos.byte > pos {
b = m; b = m;
} else { } else {
a = m; a = m;
@ -369,22 +384,40 @@ priv impl CodeMap {
fail fmt!("position %u does not resolve to a source location", fail fmt!("position %u does not resolve to a source location",
pos.to_uint()) pos.to_uint())
} }
let f = self.files[a];
a = 0u; return a;
b = vec::len(f.lines); }
fn lookup_line(&self, +pos: BytePos)
-> {fm: @FileMap, line: uint}
{
let idx = self.lookup_filemap_idx(pos);
let f = self.files[idx];
let mut a = 0u;
let mut b = vec::len(f.lines);
while b - a > 1u { while b - a > 1u {
let m = (a + b) / 2u; let m = (a + b) / 2u;
if lookup(f.lines[m]) > pos.to_uint() { b = m; } else { a = m; } if f.lines[m].byte > pos { b = m; } else { a = m; }
} }
return {fm: f, line: a}; return {fm: f, line: a};
} }
fn lookup_pos<A: Pos Num>(&self, pos: A, lookup: LookupFn) -> Loc<A> { fn lookup_pos(&self, +pos: BytePos) -> Loc {
let {fm: f, line: a} = self.lookup_line(pos, lookup); let {fm: f, line: a} = self.lookup_line(pos);
let line = a + 1u; // Line numbers start at 1
let chpos = self.bytepos_to_local_charpos(pos);
let linebpos = f.lines[a].byte;
let linechpos = self.bytepos_to_local_charpos(linebpos);
debug!("codemap: byte pos %? is on the line at byte pos %?",
pos, linebpos);
debug!("codemap: char pos %? is on the line at char pos %?",
chpos, linechpos);
debug!("codemap: byte is on line: %?", line);
assert chpos >= linechpos;
return Loc { return Loc {
file: f, file: f,
line: a + 1u, line: line,
col: pos - from_uint(lookup(f.lines[a])) col: chpos - linechpos
}; };
} }
@ -394,6 +427,40 @@ priv impl CodeMap {
return fmt!("%s:%u:%u: %u:%u", lo.file.name, return fmt!("%s:%u:%u: %u:%u", lo.file.name,
lo.line, lo.col.to_uint(), hi.line, hi.col.to_uint()) lo.line, lo.col.to_uint(), hi.line, hi.col.to_uint())
} }
fn lookup_byte_offset(&self, +bpos: BytePos)
-> {fm: @FileMap, pos: BytePos} {
let idx = self.lookup_filemap_idx(bpos);
let fm = self.files[idx];
let offset = bpos - fm.start_pos.byte;
return {fm: fm, pos: offset};
}
// Converts an absolute BytePos to a CharPos relative to the file it is
// located in
fn bytepos_to_local_charpos(&self, +bpos: BytePos) -> CharPos {
debug!("codemap: converting %? to char pos", bpos);
let idx = self.lookup_filemap_idx(bpos);
let map = self.files[idx];
// The number of extra bytes due to multibyte chars in the FileMap
let mut total_extra_bytes = 0;
for map.multibyte_chars.each |mbc| {
debug!("codemap: %?-byte char at %?", mbc.bytes, mbc.pos);
if mbc.pos < bpos {
total_extra_bytes += mbc.bytes;
// We should never see a byte position in the middle of a
// character
assert bpos == mbc.pos
|| bpos.to_uint() >= mbc.pos.to_uint() + mbc.bytes;
} else {
break;
}
}
CharPos(bpos.to_uint() - total_extra_bytes)
}
} }
// //

View file

@ -4,7 +4,7 @@ use parse::parser;
use parse::parser::{Parser, parse_from_source_str}; use parse::parser::{Parser, parse_from_source_str};
use dvec::DVec; use dvec::DVec;
use parse::token::ident_interner; use parse::token::ident_interner;
use codemap::CharPos; use codemap::{CharPos, BytePos};
use fold::*; use fold::*;
use visit::*; use visit::*;
@ -16,13 +16,13 @@ use io::*;
use codemap::span; use codemap::span;
struct gather_item { struct gather_item {
lo: CharPos, lo: BytePos,
hi: CharPos, hi: BytePos,
e: @ast::expr, e: @ast::expr,
constr: ~str constr: ~str
} }
type aq_ctxt = @{lo: CharPos, gather: DVec<gather_item>}; type aq_ctxt = @{lo: BytePos, gather: DVec<gather_item>};
enum fragment { enum fragment {
from_expr(@ast::expr), from_expr(@ast::expr),
from_ty(@ast::Ty) from_ty(@ast::Ty)
@ -115,7 +115,7 @@ impl @ast::pat: qq_helper {
fn get_fold_fn() -> ~str {~"fold_pat"} fn get_fold_fn() -> ~str {~"fold_pat"}
} }
fn gather_anti_quotes<N: qq_helper>(lo: CharPos, node: N) -> aq_ctxt fn gather_anti_quotes<N: qq_helper>(lo: BytePos, node: N) -> aq_ctxt
{ {
let v = @{visit_expr: |node, &&cx, v| visit_aq(node, ~"from_expr", cx, v), let v = @{visit_expr: |node, &&cx, v| visit_aq(node, ~"from_expr", cx, v),
visit_ty: |node, &&cx, v| visit_aq(node, ~"from_ty", cx, v), visit_ty: |node, &&cx, v| visit_aq(node, ~"from_ty", cx, v),
@ -227,7 +227,7 @@ fn finish<T: qq_helper>
let mut str2 = ~""; let mut str2 = ~"";
enum state {active, skip(uint), blank}; enum state {active, skip(uint), blank};
let mut state = active; let mut state = active;
let mut i = CharPos(0u); let mut i = BytePos(0u);
let mut j = 0u; let mut j = 0u;
let g_len = cx.gather.len(); let g_len = cx.gather.len();
for str::chars_each(*str) |ch| { for str::chars_each(*str) |ch| {
@ -244,7 +244,7 @@ fn finish<T: qq_helper>
blank if is_space(ch) => str::push_char(&mut str2, ch), blank if is_space(ch) => str::push_char(&mut str2, ch),
blank => str::push_char(&mut str2, ' ') blank => str::push_char(&mut str2, ' ')
} }
i += CharPos(1u); i += BytePos(1u);
if (j < g_len && i == cx.gather[j].hi) { if (j < g_len && i == cx.gather[j].hi) {
assert ch == ')'; assert ch == ')';
state = active; state = active;

View file

@ -11,7 +11,7 @@ use dvec::DVec;
use ast::{matcher, match_tok, match_seq, match_nonterminal, ident}; use ast::{matcher, match_tok, match_seq, match_nonterminal, ident};
use ast_util::mk_sp; use ast_util::mk_sp;
use std::map::HashMap; use std::map::HashMap;
use codemap::CharPos; use codemap::BytePos;
/* This is an Earley-like parser, without support for in-grammar nonterminals, /* This is an Earley-like parser, without support for in-grammar nonterminals,
only by calling out to the main rust parser for named nonterminals (which it only by calling out to the main rust parser for named nonterminals (which it
@ -103,7 +103,7 @@ type matcher_pos = ~{
mut up: matcher_pos_up, // mutable for swapping only mut up: matcher_pos_up, // mutable for swapping only
matches: ~[DVec<@named_match>], matches: ~[DVec<@named_match>],
match_lo: uint, match_hi: uint, match_lo: uint, match_hi: uint,
sp_lo: CharPos, sp_lo: BytePos,
}; };
fn copy_up(&& mpu: matcher_pos_up) -> matcher_pos { fn copy_up(&& mpu: matcher_pos_up) -> matcher_pos {
@ -123,7 +123,7 @@ fn count_names(ms: &[matcher]) -> uint {
} }
#[allow(non_implicitly_copyable_typarams)] #[allow(non_implicitly_copyable_typarams)]
fn initial_matcher_pos(ms: ~[matcher], sep: Option<Token>, lo: CharPos) fn initial_matcher_pos(ms: ~[matcher], sep: Option<Token>, lo: BytePos)
-> matcher_pos { -> matcher_pos {
let mut match_idx_hi = 0u; let mut match_idx_hi = 0u;
for ms.each() |elt| { for ms.each() |elt| {

View file

@ -14,7 +14,7 @@ trait parser_attr {
-> attr_or_ext; -> attr_or_ext;
fn parse_outer_attributes() -> ~[ast::attribute]; fn parse_outer_attributes() -> ~[ast::attribute];
fn parse_attribute(style: ast::attr_style) -> ast::attribute; fn parse_attribute(style: ast::attr_style) -> ast::attribute;
fn parse_attribute_naked(style: ast::attr_style, lo: CharPos) -> fn parse_attribute_naked(style: ast::attr_style, lo: BytePos) ->
ast::attribute; ast::attribute;
fn parse_inner_attrs_and_next() -> fn parse_inner_attrs_and_next() ->
{inner: ~[ast::attribute], next: ~[ast::attribute]}; {inner: ~[ast::attribute], next: ~[ast::attribute]};
@ -85,7 +85,7 @@ impl Parser: parser_attr {
return self.parse_attribute_naked(style, lo); return self.parse_attribute_naked(style, lo);
} }
fn parse_attribute_naked(style: ast::attr_style, lo: CharPos) -> fn parse_attribute_naked(style: ast::attr_style, lo: BytePos) ->
ast::attribute { ast::attribute {
self.expect(token::LBRACKET); self.expect(token::LBRACKET);
let meta_item = self.parse_meta_item(); let meta_item = self.parse_meta_item();

View file

@ -28,7 +28,7 @@ impl cmnt_style : cmp::Eq {
} }
} }
type cmnt = {style: cmnt_style, lines: ~[~str], pos: CharPos}; type cmnt = {style: cmnt_style, lines: ~[~str], pos: BytePos};
fn is_doc_comment(s: ~str) -> bool { fn is_doc_comment(s: ~str) -> bool {
s.starts_with(~"///") || s.starts_with(~"///") ||
@ -131,7 +131,7 @@ fn consume_non_eol_whitespace(rdr: string_reader) {
fn push_blank_line_comment(rdr: string_reader, comments: &mut ~[cmnt]) { fn push_blank_line_comment(rdr: string_reader, comments: &mut ~[cmnt]) {
debug!(">>> blank-line comment"); debug!(">>> blank-line comment");
let v: ~[~str] = ~[]; let v: ~[~str] = ~[];
comments.push({style: blank_line, lines: v, pos: rdr.last_pos.ch}); comments.push({style: blank_line, lines: v, pos: rdr.last_pos.byte});
} }
fn consume_whitespace_counting_blank_lines(rdr: string_reader, fn consume_whitespace_counting_blank_lines(rdr: string_reader,
@ -148,7 +148,7 @@ fn consume_whitespace_counting_blank_lines(rdr: string_reader,
fn read_shebang_comment(rdr: string_reader, code_to_the_left: bool, fn read_shebang_comment(rdr: string_reader, code_to_the_left: bool,
comments: &mut ~[cmnt]) { comments: &mut ~[cmnt]) {
debug!(">>> shebang comment"); debug!(">>> shebang comment");
let p = rdr.last_pos.ch; let p = rdr.last_pos.byte;
debug!("<<< shebang comment"); debug!("<<< shebang comment");
comments.push({ comments.push({
style: if code_to_the_left { trailing } else { isolated }, style: if code_to_the_left { trailing } else { isolated },
@ -160,7 +160,7 @@ fn read_shebang_comment(rdr: string_reader, code_to_the_left: bool,
fn read_line_comments(rdr: string_reader, code_to_the_left: bool, fn read_line_comments(rdr: string_reader, code_to_the_left: bool,
comments: &mut ~[cmnt]) { comments: &mut ~[cmnt]) {
debug!(">>> line comments"); debug!(">>> line comments");
let p = rdr.last_pos.ch; let p = rdr.last_pos.byte;
let mut lines: ~[~str] = ~[]; let mut lines: ~[~str] = ~[];
while rdr.curr == '/' && nextch(rdr) == '/' { while rdr.curr == '/' && nextch(rdr) == '/' {
let line = read_one_line_comment(rdr); let line = read_one_line_comment(rdr);
@ -209,7 +209,7 @@ fn trim_whitespace_prefix_and_push_line(lines: &mut ~[~str],
fn read_block_comment(rdr: string_reader, code_to_the_left: bool, fn read_block_comment(rdr: string_reader, code_to_the_left: bool,
comments: &mut ~[cmnt]) { comments: &mut ~[cmnt]) {
debug!(">>> block comment"); debug!(">>> block comment");
let p = rdr.last_pos.ch; let p = rdr.last_pos.byte;
let mut lines: ~[~str] = ~[]; let mut lines: ~[~str] = ~[];
let mut col: CharPos = rdr.col; let mut col: CharPos = rdr.col;
bump(rdr); bump(rdr);
@ -284,7 +284,7 @@ fn consume_comment(rdr: string_reader, code_to_the_left: bool,
debug!("<<< consume comment"); debug!("<<< consume comment");
} }
type lit = {lit: ~str, pos: CharPos}; type lit = {lit: ~str, pos: BytePos};
fn gather_comments_and_literals(span_diagnostic: diagnostic::span_handler, fn gather_comments_and_literals(span_diagnostic: diagnostic::span_handler,
path: ~str, path: ~str,

View file

@ -205,7 +205,7 @@ impl Parser: parser_common {
if self.token == token::GT { if self.token == token::GT {
self.bump(); self.bump();
} else if self.token == token::BINOP(token::SHR) { } else if self.token == token::BINOP(token::SHR) {
self.swap(token::GT, self.span.lo + CharPos(1u), self.span.hi); self.swap(token::GT, self.span.lo + BytePos(1u), self.span.hi);
} else { } else {
let mut s: ~str = ~"expected `"; let mut s: ~str = ~"expected `";
s += token_to_str(self.reader, token::GT); s += token_to_str(self.reader, token::GT);

View file

@ -23,7 +23,7 @@ type string_reader = @{
src: @~str, src: @~str,
// The absolute offset within the codemap of the next character to read // The absolute offset within the codemap of the next character to read
mut pos: FilePos, mut pos: FilePos,
// The absolute offset within the codemap of the last character to be read (curr) // The absolute offset within the codemap of the last character read(curr)
mut last_pos: FilePos, mut last_pos: FilePos,
// The column of the next character to read // The column of the next character to read
mut col: CharPos, mut col: CharPos,
@ -123,9 +123,9 @@ fn string_advance_token(&&r: string_reader) {
if is_eof(r) { if is_eof(r) {
r.peek_tok = token::EOF; r.peek_tok = token::EOF;
} else { } else {
let start_chpos = r.last_pos.ch; let start_bytepos = r.last_pos.byte;
r.peek_tok = next_token_inner(r); r.peek_tok = next_token_inner(r);
r.peek_span = ast_util::mk_sp(start_chpos, r.last_pos.ch); r.peek_span = ast_util::mk_sp(start_bytepos, r.last_pos.byte);
}; };
} }
@ -158,6 +158,11 @@ fn bump(rdr: string_reader) {
rdr.filemap.next_line(rdr.last_pos); rdr.filemap.next_line(rdr.last_pos);
rdr.col = CharPos(0u); rdr.col = CharPos(0u);
} }
if byte_offset_diff > 1 {
rdr.filemap.record_multibyte_char(
BytePos(current_byte_offset), byte_offset_diff);
}
} else { } else {
// XXX: What does this accomplish? // XXX: What does this accomplish?
if (rdr.curr != -1 as char) { if (rdr.curr != -1 as char) {
@ -233,7 +238,7 @@ fn consume_any_line_comment(rdr: string_reader)
bump(rdr); bump(rdr);
// line comments starting with "///" or "//!" are doc-comments // line comments starting with "///" or "//!" are doc-comments
if rdr.curr == '/' || rdr.curr == '!' { if rdr.curr == '/' || rdr.curr == '!' {
let start_chpos = rdr.pos.ch - CharPos(2u); let start_bpos = rdr.pos.byte - BytePos(2u);
let mut acc = ~"//"; let mut acc = ~"//";
while rdr.curr != '\n' && !is_eof(rdr) { while rdr.curr != '\n' && !is_eof(rdr) {
str::push_char(&mut acc, rdr.curr); str::push_char(&mut acc, rdr.curr);
@ -241,7 +246,7 @@ fn consume_any_line_comment(rdr: string_reader)
} }
return Some({ return Some({
tok: token::DOC_COMMENT(rdr.interner.intern(@acc)), tok: token::DOC_COMMENT(rdr.interner.intern(@acc)),
sp: ast_util::mk_sp(start_chpos, rdr.pos.ch) sp: ast_util::mk_sp(start_bpos, rdr.pos.byte)
}); });
} else { } else {
while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); } while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); }
@ -256,7 +261,7 @@ fn consume_any_line_comment(rdr: string_reader)
if nextch(rdr) == '!' { if nextch(rdr) == '!' {
let cmap = @CodeMap::new(); let cmap = @CodeMap::new();
(*cmap).files.push(rdr.filemap); (*cmap).files.push(rdr.filemap);
let loc = cmap.lookup_char_pos_adj(rdr.last_pos.ch); let loc = cmap.lookup_char_pos_adj(rdr.last_pos.byte);
if loc.line == 1u && loc.col == CharPos(0u) { if loc.line == 1u && loc.col == CharPos(0u) {
while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); } while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); }
return consume_whitespace_and_comments(rdr); return consume_whitespace_and_comments(rdr);
@ -272,7 +277,7 @@ fn consume_block_comment(rdr: string_reader)
// block comments starting with "/**" or "/*!" are doc-comments // block comments starting with "/**" or "/*!" are doc-comments
if rdr.curr == '*' || rdr.curr == '!' { if rdr.curr == '*' || rdr.curr == '!' {
let start_chpos = rdr.pos.ch - CharPos(2u); let start_bpos = rdr.pos.byte - BytePos(2u);
let mut acc = ~"/*"; let mut acc = ~"/*";
while !(rdr.curr == '*' && nextch(rdr) == '/') && !is_eof(rdr) { while !(rdr.curr == '*' && nextch(rdr) == '/') && !is_eof(rdr) {
str::push_char(&mut acc, rdr.curr); str::push_char(&mut acc, rdr.curr);
@ -286,7 +291,7 @@ fn consume_block_comment(rdr: string_reader)
bump(rdr); bump(rdr);
return Some({ return Some({
tok: token::DOC_COMMENT(rdr.interner.intern(@acc)), tok: token::DOC_COMMENT(rdr.interner.intern(@acc)),
sp: ast_util::mk_sp(start_chpos, rdr.pos.ch) sp: ast_util::mk_sp(start_bpos, rdr.pos.byte)
}); });
} }
} else { } else {

View file

@ -5,7 +5,7 @@ use either::{Either, Left, Right};
use std::map::HashMap; use std::map::HashMap;
use token::{can_begin_expr, is_ident, is_ident_or_path, is_plain_ident, use token::{can_begin_expr, is_ident, is_ident_or_path, is_plain_ident,
INTERPOLATED, special_idents}; INTERPOLATED, special_idents};
use codemap::{span,FssNone, CharPos}; use codemap::{span,FssNone, BytePos};
use util::interner::Interner; use util::interner::Interner;
use ast_util::{spanned, respan, mk_sp, ident_to_path, operator_prec}; use ast_util::{spanned, respan, mk_sp, ident_to_path, operator_prec};
use lexer::reader; use lexer::reader;
@ -244,7 +244,7 @@ impl Parser {
self.token = next.tok; self.token = next.tok;
self.span = next.sp; self.span = next.sp;
} }
fn swap(next: token::Token, +lo: CharPos, +hi: CharPos) { fn swap(next: token::Token, +lo: BytePos, +hi: BytePos) {
self.token = next; self.token = next;
self.span = mk_sp(lo, hi); self.span = mk_sp(lo, hi);
} }
@ -904,12 +904,12 @@ impl Parser {
return spanned(lo, e.span.hi, {mutbl: m, ident: i, expr: e}); return spanned(lo, e.span.hi, {mutbl: m, ident: i, expr: e});
} }
fn mk_expr(+lo: CharPos, +hi: CharPos, +node: expr_) -> @expr { fn mk_expr(+lo: BytePos, +hi: BytePos, +node: expr_) -> @expr {
return @{id: self.get_id(), callee_id: self.get_id(), return @{id: self.get_id(), callee_id: self.get_id(),
node: node, span: mk_sp(lo, hi)}; node: node, span: mk_sp(lo, hi)};
} }
fn mk_mac_expr(+lo: CharPos, +hi: CharPos, m: mac_) -> @expr { fn mk_mac_expr(+lo: BytePos, +hi: BytePos, m: mac_) -> @expr {
return @{id: self.get_id(), return @{id: self.get_id(),
callee_id: self.get_id(), callee_id: self.get_id(),
node: expr_mac({node: m, span: mk_sp(lo, hi)}), node: expr_mac({node: m, span: mk_sp(lo, hi)}),
@ -1134,7 +1134,7 @@ impl Parser {
return self.mk_expr(lo, hi, ex); return self.mk_expr(lo, hi, ex);
} }
fn parse_block_expr(lo: CharPos, blk_mode: blk_check_mode) -> @expr { fn parse_block_expr(lo: BytePos, blk_mode: blk_check_mode) -> @expr {
self.expect(token::LBRACE); self.expect(token::LBRACE);
let blk = self.parse_block_tail(lo, blk_mode); let blk = self.parse_block_tail(lo, blk_mode);
return self.mk_expr(blk.span.lo, blk.span.hi, expr_block(blk)); return self.mk_expr(blk.span.lo, blk.span.hi, expr_block(blk));
@ -1146,7 +1146,7 @@ impl Parser {
return self.parse_syntax_ext_naked(lo); return self.parse_syntax_ext_naked(lo);
} }
fn parse_syntax_ext_naked(lo: CharPos) -> @expr { fn parse_syntax_ext_naked(lo: BytePos) -> @expr {
match self.token { match self.token {
token::IDENT(_, _) => (), token::IDENT(_, _) => (),
_ => self.fatal(~"expected a syntax expander name") _ => self.fatal(~"expected a syntax expander name")
@ -2279,11 +2279,11 @@ impl Parser {
// I guess that also means "already parsed the 'impure'" if // I guess that also means "already parsed the 'impure'" if
// necessary, and this should take a qualifier. // necessary, and this should take a qualifier.
// some blocks start with "#{"... // some blocks start with "#{"...
fn parse_block_tail(lo: CharPos, s: blk_check_mode) -> blk { fn parse_block_tail(lo: BytePos, s: blk_check_mode) -> blk {
self.parse_block_tail_(lo, s, ~[]) self.parse_block_tail_(lo, s, ~[])
} }
fn parse_block_tail_(lo: CharPos, s: blk_check_mode, fn parse_block_tail_(lo: BytePos, s: blk_check_mode,
+first_item_attrs: ~[attribute]) -> blk { +first_item_attrs: ~[attribute]) -> blk {
let mut stmts = ~[]; let mut stmts = ~[];
let mut expr = None; let mut expr = None;
@ -2581,7 +2581,7 @@ impl Parser {
return {ident: id, tps: ty_params}; return {ident: id, tps: ty_params};
} }
fn mk_item(+lo: CharPos, +hi: CharPos, +ident: ident, fn mk_item(+lo: BytePos, +hi: BytePos, +ident: ident,
+node: item_, vis: visibility, +node: item_, vis: visibility,
+attrs: ~[attribute]) -> @item { +attrs: ~[attribute]) -> @item {
return @{ident: ident, return @{ident: ident,
@ -3037,7 +3037,7 @@ impl Parser {
items: items}; items: items};
} }
fn parse_item_foreign_mod(lo: CharPos, fn parse_item_foreign_mod(lo: BytePos,
visibility: visibility, visibility: visibility,
attrs: ~[attribute], attrs: ~[attribute],
items_allowed: bool) items_allowed: bool)
@ -3092,7 +3092,7 @@ impl Parser {
}); });
} }
fn parse_type_decl() -> {lo: CharPos, ident: ident} { fn parse_type_decl() -> {lo: BytePos, ident: ident} {
let lo = self.last_span.lo; let lo = self.last_span.lo;
let id = self.parse_ident(); let id = self.parse_ident();
return {lo: lo, ident: id}; return {lo: lo, ident: id};

View file

@ -1,5 +1,5 @@
use parse::{comments, lexer, token}; use parse::{comments, lexer, token};
use codemap::{CodeMap, CharPos}; use codemap::{CodeMap, BytePos};
use pp::{break_offset, word, printer, space, zerobreak, hardbreak, breaks}; use pp::{break_offset, word, printer, space, zerobreak, hardbreak, breaks};
use pp::{consistent, inconsistent, eof}; use pp::{consistent, inconsistent, eof};
use ast::{required, provided}; use ast::{required, provided};
@ -1898,7 +1898,7 @@ fn print_ty_fn(s: ps,
} }
fn maybe_print_trailing_comment(s: ps, span: codemap::span, fn maybe_print_trailing_comment(s: ps, span: codemap::span,
next_pos: Option<CharPos>) { next_pos: Option<BytePos>) {
let mut cm; let mut cm;
match s.cm { Some(ccm) => cm = ccm, _ => return } match s.cm { Some(ccm) => cm = ccm, _ => return }
match next_comment(s) { match next_comment(s) {
@ -1906,7 +1906,7 @@ fn maybe_print_trailing_comment(s: ps, span: codemap::span,
if cmnt.style != comments::trailing { return; } if cmnt.style != comments::trailing { return; }
let span_line = cm.lookup_char_pos(span.hi); let span_line = cm.lookup_char_pos(span.hi);
let comment_line = cm.lookup_char_pos(cmnt.pos); let comment_line = cm.lookup_char_pos(cmnt.pos);
let mut next = cmnt.pos + CharPos(1u); let mut next = cmnt.pos + BytePos(1u);
match next_pos { None => (), Some(p) => next = p } match next_pos { None => (), Some(p) => next = p }
if span.hi < cmnt.pos && cmnt.pos < next && if span.hi < cmnt.pos && cmnt.pos < next &&
span_line.line == comment_line.line { span_line.line == comment_line.line {
@ -1981,7 +1981,7 @@ fn lit_to_str(l: @ast::lit) -> ~str {
return to_str(l, print_literal, parse::token::mk_fake_ident_interner()); return to_str(l, print_literal, parse::token::mk_fake_ident_interner());
} }
fn next_lit(s: ps, pos: CharPos) -> Option<comments::lit> { fn next_lit(s: ps, pos: BytePos) -> Option<comments::lit> {
match s.literals { match s.literals {
Some(lits) => { Some(lits) => {
while s.cur_lit < vec::len(lits) { while s.cur_lit < vec::len(lits) {
@ -1996,7 +1996,7 @@ fn next_lit(s: ps, pos: CharPos) -> Option<comments::lit> {
} }
} }
fn maybe_print_comment(s: ps, pos: CharPos) { fn maybe_print_comment(s: ps, pos: BytePos) {
loop { loop {
match next_comment(s) { match next_comment(s) {
Some(cmnt) => { Some(cmnt) => {