Use relative positions inside a SourceFile.
This commit is contained in:
parent
585bb5e68d
commit
258ace613d
20 changed files with 217 additions and 281 deletions
|
@ -1107,27 +1107,27 @@ impl fmt::Debug for SpanData {
|
|||
}
|
||||
|
||||
/// Identifies an offset of a multi-byte character in a `SourceFile`.
|
||||
#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)]
|
||||
#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug, HashStable_Generic)]
|
||||
pub struct MultiByteChar {
|
||||
/// The absolute offset of the character in the `SourceMap`.
|
||||
pub pos: BytePos,
|
||||
pub pos: RelativeBytePos,
|
||||
/// The number of bytes, `>= 2`.
|
||||
pub bytes: u8,
|
||||
}
|
||||
|
||||
/// Identifies an offset of a non-narrow character in a `SourceFile`.
|
||||
#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)]
|
||||
#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug, HashStable_Generic)]
|
||||
pub enum NonNarrowChar {
|
||||
/// Represents a zero-width character.
|
||||
ZeroWidth(BytePos),
|
||||
ZeroWidth(RelativeBytePos),
|
||||
/// Represents a wide (full-width) character.
|
||||
Wide(BytePos),
|
||||
Wide(RelativeBytePos),
|
||||
/// Represents a tab character, represented visually with a width of 4 characters.
|
||||
Tab(BytePos),
|
||||
Tab(RelativeBytePos),
|
||||
}
|
||||
|
||||
impl NonNarrowChar {
|
||||
fn new(pos: BytePos, width: usize) -> Self {
|
||||
fn new(pos: RelativeBytePos, width: usize) -> Self {
|
||||
match width {
|
||||
0 => NonNarrowChar::ZeroWidth(pos),
|
||||
2 => NonNarrowChar::Wide(pos),
|
||||
|
@ -1137,7 +1137,7 @@ impl NonNarrowChar {
|
|||
}
|
||||
|
||||
/// Returns the absolute offset of the character in the `SourceMap`.
|
||||
pub fn pos(&self) -> BytePos {
|
||||
pub fn pos(&self) -> RelativeBytePos {
|
||||
match *self {
|
||||
NonNarrowChar::ZeroWidth(p) | NonNarrowChar::Wide(p) | NonNarrowChar::Tab(p) => p,
|
||||
}
|
||||
|
@ -1153,10 +1153,10 @@ impl NonNarrowChar {
|
|||
}
|
||||
}
|
||||
|
||||
impl Add<BytePos> for NonNarrowChar {
|
||||
impl Add<RelativeBytePos> for NonNarrowChar {
|
||||
type Output = Self;
|
||||
|
||||
fn add(self, rhs: BytePos) -> Self {
|
||||
fn add(self, rhs: RelativeBytePos) -> Self {
|
||||
match self {
|
||||
NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos + rhs),
|
||||
NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos + rhs),
|
||||
|
@ -1165,10 +1165,10 @@ impl Add<BytePos> for NonNarrowChar {
|
|||
}
|
||||
}
|
||||
|
||||
impl Sub<BytePos> for NonNarrowChar {
|
||||
impl Sub<RelativeBytePos> for NonNarrowChar {
|
||||
type Output = Self;
|
||||
|
||||
fn sub(self, rhs: BytePos) -> Self {
|
||||
fn sub(self, rhs: RelativeBytePos) -> Self {
|
||||
match self {
|
||||
NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos - rhs),
|
||||
NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos - rhs),
|
||||
|
@ -1178,10 +1178,10 @@ impl Sub<BytePos> for NonNarrowChar {
|
|||
}
|
||||
|
||||
/// Identifies an offset of a character that was normalized away from `SourceFile`.
|
||||
#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)]
|
||||
#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug, HashStable_Generic)]
|
||||
pub struct NormalizedPos {
|
||||
/// The absolute offset of the character in the `SourceMap`.
|
||||
pub pos: BytePos,
|
||||
pub pos: RelativeBytePos,
|
||||
/// The difference between original and normalized string at position.
|
||||
pub diff: u32,
|
||||
}
|
||||
|
@ -1293,7 +1293,7 @@ impl SourceFileHash {
|
|||
#[derive(Clone)]
|
||||
pub enum SourceFileLines {
|
||||
/// The source file lines, in decoded (random-access) form.
|
||||
Lines(Vec<BytePos>),
|
||||
Lines(Vec<RelativeBytePos>),
|
||||
|
||||
/// The source file lines, in undecoded difference list form.
|
||||
Diffs(SourceFileDiffs),
|
||||
|
@ -1317,7 +1317,7 @@ pub struct SourceFileDiffs {
|
|||
/// Position of the first line. Note that this is always encoded as a
|
||||
/// `BytePos` because it is often much larger than any of the
|
||||
/// differences.
|
||||
line_start: BytePos,
|
||||
line_start: RelativeBytePos,
|
||||
|
||||
/// Always 1, 2, or 4. Always as small as possible, while being big
|
||||
/// enough to hold the length of the longest line in the source file.
|
||||
|
@ -1352,7 +1352,7 @@ pub struct SourceFile {
|
|||
/// The start position of this source in the `SourceMap`.
|
||||
pub start_pos: BytePos,
|
||||
/// The end position of this source in the `SourceMap`.
|
||||
pub end_pos: BytePos,
|
||||
pub source_len: RelativeBytePos,
|
||||
/// Locations of lines beginnings in the source code.
|
||||
pub lines: Lock<SourceFileLines>,
|
||||
/// Locations of multi-byte characters in the source code.
|
||||
|
@ -1375,7 +1375,7 @@ impl Clone for SourceFile {
|
|||
src_hash: self.src_hash,
|
||||
external_src: Lock::new(self.external_src.borrow().clone()),
|
||||
start_pos: self.start_pos,
|
||||
end_pos: self.end_pos,
|
||||
source_len: self.source_len,
|
||||
lines: Lock::new(self.lines.borrow().clone()),
|
||||
multibyte_chars: self.multibyte_chars.clone(),
|
||||
non_narrow_chars: self.non_narrow_chars.clone(),
|
||||
|
@ -1390,8 +1390,8 @@ impl<S: Encoder> Encodable<S> for SourceFile {
|
|||
fn encode(&self, s: &mut S) {
|
||||
self.name.encode(s);
|
||||
self.src_hash.encode(s);
|
||||
self.start_pos.encode(s);
|
||||
self.end_pos.encode(s);
|
||||
// Do not encode `start_pos` as it's global state for this session.
|
||||
self.source_len.encode(s);
|
||||
|
||||
// We are always in `Lines` form by the time we reach here.
|
||||
assert!(self.lines.borrow().is_lines());
|
||||
|
@ -1465,8 +1465,7 @@ impl<D: Decoder> Decodable<D> for SourceFile {
|
|||
fn decode(d: &mut D) -> SourceFile {
|
||||
let name: FileName = Decodable::decode(d);
|
||||
let src_hash: SourceFileHash = Decodable::decode(d);
|
||||
let start_pos: BytePos = Decodable::decode(d);
|
||||
let end_pos: BytePos = Decodable::decode(d);
|
||||
let source_len: RelativeBytePos = Decodable::decode(d);
|
||||
let lines = {
|
||||
let num_lines: u32 = Decodable::decode(d);
|
||||
if num_lines > 0 {
|
||||
|
@ -1474,7 +1473,7 @@ impl<D: Decoder> Decodable<D> for SourceFile {
|
|||
let bytes_per_diff = d.read_u8() as usize;
|
||||
|
||||
// Read the first element.
|
||||
let line_start: BytePos = Decodable::decode(d);
|
||||
let line_start: RelativeBytePos = Decodable::decode(d);
|
||||
|
||||
// Read the difference list.
|
||||
let num_diffs = num_lines as usize - 1;
|
||||
|
@ -1496,8 +1495,8 @@ impl<D: Decoder> Decodable<D> for SourceFile {
|
|||
let cnum: CrateNum = Decodable::decode(d);
|
||||
SourceFile {
|
||||
name,
|
||||
start_pos,
|
||||
end_pos,
|
||||
start_pos: BytePos::from_u32(0),
|
||||
source_len,
|
||||
src: None,
|
||||
src_hash,
|
||||
// Unused - the metadata decoder will construct
|
||||
|
@ -1520,34 +1519,29 @@ impl fmt::Debug for SourceFile {
|
|||
}
|
||||
|
||||
impl SourceFile {
|
||||
pub fn new(
|
||||
name: FileName,
|
||||
mut src: String,
|
||||
start_pos: BytePos,
|
||||
hash_kind: SourceFileHashAlgorithm,
|
||||
) -> Self {
|
||||
pub fn new(name: FileName, mut src: String, hash_kind: SourceFileHashAlgorithm) -> Self {
|
||||
// Compute the file hash before any normalization.
|
||||
let src_hash = SourceFileHash::new(hash_kind, &src);
|
||||
let normalized_pos = normalize_src(&mut src, start_pos);
|
||||
let normalized_pos = normalize_src(&mut src);
|
||||
|
||||
let name_hash = {
|
||||
let mut hasher: StableHasher = StableHasher::new();
|
||||
name.hash(&mut hasher);
|
||||
hasher.finish()
|
||||
};
|
||||
let end_pos = start_pos.to_usize() + src.len();
|
||||
assert!(end_pos <= u32::MAX as usize);
|
||||
let source_len = src.len();
|
||||
assert!(source_len <= u32::MAX as usize);
|
||||
|
||||
let (lines, multibyte_chars, non_narrow_chars) =
|
||||
analyze_source_file::analyze_source_file(&src, start_pos);
|
||||
analyze_source_file::analyze_source_file(&src);
|
||||
|
||||
SourceFile {
|
||||
name,
|
||||
src: Some(Lrc::new(src)),
|
||||
src_hash,
|
||||
external_src: Lock::new(ExternalSource::Unneeded),
|
||||
start_pos,
|
||||
end_pos: Pos::from_usize(end_pos),
|
||||
start_pos: BytePos::from_u32(0),
|
||||
source_len: RelativeBytePos::from_usize(source_len),
|
||||
lines: Lock::new(SourceFileLines::Lines(lines)),
|
||||
multibyte_chars,
|
||||
non_narrow_chars,
|
||||
|
@ -1559,7 +1553,7 @@ impl SourceFile {
|
|||
|
||||
pub fn lines<F, R>(&self, f: F) -> R
|
||||
where
|
||||
F: FnOnce(&[BytePos]) -> R,
|
||||
F: FnOnce(&[RelativeBytePos]) -> R,
|
||||
{
|
||||
let mut guard = self.lines.borrow_mut();
|
||||
match &*guard {
|
||||
|
@ -1579,7 +1573,7 @@ impl SourceFile {
|
|||
match bytes_per_diff {
|
||||
1 => {
|
||||
lines.extend(raw_diffs.into_iter().map(|&diff| {
|
||||
line_start = line_start + BytePos(diff as u32);
|
||||
line_start = line_start + RelativeBytePos(diff as u32);
|
||||
line_start
|
||||
}));
|
||||
}
|
||||
|
@ -1588,7 +1582,7 @@ impl SourceFile {
|
|||
let pos = bytes_per_diff * i;
|
||||
let bytes = [raw_diffs[pos], raw_diffs[pos + 1]];
|
||||
let diff = u16::from_le_bytes(bytes);
|
||||
line_start = line_start + BytePos(diff as u32);
|
||||
line_start = line_start + RelativeBytePos(diff as u32);
|
||||
line_start
|
||||
}));
|
||||
}
|
||||
|
@ -1602,7 +1596,7 @@ impl SourceFile {
|
|||
raw_diffs[pos + 3],
|
||||
];
|
||||
let diff = u32::from_le_bytes(bytes);
|
||||
line_start = line_start + BytePos(diff);
|
||||
line_start = line_start + RelativeBytePos(diff);
|
||||
line_start
|
||||
}));
|
||||
}
|
||||
|
@ -1617,8 +1611,10 @@ impl SourceFile {
|
|||
|
||||
/// Returns the `BytePos` of the beginning of the current line.
|
||||
pub fn line_begin_pos(&self, pos: BytePos) -> BytePos {
|
||||
let pos = self.relative_position(pos);
|
||||
let line_index = self.lookup_line(pos).unwrap();
|
||||
self.lines(|lines| lines[line_index])
|
||||
let line_start_pos = self.lines(|lines| lines[line_index]);
|
||||
self.absolute_position(line_start_pos)
|
||||
}
|
||||
|
||||
/// Add externally loaded source.
|
||||
|
@ -1643,7 +1639,7 @@ impl SourceFile {
|
|||
if let Some(mut src) = src {
|
||||
// The src_hash needs to be computed on the pre-normalized src.
|
||||
if self.src_hash.matches(&src) {
|
||||
normalize_src(&mut src, BytePos::from_usize(0));
|
||||
normalize_src(&mut src);
|
||||
*src_kind = ExternalSourceKind::Present(Lrc::new(src));
|
||||
return true;
|
||||
}
|
||||
|
@ -1676,8 +1672,7 @@ impl SourceFile {
|
|||
|
||||
let begin = {
|
||||
let line = self.lines(|lines| lines.get(line_number).copied())?;
|
||||
let begin: BytePos = line - self.start_pos;
|
||||
begin.to_usize()
|
||||
line.to_usize()
|
||||
};
|
||||
|
||||
if let Some(ref src) = self.src {
|
||||
|
@ -1703,25 +1698,41 @@ impl SourceFile {
|
|||
self.lines(|lines| lines.len())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn absolute_position(&self, pos: RelativeBytePos) -> BytePos {
|
||||
BytePos::from_u32(pos.to_u32() + self.start_pos.to_u32())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn relative_position(&self, pos: BytePos) -> RelativeBytePos {
|
||||
RelativeBytePos::from_u32(pos.to_u32() - self.start_pos.to_u32())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn end_position(&self) -> BytePos {
|
||||
self.absolute_position(self.source_len)
|
||||
}
|
||||
|
||||
/// Finds the line containing the given position. The return value is the
|
||||
/// index into the `lines` array of this `SourceFile`, not the 1-based line
|
||||
/// number. If the source_file is empty or the position is located before the
|
||||
/// first line, `None` is returned.
|
||||
pub fn lookup_line(&self, pos: BytePos) -> Option<usize> {
|
||||
pub fn lookup_line(&self, pos: RelativeBytePos) -> Option<usize> {
|
||||
self.lines(|lines| lines.partition_point(|x| x <= &pos).checked_sub(1))
|
||||
}
|
||||
|
||||
pub fn line_bounds(&self, line_index: usize) -> Range<BytePos> {
|
||||
if self.is_empty() {
|
||||
return self.start_pos..self.end_pos;
|
||||
return self.start_pos..self.start_pos;
|
||||
}
|
||||
|
||||
self.lines(|lines| {
|
||||
assert!(line_index < lines.len());
|
||||
if line_index == (lines.len() - 1) {
|
||||
lines[line_index]..self.end_pos
|
||||
self.absolute_position(lines[line_index])..self.end_position()
|
||||
} else {
|
||||
lines[line_index]..lines[line_index + 1]
|
||||
self.absolute_position(lines[line_index])
|
||||
..self.absolute_position(lines[line_index + 1])
|
||||
}
|
||||
})
|
||||
}
|
||||
|
@ -1732,17 +1743,19 @@ impl SourceFile {
|
|||
/// returns true still contain one byte position according to this function.
|
||||
#[inline]
|
||||
pub fn contains(&self, byte_pos: BytePos) -> bool {
|
||||
byte_pos >= self.start_pos && byte_pos <= self.end_pos
|
||||
byte_pos >= self.start_pos && byte_pos <= self.end_position()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.start_pos == self.end_pos
|
||||
self.source_len.to_u32() == 0
|
||||
}
|
||||
|
||||
/// Calculates the original byte position relative to the start of the file
|
||||
/// based on the given byte position.
|
||||
pub fn original_relative_byte_pos(&self, pos: BytePos) -> BytePos {
|
||||
pub fn original_relative_byte_pos(&self, pos: BytePos) -> RelativeBytePos {
|
||||
let pos = self.relative_position(pos);
|
||||
|
||||
// Diff before any records is 0. Otherwise use the previously recorded
|
||||
// diff as that applies to the following characters until a new diff
|
||||
// is recorded.
|
||||
|
@ -1752,7 +1765,7 @@ impl SourceFile {
|
|||
Err(i) => self.normalized_pos[i - 1].diff,
|
||||
};
|
||||
|
||||
BytePos::from_u32(pos.0 - self.start_pos.0 + diff)
|
||||
RelativeBytePos::from_u32(pos.0 + diff)
|
||||
}
|
||||
|
||||
/// Calculates a normalized byte position from a byte offset relative to the
|
||||
|
@ -1778,7 +1791,7 @@ impl SourceFile {
|
|||
}
|
||||
|
||||
/// Converts an absolute `BytePos` to a `CharPos` relative to the `SourceFile`.
|
||||
pub fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos {
|
||||
fn bytepos_to_file_charpos(&self, bpos: RelativeBytePos) -> CharPos {
|
||||
// The number of extra bytes due to multibyte chars in the `SourceFile`.
|
||||
let mut total_extra_bytes = 0;
|
||||
|
||||
|
@ -1796,13 +1809,13 @@ impl SourceFile {
|
|||
}
|
||||
}
|
||||
|
||||
assert!(self.start_pos.to_u32() + total_extra_bytes <= bpos.to_u32());
|
||||
CharPos(bpos.to_usize() - self.start_pos.to_usize() - total_extra_bytes as usize)
|
||||
assert!(total_extra_bytes <= bpos.to_u32());
|
||||
CharPos(bpos.to_usize() - total_extra_bytes as usize)
|
||||
}
|
||||
|
||||
/// Looks up the file's (1-based) line number and (0-based `CharPos`) column offset, for a
|
||||
/// given `BytePos`.
|
||||
pub fn lookup_file_pos(&self, pos: BytePos) -> (usize, CharPos) {
|
||||
fn lookup_file_pos(&self, pos: RelativeBytePos) -> (usize, CharPos) {
|
||||
let chpos = self.bytepos_to_file_charpos(pos);
|
||||
match self.lookup_line(pos) {
|
||||
Some(a) => {
|
||||
|
@ -1823,6 +1836,7 @@ impl SourceFile {
|
|||
/// Looks up the file's (1-based) line number, (0-based `CharPos`) column offset, and (0-based)
|
||||
/// column offset when displayed, for a given `BytePos`.
|
||||
pub fn lookup_file_pos_with_col_display(&self, pos: BytePos) -> (usize, CharPos, usize) {
|
||||
let pos = self.relative_position(pos);
|
||||
let (line, col_or_chpos) = self.lookup_file_pos(pos);
|
||||
if line > 0 {
|
||||
let col = col_or_chpos;
|
||||
|
@ -1861,16 +1875,10 @@ impl SourceFile {
|
|||
}
|
||||
|
||||
/// Normalizes the source code and records the normalizations.
|
||||
fn normalize_src(src: &mut String, start_pos: BytePos) -> Vec<NormalizedPos> {
|
||||
fn normalize_src(src: &mut String) -> Vec<NormalizedPos> {
|
||||
let mut normalized_pos = vec![];
|
||||
remove_bom(src, &mut normalized_pos);
|
||||
normalize_newlines(src, &mut normalized_pos);
|
||||
|
||||
// Offset all the positions by start_pos to match the final file positions.
|
||||
for np in &mut normalized_pos {
|
||||
np.pos.0 += start_pos.0;
|
||||
}
|
||||
|
||||
normalized_pos
|
||||
}
|
||||
|
||||
|
@ -1878,7 +1886,7 @@ fn normalize_src(src: &mut String, start_pos: BytePos) -> Vec<NormalizedPos> {
|
|||
fn remove_bom(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>) {
|
||||
if src.starts_with('\u{feff}') {
|
||||
src.drain(..3);
|
||||
normalized_pos.push(NormalizedPos { pos: BytePos(0), diff: 3 });
|
||||
normalized_pos.push(NormalizedPos { pos: RelativeBytePos(0), diff: 3 });
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1913,7 +1921,7 @@ fn normalize_newlines(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>)
|
|||
cursor += idx - gap_len;
|
||||
gap_len += 1;
|
||||
normalized_pos.push(NormalizedPos {
|
||||
pos: BytePos::from_usize(cursor + 1),
|
||||
pos: RelativeBytePos::from_usize(cursor + 1),
|
||||
diff: original_gap + gap_len as u32,
|
||||
});
|
||||
}
|
||||
|
@ -2015,6 +2023,12 @@ impl_pos! {
|
|||
#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
|
||||
pub struct BytePos(pub u32);
|
||||
|
||||
/// A byte offset relative to file beginning.
|
||||
///
|
||||
/// Keep this small (currently 32-bits), as AST contains a lot of them.
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
|
||||
pub struct RelativeBytePos(pub u32);
|
||||
|
||||
/// A character offset.
|
||||
///
|
||||
/// Because of multibyte UTF-8 characters, a byte offset
|
||||
|
@ -2036,6 +2050,24 @@ impl<D: Decoder> Decodable<D> for BytePos {
|
|||
}
|
||||
}
|
||||
|
||||
impl<H: HashStableContext> HashStable<H> for RelativeBytePos {
|
||||
fn hash_stable(&self, hcx: &mut H, hasher: &mut StableHasher) {
|
||||
self.0.hash_stable(hcx, hasher);
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: Encoder> Encodable<S> for RelativeBytePos {
|
||||
fn encode(&self, s: &mut S) {
|
||||
s.emit_u32(self.0);
|
||||
}
|
||||
}
|
||||
|
||||
impl<D: Decoder> Decodable<D> for RelativeBytePos {
|
||||
fn decode(d: &mut D) -> RelativeBytePos {
|
||||
RelativeBytePos(d.read_u32())
|
||||
}
|
||||
}
|
||||
|
||||
// _____________________________________________________________________________
|
||||
// Loc, SourceFileAndLine, SourceFileAndBytePos
|
||||
//
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue