1
Fork 0

Use relative positions inside a SourceFile.

This commit is contained in:
Camille GILLOT 2023-09-03 10:15:35 +00:00
parent 585bb5e68d
commit 258ace613d
20 changed files with 217 additions and 281 deletions

View file

@ -1107,27 +1107,27 @@ impl fmt::Debug for SpanData {
}
/// Identifies an offset of a multi-byte character in a `SourceFile`.
#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)]
#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug, HashStable_Generic)]
pub struct MultiByteChar {
/// The absolute offset of the character in the `SourceMap`.
pub pos: BytePos,
pub pos: RelativeBytePos,
/// The number of bytes, `>= 2`.
pub bytes: u8,
}
/// Identifies an offset of a non-narrow character in a `SourceFile`.
#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)]
#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug, HashStable_Generic)]
pub enum NonNarrowChar {
/// Represents a zero-width character.
ZeroWidth(BytePos),
ZeroWidth(RelativeBytePos),
/// Represents a wide (full-width) character.
Wide(BytePos),
Wide(RelativeBytePos),
/// Represents a tab character, represented visually with a width of 4 characters.
Tab(BytePos),
Tab(RelativeBytePos),
}
impl NonNarrowChar {
fn new(pos: BytePos, width: usize) -> Self {
fn new(pos: RelativeBytePos, width: usize) -> Self {
match width {
0 => NonNarrowChar::ZeroWidth(pos),
2 => NonNarrowChar::Wide(pos),
@ -1137,7 +1137,7 @@ impl NonNarrowChar {
}
/// Returns the absolute offset of the character in the `SourceMap`.
pub fn pos(&self) -> BytePos {
pub fn pos(&self) -> RelativeBytePos {
match *self {
NonNarrowChar::ZeroWidth(p) | NonNarrowChar::Wide(p) | NonNarrowChar::Tab(p) => p,
}
@ -1153,10 +1153,10 @@ impl NonNarrowChar {
}
}
impl Add<BytePos> for NonNarrowChar {
impl Add<RelativeBytePos> for NonNarrowChar {
type Output = Self;
fn add(self, rhs: BytePos) -> Self {
fn add(self, rhs: RelativeBytePos) -> Self {
match self {
NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos + rhs),
NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos + rhs),
@ -1165,10 +1165,10 @@ impl Add<BytePos> for NonNarrowChar {
}
}
impl Sub<BytePos> for NonNarrowChar {
impl Sub<RelativeBytePos> for NonNarrowChar {
type Output = Self;
fn sub(self, rhs: BytePos) -> Self {
fn sub(self, rhs: RelativeBytePos) -> Self {
match self {
NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos - rhs),
NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos - rhs),
@ -1178,10 +1178,10 @@ impl Sub<BytePos> for NonNarrowChar {
}
/// Identifies an offset of a character that was normalized away from `SourceFile`.
#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)]
#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug, HashStable_Generic)]
pub struct NormalizedPos {
/// The absolute offset of the character in the `SourceMap`.
pub pos: BytePos,
pub pos: RelativeBytePos,
/// The difference between original and normalized string at position.
pub diff: u32,
}
@ -1293,7 +1293,7 @@ impl SourceFileHash {
#[derive(Clone)]
pub enum SourceFileLines {
/// The source file lines, in decoded (random-access) form.
Lines(Vec<BytePos>),
Lines(Vec<RelativeBytePos>),
/// The source file lines, in undecoded difference list form.
Diffs(SourceFileDiffs),
@ -1317,7 +1317,7 @@ pub struct SourceFileDiffs {
/// Position of the first line. Note that this is always encoded as a
/// `BytePos` because it is often much larger than any of the
/// differences.
line_start: BytePos,
line_start: RelativeBytePos,
/// Always 1, 2, or 4. Always as small as possible, while being big
/// enough to hold the length of the longest line in the source file.
@ -1352,7 +1352,7 @@ pub struct SourceFile {
/// The start position of this source in the `SourceMap`.
pub start_pos: BytePos,
/// The end position of this source in the `SourceMap`.
pub end_pos: BytePos,
pub source_len: RelativeBytePos,
/// Locations of lines beginnings in the source code.
pub lines: Lock<SourceFileLines>,
/// Locations of multi-byte characters in the source code.
@ -1375,7 +1375,7 @@ impl Clone for SourceFile {
src_hash: self.src_hash,
external_src: Lock::new(self.external_src.borrow().clone()),
start_pos: self.start_pos,
end_pos: self.end_pos,
source_len: self.source_len,
lines: Lock::new(self.lines.borrow().clone()),
multibyte_chars: self.multibyte_chars.clone(),
non_narrow_chars: self.non_narrow_chars.clone(),
@ -1390,8 +1390,8 @@ impl<S: Encoder> Encodable<S> for SourceFile {
fn encode(&self, s: &mut S) {
self.name.encode(s);
self.src_hash.encode(s);
self.start_pos.encode(s);
self.end_pos.encode(s);
// Do not encode `start_pos` as it's global state for this session.
self.source_len.encode(s);
// We are always in `Lines` form by the time we reach here.
assert!(self.lines.borrow().is_lines());
@ -1465,8 +1465,7 @@ impl<D: Decoder> Decodable<D> for SourceFile {
fn decode(d: &mut D) -> SourceFile {
let name: FileName = Decodable::decode(d);
let src_hash: SourceFileHash = Decodable::decode(d);
let start_pos: BytePos = Decodable::decode(d);
let end_pos: BytePos = Decodable::decode(d);
let source_len: RelativeBytePos = Decodable::decode(d);
let lines = {
let num_lines: u32 = Decodable::decode(d);
if num_lines > 0 {
@ -1474,7 +1473,7 @@ impl<D: Decoder> Decodable<D> for SourceFile {
let bytes_per_diff = d.read_u8() as usize;
// Read the first element.
let line_start: BytePos = Decodable::decode(d);
let line_start: RelativeBytePos = Decodable::decode(d);
// Read the difference list.
let num_diffs = num_lines as usize - 1;
@ -1496,8 +1495,8 @@ impl<D: Decoder> Decodable<D> for SourceFile {
let cnum: CrateNum = Decodable::decode(d);
SourceFile {
name,
start_pos,
end_pos,
start_pos: BytePos::from_u32(0),
source_len,
src: None,
src_hash,
// Unused - the metadata decoder will construct
@ -1520,34 +1519,29 @@ impl fmt::Debug for SourceFile {
}
impl SourceFile {
pub fn new(
name: FileName,
mut src: String,
start_pos: BytePos,
hash_kind: SourceFileHashAlgorithm,
) -> Self {
pub fn new(name: FileName, mut src: String, hash_kind: SourceFileHashAlgorithm) -> Self {
// Compute the file hash before any normalization.
let src_hash = SourceFileHash::new(hash_kind, &src);
let normalized_pos = normalize_src(&mut src, start_pos);
let normalized_pos = normalize_src(&mut src);
let name_hash = {
let mut hasher: StableHasher = StableHasher::new();
name.hash(&mut hasher);
hasher.finish()
};
let end_pos = start_pos.to_usize() + src.len();
assert!(end_pos <= u32::MAX as usize);
let source_len = src.len();
assert!(source_len <= u32::MAX as usize);
let (lines, multibyte_chars, non_narrow_chars) =
analyze_source_file::analyze_source_file(&src, start_pos);
analyze_source_file::analyze_source_file(&src);
SourceFile {
name,
src: Some(Lrc::new(src)),
src_hash,
external_src: Lock::new(ExternalSource::Unneeded),
start_pos,
end_pos: Pos::from_usize(end_pos),
start_pos: BytePos::from_u32(0),
source_len: RelativeBytePos::from_usize(source_len),
lines: Lock::new(SourceFileLines::Lines(lines)),
multibyte_chars,
non_narrow_chars,
@ -1559,7 +1553,7 @@ impl SourceFile {
pub fn lines<F, R>(&self, f: F) -> R
where
F: FnOnce(&[BytePos]) -> R,
F: FnOnce(&[RelativeBytePos]) -> R,
{
let mut guard = self.lines.borrow_mut();
match &*guard {
@ -1579,7 +1573,7 @@ impl SourceFile {
match bytes_per_diff {
1 => {
lines.extend(raw_diffs.into_iter().map(|&diff| {
line_start = line_start + BytePos(diff as u32);
line_start = line_start + RelativeBytePos(diff as u32);
line_start
}));
}
@ -1588,7 +1582,7 @@ impl SourceFile {
let pos = bytes_per_diff * i;
let bytes = [raw_diffs[pos], raw_diffs[pos + 1]];
let diff = u16::from_le_bytes(bytes);
line_start = line_start + BytePos(diff as u32);
line_start = line_start + RelativeBytePos(diff as u32);
line_start
}));
}
@ -1602,7 +1596,7 @@ impl SourceFile {
raw_diffs[pos + 3],
];
let diff = u32::from_le_bytes(bytes);
line_start = line_start + BytePos(diff);
line_start = line_start + RelativeBytePos(diff);
line_start
}));
}
@ -1617,8 +1611,10 @@ impl SourceFile {
/// Returns the `BytePos` of the beginning of the current line.
pub fn line_begin_pos(&self, pos: BytePos) -> BytePos {
let pos = self.relative_position(pos);
let line_index = self.lookup_line(pos).unwrap();
self.lines(|lines| lines[line_index])
let line_start_pos = self.lines(|lines| lines[line_index]);
self.absolute_position(line_start_pos)
}
/// Add externally loaded source.
@ -1643,7 +1639,7 @@ impl SourceFile {
if let Some(mut src) = src {
// The src_hash needs to be computed on the pre-normalized src.
if self.src_hash.matches(&src) {
normalize_src(&mut src, BytePos::from_usize(0));
normalize_src(&mut src);
*src_kind = ExternalSourceKind::Present(Lrc::new(src));
return true;
}
@ -1676,8 +1672,7 @@ impl SourceFile {
let begin = {
let line = self.lines(|lines| lines.get(line_number).copied())?;
let begin: BytePos = line - self.start_pos;
begin.to_usize()
line.to_usize()
};
if let Some(ref src) = self.src {
@ -1703,25 +1698,41 @@ impl SourceFile {
self.lines(|lines| lines.len())
}
#[inline]
pub fn absolute_position(&self, pos: RelativeBytePos) -> BytePos {
BytePos::from_u32(pos.to_u32() + self.start_pos.to_u32())
}
#[inline]
pub fn relative_position(&self, pos: BytePos) -> RelativeBytePos {
RelativeBytePos::from_u32(pos.to_u32() - self.start_pos.to_u32())
}
#[inline]
pub fn end_position(&self) -> BytePos {
self.absolute_position(self.source_len)
}
/// Finds the line containing the given position. The return value is the
/// index into the `lines` array of this `SourceFile`, not the 1-based line
/// number. If the source_file is empty or the position is located before the
/// first line, `None` is returned.
pub fn lookup_line(&self, pos: BytePos) -> Option<usize> {
pub fn lookup_line(&self, pos: RelativeBytePos) -> Option<usize> {
self.lines(|lines| lines.partition_point(|x| x <= &pos).checked_sub(1))
}
pub fn line_bounds(&self, line_index: usize) -> Range<BytePos> {
if self.is_empty() {
return self.start_pos..self.end_pos;
return self.start_pos..self.start_pos;
}
self.lines(|lines| {
assert!(line_index < lines.len());
if line_index == (lines.len() - 1) {
lines[line_index]..self.end_pos
self.absolute_position(lines[line_index])..self.end_position()
} else {
lines[line_index]..lines[line_index + 1]
self.absolute_position(lines[line_index])
..self.absolute_position(lines[line_index + 1])
}
})
}
@ -1732,17 +1743,19 @@ impl SourceFile {
/// returns true still contain one byte position according to this function.
#[inline]
pub fn contains(&self, byte_pos: BytePos) -> bool {
byte_pos >= self.start_pos && byte_pos <= self.end_pos
byte_pos >= self.start_pos && byte_pos <= self.end_position()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.start_pos == self.end_pos
self.source_len.to_u32() == 0
}
/// Calculates the original byte position relative to the start of the file
/// based on the given byte position.
pub fn original_relative_byte_pos(&self, pos: BytePos) -> BytePos {
pub fn original_relative_byte_pos(&self, pos: BytePos) -> RelativeBytePos {
let pos = self.relative_position(pos);
// Diff before any records is 0. Otherwise use the previously recorded
// diff as that applies to the following characters until a new diff
// is recorded.
@ -1752,7 +1765,7 @@ impl SourceFile {
Err(i) => self.normalized_pos[i - 1].diff,
};
BytePos::from_u32(pos.0 - self.start_pos.0 + diff)
RelativeBytePos::from_u32(pos.0 + diff)
}
/// Calculates a normalized byte position from a byte offset relative to the
@ -1778,7 +1791,7 @@ impl SourceFile {
}
/// Converts an absolute `BytePos` to a `CharPos` relative to the `SourceFile`.
pub fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos {
fn bytepos_to_file_charpos(&self, bpos: RelativeBytePos) -> CharPos {
// The number of extra bytes due to multibyte chars in the `SourceFile`.
let mut total_extra_bytes = 0;
@ -1796,13 +1809,13 @@ impl SourceFile {
}
}
assert!(self.start_pos.to_u32() + total_extra_bytes <= bpos.to_u32());
CharPos(bpos.to_usize() - self.start_pos.to_usize() - total_extra_bytes as usize)
assert!(total_extra_bytes <= bpos.to_u32());
CharPos(bpos.to_usize() - total_extra_bytes as usize)
}
/// Looks up the file's (1-based) line number and (0-based `CharPos`) column offset, for a
/// given `BytePos`.
pub fn lookup_file_pos(&self, pos: BytePos) -> (usize, CharPos) {
fn lookup_file_pos(&self, pos: RelativeBytePos) -> (usize, CharPos) {
let chpos = self.bytepos_to_file_charpos(pos);
match self.lookup_line(pos) {
Some(a) => {
@ -1823,6 +1836,7 @@ impl SourceFile {
/// Looks up the file's (1-based) line number, (0-based `CharPos`) column offset, and (0-based)
/// column offset when displayed, for a given `BytePos`.
pub fn lookup_file_pos_with_col_display(&self, pos: BytePos) -> (usize, CharPos, usize) {
let pos = self.relative_position(pos);
let (line, col_or_chpos) = self.lookup_file_pos(pos);
if line > 0 {
let col = col_or_chpos;
@ -1861,16 +1875,10 @@ impl SourceFile {
}
/// Normalizes the source code and records the normalizations.
fn normalize_src(src: &mut String, start_pos: BytePos) -> Vec<NormalizedPos> {
fn normalize_src(src: &mut String) -> Vec<NormalizedPos> {
let mut normalized_pos = vec![];
remove_bom(src, &mut normalized_pos);
normalize_newlines(src, &mut normalized_pos);
// Offset all the positions by start_pos to match the final file positions.
for np in &mut normalized_pos {
np.pos.0 += start_pos.0;
}
normalized_pos
}
@ -1878,7 +1886,7 @@ fn normalize_src(src: &mut String, start_pos: BytePos) -> Vec<NormalizedPos> {
fn remove_bom(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>) {
if src.starts_with('\u{feff}') {
src.drain(..3);
normalized_pos.push(NormalizedPos { pos: BytePos(0), diff: 3 });
normalized_pos.push(NormalizedPos { pos: RelativeBytePos(0), diff: 3 });
}
}
@ -1913,7 +1921,7 @@ fn normalize_newlines(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>)
cursor += idx - gap_len;
gap_len += 1;
normalized_pos.push(NormalizedPos {
pos: BytePos::from_usize(cursor + 1),
pos: RelativeBytePos::from_usize(cursor + 1),
diff: original_gap + gap_len as u32,
});
}
@ -2015,6 +2023,12 @@ impl_pos! {
#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
pub struct BytePos(pub u32);
/// A byte offset relative to file beginning.
///
/// Keep this small (currently 32-bits), as AST contains a lot of them.
#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
pub struct RelativeBytePos(pub u32);
/// A character offset.
///
/// Because of multibyte UTF-8 characters, a byte offset
@ -2036,6 +2050,24 @@ impl<D: Decoder> Decodable<D> for BytePos {
}
}
impl<H: HashStableContext> HashStable<H> for RelativeBytePos {
fn hash_stable(&self, hcx: &mut H, hasher: &mut StableHasher) {
self.0.hash_stable(hcx, hasher);
}
}
impl<S: Encoder> Encodable<S> for RelativeBytePos {
fn encode(&self, s: &mut S) {
s.emit_u32(self.0);
}
}
impl<D: Decoder> Decodable<D> for RelativeBytePos {
fn decode(d: &mut D) -> RelativeBytePos {
RelativeBytePos(d.read_u32())
}
}
// _____________________________________________________________________________
// Loc, SourceFileAndLine, SourceFileAndBytePos
//