Use relative positions inside a SourceFile.

2023-09-03 10:15:35 +00:00 · 2023-09-03 10:15:35 +00:00 · 258ace613d
commit 258ace613d
parent 585bb5e68d
20 changed files with 217 additions and 281 deletions
--- a/compiler/rustc_span/src/lib.rs
+++ b/compiler/rustc_span/src/lib.rs
@ -1107,27 +1107,27 @@ impl fmt::Debug for SpanData {
 }

 /// Identifies an offset of a multi-byte character in a `SourceFile`.
-#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)]
+#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug, HashStable_Generic)]
 pub struct MultiByteChar {
    /// The absolute offset of the character in the `SourceMap`.
-    pub pos: BytePos,
+    pub pos: RelativeBytePos,
    /// The number of bytes, `>= 2`.
    pub bytes: u8,
 }

 /// Identifies an offset of a non-narrow character in a `SourceFile`.
-#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)]
+#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug, HashStable_Generic)]
 pub enum NonNarrowChar {
    /// Represents a zero-width character.
-    ZeroWidth(BytePos),
+    ZeroWidth(RelativeBytePos),
    /// Represents a wide (full-width) character.
-    Wide(BytePos),
+    Wide(RelativeBytePos),
    /// Represents a tab character, represented visually with a width of 4 characters.
-    Tab(BytePos),
+    Tab(RelativeBytePos),
 }

 impl NonNarrowChar {
-    fn new(pos: BytePos, width: usize) -> Self {
+    fn new(pos: RelativeBytePos, width: usize) -> Self {
        match width {
            0 => NonNarrowChar::ZeroWidth(pos),
            2 => NonNarrowChar::Wide(pos),
@ -1137,7 +1137,7 @@ impl NonNarrowChar {
    }

    /// Returns the absolute offset of the character in the `SourceMap`.
-    pub fn pos(&self) -> BytePos {
+    pub fn pos(&self) -> RelativeBytePos {
        match *self {
            NonNarrowChar::ZeroWidth(p) | NonNarrowChar::Wide(p) | NonNarrowChar::Tab(p) => p,
        }
@ -1153,10 +1153,10 @@ impl NonNarrowChar {
    }
 }

-impl Add<BytePos> for NonNarrowChar {
+impl Add<RelativeBytePos> for NonNarrowChar {
    type Output = Self;

-    fn add(self, rhs: BytePos) -> Self {
+    fn add(self, rhs: RelativeBytePos) -> Self {
        match self {
            NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos + rhs),
            NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos + rhs),
@ -1165,10 +1165,10 @@ impl Add<BytePos> for NonNarrowChar {
    }
 }

-impl Sub<BytePos> for NonNarrowChar {
+impl Sub<RelativeBytePos> for NonNarrowChar {
    type Output = Self;

-    fn sub(self, rhs: BytePos) -> Self {
+    fn sub(self, rhs: RelativeBytePos) -> Self {
        match self {
            NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos - rhs),
            NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos - rhs),
@ -1178,10 +1178,10 @@ impl Sub<BytePos> for NonNarrowChar {
 }

 /// Identifies an offset of a character that was normalized away from `SourceFile`.
-#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)]
+#[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug, HashStable_Generic)]
 pub struct NormalizedPos {
    /// The absolute offset of the character in the `SourceMap`.
-    pub pos: BytePos,
+    pub pos: RelativeBytePos,
    /// The difference between original and normalized string at position.
    pub diff: u32,
 }
@ -1293,7 +1293,7 @@ impl SourceFileHash {
 #[derive(Clone)]
 pub enum SourceFileLines {
    /// The source file lines, in decoded (random-access) form.
-    Lines(Vec<BytePos>),
+    Lines(Vec<RelativeBytePos>),

    /// The source file lines, in undecoded difference list form.
    Diffs(SourceFileDiffs),
@ -1317,7 +1317,7 @@ pub struct SourceFileDiffs {
    /// Position of the first line. Note that this is always encoded as a
    /// `BytePos` because it is often much larger than any of the
    /// differences.
-    line_start: BytePos,
+    line_start: RelativeBytePos,

    /// Always 1, 2, or 4. Always as small as possible, while being big
    /// enough to hold the length of the longest line in the source file.
@ -1352,7 +1352,7 @@ pub struct SourceFile {
    /// The start position of this source in the `SourceMap`.
    pub start_pos: BytePos,
    /// The end position of this source in the `SourceMap`.
-    pub end_pos: BytePos,
+    pub source_len: RelativeBytePos,
    /// Locations of lines beginnings in the source code.
    pub lines: Lock<SourceFileLines>,
    /// Locations of multi-byte characters in the source code.
@ -1375,7 +1375,7 @@ impl Clone for SourceFile {
            src_hash: self.src_hash,
            external_src: Lock::new(self.external_src.borrow().clone()),
            start_pos: self.start_pos,
-            end_pos: self.end_pos,
+            source_len: self.source_len,
            lines: Lock::new(self.lines.borrow().clone()),
            multibyte_chars: self.multibyte_chars.clone(),
            non_narrow_chars: self.non_narrow_chars.clone(),
@ -1390,8 +1390,8 @@ impl<S: Encoder> Encodable<S> for SourceFile {
    fn encode(&self, s: &mut S) {
        self.name.encode(s);
        self.src_hash.encode(s);
-        self.start_pos.encode(s);
-        self.end_pos.encode(s);
+        // Do not encode `start_pos` as it's global state for this session.
+        self.source_len.encode(s);

        // We are always in `Lines` form by the time we reach here.
        assert!(self.lines.borrow().is_lines());
@ -1465,8 +1465,7 @@ impl<D: Decoder> Decodable<D> for SourceFile {
    fn decode(d: &mut D) -> SourceFile {
        let name: FileName = Decodable::decode(d);
        let src_hash: SourceFileHash = Decodable::decode(d);
-        let start_pos: BytePos = Decodable::decode(d);
-        let end_pos: BytePos = Decodable::decode(d);
+        let source_len: RelativeBytePos = Decodable::decode(d);
        let lines = {
            let num_lines: u32 = Decodable::decode(d);
            if num_lines > 0 {
@ -1474,7 +1473,7 @@ impl<D: Decoder> Decodable<D> for SourceFile {
                let bytes_per_diff = d.read_u8() as usize;

                // Read the first element.
-                let line_start: BytePos = Decodable::decode(d);
+                let line_start: RelativeBytePos = Decodable::decode(d);

                // Read the difference list.
                let num_diffs = num_lines as usize - 1;
@ -1496,8 +1495,8 @@ impl<D: Decoder> Decodable<D> for SourceFile {
        let cnum: CrateNum = Decodable::decode(d);
        SourceFile {
            name,
-            start_pos,
-            end_pos,
+            start_pos: BytePos::from_u32(0),
+            source_len,
            src: None,
            src_hash,
            // Unused - the metadata decoder will construct
@ -1520,34 +1519,29 @@ impl fmt::Debug for SourceFile {
 }

 impl SourceFile {
-    pub fn new(
-        name: FileName,
-        mut src: String,
-        start_pos: BytePos,
-        hash_kind: SourceFileHashAlgorithm,
-    ) -> Self {
+    pub fn new(name: FileName, mut src: String, hash_kind: SourceFileHashAlgorithm) -> Self {
        // Compute the file hash before any normalization.
        let src_hash = SourceFileHash::new(hash_kind, &src);
-        let normalized_pos = normalize_src(&mut src, start_pos);
+        let normalized_pos = normalize_src(&mut src);

        let name_hash = {
            let mut hasher: StableHasher = StableHasher::new();
            name.hash(&mut hasher);
            hasher.finish()
        };
-        let end_pos = start_pos.to_usize() + src.len();
-        assert!(end_pos <= u32::MAX as usize);
+        let source_len = src.len();
+        assert!(source_len <= u32::MAX as usize);

        let (lines, multibyte_chars, non_narrow_chars) =
-            analyze_source_file::analyze_source_file(&src, start_pos);
+            analyze_source_file::analyze_source_file(&src);

        SourceFile {
            name,
            src: Some(Lrc::new(src)),
            src_hash,
            external_src: Lock::new(ExternalSource::Unneeded),
-            start_pos,
-            end_pos: Pos::from_usize(end_pos),
+            start_pos: BytePos::from_u32(0),
+            source_len: RelativeBytePos::from_usize(source_len),
            lines: Lock::new(SourceFileLines::Lines(lines)),
            multibyte_chars,
            non_narrow_chars,
@ -1559,7 +1553,7 @@ impl SourceFile {

    pub fn lines<F, R>(&self, f: F) -> R
    where
-        F: FnOnce(&[BytePos]) -> R,
+        F: FnOnce(&[RelativeBytePos]) -> R,
    {
        let mut guard = self.lines.borrow_mut();
        match &*guard {
@ -1579,7 +1573,7 @@ impl SourceFile {
                match bytes_per_diff {
                    1 => {
                        lines.extend(raw_diffs.into_iter().map(|&diff| {
-                            line_start = line_start + BytePos(diff as u32);
+                            line_start = line_start + RelativeBytePos(diff as u32);
                            line_start
                        }));
                    }
@ -1588,7 +1582,7 @@ impl SourceFile {
                            let pos = bytes_per_diff * i;
                            let bytes = [raw_diffs[pos], raw_diffs[pos + 1]];
                            let diff = u16::from_le_bytes(bytes);
-                            line_start = line_start + BytePos(diff as u32);
+                            line_start = line_start + RelativeBytePos(diff as u32);
                            line_start
                        }));
                    }
@ -1602,7 +1596,7 @@ impl SourceFile {
                                raw_diffs[pos + 3],
                            ];
                            let diff = u32::from_le_bytes(bytes);
-                            line_start = line_start + BytePos(diff);
+                            line_start = line_start + RelativeBytePos(diff);
                            line_start
                        }));
                    }
@ -1617,8 +1611,10 @@ impl SourceFile {

    /// Returns the `BytePos` of the beginning of the current line.
    pub fn line_begin_pos(&self, pos: BytePos) -> BytePos {
+        let pos = self.relative_position(pos);
        let line_index = self.lookup_line(pos).unwrap();
-        self.lines(|lines| lines[line_index])
+        let line_start_pos = self.lines(|lines| lines[line_index]);
+        self.absolute_position(line_start_pos)
    }

    /// Add externally loaded source.
@ -1643,7 +1639,7 @@ impl SourceFile {
                if let Some(mut src) = src {
                    // The src_hash needs to be computed on the pre-normalized src.
                    if self.src_hash.matches(&src) {
-                        normalize_src(&mut src, BytePos::from_usize(0));
+                        normalize_src(&mut src);
                        *src_kind = ExternalSourceKind::Present(Lrc::new(src));
                        return true;
                    }
@ -1676,8 +1672,7 @@ impl SourceFile {

        let begin = {
            let line = self.lines(|lines| lines.get(line_number).copied())?;
-            let begin: BytePos = line - self.start_pos;
-            begin.to_usize()
+            line.to_usize()
        };

        if let Some(ref src) = self.src {
@ -1703,25 +1698,41 @@ impl SourceFile {
        self.lines(|lines| lines.len())
    }

+    #[inline]
+    pub fn absolute_position(&self, pos: RelativeBytePos) -> BytePos {
+        BytePos::from_u32(pos.to_u32() + self.start_pos.to_u32())
+    }
+
+    #[inline]
+    pub fn relative_position(&self, pos: BytePos) -> RelativeBytePos {
+        RelativeBytePos::from_u32(pos.to_u32() - self.start_pos.to_u32())
+    }
+
+    #[inline]
+    pub fn end_position(&self) -> BytePos {
+        self.absolute_position(self.source_len)
+    }
+
    /// Finds the line containing the given position. The return value is the
    /// index into the `lines` array of this `SourceFile`, not the 1-based line
    /// number. If the source_file is empty or the position is located before the
    /// first line, `None` is returned.
-    pub fn lookup_line(&self, pos: BytePos) -> Option<usize> {
+    pub fn lookup_line(&self, pos: RelativeBytePos) -> Option<usize> {
        self.lines(|lines| lines.partition_point(|x| x <= &pos).checked_sub(1))
    }

    pub fn line_bounds(&self, line_index: usize) -> Range<BytePos> {
        if self.is_empty() {
-            return self.start_pos..self.end_pos;
+            return self.start_pos..self.start_pos;
        }

        self.lines(|lines| {
            assert!(line_index < lines.len());
            if line_index == (lines.len() - 1) {
-                lines[line_index]..self.end_pos
+                self.absolute_position(lines[line_index])..self.end_position()
            } else {
-                lines[line_index]..lines[line_index + 1]
+                self.absolute_position(lines[line_index])
+                    ..self.absolute_position(lines[line_index + 1])
            }
        })
    }
@ -1732,17 +1743,19 @@ impl SourceFile {
    /// returns true still contain one byte position according to this function.
    #[inline]
    pub fn contains(&self, byte_pos: BytePos) -> bool {
-        byte_pos >= self.start_pos && byte_pos <= self.end_pos
+        byte_pos >= self.start_pos && byte_pos <= self.end_position()
    }

    #[inline]
    pub fn is_empty(&self) -> bool {
-        self.start_pos == self.end_pos
+        self.source_len.to_u32() == 0
    }

    /// Calculates the original byte position relative to the start of the file
    /// based on the given byte position.
-    pub fn original_relative_byte_pos(&self, pos: BytePos) -> BytePos {
+    pub fn original_relative_byte_pos(&self, pos: BytePos) -> RelativeBytePos {
+        let pos = self.relative_position(pos);
+
        // Diff before any records is 0. Otherwise use the previously recorded
        // diff as that applies to the following characters until a new diff
        // is recorded.
@ -1752,7 +1765,7 @@ impl SourceFile {
            Err(i) => self.normalized_pos[i - 1].diff,
        };

-        BytePos::from_u32(pos.0 - self.start_pos.0 + diff)
+        RelativeBytePos::from_u32(pos.0 + diff)
    }

    /// Calculates a normalized byte position from a byte offset relative to the
@ -1778,7 +1791,7 @@ impl SourceFile {
    }

    /// Converts an absolute `BytePos` to a `CharPos` relative to the `SourceFile`.
-    pub fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos {
+    fn bytepos_to_file_charpos(&self, bpos: RelativeBytePos) -> CharPos {
        // The number of extra bytes due to multibyte chars in the `SourceFile`.
        let mut total_extra_bytes = 0;

@ -1796,13 +1809,13 @@ impl SourceFile {
            }
        }

-        assert!(self.start_pos.to_u32() + total_extra_bytes <= bpos.to_u32());
-        CharPos(bpos.to_usize() - self.start_pos.to_usize() - total_extra_bytes as usize)
+        assert!(total_extra_bytes <= bpos.to_u32());
+        CharPos(bpos.to_usize() - total_extra_bytes as usize)
    }

    /// Looks up the file's (1-based) line number and (0-based `CharPos`) column offset, for a
    /// given `BytePos`.
-    pub fn lookup_file_pos(&self, pos: BytePos) -> (usize, CharPos) {
+    fn lookup_file_pos(&self, pos: RelativeBytePos) -> (usize, CharPos) {
        let chpos = self.bytepos_to_file_charpos(pos);
        match self.lookup_line(pos) {
            Some(a) => {
@ -1823,6 +1836,7 @@ impl SourceFile {
    /// Looks up the file's (1-based) line number, (0-based `CharPos`) column offset, and (0-based)
    /// column offset when displayed, for a given `BytePos`.
    pub fn lookup_file_pos_with_col_display(&self, pos: BytePos) -> (usize, CharPos, usize) {
+        let pos = self.relative_position(pos);
        let (line, col_or_chpos) = self.lookup_file_pos(pos);
        if line > 0 {
            let col = col_or_chpos;
@ -1861,16 +1875,10 @@ impl SourceFile {
 }

 /// Normalizes the source code and records the normalizations.
-fn normalize_src(src: &mut String, start_pos: BytePos) -> Vec<NormalizedPos> {
+fn normalize_src(src: &mut String) -> Vec<NormalizedPos> {
    let mut normalized_pos = vec![];
    remove_bom(src, &mut normalized_pos);
    normalize_newlines(src, &mut normalized_pos);
-
-    // Offset all the positions by start_pos to match the final file positions.
-    for np in &mut normalized_pos {
-        np.pos.0 += start_pos.0;
-    }
-
    normalized_pos
 }

@ -1878,7 +1886,7 @@ fn normalize_src(src: &mut String, start_pos: BytePos) -> Vec<NormalizedPos> {
 fn remove_bom(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>) {
    if src.starts_with('\u{feff}') {
        src.drain(..3);
-        normalized_pos.push(NormalizedPos { pos: BytePos(0), diff: 3 });
+        normalized_pos.push(NormalizedPos { pos: RelativeBytePos(0), diff: 3 });
    }
 }

@ -1913,7 +1921,7 @@ fn normalize_newlines(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>)
        cursor += idx - gap_len;
        gap_len += 1;
        normalized_pos.push(NormalizedPos {
-            pos: BytePos::from_usize(cursor + 1),
+            pos: RelativeBytePos::from_usize(cursor + 1),
            diff: original_gap + gap_len as u32,
        });
    }
@ -2015,6 +2023,12 @@ impl_pos! {
    #[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
    pub struct BytePos(pub u32);

+    /// A byte offset relative to file beginning.
+    ///
+    /// Keep this small (currently 32-bits), as AST contains a lot of them.
+    #[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
+    pub struct RelativeBytePos(pub u32);
+
    /// A character offset.
    ///
    /// Because of multibyte UTF-8 characters, a byte offset
@ -2036,6 +2050,24 @@ impl<D: Decoder> Decodable<D> for BytePos {
    }
 }

+impl<H: HashStableContext> HashStable<H> for RelativeBytePos {
+    fn hash_stable(&self, hcx: &mut H, hasher: &mut StableHasher) {
+        self.0.hash_stable(hcx, hasher);
+    }
+}
+
+impl<S: Encoder> Encodable<S> for RelativeBytePos {
+    fn encode(&self, s: &mut S) {
+        s.emit_u32(self.0);
+    }
+}
+
+impl<D: Decoder> Decodable<D> for RelativeBytePos {
+    fn decode(d: &mut D) -> RelativeBytePos {
+        RelativeBytePos(d.read_u32())
+    }
+}
+
 // _____________________________________________________________________________
 // Loc, SourceFileAndLine, SourceFileAndBytePos
 //