mv compiler to compiler/
This commit is contained in:
parent
db534b3ac2
commit
9e5f7d5631
1686 changed files with 941 additions and 1051 deletions
274
compiler/rustc_span/src/analyze_source_file.rs
Normal file
274
compiler/rustc_span/src/analyze_source_file.rs
Normal file
|
@ -0,0 +1,274 @@
|
|||
use super::*;
|
||||
use unicode_width::UnicodeWidthChar;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
/// Finds all newlines, multi-byte characters, and non-narrow characters in a
|
||||
/// SourceFile.
|
||||
///
|
||||
/// This function will use an SSE2 enhanced implementation if hardware support
|
||||
/// is detected at runtime.
|
||||
pub fn analyze_source_file(
|
||||
src: &str,
|
||||
source_file_start_pos: BytePos,
|
||||
) -> (Vec<BytePos>, Vec<MultiByteChar>, Vec<NonNarrowChar>) {
|
||||
let mut lines = vec![source_file_start_pos];
|
||||
let mut multi_byte_chars = vec![];
|
||||
let mut non_narrow_chars = vec![];
|
||||
|
||||
// Calls the right implementation, depending on hardware support available.
|
||||
analyze_source_file_dispatch(
|
||||
src,
|
||||
source_file_start_pos,
|
||||
&mut lines,
|
||||
&mut multi_byte_chars,
|
||||
&mut non_narrow_chars,
|
||||
);
|
||||
|
||||
// The code above optimistically registers a new line *after* each \n
|
||||
// it encounters. If that point is already outside the source_file, remove
|
||||
// it again.
|
||||
if let Some(&last_line_start) = lines.last() {
|
||||
let source_file_end = source_file_start_pos + BytePos::from_usize(src.len());
|
||||
assert!(source_file_end >= last_line_start);
|
||||
if last_line_start == source_file_end {
|
||||
lines.pop();
|
||||
}
|
||||
}
|
||||
|
||||
(lines, multi_byte_chars, non_narrow_chars)
|
||||
}
|
||||
|
||||
cfg_if::cfg_if! {
|
||||
if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64")))] {
|
||||
fn analyze_source_file_dispatch(src: &str,
|
||||
source_file_start_pos: BytePos,
|
||||
lines: &mut Vec<BytePos>,
|
||||
multi_byte_chars: &mut Vec<MultiByteChar>,
|
||||
non_narrow_chars: &mut Vec<NonNarrowChar>) {
|
||||
if is_x86_feature_detected!("sse2") {
|
||||
unsafe {
|
||||
analyze_source_file_sse2(src,
|
||||
source_file_start_pos,
|
||||
lines,
|
||||
multi_byte_chars,
|
||||
non_narrow_chars);
|
||||
}
|
||||
} else {
|
||||
analyze_source_file_generic(src,
|
||||
src.len(),
|
||||
source_file_start_pos,
|
||||
lines,
|
||||
multi_byte_chars,
|
||||
non_narrow_chars);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks 16 byte chunks of text at a time. If the chunk contains
|
||||
/// something other than printable ASCII characters and newlines, the
|
||||
/// function falls back to the generic implementation. Otherwise it uses
|
||||
/// SSE2 intrinsics to quickly find all newlines.
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn analyze_source_file_sse2(src: &str,
|
||||
output_offset: BytePos,
|
||||
lines: &mut Vec<BytePos>,
|
||||
multi_byte_chars: &mut Vec<MultiByteChar>,
|
||||
non_narrow_chars: &mut Vec<NonNarrowChar>) {
|
||||
#[cfg(target_arch = "x86")]
|
||||
use std::arch::x86::*;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use std::arch::x86_64::*;
|
||||
|
||||
const CHUNK_SIZE: usize = 16;
|
||||
|
||||
let src_bytes = src.as_bytes();
|
||||
|
||||
let chunk_count = src.len() / CHUNK_SIZE;
|
||||
|
||||
// This variable keeps track of where we should start decoding a
|
||||
// chunk. If a multi-byte character spans across chunk boundaries,
|
||||
// we need to skip that part in the next chunk because we already
|
||||
// handled it.
|
||||
let mut intra_chunk_offset = 0;
|
||||
|
||||
for chunk_index in 0 .. chunk_count {
|
||||
let ptr = src_bytes.as_ptr() as *const __m128i;
|
||||
// We don't know if the pointer is aligned to 16 bytes, so we
|
||||
// use `loadu`, which supports unaligned loading.
|
||||
let chunk = _mm_loadu_si128(ptr.offset(chunk_index as isize));
|
||||
|
||||
// For character in the chunk, see if its byte value is < 0, which
|
||||
// indicates that it's part of a UTF-8 char.
|
||||
let multibyte_test = _mm_cmplt_epi8(chunk, _mm_set1_epi8(0));
|
||||
// Create a bit mask from the comparison results.
|
||||
let multibyte_mask = _mm_movemask_epi8(multibyte_test);
|
||||
|
||||
// If the bit mask is all zero, we only have ASCII chars here:
|
||||
if multibyte_mask == 0 {
|
||||
assert!(intra_chunk_offset == 0);
|
||||
|
||||
// Check if there are any control characters in the chunk. All
|
||||
// control characters that we can encounter at this point have a
|
||||
// byte value less than 32 or ...
|
||||
let control_char_test0 = _mm_cmplt_epi8(chunk, _mm_set1_epi8(32));
|
||||
let control_char_mask0 = _mm_movemask_epi8(control_char_test0);
|
||||
|
||||
// ... it's the ASCII 'DEL' character with a value of 127.
|
||||
let control_char_test1 = _mm_cmpeq_epi8(chunk, _mm_set1_epi8(127));
|
||||
let control_char_mask1 = _mm_movemask_epi8(control_char_test1);
|
||||
|
||||
let control_char_mask = control_char_mask0 | control_char_mask1;
|
||||
|
||||
if control_char_mask != 0 {
|
||||
// Check for newlines in the chunk
|
||||
let newlines_test = _mm_cmpeq_epi8(chunk, _mm_set1_epi8(b'\n' as i8));
|
||||
let newlines_mask = _mm_movemask_epi8(newlines_test);
|
||||
|
||||
if control_char_mask == newlines_mask {
|
||||
// All control characters are newlines, record them
|
||||
let mut newlines_mask = 0xFFFF0000 | newlines_mask as u32;
|
||||
let output_offset = output_offset +
|
||||
BytePos::from_usize(chunk_index * CHUNK_SIZE + 1);
|
||||
|
||||
loop {
|
||||
let index = newlines_mask.trailing_zeros();
|
||||
|
||||
if index >= CHUNK_SIZE as u32 {
|
||||
// We have arrived at the end of the chunk.
|
||||
break
|
||||
}
|
||||
|
||||
lines.push(BytePos(index) + output_offset);
|
||||
|
||||
// Clear the bit, so we can find the next one.
|
||||
newlines_mask &= (!1) << index;
|
||||
}
|
||||
|
||||
// We are done for this chunk. All control characters were
|
||||
// newlines and we took care of those.
|
||||
continue
|
||||
} else {
|
||||
// Some of the control characters are not newlines,
|
||||
// fall through to the slow path below.
|
||||
}
|
||||
} else {
|
||||
// No control characters, nothing to record for this chunk
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// The slow path.
|
||||
// There are control chars in here, fallback to generic decoding.
|
||||
let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;
|
||||
intra_chunk_offset = analyze_source_file_generic(
|
||||
&src[scan_start .. ],
|
||||
CHUNK_SIZE - intra_chunk_offset,
|
||||
BytePos::from_usize(scan_start) + output_offset,
|
||||
lines,
|
||||
multi_byte_chars,
|
||||
non_narrow_chars
|
||||
);
|
||||
}
|
||||
|
||||
// There might still be a tail left to analyze
|
||||
let tail_start = chunk_count * CHUNK_SIZE + intra_chunk_offset;
|
||||
if tail_start < src.len() {
|
||||
analyze_source_file_generic(&src[tail_start as usize ..],
|
||||
src.len() - tail_start,
|
||||
output_offset + BytePos::from_usize(tail_start),
|
||||
lines,
|
||||
multi_byte_chars,
|
||||
non_narrow_chars);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
||||
// The target (or compiler version) does not support SSE2 ...
|
||||
fn analyze_source_file_dispatch(src: &str,
|
||||
source_file_start_pos: BytePos,
|
||||
lines: &mut Vec<BytePos>,
|
||||
multi_byte_chars: &mut Vec<MultiByteChar>,
|
||||
non_narrow_chars: &mut Vec<NonNarrowChar>) {
|
||||
analyze_source_file_generic(src,
|
||||
src.len(),
|
||||
source_file_start_pos,
|
||||
lines,
|
||||
multi_byte_chars,
|
||||
non_narrow_chars);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// `scan_len` determines the number of bytes in `src` to scan. Note that the
|
||||
// function can read past `scan_len` if a multi-byte character start within the
|
||||
// range but extends past it. The overflow is returned by the function.
|
||||
fn analyze_source_file_generic(
|
||||
src: &str,
|
||||
scan_len: usize,
|
||||
output_offset: BytePos,
|
||||
lines: &mut Vec<BytePos>,
|
||||
multi_byte_chars: &mut Vec<MultiByteChar>,
|
||||
non_narrow_chars: &mut Vec<NonNarrowChar>,
|
||||
) -> usize {
|
||||
assert!(src.len() >= scan_len);
|
||||
let mut i = 0;
|
||||
let src_bytes = src.as_bytes();
|
||||
|
||||
while i < scan_len {
|
||||
let byte = unsafe {
|
||||
// We verified that i < scan_len <= src.len()
|
||||
*src_bytes.get_unchecked(i as usize)
|
||||
};
|
||||
|
||||
// How much to advance in order to get to the next UTF-8 char in the
|
||||
// string.
|
||||
let mut char_len = 1;
|
||||
|
||||
if byte < 32 {
|
||||
// This is an ASCII control character, it could be one of the cases
|
||||
// that are interesting to us.
|
||||
|
||||
let pos = BytePos::from_usize(i) + output_offset;
|
||||
|
||||
match byte {
|
||||
b'\n' => {
|
||||
lines.push(pos + BytePos(1));
|
||||
}
|
||||
b'\t' => {
|
||||
non_narrow_chars.push(NonNarrowChar::Tab(pos));
|
||||
}
|
||||
_ => {
|
||||
non_narrow_chars.push(NonNarrowChar::ZeroWidth(pos));
|
||||
}
|
||||
}
|
||||
} else if byte >= 127 {
|
||||
// The slow path:
|
||||
// This is either ASCII control character "DEL" or the beginning of
|
||||
// a multibyte char. Just decode to `char`.
|
||||
let c = (&src[i..]).chars().next().unwrap();
|
||||
char_len = c.len_utf8();
|
||||
|
||||
let pos = BytePos::from_usize(i) + output_offset;
|
||||
|
||||
if char_len > 1 {
|
||||
assert!(char_len >= 2 && char_len <= 4);
|
||||
let mbc = MultiByteChar { pos, bytes: char_len as u8 };
|
||||
multi_byte_chars.push(mbc);
|
||||
}
|
||||
|
||||
// Assume control characters are zero width.
|
||||
// FIXME: How can we decide between `width` and `width_cjk`?
|
||||
let char_width = UnicodeWidthChar::width(c).unwrap_or(0);
|
||||
|
||||
if char_width != 1 {
|
||||
non_narrow_chars.push(NonNarrowChar::new(pos, char_width));
|
||||
}
|
||||
}
|
||||
|
||||
i += char_len;
|
||||
}
|
||||
|
||||
i - scan_len
|
||||
}
|
142
compiler/rustc_span/src/analyze_source_file/tests.rs
Normal file
142
compiler/rustc_span/src/analyze_source_file/tests.rs
Normal file
|
@ -0,0 +1,142 @@
|
|||
use super::*;
|
||||
|
||||
macro_rules! test {
|
||||
(case: $test_name:ident,
|
||||
text: $text:expr,
|
||||
source_file_start_pos: $source_file_start_pos:expr,
|
||||
lines: $lines:expr,
|
||||
multi_byte_chars: $multi_byte_chars:expr,
|
||||
non_narrow_chars: $non_narrow_chars:expr,) => {
|
||||
#[test]
|
||||
fn $test_name() {
|
||||
let (lines, multi_byte_chars, non_narrow_chars) =
|
||||
analyze_source_file($text, BytePos($source_file_start_pos));
|
||||
|
||||
let expected_lines: Vec<BytePos> = $lines.into_iter().map(|pos| BytePos(pos)).collect();
|
||||
|
||||
assert_eq!(lines, expected_lines);
|
||||
|
||||
let expected_mbcs: Vec<MultiByteChar> = $multi_byte_chars
|
||||
.into_iter()
|
||||
.map(|(pos, bytes)| MultiByteChar { pos: BytePos(pos), bytes })
|
||||
.collect();
|
||||
|
||||
assert_eq!(multi_byte_chars, expected_mbcs);
|
||||
|
||||
let expected_nncs: Vec<NonNarrowChar> = $non_narrow_chars
|
||||
.into_iter()
|
||||
.map(|(pos, width)| NonNarrowChar::new(BytePos(pos), width))
|
||||
.collect();
|
||||
|
||||
assert_eq!(non_narrow_chars, expected_nncs);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test!(
|
||||
case: empty_text,
|
||||
text: "",
|
||||
source_file_start_pos: 0,
|
||||
lines: vec![],
|
||||
multi_byte_chars: vec![],
|
||||
non_narrow_chars: vec![],
|
||||
);
|
||||
|
||||
test!(
|
||||
case: newlines_short,
|
||||
text: "a\nc",
|
||||
source_file_start_pos: 0,
|
||||
lines: vec![0, 2],
|
||||
multi_byte_chars: vec![],
|
||||
non_narrow_chars: vec![],
|
||||
);
|
||||
|
||||
test!(
|
||||
case: newlines_long,
|
||||
text: "012345678\nabcdef012345678\na",
|
||||
source_file_start_pos: 0,
|
||||
lines: vec![0, 10, 26],
|
||||
multi_byte_chars: vec![],
|
||||
non_narrow_chars: vec![],
|
||||
);
|
||||
|
||||
test!(
|
||||
case: newline_and_multi_byte_char_in_same_chunk,
|
||||
text: "01234β789\nbcdef0123456789abcdef",
|
||||
source_file_start_pos: 0,
|
||||
lines: vec![0, 11],
|
||||
multi_byte_chars: vec![(5, 2)],
|
||||
non_narrow_chars: vec![],
|
||||
);
|
||||
|
||||
test!(
|
||||
case: newline_and_control_char_in_same_chunk,
|
||||
text: "01234\u{07}6789\nbcdef0123456789abcdef",
|
||||
source_file_start_pos: 0,
|
||||
lines: vec![0, 11],
|
||||
multi_byte_chars: vec![],
|
||||
non_narrow_chars: vec![(5, 0)],
|
||||
);
|
||||
|
||||
test!(
|
||||
case: multi_byte_char_short,
|
||||
text: "aβc",
|
||||
source_file_start_pos: 0,
|
||||
lines: vec![0],
|
||||
multi_byte_chars: vec![(1, 2)],
|
||||
non_narrow_chars: vec![],
|
||||
);
|
||||
|
||||
test!(
|
||||
case: multi_byte_char_long,
|
||||
text: "0123456789abcΔf012345β",
|
||||
source_file_start_pos: 0,
|
||||
lines: vec![0],
|
||||
multi_byte_chars: vec![(13, 2), (22, 2)],
|
||||
non_narrow_chars: vec![],
|
||||
);
|
||||
|
||||
test!(
|
||||
case: multi_byte_char_across_chunk_boundary,
|
||||
text: "0123456789abcdeΔ123456789abcdef01234",
|
||||
source_file_start_pos: 0,
|
||||
lines: vec![0],
|
||||
multi_byte_chars: vec![(15, 2)],
|
||||
non_narrow_chars: vec![],
|
||||
);
|
||||
|
||||
test!(
|
||||
case: multi_byte_char_across_chunk_boundary_tail,
|
||||
text: "0123456789abcdeΔ....",
|
||||
source_file_start_pos: 0,
|
||||
lines: vec![0],
|
||||
multi_byte_chars: vec![(15, 2)],
|
||||
non_narrow_chars: vec![],
|
||||
);
|
||||
|
||||
test!(
|
||||
case: non_narrow_short,
|
||||
text: "0\t2",
|
||||
source_file_start_pos: 0,
|
||||
lines: vec![0],
|
||||
multi_byte_chars: vec![],
|
||||
non_narrow_chars: vec![(1, 4)],
|
||||
);
|
||||
|
||||
test!(
|
||||
case: non_narrow_long,
|
||||
text: "01\t3456789abcdef01234567\u{07}9",
|
||||
source_file_start_pos: 0,
|
||||
lines: vec![0],
|
||||
multi_byte_chars: vec![],
|
||||
non_narrow_chars: vec![(2, 4), (24, 0)],
|
||||
);
|
||||
|
||||
test!(
|
||||
case: output_offset_all,
|
||||
text: "01\t345\n789abcΔf01234567\u{07}9\nbcΔf",
|
||||
source_file_start_pos: 1000,
|
||||
lines: vec![0 + 1000, 7 + 1000, 27 + 1000],
|
||||
multi_byte_chars: vec![(13 + 1000, 2), (29 + 1000, 2)],
|
||||
non_narrow_chars: vec![(2 + 1000, 4), (24 + 1000, 0)],
|
||||
);
|
104
compiler/rustc_span/src/caching_source_map_view.rs
Normal file
104
compiler/rustc_span/src/caching_source_map_view.rs
Normal file
|
@ -0,0 +1,104 @@
|
|||
use crate::source_map::SourceMap;
|
||||
use crate::{BytePos, SourceFile};
|
||||
use rustc_data_structures::sync::Lrc;
|
||||
|
||||
#[derive(Clone)]
|
||||
struct CacheEntry {
|
||||
time_stamp: usize,
|
||||
line_number: usize,
|
||||
line_start: BytePos,
|
||||
line_end: BytePos,
|
||||
file: Lrc<SourceFile>,
|
||||
file_index: usize,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct CachingSourceMapView<'sm> {
|
||||
source_map: &'sm SourceMap,
|
||||
line_cache: [CacheEntry; 3],
|
||||
time_stamp: usize,
|
||||
}
|
||||
|
||||
impl<'sm> CachingSourceMapView<'sm> {
|
||||
pub fn new(source_map: &'sm SourceMap) -> CachingSourceMapView<'sm> {
|
||||
let files = source_map.files();
|
||||
let first_file = files[0].clone();
|
||||
let entry = CacheEntry {
|
||||
time_stamp: 0,
|
||||
line_number: 0,
|
||||
line_start: BytePos(0),
|
||||
line_end: BytePos(0),
|
||||
file: first_file,
|
||||
file_index: 0,
|
||||
};
|
||||
|
||||
CachingSourceMapView {
|
||||
source_map,
|
||||
line_cache: [entry.clone(), entry.clone(), entry],
|
||||
time_stamp: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn byte_pos_to_line_and_col(
|
||||
&mut self,
|
||||
pos: BytePos,
|
||||
) -> Option<(Lrc<SourceFile>, usize, BytePos)> {
|
||||
self.time_stamp += 1;
|
||||
|
||||
// Check if the position is in one of the cached lines
|
||||
for cache_entry in self.line_cache.iter_mut() {
|
||||
if pos >= cache_entry.line_start && pos < cache_entry.line_end {
|
||||
cache_entry.time_stamp = self.time_stamp;
|
||||
|
||||
return Some((
|
||||
cache_entry.file.clone(),
|
||||
cache_entry.line_number,
|
||||
pos - cache_entry.line_start,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// No cache hit ...
|
||||
let mut oldest = 0;
|
||||
for index in 1..self.line_cache.len() {
|
||||
if self.line_cache[index].time_stamp < self.line_cache[oldest].time_stamp {
|
||||
oldest = index;
|
||||
}
|
||||
}
|
||||
|
||||
let cache_entry = &mut self.line_cache[oldest];
|
||||
|
||||
// If the entry doesn't point to the correct file, fix it up
|
||||
if pos < cache_entry.file.start_pos || pos >= cache_entry.file.end_pos {
|
||||
let file_valid;
|
||||
if self.source_map.files().len() > 0 {
|
||||
let file_index = self.source_map.lookup_source_file_idx(pos);
|
||||
let file = self.source_map.files()[file_index].clone();
|
||||
|
||||
if pos >= file.start_pos && pos < file.end_pos {
|
||||
cache_entry.file = file;
|
||||
cache_entry.file_index = file_index;
|
||||
file_valid = true;
|
||||
} else {
|
||||
file_valid = false;
|
||||
}
|
||||
} else {
|
||||
file_valid = false;
|
||||
}
|
||||
|
||||
if !file_valid {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
let line_index = cache_entry.file.lookup_line(pos).unwrap();
|
||||
let line_bounds = cache_entry.file.line_bounds(line_index);
|
||||
|
||||
cache_entry.line_number = line_index + 1;
|
||||
cache_entry.line_start = line_bounds.0;
|
||||
cache_entry.line_end = line_bounds.1;
|
||||
cache_entry.time_stamp = self.time_stamp;
|
||||
|
||||
Some((cache_entry.file.clone(), cache_entry.line_number, pos - cache_entry.line_start))
|
||||
}
|
||||
}
|
280
compiler/rustc_span/src/def_id.rs
Normal file
280
compiler/rustc_span/src/def_id.rs
Normal file
|
@ -0,0 +1,280 @@
|
|||
use crate::HashStableContext;
|
||||
use rustc_data_structures::fingerprint::Fingerprint;
|
||||
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
|
||||
use rustc_data_structures::AtomicRef;
|
||||
use rustc_index::vec::Idx;
|
||||
use rustc_macros::HashStable_Generic;
|
||||
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
|
||||
use std::borrow::Borrow;
|
||||
use std::fmt;
|
||||
|
||||
rustc_index::newtype_index! {
|
||||
pub struct CrateId {
|
||||
ENCODABLE = custom
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum CrateNum {
|
||||
/// A special `CrateNum` that we use for the `tcx.rcache` when decoding from
|
||||
/// the incr. comp. cache.
|
||||
ReservedForIncrCompCache,
|
||||
Index(CrateId),
|
||||
}
|
||||
|
||||
/// Item definitions in the currently-compiled crate would have the `CrateNum`
|
||||
/// `LOCAL_CRATE` in their `DefId`.
|
||||
pub const LOCAL_CRATE: CrateNum = CrateNum::Index(CrateId::from_u32(0));
|
||||
|
||||
impl Idx for CrateNum {
|
||||
#[inline]
|
||||
fn new(value: usize) -> Self {
|
||||
CrateNum::Index(Idx::new(value))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn index(self) -> usize {
|
||||
match self {
|
||||
CrateNum::Index(idx) => Idx::index(idx),
|
||||
_ => panic!("Tried to get crate index of {:?}", self),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl CrateNum {
|
||||
pub fn new(x: usize) -> CrateNum {
|
||||
CrateNum::from_usize(x)
|
||||
}
|
||||
|
||||
pub fn from_usize(x: usize) -> CrateNum {
|
||||
CrateNum::Index(CrateId::from_usize(x))
|
||||
}
|
||||
|
||||
pub fn from_u32(x: u32) -> CrateNum {
|
||||
CrateNum::Index(CrateId::from_u32(x))
|
||||
}
|
||||
|
||||
pub fn as_usize(self) -> usize {
|
||||
match self {
|
||||
CrateNum::Index(id) => id.as_usize(),
|
||||
_ => panic!("tried to get index of non-standard crate {:?}", self),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_u32(self) -> u32 {
|
||||
match self {
|
||||
CrateNum::Index(id) => id.as_u32(),
|
||||
_ => panic!("tried to get index of non-standard crate {:?}", self),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_def_id(&self) -> DefId {
|
||||
DefId { krate: *self, index: CRATE_DEF_INDEX }
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for CrateNum {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
CrateNum::Index(id) => fmt::Display::fmt(&id.private, f),
|
||||
CrateNum::ReservedForIncrCompCache => write!(f, "crate for decoding incr comp cache"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// As a local identifier, a `CrateNum` is only meaningful within its context, e.g. within a tcx.
|
||||
/// Therefore, make sure to include the context when encode a `CrateNum`.
|
||||
impl<E: Encoder> Encodable<E> for CrateNum {
|
||||
default fn encode(&self, s: &mut E) -> Result<(), E::Error> {
|
||||
s.emit_u32(self.as_u32())
|
||||
}
|
||||
}
|
||||
|
||||
impl<D: Decoder> Decodable<D> for CrateNum {
|
||||
default fn decode(d: &mut D) -> Result<CrateNum, D::Error> {
|
||||
Ok(CrateNum::from_u32(d.read_u32()?))
|
||||
}
|
||||
}
|
||||
|
||||
impl ::std::fmt::Debug for CrateNum {
|
||||
fn fmt(&self, fmt: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result {
|
||||
match self {
|
||||
CrateNum::Index(id) => write!(fmt, "crate{}", id.private),
|
||||
CrateNum::ReservedForIncrCompCache => write!(fmt, "crate for decoding incr comp cache"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)]
|
||||
#[derive(HashStable_Generic, Encodable, Decodable)]
|
||||
pub struct DefPathHash(pub Fingerprint);
|
||||
|
||||
impl Borrow<Fingerprint> for DefPathHash {
|
||||
#[inline]
|
||||
fn borrow(&self) -> &Fingerprint {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
rustc_index::newtype_index! {
|
||||
/// A DefIndex is an index into the hir-map for a crate, identifying a
|
||||
/// particular definition. It should really be considered an interned
|
||||
/// shorthand for a particular DefPath.
|
||||
pub struct DefIndex {
|
||||
ENCODABLE = custom // (only encodable in metadata)
|
||||
|
||||
DEBUG_FORMAT = "DefIndex({})",
|
||||
/// The crate root is always assigned index 0 by the AST Map code,
|
||||
/// thanks to `NodeCollector::new`.
|
||||
const CRATE_DEF_INDEX = 0,
|
||||
}
|
||||
}
|
||||
|
||||
impl<E: Encoder> Encodable<E> for DefIndex {
|
||||
default fn encode(&self, _: &mut E) -> Result<(), E::Error> {
|
||||
panic!("cannot encode `DefIndex` with `{}`", std::any::type_name::<E>());
|
||||
}
|
||||
}
|
||||
|
||||
impl<D: Decoder> Decodable<D> for DefIndex {
|
||||
default fn decode(_: &mut D) -> Result<DefIndex, D::Error> {
|
||||
panic!("cannot decode `DefIndex` with `{}`", std::any::type_name::<D>());
|
||||
}
|
||||
}
|
||||
|
||||
/// A `DefId` identifies a particular *definition*, by combining a crate
|
||||
/// index and a def index.
|
||||
///
|
||||
/// You can create a `DefId` from a `LocalDefId` using `local_def_id.to_def_id()`.
|
||||
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Copy)]
|
||||
pub struct DefId {
|
||||
pub krate: CrateNum,
|
||||
pub index: DefIndex,
|
||||
}
|
||||
|
||||
impl DefId {
|
||||
/// Makes a local `DefId` from the given `DefIndex`.
|
||||
#[inline]
|
||||
pub fn local(index: DefIndex) -> DefId {
|
||||
DefId { krate: LOCAL_CRATE, index }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn is_local(self) -> bool {
|
||||
self.krate == LOCAL_CRATE
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn as_local(self) -> Option<LocalDefId> {
|
||||
if self.is_local() { Some(LocalDefId { local_def_index: self.index }) } else { None }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn expect_local(self) -> LocalDefId {
|
||||
self.as_local().unwrap_or_else(|| panic!("DefId::expect_local: `{:?}` isn't local", self))
|
||||
}
|
||||
|
||||
pub fn is_top_level_module(self) -> bool {
|
||||
self.is_local() && self.index == CRATE_DEF_INDEX
|
||||
}
|
||||
}
|
||||
|
||||
impl<E: Encoder> Encodable<E> for DefId {
|
||||
default fn encode(&self, s: &mut E) -> Result<(), E::Error> {
|
||||
s.emit_struct("DefId", 2, |s| {
|
||||
s.emit_struct_field("krate", 0, |s| self.krate.encode(s))?;
|
||||
|
||||
s.emit_struct_field("index", 1, |s| self.index.encode(s))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<D: Decoder> Decodable<D> for DefId {
|
||||
default fn decode(d: &mut D) -> Result<DefId, D::Error> {
|
||||
d.read_struct("DefId", 2, |d| {
|
||||
Ok(DefId {
|
||||
krate: d.read_struct_field("krate", 0, Decodable::decode)?,
|
||||
index: d.read_struct_field("index", 1, Decodable::decode)?,
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub fn default_def_id_debug(def_id: DefId, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("DefId").field("krate", &def_id.krate).field("index", &def_id.index).finish()
|
||||
}
|
||||
|
||||
pub static DEF_ID_DEBUG: AtomicRef<fn(DefId, &mut fmt::Formatter<'_>) -> fmt::Result> =
|
||||
AtomicRef::new(&(default_def_id_debug as fn(_, &mut fmt::Formatter<'_>) -> _));
|
||||
|
||||
impl fmt::Debug for DefId {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
(*DEF_ID_DEBUG)(*self, f)
|
||||
}
|
||||
}
|
||||
|
||||
rustc_data_structures::define_id_collections!(DefIdMap, DefIdSet, DefId);
|
||||
|
||||
/// A LocalDefId is equivalent to a DefId with `krate == LOCAL_CRATE`. Since
|
||||
/// we encode this information in the type, we can ensure at compile time that
|
||||
/// no DefIds from upstream crates get thrown into the mix. There are quite a
|
||||
/// few cases where we know that only DefIds from the local crate are expected
|
||||
/// and a DefId from a different crate would signify a bug somewhere. This
|
||||
/// is when LocalDefId comes in handy.
|
||||
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct LocalDefId {
|
||||
pub local_def_index: DefIndex,
|
||||
}
|
||||
|
||||
impl Idx for LocalDefId {
|
||||
#[inline]
|
||||
fn new(idx: usize) -> Self {
|
||||
LocalDefId { local_def_index: Idx::new(idx) }
|
||||
}
|
||||
#[inline]
|
||||
fn index(self) -> usize {
|
||||
self.local_def_index.index()
|
||||
}
|
||||
}
|
||||
|
||||
impl LocalDefId {
|
||||
#[inline]
|
||||
pub fn to_def_id(self) -> DefId {
|
||||
DefId { krate: LOCAL_CRATE, index: self.local_def_index }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn is_top_level_module(self) -> bool {
|
||||
self.local_def_index == CRATE_DEF_INDEX
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for LocalDefId {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
self.to_def_id().fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl<E: Encoder> Encodable<E> for LocalDefId {
|
||||
fn encode(&self, s: &mut E) -> Result<(), E::Error> {
|
||||
self.to_def_id().encode(s)
|
||||
}
|
||||
}
|
||||
|
||||
impl<D: Decoder> Decodable<D> for LocalDefId {
|
||||
fn decode(d: &mut D) -> Result<LocalDefId, D::Error> {
|
||||
DefId::decode(d).map(|d| d.expect_local())
|
||||
}
|
||||
}
|
||||
|
||||
impl<CTX: HashStableContext> HashStable<CTX> for DefId {
|
||||
fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) {
|
||||
hcx.hash_def_id(*self, hasher)
|
||||
}
|
||||
}
|
||||
|
||||
impl<CTX: HashStableContext> HashStable<CTX> for CrateNum {
|
||||
fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) {
|
||||
hcx.hash_crate_num(*self, hasher)
|
||||
}
|
||||
}
|
73
compiler/rustc_span/src/edition.rs
Normal file
73
compiler/rustc_span/src/edition.rs
Normal file
|
@ -0,0 +1,73 @@
|
|||
use crate::symbol::{sym, Symbol};
|
||||
use std::fmt;
|
||||
use std::str::FromStr;
|
||||
|
||||
use rustc_macros::HashStable_Generic;
|
||||
|
||||
/// The edition of the compiler (RFC 2052)
|
||||
#[derive(Clone, Copy, Hash, PartialEq, PartialOrd, Debug, Encodable, Decodable, Eq)]
|
||||
#[derive(HashStable_Generic)]
|
||||
pub enum Edition {
|
||||
// editions must be kept in order, oldest to newest
|
||||
/// The 2015 edition
|
||||
Edition2015,
|
||||
/// The 2018 edition
|
||||
Edition2018,
|
||||
// when adding new editions, be sure to update:
|
||||
//
|
||||
// - Update the `ALL_EDITIONS` const
|
||||
// - Update the EDITION_NAME_LIST const
|
||||
// - add a `rust_####()` function to the session
|
||||
// - update the enum in Cargo's sources as well
|
||||
}
|
||||
|
||||
// must be in order from oldest to newest
|
||||
pub const ALL_EDITIONS: &[Edition] = &[Edition::Edition2015, Edition::Edition2018];
|
||||
|
||||
pub const EDITION_NAME_LIST: &str = "2015|2018";
|
||||
|
||||
pub const DEFAULT_EDITION: Edition = Edition::Edition2015;
|
||||
|
||||
impl fmt::Display for Edition {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let s = match *self {
|
||||
Edition::Edition2015 => "2015",
|
||||
Edition::Edition2018 => "2018",
|
||||
};
|
||||
write!(f, "{}", s)
|
||||
}
|
||||
}
|
||||
|
||||
impl Edition {
|
||||
pub fn lint_name(&self) -> &'static str {
|
||||
match *self {
|
||||
Edition::Edition2015 => "rust_2015_compatibility",
|
||||
Edition::Edition2018 => "rust_2018_compatibility",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn feature_name(&self) -> Symbol {
|
||||
match *self {
|
||||
Edition::Edition2015 => sym::rust_2015_preview,
|
||||
Edition::Edition2018 => sym::rust_2018_preview,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_stable(&self) -> bool {
|
||||
match *self {
|
||||
Edition::Edition2015 => true,
|
||||
Edition::Edition2018 => true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for Edition {
|
||||
type Err = ();
|
||||
fn from_str(s: &str) -> Result<Self, ()> {
|
||||
match s {
|
||||
"2015" => Ok(Edition::Edition2015),
|
||||
"2018" => Ok(Edition::Edition2018),
|
||||
_ => Err(()),
|
||||
}
|
||||
}
|
||||
}
|
26
compiler/rustc_span/src/fatal_error.rs
Normal file
26
compiler/rustc_span/src/fatal_error.rs
Normal file
|
@ -0,0 +1,26 @@
|
|||
/// Used as a return value to signify a fatal error occurred. (It is also
|
||||
/// used as the argument to panic at the moment, but that will eventually
|
||||
/// not be true.)
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
#[must_use]
|
||||
pub struct FatalError;
|
||||
|
||||
pub struct FatalErrorMarker;
|
||||
|
||||
// Don't implement Send on FatalError. This makes it impossible to panic!(FatalError).
|
||||
// We don't want to invoke the panic handler and print a backtrace for fatal errors.
|
||||
impl !Send for FatalError {}
|
||||
|
||||
impl FatalError {
|
||||
pub fn raise(self) -> ! {
|
||||
std::panic::resume_unwind(Box::new(FatalErrorMarker))
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for FatalError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "parser fatal error")
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for FatalError {}
|
1239
compiler/rustc_span/src/hygiene.rs
Normal file
1239
compiler/rustc_span/src/hygiene.rs
Normal file
File diff suppressed because it is too large
Load diff
1872
compiler/rustc_span/src/lib.rs
Normal file
1872
compiler/rustc_span/src/lib.rs
Normal file
File diff suppressed because it is too large
Load diff
1097
compiler/rustc_span/src/source_map.rs
Normal file
1097
compiler/rustc_span/src/source_map.rs
Normal file
File diff suppressed because it is too large
Load diff
272
compiler/rustc_span/src/source_map/tests.rs
Normal file
272
compiler/rustc_span/src/source_map/tests.rs
Normal file
|
@ -0,0 +1,272 @@
|
|||
use super::*;
|
||||
|
||||
use rustc_data_structures::sync::Lrc;
|
||||
|
||||
fn init_source_map() -> SourceMap {
|
||||
let sm = SourceMap::new(FilePathMapping::empty());
|
||||
sm.new_source_file(PathBuf::from("blork.rs").into(), "first line.\nsecond line".to_string());
|
||||
sm.new_source_file(PathBuf::from("empty.rs").into(), String::new());
|
||||
sm.new_source_file(PathBuf::from("blork2.rs").into(), "first line.\nsecond line".to_string());
|
||||
sm
|
||||
}
|
||||
|
||||
/// Tests `lookup_byte_offset`.
|
||||
#[test]
|
||||
fn t3() {
|
||||
let sm = init_source_map();
|
||||
|
||||
let srcfbp1 = sm.lookup_byte_offset(BytePos(23));
|
||||
assert_eq!(srcfbp1.sf.name, PathBuf::from("blork.rs").into());
|
||||
assert_eq!(srcfbp1.pos, BytePos(23));
|
||||
|
||||
let srcfbp1 = sm.lookup_byte_offset(BytePos(24));
|
||||
assert_eq!(srcfbp1.sf.name, PathBuf::from("empty.rs").into());
|
||||
assert_eq!(srcfbp1.pos, BytePos(0));
|
||||
|
||||
let srcfbp2 = sm.lookup_byte_offset(BytePos(25));
|
||||
assert_eq!(srcfbp2.sf.name, PathBuf::from("blork2.rs").into());
|
||||
assert_eq!(srcfbp2.pos, BytePos(0));
|
||||
}
|
||||
|
||||
/// Tests `bytepos_to_file_charpos`.
|
||||
#[test]
|
||||
fn t4() {
|
||||
let sm = init_source_map();
|
||||
|
||||
let cp1 = sm.bytepos_to_file_charpos(BytePos(22));
|
||||
assert_eq!(cp1, CharPos(22));
|
||||
|
||||
let cp2 = sm.bytepos_to_file_charpos(BytePos(25));
|
||||
assert_eq!(cp2, CharPos(0));
|
||||
}
|
||||
|
||||
/// Tests zero-length `SourceFile`s.
|
||||
#[test]
|
||||
fn t5() {
|
||||
let sm = init_source_map();
|
||||
|
||||
let loc1 = sm.lookup_char_pos(BytePos(22));
|
||||
assert_eq!(loc1.file.name, PathBuf::from("blork.rs").into());
|
||||
assert_eq!(loc1.line, 2);
|
||||
assert_eq!(loc1.col, CharPos(10));
|
||||
|
||||
let loc2 = sm.lookup_char_pos(BytePos(25));
|
||||
assert_eq!(loc2.file.name, PathBuf::from("blork2.rs").into());
|
||||
assert_eq!(loc2.line, 1);
|
||||
assert_eq!(loc2.col, CharPos(0));
|
||||
}
|
||||
|
||||
fn init_source_map_mbc() -> SourceMap {
|
||||
let sm = SourceMap::new(FilePathMapping::empty());
|
||||
// "€" is a three-byte UTF8 char.
|
||||
sm.new_source_file(
|
||||
PathBuf::from("blork.rs").into(),
|
||||
"fir€st €€€€ line.\nsecond line".to_string(),
|
||||
);
|
||||
sm.new_source_file(
|
||||
PathBuf::from("blork2.rs").into(),
|
||||
"first line€€.\n€ second line".to_string(),
|
||||
);
|
||||
sm
|
||||
}
|
||||
|
||||
/// Tests `bytepos_to_file_charpos` in the presence of multi-byte chars.
|
||||
#[test]
|
||||
fn t6() {
|
||||
let sm = init_source_map_mbc();
|
||||
|
||||
let cp1 = sm.bytepos_to_file_charpos(BytePos(3));
|
||||
assert_eq!(cp1, CharPos(3));
|
||||
|
||||
let cp2 = sm.bytepos_to_file_charpos(BytePos(6));
|
||||
assert_eq!(cp2, CharPos(4));
|
||||
|
||||
let cp3 = sm.bytepos_to_file_charpos(BytePos(56));
|
||||
assert_eq!(cp3, CharPos(12));
|
||||
|
||||
let cp4 = sm.bytepos_to_file_charpos(BytePos(61));
|
||||
assert_eq!(cp4, CharPos(15));
|
||||
}
|
||||
|
||||
/// Test `span_to_lines` for a span ending at the end of a `SourceFile`.
|
||||
#[test]
|
||||
fn t7() {
|
||||
let sm = init_source_map();
|
||||
let span = Span::with_root_ctxt(BytePos(12), BytePos(23));
|
||||
let file_lines = sm.span_to_lines(span).unwrap();
|
||||
|
||||
assert_eq!(file_lines.file.name, PathBuf::from("blork.rs").into());
|
||||
assert_eq!(file_lines.lines.len(), 1);
|
||||
assert_eq!(file_lines.lines[0].line_index, 1);
|
||||
}
|
||||
|
||||
/// Given a string like " ~~~~~~~~~~~~ ", produces a span
|
||||
/// converting that range. The idea is that the string has the same
|
||||
/// length as the input, and we uncover the byte positions. Note
|
||||
/// that this can span lines and so on.
|
||||
fn span_from_selection(input: &str, selection: &str) -> Span {
|
||||
assert_eq!(input.len(), selection.len());
|
||||
let left_index = selection.find('~').unwrap() as u32;
|
||||
let right_index = selection.rfind('~').map(|x| x as u32).unwrap_or(left_index);
|
||||
Span::with_root_ctxt(BytePos(left_index), BytePos(right_index + 1))
|
||||
}
|
||||
|
||||
/// Tests `span_to_snippet` and `span_to_lines` for a span converting 3
|
||||
/// lines in the middle of a file.
|
||||
#[test]
|
||||
fn span_to_snippet_and_lines_spanning_multiple_lines() {
|
||||
let sm = SourceMap::new(FilePathMapping::empty());
|
||||
let inputtext = "aaaaa\nbbbbBB\nCCC\nDDDDDddddd\neee\n";
|
||||
let selection = " \n ~~\n~~~\n~~~~~ \n \n";
|
||||
sm.new_source_file(Path::new("blork.rs").to_owned().into(), inputtext.to_string());
|
||||
let span = span_from_selection(inputtext, selection);
|
||||
|
||||
// Check that we are extracting the text we thought we were extracting.
|
||||
assert_eq!(&sm.span_to_snippet(span).unwrap(), "BB\nCCC\nDDDDD");
|
||||
|
||||
// Check that span_to_lines gives us the complete result with the lines/cols we expected.
|
||||
let lines = sm.span_to_lines(span).unwrap();
|
||||
let expected = vec![
|
||||
LineInfo { line_index: 1, start_col: CharPos(4), end_col: CharPos(6) },
|
||||
LineInfo { line_index: 2, start_col: CharPos(0), end_col: CharPos(3) },
|
||||
LineInfo { line_index: 3, start_col: CharPos(0), end_col: CharPos(5) },
|
||||
];
|
||||
assert_eq!(lines.lines, expected);
|
||||
}
|
||||
|
||||
/// Test span_to_snippet for a span ending at the end of a `SourceFile`.
|
||||
#[test]
|
||||
fn t8() {
|
||||
let sm = init_source_map();
|
||||
let span = Span::with_root_ctxt(BytePos(12), BytePos(23));
|
||||
let snippet = sm.span_to_snippet(span);
|
||||
|
||||
assert_eq!(snippet, Ok("second line".to_string()));
|
||||
}
|
||||
|
||||
/// Test `span_to_str` for a span ending at the end of a `SourceFile`.
|
||||
#[test]
|
||||
fn t9() {
|
||||
let sm = init_source_map();
|
||||
let span = Span::with_root_ctxt(BytePos(12), BytePos(23));
|
||||
let sstr = sm.span_to_string(span);
|
||||
|
||||
assert_eq!(sstr, "blork.rs:2:1: 2:12");
|
||||
}
|
||||
|
||||
/// Tests failing to merge two spans on different lines.
|
||||
#[test]
|
||||
fn span_merging_fail() {
|
||||
let sm = SourceMap::new(FilePathMapping::empty());
|
||||
let inputtext = "bbbb BB\ncc CCC\n";
|
||||
let selection1 = " ~~\n \n";
|
||||
let selection2 = " \n ~~~\n";
|
||||
sm.new_source_file(Path::new("blork.rs").to_owned().into(), inputtext.to_owned());
|
||||
let span1 = span_from_selection(inputtext, selection1);
|
||||
let span2 = span_from_selection(inputtext, selection2);
|
||||
|
||||
assert!(sm.merge_spans(span1, span2).is_none());
|
||||
}
|
||||
|
||||
/// Tests loading an external source file that requires normalization.
|
||||
#[test]
|
||||
fn t10() {
|
||||
let sm = SourceMap::new(FilePathMapping::empty());
|
||||
let unnormalized = "first line.\r\nsecond line";
|
||||
let normalized = "first line.\nsecond line";
|
||||
|
||||
let src_file = sm.new_source_file(PathBuf::from("blork.rs").into(), unnormalized.to_string());
|
||||
|
||||
assert_eq!(src_file.src.as_ref().unwrap().as_ref(), normalized);
|
||||
assert!(
|
||||
src_file.src_hash.matches(unnormalized),
|
||||
"src_hash should use the source before normalization"
|
||||
);
|
||||
|
||||
let SourceFile {
|
||||
name,
|
||||
name_was_remapped,
|
||||
src_hash,
|
||||
start_pos,
|
||||
end_pos,
|
||||
lines,
|
||||
multibyte_chars,
|
||||
non_narrow_chars,
|
||||
normalized_pos,
|
||||
name_hash,
|
||||
..
|
||||
} = (*src_file).clone();
|
||||
|
||||
let imported_src_file = sm.new_imported_source_file(
|
||||
name,
|
||||
name_was_remapped,
|
||||
src_hash,
|
||||
name_hash,
|
||||
(end_pos - start_pos).to_usize(),
|
||||
CrateNum::new(0),
|
||||
lines,
|
||||
multibyte_chars,
|
||||
non_narrow_chars,
|
||||
normalized_pos,
|
||||
start_pos,
|
||||
end_pos,
|
||||
);
|
||||
|
||||
assert!(
|
||||
imported_src_file.external_src.borrow().get_source().is_none(),
|
||||
"imported source file should not have source yet"
|
||||
);
|
||||
imported_src_file.add_external_src(|| Some(unnormalized.to_string()));
|
||||
assert_eq!(
|
||||
imported_src_file.external_src.borrow().get_source().unwrap().as_ref(),
|
||||
normalized,
|
||||
"imported source file should be normalized"
|
||||
);
|
||||
}
|
||||
|
||||
/// Returns the span corresponding to the `n`th occurrence of `substring` in `source_text`.
|
||||
trait SourceMapExtension {
|
||||
fn span_substr(
|
||||
&self,
|
||||
file: &Lrc<SourceFile>,
|
||||
source_text: &str,
|
||||
substring: &str,
|
||||
n: usize,
|
||||
) -> Span;
|
||||
}
|
||||
|
||||
impl SourceMapExtension for SourceMap {
|
||||
fn span_substr(
|
||||
&self,
|
||||
file: &Lrc<SourceFile>,
|
||||
source_text: &str,
|
||||
substring: &str,
|
||||
n: usize,
|
||||
) -> Span {
|
||||
println!(
|
||||
"span_substr(file={:?}/{:?}, substring={:?}, n={})",
|
||||
file.name, file.start_pos, substring, n
|
||||
);
|
||||
let mut i = 0;
|
||||
let mut hi = 0;
|
||||
loop {
|
||||
let offset = source_text[hi..].find(substring).unwrap_or_else(|| {
|
||||
panic!(
|
||||
"source_text `{}` does not have {} occurrences of `{}`, only {}",
|
||||
source_text, n, substring, i
|
||||
);
|
||||
});
|
||||
let lo = hi + offset;
|
||||
hi = lo + substring.len();
|
||||
if i == n {
|
||||
let span = Span::with_root_ctxt(
|
||||
BytePos(lo as u32 + file.start_pos.0),
|
||||
BytePos(hi as u32 + file.start_pos.0),
|
||||
);
|
||||
assert_eq!(&self.span_to_snippet(span).unwrap()[..], substring);
|
||||
return span;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
133
compiler/rustc_span/src/span_encoding.rs
Normal file
133
compiler/rustc_span/src/span_encoding.rs
Normal file
|
@ -0,0 +1,133 @@
|
|||
// Spans are encoded using 1-bit tag and 2 different encoding formats (one for each tag value).
|
||||
// One format is used for keeping span data inline,
|
||||
// another contains index into an out-of-line span interner.
|
||||
// The encoding format for inline spans were obtained by optimizing over crates in rustc/libstd.
|
||||
// See https://internals.rust-lang.org/t/rfc-compiler-refactoring-spans/1357/28
|
||||
|
||||
use crate::hygiene::SyntaxContext;
|
||||
use crate::SESSION_GLOBALS;
|
||||
use crate::{BytePos, SpanData};
|
||||
|
||||
use rustc_data_structures::fx::FxIndexSet;
|
||||
|
||||
/// A compressed span.
|
||||
///
|
||||
/// `SpanData` is 12 bytes, which is a bit too big to stick everywhere. `Span`
|
||||
/// is a form that only takes up 8 bytes, with less space for the length and
|
||||
/// context. The vast majority (99.9%+) of `SpanData` instances will fit within
|
||||
/// those 8 bytes; any `SpanData` whose fields don't fit into a `Span` are
|
||||
/// stored in a separate interner table, and the `Span` will index into that
|
||||
/// table. Interning is rare enough that the cost is low, but common enough
|
||||
/// that the code is exercised regularly.
|
||||
///
|
||||
/// An earlier version of this code used only 4 bytes for `Span`, but that was
|
||||
/// slower because only 80--90% of spans could be stored inline (even less in
|
||||
/// very large crates) and so the interner was used a lot more.
|
||||
///
|
||||
/// Inline (compressed) format:
|
||||
/// - `span.base_or_index == span_data.lo`
|
||||
/// - `span.len_or_tag == len == span_data.hi - span_data.lo` (must be `<= MAX_LEN`)
|
||||
/// - `span.ctxt == span_data.ctxt` (must be `<= MAX_CTXT`)
|
||||
///
|
||||
/// Interned format:
|
||||
/// - `span.base_or_index == index` (indexes into the interner table)
|
||||
/// - `span.len_or_tag == LEN_TAG` (high bit set, all other bits are zero)
|
||||
/// - `span.ctxt == 0`
|
||||
///
|
||||
/// The inline form uses 0 for the tag value (rather than 1) so that we don't
|
||||
/// need to mask out the tag bit when getting the length, and so that the
|
||||
/// dummy span can be all zeroes.
|
||||
///
|
||||
/// Notes about the choice of field sizes:
|
||||
/// - `base` is 32 bits in both `Span` and `SpanData`, which means that `base`
|
||||
/// values never cause interning. The number of bits needed for `base`
|
||||
/// depends on the crate size. 32 bits allows up to 4 GiB of code in a crate.
|
||||
/// `script-servo` is the largest crate in `rustc-perf`, requiring 26 bits
|
||||
/// for some spans.
|
||||
/// - `len` is 15 bits in `Span` (a u16, minus 1 bit for the tag) and 32 bits
|
||||
/// in `SpanData`, which means that large `len` values will cause interning.
|
||||
/// The number of bits needed for `len` does not depend on the crate size.
|
||||
/// The most common number of bits for `len` are 0--7, with a peak usually at
|
||||
/// 3 or 4, and then it drops off quickly from 8 onwards. 15 bits is enough
|
||||
/// for 99.99%+ of cases, but larger values (sometimes 20+ bits) might occur
|
||||
/// dozens of times in a typical crate.
|
||||
/// - `ctxt` is 16 bits in `Span` and 32 bits in `SpanData`, which means that
|
||||
/// large `ctxt` values will cause interning. The number of bits needed for
|
||||
/// `ctxt` values depend partly on the crate size and partly on the form of
|
||||
/// the code. No crates in `rustc-perf` need more than 15 bits for `ctxt`,
|
||||
/// but larger crates might need more than 16 bits.
|
||||
///
|
||||
#[derive(Clone, Copy, Eq, PartialEq, Hash)]
|
||||
pub struct Span {
|
||||
base_or_index: u32,
|
||||
len_or_tag: u16,
|
||||
ctxt_or_zero: u16,
|
||||
}
|
||||
|
||||
const LEN_TAG: u16 = 0b1000_0000_0000_0000;
|
||||
const MAX_LEN: u32 = 0b0111_1111_1111_1111;
|
||||
const MAX_CTXT: u32 = 0b1111_1111_1111_1111;
|
||||
|
||||
/// Dummy span, both position and length are zero, syntax context is zero as well.
|
||||
pub const DUMMY_SP: Span = Span { base_or_index: 0, len_or_tag: 0, ctxt_or_zero: 0 };
|
||||
|
||||
impl Span {
|
||||
#[inline]
|
||||
pub fn new(mut lo: BytePos, mut hi: BytePos, ctxt: SyntaxContext) -> Self {
|
||||
if lo > hi {
|
||||
std::mem::swap(&mut lo, &mut hi);
|
||||
}
|
||||
|
||||
let (base, len, ctxt2) = (lo.0, hi.0 - lo.0, ctxt.as_u32());
|
||||
|
||||
if len <= MAX_LEN && ctxt2 <= MAX_CTXT {
|
||||
// Inline format.
|
||||
Span { base_or_index: base, len_or_tag: len as u16, ctxt_or_zero: ctxt2 as u16 }
|
||||
} else {
|
||||
// Interned format.
|
||||
let index = with_span_interner(|interner| interner.intern(&SpanData { lo, hi, ctxt }));
|
||||
Span { base_or_index: index, len_or_tag: LEN_TAG, ctxt_or_zero: 0 }
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn data(self) -> SpanData {
|
||||
if self.len_or_tag != LEN_TAG {
|
||||
// Inline format.
|
||||
debug_assert!(self.len_or_tag as u32 <= MAX_LEN);
|
||||
SpanData {
|
||||
lo: BytePos(self.base_or_index),
|
||||
hi: BytePos(self.base_or_index + self.len_or_tag as u32),
|
||||
ctxt: SyntaxContext::from_u32(self.ctxt_or_zero as u32),
|
||||
}
|
||||
} else {
|
||||
// Interned format.
|
||||
debug_assert!(self.ctxt_or_zero == 0);
|
||||
let index = self.base_or_index;
|
||||
with_span_interner(|interner| *interner.get(index))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct SpanInterner {
|
||||
spans: FxIndexSet<SpanData>,
|
||||
}
|
||||
|
||||
impl SpanInterner {
|
||||
fn intern(&mut self, span_data: &SpanData) -> u32 {
|
||||
let (index, _) = self.spans.insert_full(*span_data);
|
||||
index as u32
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn get(&self, index: u32) -> &SpanData {
|
||||
&self.spans[index as usize]
|
||||
}
|
||||
}
|
||||
|
||||
// If an interner exists, return it. Otherwise, prepare a fresh one.
|
||||
#[inline]
|
||||
fn with_span_interner<T, F: FnOnce(&mut SpanInterner) -> T>(f: F) -> T {
|
||||
SESSION_GLOBALS.with(|session_globals| f(&mut *session_globals.span_interner.lock()))
|
||||
}
|
1693
compiler/rustc_span/src/symbol.rs
Normal file
1693
compiler/rustc_span/src/symbol.rs
Normal file
File diff suppressed because it is too large
Load diff
25
compiler/rustc_span/src/symbol/tests.rs
Normal file
25
compiler/rustc_span/src/symbol/tests.rs
Normal file
|
@ -0,0 +1,25 @@
|
|||
use super::*;
|
||||
|
||||
use crate::{edition, SessionGlobals};
|
||||
|
||||
#[test]
|
||||
fn interner_tests() {
|
||||
let mut i: Interner = Interner::default();
|
||||
// first one is zero:
|
||||
assert_eq!(i.intern("dog"), Symbol::new(0));
|
||||
// re-use gets the same entry:
|
||||
assert_eq!(i.intern("dog"), Symbol::new(0));
|
||||
// different string gets a different #:
|
||||
assert_eq!(i.intern("cat"), Symbol::new(1));
|
||||
assert_eq!(i.intern("cat"), Symbol::new(1));
|
||||
// dog is still at zero
|
||||
assert_eq!(i.intern("dog"), Symbol::new(0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn without_first_quote_test() {
|
||||
SESSION_GLOBALS.set(&SessionGlobals::new(edition::DEFAULT_EDITION), || {
|
||||
let i = Ident::from_str("'break");
|
||||
assert_eq!(i.without_first_quote().name, kw::Break);
|
||||
});
|
||||
}
|
40
compiler/rustc_span/src/tests.rs
Normal file
40
compiler/rustc_span/src/tests.rs
Normal file
|
@ -0,0 +1,40 @@
|
|||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_lookup_line() {
|
||||
let lines = &[BytePos(3), BytePos(17), BytePos(28)];
|
||||
|
||||
assert_eq!(lookup_line(lines, BytePos(0)), -1);
|
||||
assert_eq!(lookup_line(lines, BytePos(3)), 0);
|
||||
assert_eq!(lookup_line(lines, BytePos(4)), 0);
|
||||
|
||||
assert_eq!(lookup_line(lines, BytePos(16)), 0);
|
||||
assert_eq!(lookup_line(lines, BytePos(17)), 1);
|
||||
assert_eq!(lookup_line(lines, BytePos(18)), 1);
|
||||
|
||||
assert_eq!(lookup_line(lines, BytePos(28)), 2);
|
||||
assert_eq!(lookup_line(lines, BytePos(29)), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_newlines() {
|
||||
fn check(before: &str, after: &str, expected_positions: &[u32]) {
|
||||
let mut actual = before.to_string();
|
||||
let mut actual_positions = vec![];
|
||||
normalize_newlines(&mut actual, &mut actual_positions);
|
||||
let actual_positions: Vec<_> = actual_positions.into_iter().map(|nc| nc.pos.0).collect();
|
||||
assert_eq!(actual.as_str(), after);
|
||||
assert_eq!(actual_positions, expected_positions);
|
||||
}
|
||||
check("", "", &[]);
|
||||
check("\n", "\n", &[]);
|
||||
check("\r", "\r", &[]);
|
||||
check("\r\r", "\r\r", &[]);
|
||||
check("\r\n", "\n", &[1]);
|
||||
check("hello world", "hello world", &[]);
|
||||
check("hello\nworld", "hello\nworld", &[]);
|
||||
check("hello\r\nworld", "hello\nworld", &[6]);
|
||||
check("\r\nhello\r\nworld\r\n", "\nhello\nworld\n", &[1, 7, 13]);
|
||||
check("\r\r\n", "\r\n", &[2]);
|
||||
check("hello\rworld", "hello\rworld", &[]);
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue