1
Fork 0

mv compiler to compiler/

This commit is contained in:
mark 2020-08-27 22:58:48 -05:00 committed by Vadim Petrochenkov
parent db534b3ac2
commit 9e5f7d5631
1686 changed files with 941 additions and 1051 deletions

View file

@ -0,0 +1,274 @@
use super::*;
use unicode_width::UnicodeWidthChar;
#[cfg(test)]
mod tests;
/// Finds all newlines, multi-byte characters, and non-narrow characters in a
/// SourceFile.
///
/// This function will use an SSE2 enhanced implementation if hardware support
/// is detected at runtime.
pub fn analyze_source_file(
src: &str,
source_file_start_pos: BytePos,
) -> (Vec<BytePos>, Vec<MultiByteChar>, Vec<NonNarrowChar>) {
let mut lines = vec![source_file_start_pos];
let mut multi_byte_chars = vec![];
let mut non_narrow_chars = vec![];
// Calls the right implementation, depending on hardware support available.
analyze_source_file_dispatch(
src,
source_file_start_pos,
&mut lines,
&mut multi_byte_chars,
&mut non_narrow_chars,
);
// The code above optimistically registers a new line *after* each \n
// it encounters. If that point is already outside the source_file, remove
// it again.
if let Some(&last_line_start) = lines.last() {
let source_file_end = source_file_start_pos + BytePos::from_usize(src.len());
assert!(source_file_end >= last_line_start);
if last_line_start == source_file_end {
lines.pop();
}
}
(lines, multi_byte_chars, non_narrow_chars)
}
cfg_if::cfg_if! {
if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64")))] {
fn analyze_source_file_dispatch(src: &str,
source_file_start_pos: BytePos,
lines: &mut Vec<BytePos>,
multi_byte_chars: &mut Vec<MultiByteChar>,
non_narrow_chars: &mut Vec<NonNarrowChar>) {
if is_x86_feature_detected!("sse2") {
unsafe {
analyze_source_file_sse2(src,
source_file_start_pos,
lines,
multi_byte_chars,
non_narrow_chars);
}
} else {
analyze_source_file_generic(src,
src.len(),
source_file_start_pos,
lines,
multi_byte_chars,
non_narrow_chars);
}
}
/// Checks 16 byte chunks of text at a time. If the chunk contains
/// something other than printable ASCII characters and newlines, the
/// function falls back to the generic implementation. Otherwise it uses
/// SSE2 intrinsics to quickly find all newlines.
#[target_feature(enable = "sse2")]
unsafe fn analyze_source_file_sse2(src: &str,
output_offset: BytePos,
lines: &mut Vec<BytePos>,
multi_byte_chars: &mut Vec<MultiByteChar>,
non_narrow_chars: &mut Vec<NonNarrowChar>) {
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
const CHUNK_SIZE: usize = 16;
let src_bytes = src.as_bytes();
let chunk_count = src.len() / CHUNK_SIZE;
// This variable keeps track of where we should start decoding a
// chunk. If a multi-byte character spans across chunk boundaries,
// we need to skip that part in the next chunk because we already
// handled it.
let mut intra_chunk_offset = 0;
for chunk_index in 0 .. chunk_count {
let ptr = src_bytes.as_ptr() as *const __m128i;
// We don't know if the pointer is aligned to 16 bytes, so we
// use `loadu`, which supports unaligned loading.
let chunk = _mm_loadu_si128(ptr.offset(chunk_index as isize));
// For character in the chunk, see if its byte value is < 0, which
// indicates that it's part of a UTF-8 char.
let multibyte_test = _mm_cmplt_epi8(chunk, _mm_set1_epi8(0));
// Create a bit mask from the comparison results.
let multibyte_mask = _mm_movemask_epi8(multibyte_test);
// If the bit mask is all zero, we only have ASCII chars here:
if multibyte_mask == 0 {
assert!(intra_chunk_offset == 0);
// Check if there are any control characters in the chunk. All
// control characters that we can encounter at this point have a
// byte value less than 32 or ...
let control_char_test0 = _mm_cmplt_epi8(chunk, _mm_set1_epi8(32));
let control_char_mask0 = _mm_movemask_epi8(control_char_test0);
// ... it's the ASCII 'DEL' character with a value of 127.
let control_char_test1 = _mm_cmpeq_epi8(chunk, _mm_set1_epi8(127));
let control_char_mask1 = _mm_movemask_epi8(control_char_test1);
let control_char_mask = control_char_mask0 | control_char_mask1;
if control_char_mask != 0 {
// Check for newlines in the chunk
let newlines_test = _mm_cmpeq_epi8(chunk, _mm_set1_epi8(b'\n' as i8));
let newlines_mask = _mm_movemask_epi8(newlines_test);
if control_char_mask == newlines_mask {
// All control characters are newlines, record them
let mut newlines_mask = 0xFFFF0000 | newlines_mask as u32;
let output_offset = output_offset +
BytePos::from_usize(chunk_index * CHUNK_SIZE + 1);
loop {
let index = newlines_mask.trailing_zeros();
if index >= CHUNK_SIZE as u32 {
// We have arrived at the end of the chunk.
break
}
lines.push(BytePos(index) + output_offset);
// Clear the bit, so we can find the next one.
newlines_mask &= (!1) << index;
}
// We are done for this chunk. All control characters were
// newlines and we took care of those.
continue
} else {
// Some of the control characters are not newlines,
// fall through to the slow path below.
}
} else {
// No control characters, nothing to record for this chunk
continue
}
}
// The slow path.
// There are control chars in here, fallback to generic decoding.
let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;
intra_chunk_offset = analyze_source_file_generic(
&src[scan_start .. ],
CHUNK_SIZE - intra_chunk_offset,
BytePos::from_usize(scan_start) + output_offset,
lines,
multi_byte_chars,
non_narrow_chars
);
}
// There might still be a tail left to analyze
let tail_start = chunk_count * CHUNK_SIZE + intra_chunk_offset;
if tail_start < src.len() {
analyze_source_file_generic(&src[tail_start as usize ..],
src.len() - tail_start,
output_offset + BytePos::from_usize(tail_start),
lines,
multi_byte_chars,
non_narrow_chars);
}
}
} else {
// The target (or compiler version) does not support SSE2 ...
fn analyze_source_file_dispatch(src: &str,
source_file_start_pos: BytePos,
lines: &mut Vec<BytePos>,
multi_byte_chars: &mut Vec<MultiByteChar>,
non_narrow_chars: &mut Vec<NonNarrowChar>) {
analyze_source_file_generic(src,
src.len(),
source_file_start_pos,
lines,
multi_byte_chars,
non_narrow_chars);
}
}
}
// `scan_len` determines the number of bytes in `src` to scan. Note that the
// function can read past `scan_len` if a multi-byte character start within the
// range but extends past it. The overflow is returned by the function.
fn analyze_source_file_generic(
src: &str,
scan_len: usize,
output_offset: BytePos,
lines: &mut Vec<BytePos>,
multi_byte_chars: &mut Vec<MultiByteChar>,
non_narrow_chars: &mut Vec<NonNarrowChar>,
) -> usize {
assert!(src.len() >= scan_len);
let mut i = 0;
let src_bytes = src.as_bytes();
while i < scan_len {
let byte = unsafe {
// We verified that i < scan_len <= src.len()
*src_bytes.get_unchecked(i as usize)
};
// How much to advance in order to get to the next UTF-8 char in the
// string.
let mut char_len = 1;
if byte < 32 {
// This is an ASCII control character, it could be one of the cases
// that are interesting to us.
let pos = BytePos::from_usize(i) + output_offset;
match byte {
b'\n' => {
lines.push(pos + BytePos(1));
}
b'\t' => {
non_narrow_chars.push(NonNarrowChar::Tab(pos));
}
_ => {
non_narrow_chars.push(NonNarrowChar::ZeroWidth(pos));
}
}
} else if byte >= 127 {
// The slow path:
// This is either ASCII control character "DEL" or the beginning of
// a multibyte char. Just decode to `char`.
let c = (&src[i..]).chars().next().unwrap();
char_len = c.len_utf8();
let pos = BytePos::from_usize(i) + output_offset;
if char_len > 1 {
assert!(char_len >= 2 && char_len <= 4);
let mbc = MultiByteChar { pos, bytes: char_len as u8 };
multi_byte_chars.push(mbc);
}
// Assume control characters are zero width.
// FIXME: How can we decide between `width` and `width_cjk`?
let char_width = UnicodeWidthChar::width(c).unwrap_or(0);
if char_width != 1 {
non_narrow_chars.push(NonNarrowChar::new(pos, char_width));
}
}
i += char_len;
}
i - scan_len
}

View file

@ -0,0 +1,142 @@
use super::*;
macro_rules! test {
(case: $test_name:ident,
text: $text:expr,
source_file_start_pos: $source_file_start_pos:expr,
lines: $lines:expr,
multi_byte_chars: $multi_byte_chars:expr,
non_narrow_chars: $non_narrow_chars:expr,) => {
#[test]
fn $test_name() {
let (lines, multi_byte_chars, non_narrow_chars) =
analyze_source_file($text, BytePos($source_file_start_pos));
let expected_lines: Vec<BytePos> = $lines.into_iter().map(|pos| BytePos(pos)).collect();
assert_eq!(lines, expected_lines);
let expected_mbcs: Vec<MultiByteChar> = $multi_byte_chars
.into_iter()
.map(|(pos, bytes)| MultiByteChar { pos: BytePos(pos), bytes })
.collect();
assert_eq!(multi_byte_chars, expected_mbcs);
let expected_nncs: Vec<NonNarrowChar> = $non_narrow_chars
.into_iter()
.map(|(pos, width)| NonNarrowChar::new(BytePos(pos), width))
.collect();
assert_eq!(non_narrow_chars, expected_nncs);
}
};
}
test!(
case: empty_text,
text: "",
source_file_start_pos: 0,
lines: vec![],
multi_byte_chars: vec![],
non_narrow_chars: vec![],
);
test!(
case: newlines_short,
text: "a\nc",
source_file_start_pos: 0,
lines: vec![0, 2],
multi_byte_chars: vec![],
non_narrow_chars: vec![],
);
test!(
case: newlines_long,
text: "012345678\nabcdef012345678\na",
source_file_start_pos: 0,
lines: vec![0, 10, 26],
multi_byte_chars: vec![],
non_narrow_chars: vec![],
);
test!(
case: newline_and_multi_byte_char_in_same_chunk,
text: "01234β789\nbcdef0123456789abcdef",
source_file_start_pos: 0,
lines: vec![0, 11],
multi_byte_chars: vec![(5, 2)],
non_narrow_chars: vec![],
);
test!(
case: newline_and_control_char_in_same_chunk,
text: "01234\u{07}6789\nbcdef0123456789abcdef",
source_file_start_pos: 0,
lines: vec![0, 11],
multi_byte_chars: vec![],
non_narrow_chars: vec![(5, 0)],
);
test!(
case: multi_byte_char_short,
text: "aβc",
source_file_start_pos: 0,
lines: vec![0],
multi_byte_chars: vec![(1, 2)],
non_narrow_chars: vec![],
);
test!(
case: multi_byte_char_long,
text: "0123456789abcΔf012345β",
source_file_start_pos: 0,
lines: vec![0],
multi_byte_chars: vec![(13, 2), (22, 2)],
non_narrow_chars: vec![],
);
test!(
case: multi_byte_char_across_chunk_boundary,
text: "0123456789abcdeΔ123456789abcdef01234",
source_file_start_pos: 0,
lines: vec![0],
multi_byte_chars: vec![(15, 2)],
non_narrow_chars: vec![],
);
test!(
case: multi_byte_char_across_chunk_boundary_tail,
text: "0123456789abcdeΔ....",
source_file_start_pos: 0,
lines: vec![0],
multi_byte_chars: vec![(15, 2)],
non_narrow_chars: vec![],
);
test!(
case: non_narrow_short,
text: "0\t2",
source_file_start_pos: 0,
lines: vec![0],
multi_byte_chars: vec![],
non_narrow_chars: vec![(1, 4)],
);
test!(
case: non_narrow_long,
text: "01\t3456789abcdef01234567\u{07}9",
source_file_start_pos: 0,
lines: vec![0],
multi_byte_chars: vec![],
non_narrow_chars: vec![(2, 4), (24, 0)],
);
test!(
case: output_offset_all,
text: "01\t345\n789abcΔf01234567\u{07}9\nbcΔf",
source_file_start_pos: 1000,
lines: vec![0 + 1000, 7 + 1000, 27 + 1000],
multi_byte_chars: vec![(13 + 1000, 2), (29 + 1000, 2)],
non_narrow_chars: vec![(2 + 1000, 4), (24 + 1000, 0)],
);

View file

@ -0,0 +1,104 @@
use crate::source_map::SourceMap;
use crate::{BytePos, SourceFile};
use rustc_data_structures::sync::Lrc;
#[derive(Clone)]
struct CacheEntry {
time_stamp: usize,
line_number: usize,
line_start: BytePos,
line_end: BytePos,
file: Lrc<SourceFile>,
file_index: usize,
}
#[derive(Clone)]
pub struct CachingSourceMapView<'sm> {
source_map: &'sm SourceMap,
line_cache: [CacheEntry; 3],
time_stamp: usize,
}
impl<'sm> CachingSourceMapView<'sm> {
pub fn new(source_map: &'sm SourceMap) -> CachingSourceMapView<'sm> {
let files = source_map.files();
let first_file = files[0].clone();
let entry = CacheEntry {
time_stamp: 0,
line_number: 0,
line_start: BytePos(0),
line_end: BytePos(0),
file: first_file,
file_index: 0,
};
CachingSourceMapView {
source_map,
line_cache: [entry.clone(), entry.clone(), entry],
time_stamp: 0,
}
}
pub fn byte_pos_to_line_and_col(
&mut self,
pos: BytePos,
) -> Option<(Lrc<SourceFile>, usize, BytePos)> {
self.time_stamp += 1;
// Check if the position is in one of the cached lines
for cache_entry in self.line_cache.iter_mut() {
if pos >= cache_entry.line_start && pos < cache_entry.line_end {
cache_entry.time_stamp = self.time_stamp;
return Some((
cache_entry.file.clone(),
cache_entry.line_number,
pos - cache_entry.line_start,
));
}
}
// No cache hit ...
let mut oldest = 0;
for index in 1..self.line_cache.len() {
if self.line_cache[index].time_stamp < self.line_cache[oldest].time_stamp {
oldest = index;
}
}
let cache_entry = &mut self.line_cache[oldest];
// If the entry doesn't point to the correct file, fix it up
if pos < cache_entry.file.start_pos || pos >= cache_entry.file.end_pos {
let file_valid;
if self.source_map.files().len() > 0 {
let file_index = self.source_map.lookup_source_file_idx(pos);
let file = self.source_map.files()[file_index].clone();
if pos >= file.start_pos && pos < file.end_pos {
cache_entry.file = file;
cache_entry.file_index = file_index;
file_valid = true;
} else {
file_valid = false;
}
} else {
file_valid = false;
}
if !file_valid {
return None;
}
}
let line_index = cache_entry.file.lookup_line(pos).unwrap();
let line_bounds = cache_entry.file.line_bounds(line_index);
cache_entry.line_number = line_index + 1;
cache_entry.line_start = line_bounds.0;
cache_entry.line_end = line_bounds.1;
cache_entry.time_stamp = self.time_stamp;
Some((cache_entry.file.clone(), cache_entry.line_number, pos - cache_entry.line_start))
}
}

View file

@ -0,0 +1,280 @@
use crate::HashStableContext;
use rustc_data_structures::fingerprint::Fingerprint;
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
use rustc_data_structures::AtomicRef;
use rustc_index::vec::Idx;
use rustc_macros::HashStable_Generic;
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
use std::borrow::Borrow;
use std::fmt;
rustc_index::newtype_index! {
pub struct CrateId {
ENCODABLE = custom
}
}
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum CrateNum {
/// A special `CrateNum` that we use for the `tcx.rcache` when decoding from
/// the incr. comp. cache.
ReservedForIncrCompCache,
Index(CrateId),
}
/// Item definitions in the currently-compiled crate would have the `CrateNum`
/// `LOCAL_CRATE` in their `DefId`.
pub const LOCAL_CRATE: CrateNum = CrateNum::Index(CrateId::from_u32(0));
impl Idx for CrateNum {
#[inline]
fn new(value: usize) -> Self {
CrateNum::Index(Idx::new(value))
}
#[inline]
fn index(self) -> usize {
match self {
CrateNum::Index(idx) => Idx::index(idx),
_ => panic!("Tried to get crate index of {:?}", self),
}
}
}
impl CrateNum {
pub fn new(x: usize) -> CrateNum {
CrateNum::from_usize(x)
}
pub fn from_usize(x: usize) -> CrateNum {
CrateNum::Index(CrateId::from_usize(x))
}
pub fn from_u32(x: u32) -> CrateNum {
CrateNum::Index(CrateId::from_u32(x))
}
pub fn as_usize(self) -> usize {
match self {
CrateNum::Index(id) => id.as_usize(),
_ => panic!("tried to get index of non-standard crate {:?}", self),
}
}
pub fn as_u32(self) -> u32 {
match self {
CrateNum::Index(id) => id.as_u32(),
_ => panic!("tried to get index of non-standard crate {:?}", self),
}
}
pub fn as_def_id(&self) -> DefId {
DefId { krate: *self, index: CRATE_DEF_INDEX }
}
}
impl fmt::Display for CrateNum {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
CrateNum::Index(id) => fmt::Display::fmt(&id.private, f),
CrateNum::ReservedForIncrCompCache => write!(f, "crate for decoding incr comp cache"),
}
}
}
/// As a local identifier, a `CrateNum` is only meaningful within its context, e.g. within a tcx.
/// Therefore, make sure to include the context when encode a `CrateNum`.
impl<E: Encoder> Encodable<E> for CrateNum {
default fn encode(&self, s: &mut E) -> Result<(), E::Error> {
s.emit_u32(self.as_u32())
}
}
impl<D: Decoder> Decodable<D> for CrateNum {
default fn decode(d: &mut D) -> Result<CrateNum, D::Error> {
Ok(CrateNum::from_u32(d.read_u32()?))
}
}
impl ::std::fmt::Debug for CrateNum {
fn fmt(&self, fmt: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result {
match self {
CrateNum::Index(id) => write!(fmt, "crate{}", id.private),
CrateNum::ReservedForIncrCompCache => write!(fmt, "crate for decoding incr comp cache"),
}
}
}
#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)]
#[derive(HashStable_Generic, Encodable, Decodable)]
pub struct DefPathHash(pub Fingerprint);
impl Borrow<Fingerprint> for DefPathHash {
#[inline]
fn borrow(&self) -> &Fingerprint {
&self.0
}
}
rustc_index::newtype_index! {
/// A DefIndex is an index into the hir-map for a crate, identifying a
/// particular definition. It should really be considered an interned
/// shorthand for a particular DefPath.
pub struct DefIndex {
ENCODABLE = custom // (only encodable in metadata)
DEBUG_FORMAT = "DefIndex({})",
/// The crate root is always assigned index 0 by the AST Map code,
/// thanks to `NodeCollector::new`.
const CRATE_DEF_INDEX = 0,
}
}
impl<E: Encoder> Encodable<E> for DefIndex {
default fn encode(&self, _: &mut E) -> Result<(), E::Error> {
panic!("cannot encode `DefIndex` with `{}`", std::any::type_name::<E>());
}
}
impl<D: Decoder> Decodable<D> for DefIndex {
default fn decode(_: &mut D) -> Result<DefIndex, D::Error> {
panic!("cannot decode `DefIndex` with `{}`", std::any::type_name::<D>());
}
}
/// A `DefId` identifies a particular *definition*, by combining a crate
/// index and a def index.
///
/// You can create a `DefId` from a `LocalDefId` using `local_def_id.to_def_id()`.
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Copy)]
pub struct DefId {
pub krate: CrateNum,
pub index: DefIndex,
}
impl DefId {
/// Makes a local `DefId` from the given `DefIndex`.
#[inline]
pub fn local(index: DefIndex) -> DefId {
DefId { krate: LOCAL_CRATE, index }
}
#[inline]
pub fn is_local(self) -> bool {
self.krate == LOCAL_CRATE
}
#[inline]
pub fn as_local(self) -> Option<LocalDefId> {
if self.is_local() { Some(LocalDefId { local_def_index: self.index }) } else { None }
}
#[inline]
pub fn expect_local(self) -> LocalDefId {
self.as_local().unwrap_or_else(|| panic!("DefId::expect_local: `{:?}` isn't local", self))
}
pub fn is_top_level_module(self) -> bool {
self.is_local() && self.index == CRATE_DEF_INDEX
}
}
impl<E: Encoder> Encodable<E> for DefId {
default fn encode(&self, s: &mut E) -> Result<(), E::Error> {
s.emit_struct("DefId", 2, |s| {
s.emit_struct_field("krate", 0, |s| self.krate.encode(s))?;
s.emit_struct_field("index", 1, |s| self.index.encode(s))
})
}
}
impl<D: Decoder> Decodable<D> for DefId {
default fn decode(d: &mut D) -> Result<DefId, D::Error> {
d.read_struct("DefId", 2, |d| {
Ok(DefId {
krate: d.read_struct_field("krate", 0, Decodable::decode)?,
index: d.read_struct_field("index", 1, Decodable::decode)?,
})
})
}
}
pub fn default_def_id_debug(def_id: DefId, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("DefId").field("krate", &def_id.krate).field("index", &def_id.index).finish()
}
pub static DEF_ID_DEBUG: AtomicRef<fn(DefId, &mut fmt::Formatter<'_>) -> fmt::Result> =
AtomicRef::new(&(default_def_id_debug as fn(_, &mut fmt::Formatter<'_>) -> _));
impl fmt::Debug for DefId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
(*DEF_ID_DEBUG)(*self, f)
}
}
rustc_data_structures::define_id_collections!(DefIdMap, DefIdSet, DefId);
/// A LocalDefId is equivalent to a DefId with `krate == LOCAL_CRATE`. Since
/// we encode this information in the type, we can ensure at compile time that
/// no DefIds from upstream crates get thrown into the mix. There are quite a
/// few cases where we know that only DefIds from the local crate are expected
/// and a DefId from a different crate would signify a bug somewhere. This
/// is when LocalDefId comes in handy.
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct LocalDefId {
pub local_def_index: DefIndex,
}
impl Idx for LocalDefId {
#[inline]
fn new(idx: usize) -> Self {
LocalDefId { local_def_index: Idx::new(idx) }
}
#[inline]
fn index(self) -> usize {
self.local_def_index.index()
}
}
impl LocalDefId {
#[inline]
pub fn to_def_id(self) -> DefId {
DefId { krate: LOCAL_CRATE, index: self.local_def_index }
}
#[inline]
pub fn is_top_level_module(self) -> bool {
self.local_def_index == CRATE_DEF_INDEX
}
}
impl fmt::Debug for LocalDefId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.to_def_id().fmt(f)
}
}
impl<E: Encoder> Encodable<E> for LocalDefId {
fn encode(&self, s: &mut E) -> Result<(), E::Error> {
self.to_def_id().encode(s)
}
}
impl<D: Decoder> Decodable<D> for LocalDefId {
fn decode(d: &mut D) -> Result<LocalDefId, D::Error> {
DefId::decode(d).map(|d| d.expect_local())
}
}
impl<CTX: HashStableContext> HashStable<CTX> for DefId {
fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) {
hcx.hash_def_id(*self, hasher)
}
}
impl<CTX: HashStableContext> HashStable<CTX> for CrateNum {
fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) {
hcx.hash_crate_num(*self, hasher)
}
}

View file

@ -0,0 +1,73 @@
use crate::symbol::{sym, Symbol};
use std::fmt;
use std::str::FromStr;
use rustc_macros::HashStable_Generic;
/// The edition of the compiler (RFC 2052)
#[derive(Clone, Copy, Hash, PartialEq, PartialOrd, Debug, Encodable, Decodable, Eq)]
#[derive(HashStable_Generic)]
pub enum Edition {
// editions must be kept in order, oldest to newest
/// The 2015 edition
Edition2015,
/// The 2018 edition
Edition2018,
// when adding new editions, be sure to update:
//
// - Update the `ALL_EDITIONS` const
// - Update the EDITION_NAME_LIST const
// - add a `rust_####()` function to the session
// - update the enum in Cargo's sources as well
}
// must be in order from oldest to newest
pub const ALL_EDITIONS: &[Edition] = &[Edition::Edition2015, Edition::Edition2018];
pub const EDITION_NAME_LIST: &str = "2015|2018";
pub const DEFAULT_EDITION: Edition = Edition::Edition2015;
impl fmt::Display for Edition {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s = match *self {
Edition::Edition2015 => "2015",
Edition::Edition2018 => "2018",
};
write!(f, "{}", s)
}
}
impl Edition {
pub fn lint_name(&self) -> &'static str {
match *self {
Edition::Edition2015 => "rust_2015_compatibility",
Edition::Edition2018 => "rust_2018_compatibility",
}
}
pub fn feature_name(&self) -> Symbol {
match *self {
Edition::Edition2015 => sym::rust_2015_preview,
Edition::Edition2018 => sym::rust_2018_preview,
}
}
pub fn is_stable(&self) -> bool {
match *self {
Edition::Edition2015 => true,
Edition::Edition2018 => true,
}
}
}
impl FromStr for Edition {
type Err = ();
fn from_str(s: &str) -> Result<Self, ()> {
match s {
"2015" => Ok(Edition::Edition2015),
"2018" => Ok(Edition::Edition2018),
_ => Err(()),
}
}
}

View file

@ -0,0 +1,26 @@
/// Used as a return value to signify a fatal error occurred. (It is also
/// used as the argument to panic at the moment, but that will eventually
/// not be true.)
#[derive(Copy, Clone, Debug)]
#[must_use]
pub struct FatalError;
pub struct FatalErrorMarker;
// Don't implement Send on FatalError. This makes it impossible to panic!(FatalError).
// We don't want to invoke the panic handler and print a backtrace for fatal errors.
impl !Send for FatalError {}
impl FatalError {
pub fn raise(self) -> ! {
std::panic::resume_unwind(Box::new(FatalErrorMarker))
}
}
impl std::fmt::Display for FatalError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "parser fatal error")
}
}
impl std::error::Error for FatalError {}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,272 @@
use super::*;
use rustc_data_structures::sync::Lrc;
fn init_source_map() -> SourceMap {
let sm = SourceMap::new(FilePathMapping::empty());
sm.new_source_file(PathBuf::from("blork.rs").into(), "first line.\nsecond line".to_string());
sm.new_source_file(PathBuf::from("empty.rs").into(), String::new());
sm.new_source_file(PathBuf::from("blork2.rs").into(), "first line.\nsecond line".to_string());
sm
}
/// Tests `lookup_byte_offset`.
#[test]
fn t3() {
let sm = init_source_map();
let srcfbp1 = sm.lookup_byte_offset(BytePos(23));
assert_eq!(srcfbp1.sf.name, PathBuf::from("blork.rs").into());
assert_eq!(srcfbp1.pos, BytePos(23));
let srcfbp1 = sm.lookup_byte_offset(BytePos(24));
assert_eq!(srcfbp1.sf.name, PathBuf::from("empty.rs").into());
assert_eq!(srcfbp1.pos, BytePos(0));
let srcfbp2 = sm.lookup_byte_offset(BytePos(25));
assert_eq!(srcfbp2.sf.name, PathBuf::from("blork2.rs").into());
assert_eq!(srcfbp2.pos, BytePos(0));
}
/// Tests `bytepos_to_file_charpos`.
#[test]
fn t4() {
let sm = init_source_map();
let cp1 = sm.bytepos_to_file_charpos(BytePos(22));
assert_eq!(cp1, CharPos(22));
let cp2 = sm.bytepos_to_file_charpos(BytePos(25));
assert_eq!(cp2, CharPos(0));
}
/// Tests zero-length `SourceFile`s.
#[test]
fn t5() {
let sm = init_source_map();
let loc1 = sm.lookup_char_pos(BytePos(22));
assert_eq!(loc1.file.name, PathBuf::from("blork.rs").into());
assert_eq!(loc1.line, 2);
assert_eq!(loc1.col, CharPos(10));
let loc2 = sm.lookup_char_pos(BytePos(25));
assert_eq!(loc2.file.name, PathBuf::from("blork2.rs").into());
assert_eq!(loc2.line, 1);
assert_eq!(loc2.col, CharPos(0));
}
fn init_source_map_mbc() -> SourceMap {
let sm = SourceMap::new(FilePathMapping::empty());
// "€" is a three-byte UTF8 char.
sm.new_source_file(
PathBuf::from("blork.rs").into(),
"fir€st €€€€ line.\nsecond line".to_string(),
);
sm.new_source_file(
PathBuf::from("blork2.rs").into(),
"first line€€.\n€ second line".to_string(),
);
sm
}
/// Tests `bytepos_to_file_charpos` in the presence of multi-byte chars.
#[test]
fn t6() {
let sm = init_source_map_mbc();
let cp1 = sm.bytepos_to_file_charpos(BytePos(3));
assert_eq!(cp1, CharPos(3));
let cp2 = sm.bytepos_to_file_charpos(BytePos(6));
assert_eq!(cp2, CharPos(4));
let cp3 = sm.bytepos_to_file_charpos(BytePos(56));
assert_eq!(cp3, CharPos(12));
let cp4 = sm.bytepos_to_file_charpos(BytePos(61));
assert_eq!(cp4, CharPos(15));
}
/// Test `span_to_lines` for a span ending at the end of a `SourceFile`.
#[test]
fn t7() {
let sm = init_source_map();
let span = Span::with_root_ctxt(BytePos(12), BytePos(23));
let file_lines = sm.span_to_lines(span).unwrap();
assert_eq!(file_lines.file.name, PathBuf::from("blork.rs").into());
assert_eq!(file_lines.lines.len(), 1);
assert_eq!(file_lines.lines[0].line_index, 1);
}
/// Given a string like " ~~~~~~~~~~~~ ", produces a span
/// converting that range. The idea is that the string has the same
/// length as the input, and we uncover the byte positions. Note
/// that this can span lines and so on.
fn span_from_selection(input: &str, selection: &str) -> Span {
assert_eq!(input.len(), selection.len());
let left_index = selection.find('~').unwrap() as u32;
let right_index = selection.rfind('~').map(|x| x as u32).unwrap_or(left_index);
Span::with_root_ctxt(BytePos(left_index), BytePos(right_index + 1))
}
/// Tests `span_to_snippet` and `span_to_lines` for a span converting 3
/// lines in the middle of a file.
#[test]
fn span_to_snippet_and_lines_spanning_multiple_lines() {
let sm = SourceMap::new(FilePathMapping::empty());
let inputtext = "aaaaa\nbbbbBB\nCCC\nDDDDDddddd\neee\n";
let selection = " \n ~~\n~~~\n~~~~~ \n \n";
sm.new_source_file(Path::new("blork.rs").to_owned().into(), inputtext.to_string());
let span = span_from_selection(inputtext, selection);
// Check that we are extracting the text we thought we were extracting.
assert_eq!(&sm.span_to_snippet(span).unwrap(), "BB\nCCC\nDDDDD");
// Check that span_to_lines gives us the complete result with the lines/cols we expected.
let lines = sm.span_to_lines(span).unwrap();
let expected = vec![
LineInfo { line_index: 1, start_col: CharPos(4), end_col: CharPos(6) },
LineInfo { line_index: 2, start_col: CharPos(0), end_col: CharPos(3) },
LineInfo { line_index: 3, start_col: CharPos(0), end_col: CharPos(5) },
];
assert_eq!(lines.lines, expected);
}
/// Test span_to_snippet for a span ending at the end of a `SourceFile`.
#[test]
fn t8() {
let sm = init_source_map();
let span = Span::with_root_ctxt(BytePos(12), BytePos(23));
let snippet = sm.span_to_snippet(span);
assert_eq!(snippet, Ok("second line".to_string()));
}
/// Test `span_to_str` for a span ending at the end of a `SourceFile`.
#[test]
fn t9() {
let sm = init_source_map();
let span = Span::with_root_ctxt(BytePos(12), BytePos(23));
let sstr = sm.span_to_string(span);
assert_eq!(sstr, "blork.rs:2:1: 2:12");
}
/// Tests failing to merge two spans on different lines.
#[test]
fn span_merging_fail() {
let sm = SourceMap::new(FilePathMapping::empty());
let inputtext = "bbbb BB\ncc CCC\n";
let selection1 = " ~~\n \n";
let selection2 = " \n ~~~\n";
sm.new_source_file(Path::new("blork.rs").to_owned().into(), inputtext.to_owned());
let span1 = span_from_selection(inputtext, selection1);
let span2 = span_from_selection(inputtext, selection2);
assert!(sm.merge_spans(span1, span2).is_none());
}
/// Tests loading an external source file that requires normalization.
#[test]
fn t10() {
let sm = SourceMap::new(FilePathMapping::empty());
let unnormalized = "first line.\r\nsecond line";
let normalized = "first line.\nsecond line";
let src_file = sm.new_source_file(PathBuf::from("blork.rs").into(), unnormalized.to_string());
assert_eq!(src_file.src.as_ref().unwrap().as_ref(), normalized);
assert!(
src_file.src_hash.matches(unnormalized),
"src_hash should use the source before normalization"
);
let SourceFile {
name,
name_was_remapped,
src_hash,
start_pos,
end_pos,
lines,
multibyte_chars,
non_narrow_chars,
normalized_pos,
name_hash,
..
} = (*src_file).clone();
let imported_src_file = sm.new_imported_source_file(
name,
name_was_remapped,
src_hash,
name_hash,
(end_pos - start_pos).to_usize(),
CrateNum::new(0),
lines,
multibyte_chars,
non_narrow_chars,
normalized_pos,
start_pos,
end_pos,
);
assert!(
imported_src_file.external_src.borrow().get_source().is_none(),
"imported source file should not have source yet"
);
imported_src_file.add_external_src(|| Some(unnormalized.to_string()));
assert_eq!(
imported_src_file.external_src.borrow().get_source().unwrap().as_ref(),
normalized,
"imported source file should be normalized"
);
}
/// Returns the span corresponding to the `n`th occurrence of `substring` in `source_text`.
trait SourceMapExtension {
fn span_substr(
&self,
file: &Lrc<SourceFile>,
source_text: &str,
substring: &str,
n: usize,
) -> Span;
}
impl SourceMapExtension for SourceMap {
fn span_substr(
&self,
file: &Lrc<SourceFile>,
source_text: &str,
substring: &str,
n: usize,
) -> Span {
println!(
"span_substr(file={:?}/{:?}, substring={:?}, n={})",
file.name, file.start_pos, substring, n
);
let mut i = 0;
let mut hi = 0;
loop {
let offset = source_text[hi..].find(substring).unwrap_or_else(|| {
panic!(
"source_text `{}` does not have {} occurrences of `{}`, only {}",
source_text, n, substring, i
);
});
let lo = hi + offset;
hi = lo + substring.len();
if i == n {
let span = Span::with_root_ctxt(
BytePos(lo as u32 + file.start_pos.0),
BytePos(hi as u32 + file.start_pos.0),
);
assert_eq!(&self.span_to_snippet(span).unwrap()[..], substring);
return span;
}
i += 1;
}
}
}

View file

@ -0,0 +1,133 @@
// Spans are encoded using 1-bit tag and 2 different encoding formats (one for each tag value).
// One format is used for keeping span data inline,
// another contains index into an out-of-line span interner.
// The encoding format for inline spans were obtained by optimizing over crates in rustc/libstd.
// See https://internals.rust-lang.org/t/rfc-compiler-refactoring-spans/1357/28
use crate::hygiene::SyntaxContext;
use crate::SESSION_GLOBALS;
use crate::{BytePos, SpanData};
use rustc_data_structures::fx::FxIndexSet;
/// A compressed span.
///
/// `SpanData` is 12 bytes, which is a bit too big to stick everywhere. `Span`
/// is a form that only takes up 8 bytes, with less space for the length and
/// context. The vast majority (99.9%+) of `SpanData` instances will fit within
/// those 8 bytes; any `SpanData` whose fields don't fit into a `Span` are
/// stored in a separate interner table, and the `Span` will index into that
/// table. Interning is rare enough that the cost is low, but common enough
/// that the code is exercised regularly.
///
/// An earlier version of this code used only 4 bytes for `Span`, but that was
/// slower because only 80--90% of spans could be stored inline (even less in
/// very large crates) and so the interner was used a lot more.
///
/// Inline (compressed) format:
/// - `span.base_or_index == span_data.lo`
/// - `span.len_or_tag == len == span_data.hi - span_data.lo` (must be `<= MAX_LEN`)
/// - `span.ctxt == span_data.ctxt` (must be `<= MAX_CTXT`)
///
/// Interned format:
/// - `span.base_or_index == index` (indexes into the interner table)
/// - `span.len_or_tag == LEN_TAG` (high bit set, all other bits are zero)
/// - `span.ctxt == 0`
///
/// The inline form uses 0 for the tag value (rather than 1) so that we don't
/// need to mask out the tag bit when getting the length, and so that the
/// dummy span can be all zeroes.
///
/// Notes about the choice of field sizes:
/// - `base` is 32 bits in both `Span` and `SpanData`, which means that `base`
/// values never cause interning. The number of bits needed for `base`
/// depends on the crate size. 32 bits allows up to 4 GiB of code in a crate.
/// `script-servo` is the largest crate in `rustc-perf`, requiring 26 bits
/// for some spans.
/// - `len` is 15 bits in `Span` (a u16, minus 1 bit for the tag) and 32 bits
/// in `SpanData`, which means that large `len` values will cause interning.
/// The number of bits needed for `len` does not depend on the crate size.
/// The most common number of bits for `len` are 0--7, with a peak usually at
/// 3 or 4, and then it drops off quickly from 8 onwards. 15 bits is enough
/// for 99.99%+ of cases, but larger values (sometimes 20+ bits) might occur
/// dozens of times in a typical crate.
/// - `ctxt` is 16 bits in `Span` and 32 bits in `SpanData`, which means that
/// large `ctxt` values will cause interning. The number of bits needed for
/// `ctxt` values depend partly on the crate size and partly on the form of
/// the code. No crates in `rustc-perf` need more than 15 bits for `ctxt`,
/// but larger crates might need more than 16 bits.
///
#[derive(Clone, Copy, Eq, PartialEq, Hash)]
pub struct Span {
base_or_index: u32,
len_or_tag: u16,
ctxt_or_zero: u16,
}
const LEN_TAG: u16 = 0b1000_0000_0000_0000;
const MAX_LEN: u32 = 0b0111_1111_1111_1111;
const MAX_CTXT: u32 = 0b1111_1111_1111_1111;
/// Dummy span, both position and length are zero, syntax context is zero as well.
pub const DUMMY_SP: Span = Span { base_or_index: 0, len_or_tag: 0, ctxt_or_zero: 0 };
impl Span {
#[inline]
pub fn new(mut lo: BytePos, mut hi: BytePos, ctxt: SyntaxContext) -> Self {
if lo > hi {
std::mem::swap(&mut lo, &mut hi);
}
let (base, len, ctxt2) = (lo.0, hi.0 - lo.0, ctxt.as_u32());
if len <= MAX_LEN && ctxt2 <= MAX_CTXT {
// Inline format.
Span { base_or_index: base, len_or_tag: len as u16, ctxt_or_zero: ctxt2 as u16 }
} else {
// Interned format.
let index = with_span_interner(|interner| interner.intern(&SpanData { lo, hi, ctxt }));
Span { base_or_index: index, len_or_tag: LEN_TAG, ctxt_or_zero: 0 }
}
}
#[inline]
pub fn data(self) -> SpanData {
if self.len_or_tag != LEN_TAG {
// Inline format.
debug_assert!(self.len_or_tag as u32 <= MAX_LEN);
SpanData {
lo: BytePos(self.base_or_index),
hi: BytePos(self.base_or_index + self.len_or_tag as u32),
ctxt: SyntaxContext::from_u32(self.ctxt_or_zero as u32),
}
} else {
// Interned format.
debug_assert!(self.ctxt_or_zero == 0);
let index = self.base_or_index;
with_span_interner(|interner| *interner.get(index))
}
}
}
#[derive(Default)]
pub struct SpanInterner {
spans: FxIndexSet<SpanData>,
}
impl SpanInterner {
fn intern(&mut self, span_data: &SpanData) -> u32 {
let (index, _) = self.spans.insert_full(*span_data);
index as u32
}
#[inline]
fn get(&self, index: u32) -> &SpanData {
&self.spans[index as usize]
}
}
// If an interner exists, return it. Otherwise, prepare a fresh one.
#[inline]
fn with_span_interner<T, F: FnOnce(&mut SpanInterner) -> T>(f: F) -> T {
SESSION_GLOBALS.with(|session_globals| f(&mut *session_globals.span_interner.lock()))
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,25 @@
use super::*;
use crate::{edition, SessionGlobals};
#[test]
fn interner_tests() {
let mut i: Interner = Interner::default();
// first one is zero:
assert_eq!(i.intern("dog"), Symbol::new(0));
// re-use gets the same entry:
assert_eq!(i.intern("dog"), Symbol::new(0));
// different string gets a different #:
assert_eq!(i.intern("cat"), Symbol::new(1));
assert_eq!(i.intern("cat"), Symbol::new(1));
// dog is still at zero
assert_eq!(i.intern("dog"), Symbol::new(0));
}
#[test]
fn without_first_quote_test() {
SESSION_GLOBALS.set(&SessionGlobals::new(edition::DEFAULT_EDITION), || {
let i = Ident::from_str("'break");
assert_eq!(i.without_first_quote().name, kw::Break);
});
}

View file

@ -0,0 +1,40 @@
use super::*;
#[test]
fn test_lookup_line() {
let lines = &[BytePos(3), BytePos(17), BytePos(28)];
assert_eq!(lookup_line(lines, BytePos(0)), -1);
assert_eq!(lookup_line(lines, BytePos(3)), 0);
assert_eq!(lookup_line(lines, BytePos(4)), 0);
assert_eq!(lookup_line(lines, BytePos(16)), 0);
assert_eq!(lookup_line(lines, BytePos(17)), 1);
assert_eq!(lookup_line(lines, BytePos(18)), 1);
assert_eq!(lookup_line(lines, BytePos(28)), 2);
assert_eq!(lookup_line(lines, BytePos(29)), 2);
}
#[test]
fn test_normalize_newlines() {
fn check(before: &str, after: &str, expected_positions: &[u32]) {
let mut actual = before.to_string();
let mut actual_positions = vec![];
normalize_newlines(&mut actual, &mut actual_positions);
let actual_positions: Vec<_> = actual_positions.into_iter().map(|nc| nc.pos.0).collect();
assert_eq!(actual.as_str(), after);
assert_eq!(actual_positions, expected_positions);
}
check("", "", &[]);
check("\n", "\n", &[]);
check("\r", "\r", &[]);
check("\r\r", "\r\r", &[]);
check("\r\n", "\n", &[1]);
check("hello world", "hello world", &[]);
check("hello\nworld", "hello\nworld", &[]);
check("hello\r\nworld", "hello\nworld", &[6]);
check("\r\nhello\r\nworld\r\n", "\nhello\nworld\n", &[1, 7, 13]);
check("\r\r\n", "\r\n", &[2]);
check("hello\rworld", "hello\rworld", &[]);
}