mv compiler to compiler/

2020-08-27 22:58:48 -05:00 · 2020-08-27 22:58:48 -05:00 · 9e5f7d5631
commit 9e5f7d5631
parent db534b3ac2
1686 changed files with 941 additions and 1051 deletions
--- a/compiler/rustc_span/src/analyze_source_file.rs
+++ b/compiler/rustc_span/src/analyze_source_file.rs
@ -0,0 +1,274 @@
+use super::*;
+use unicode_width::UnicodeWidthChar;
+
+#[cfg(test)]
+mod tests;
+
+/// Finds all newlines, multi-byte characters, and non-narrow characters in a
+/// SourceFile.
+///
+/// This function will use an SSE2 enhanced implementation if hardware support
+/// is detected at runtime.
+pub fn analyze_source_file(
+    src: &str,
+    source_file_start_pos: BytePos,
+) -> (Vec<BytePos>, Vec<MultiByteChar>, Vec<NonNarrowChar>) {
+    let mut lines = vec![source_file_start_pos];
+    let mut multi_byte_chars = vec![];
+    let mut non_narrow_chars = vec![];
+
+    // Calls the right implementation, depending on hardware support available.
+    analyze_source_file_dispatch(
+        src,
+        source_file_start_pos,
+        &mut lines,
+        &mut multi_byte_chars,
+        &mut non_narrow_chars,
+    );
+
+    // The code above optimistically registers a new line *after* each \n
+    // it encounters. If that point is already outside the source_file, remove
+    // it again.
+    if let Some(&last_line_start) = lines.last() {
+        let source_file_end = source_file_start_pos + BytePos::from_usize(src.len());
+        assert!(source_file_end >= last_line_start);
+        if last_line_start == source_file_end {
+            lines.pop();
+        }
+    }
+
+    (lines, multi_byte_chars, non_narrow_chars)
+}
+
+cfg_if::cfg_if! {
+    if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64")))] {
+        fn analyze_source_file_dispatch(src: &str,
+                                    source_file_start_pos: BytePos,
+                                    lines: &mut Vec<BytePos>,
+                                    multi_byte_chars: &mut Vec<MultiByteChar>,
+                                    non_narrow_chars: &mut Vec<NonNarrowChar>) {
+            if is_x86_feature_detected!("sse2") {
+                unsafe {
+                    analyze_source_file_sse2(src,
+                                         source_file_start_pos,
+                                         lines,
+                                         multi_byte_chars,
+                                         non_narrow_chars);
+                }
+            } else {
+                analyze_source_file_generic(src,
+                                        src.len(),
+                                        source_file_start_pos,
+                                        lines,
+                                        multi_byte_chars,
+                                        non_narrow_chars);
+
+            }
+        }
+
+        /// Checks 16 byte chunks of text at a time. If the chunk contains
+        /// something other than printable ASCII characters and newlines, the
+        /// function falls back to the generic implementation. Otherwise it uses
+        /// SSE2 intrinsics to quickly find all newlines.
+        #[target_feature(enable = "sse2")]
+        unsafe fn analyze_source_file_sse2(src: &str,
+                                       output_offset: BytePos,
+                                       lines: &mut Vec<BytePos>,
+                                       multi_byte_chars: &mut Vec<MultiByteChar>,
+                                       non_narrow_chars: &mut Vec<NonNarrowChar>) {
+            #[cfg(target_arch = "x86")]
+            use std::arch::x86::*;
+            #[cfg(target_arch = "x86_64")]
+            use std::arch::x86_64::*;
+
+            const CHUNK_SIZE: usize = 16;
+
+            let src_bytes = src.as_bytes();
+
+            let chunk_count = src.len() / CHUNK_SIZE;
+
+            // This variable keeps track of where we should start decoding a
+            // chunk. If a multi-byte character spans across chunk boundaries,
+            // we need to skip that part in the next chunk because we already
+            // handled it.
+            let mut intra_chunk_offset = 0;
+
+            for chunk_index in 0 .. chunk_count {
+                let ptr = src_bytes.as_ptr() as *const __m128i;
+                // We don't know if the pointer is aligned to 16 bytes, so we
+                // use `loadu`, which supports unaligned loading.
+                let chunk = _mm_loadu_si128(ptr.offset(chunk_index as isize));
+
+                // For character in the chunk, see if its byte value is < 0, which
+                // indicates that it's part of a UTF-8 char.
+                let multibyte_test = _mm_cmplt_epi8(chunk, _mm_set1_epi8(0));
+                // Create a bit mask from the comparison results.
+                let multibyte_mask = _mm_movemask_epi8(multibyte_test);
+
+                // If the bit mask is all zero, we only have ASCII chars here:
+                if multibyte_mask == 0 {
+                    assert!(intra_chunk_offset == 0);
+
+                    // Check if there are any control characters in the chunk. All
+                    // control characters that we can encounter at this point have a
+                    // byte value less than 32 or ...
+                    let control_char_test0 = _mm_cmplt_epi8(chunk, _mm_set1_epi8(32));
+                    let control_char_mask0 = _mm_movemask_epi8(control_char_test0);
+
+                    // ... it's the ASCII 'DEL' character with a value of 127.
+                    let control_char_test1 = _mm_cmpeq_epi8(chunk, _mm_set1_epi8(127));
+                    let control_char_mask1 = _mm_movemask_epi8(control_char_test1);
+
+                    let control_char_mask = control_char_mask0 | control_char_mask1;
+
+                    if control_char_mask != 0 {
+                        // Check for newlines in the chunk
+                        let newlines_test = _mm_cmpeq_epi8(chunk, _mm_set1_epi8(b'\n' as i8));
+                        let newlines_mask = _mm_movemask_epi8(newlines_test);
+
+                        if control_char_mask == newlines_mask {
+                            // All control characters are newlines, record them
+                            let mut newlines_mask = 0xFFFF0000 | newlines_mask as u32;
+                            let output_offset = output_offset +
+                                BytePos::from_usize(chunk_index * CHUNK_SIZE + 1);
+
+                            loop {
+                                let index = newlines_mask.trailing_zeros();
+
+                                if index >= CHUNK_SIZE as u32 {
+                                    // We have arrived at the end of the chunk.
+                                    break
+                                }
+
+                                lines.push(BytePos(index) + output_offset);
+
+                                // Clear the bit, so we can find the next one.
+                                newlines_mask &= (!1) << index;
+                            }
+
+                            // We are done for this chunk. All control characters were
+                            // newlines and we took care of those.
+                            continue
+                        } else {
+                            // Some of the control characters are not newlines,
+                            // fall through to the slow path below.
+                        }
+                    } else {
+                        // No control characters, nothing to record for this chunk
+                        continue
+                    }
+                }
+
+                // The slow path.
+                // There are control chars in here, fallback to generic decoding.
+                let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;
+                intra_chunk_offset = analyze_source_file_generic(
+                    &src[scan_start .. ],
+                    CHUNK_SIZE - intra_chunk_offset,
+                    BytePos::from_usize(scan_start) + output_offset,
+                    lines,
+                    multi_byte_chars,
+                    non_narrow_chars
+                );
+            }
+
+            // There might still be a tail left to analyze
+            let tail_start = chunk_count * CHUNK_SIZE + intra_chunk_offset;
+            if tail_start < src.len() {
+                analyze_source_file_generic(&src[tail_start as usize ..],
+                                        src.len() - tail_start,
+                                        output_offset + BytePos::from_usize(tail_start),
+                                        lines,
+                                        multi_byte_chars,
+                                        non_narrow_chars);
+            }
+        }
+    } else {
+
+        // The target (or compiler version) does not support SSE2 ...
+        fn analyze_source_file_dispatch(src: &str,
+                                    source_file_start_pos: BytePos,
+                                    lines: &mut Vec<BytePos>,
+                                    multi_byte_chars: &mut Vec<MultiByteChar>,
+                                    non_narrow_chars: &mut Vec<NonNarrowChar>) {
+            analyze_source_file_generic(src,
+                                    src.len(),
+                                    source_file_start_pos,
+                                    lines,
+                                    multi_byte_chars,
+                                    non_narrow_chars);
+        }
+    }
+}
+
+// `scan_len` determines the number of bytes in `src` to scan. Note that the
+// function can read past `scan_len` if a multi-byte character start within the
+// range but extends past it. The overflow is returned by the function.
+fn analyze_source_file_generic(
+    src: &str,
+    scan_len: usize,
+    output_offset: BytePos,
+    lines: &mut Vec<BytePos>,
+    multi_byte_chars: &mut Vec<MultiByteChar>,
+    non_narrow_chars: &mut Vec<NonNarrowChar>,
+) -> usize {
+    assert!(src.len() >= scan_len);
+    let mut i = 0;
+    let src_bytes = src.as_bytes();
+
+    while i < scan_len {
+        let byte = unsafe {
+            // We verified that i < scan_len <= src.len()
+            *src_bytes.get_unchecked(i as usize)
+        };
+
+        // How much to advance in order to get to the next UTF-8 char in the
+        // string.
+        let mut char_len = 1;
+
+        if byte < 32 {
+            // This is an ASCII control character, it could be one of the cases
+            // that are interesting to us.
+
+            let pos = BytePos::from_usize(i) + output_offset;
+
+            match byte {
+                b'\n' => {
+                    lines.push(pos + BytePos(1));
+                }
+                b'\t' => {
+                    non_narrow_chars.push(NonNarrowChar::Tab(pos));
+                }
+                _ => {
+                    non_narrow_chars.push(NonNarrowChar::ZeroWidth(pos));
+                }
+            }
+        } else if byte >= 127 {
+            // The slow path:
+            // This is either ASCII control character "DEL" or the beginning of
+            // a multibyte char. Just decode to `char`.
+            let c = (&src[i..]).chars().next().unwrap();
+            char_len = c.len_utf8();
+
+            let pos = BytePos::from_usize(i) + output_offset;
+
+            if char_len > 1 {
+                assert!(char_len >= 2 && char_len <= 4);
+                let mbc = MultiByteChar { pos, bytes: char_len as u8 };
+                multi_byte_chars.push(mbc);
+            }
+
+            // Assume control characters are zero width.
+            // FIXME: How can we decide between `width` and `width_cjk`?
+            let char_width = UnicodeWidthChar::width(c).unwrap_or(0);
+
+            if char_width != 1 {
+                non_narrow_chars.push(NonNarrowChar::new(pos, char_width));
+            }
+        }
+
+        i += char_len;
+    }
+
+    i - scan_len
+}
--- a/compiler/rustc_span/src/analyze_source_file/tests.rs
+++ b/compiler/rustc_span/src/analyze_source_file/tests.rs
@ -0,0 +1,142 @@
+use super::*;
+
+macro_rules! test {
+    (case: $test_name:ident,
+     text: $text:expr,
+     source_file_start_pos: $source_file_start_pos:expr,
+     lines: $lines:expr,
+     multi_byte_chars: $multi_byte_chars:expr,
+     non_narrow_chars: $non_narrow_chars:expr,) => {
+        #[test]
+        fn $test_name() {
+            let (lines, multi_byte_chars, non_narrow_chars) =
+                analyze_source_file($text, BytePos($source_file_start_pos));
+
+            let expected_lines: Vec<BytePos> = $lines.into_iter().map(|pos| BytePos(pos)).collect();
+
+            assert_eq!(lines, expected_lines);
+
+            let expected_mbcs: Vec<MultiByteChar> = $multi_byte_chars
+                .into_iter()
+                .map(|(pos, bytes)| MultiByteChar { pos: BytePos(pos), bytes })
+                .collect();
+
+            assert_eq!(multi_byte_chars, expected_mbcs);
+
+            let expected_nncs: Vec<NonNarrowChar> = $non_narrow_chars
+                .into_iter()
+                .map(|(pos, width)| NonNarrowChar::new(BytePos(pos), width))
+                .collect();
+
+            assert_eq!(non_narrow_chars, expected_nncs);
+        }
+    };
+}
+
+test!(
+    case: empty_text,
+    text: "",
+    source_file_start_pos: 0,
+    lines: vec![],
+    multi_byte_chars: vec![],
+    non_narrow_chars: vec![],
+);
+
+test!(
+    case: newlines_short,
+    text: "a\nc",
+    source_file_start_pos: 0,
+    lines: vec![0, 2],
+    multi_byte_chars: vec![],
+    non_narrow_chars: vec![],
+);
+
+test!(
+    case: newlines_long,
+    text: "012345678\nabcdef012345678\na",
+    source_file_start_pos: 0,
+    lines: vec![0, 10, 26],
+    multi_byte_chars: vec![],
+    non_narrow_chars: vec![],
+);
+
+test!(
+    case: newline_and_multi_byte_char_in_same_chunk,
+    text: "01234β789\nbcdef0123456789abcdef",
+    source_file_start_pos: 0,
+    lines: vec![0, 11],
+    multi_byte_chars: vec![(5, 2)],
+    non_narrow_chars: vec![],
+);
+
+test!(
+    case: newline_and_control_char_in_same_chunk,
+    text: "01234\u{07}6789\nbcdef0123456789abcdef",
+    source_file_start_pos: 0,
+    lines: vec![0, 11],
+    multi_byte_chars: vec![],
+    non_narrow_chars: vec![(5, 0)],
+);
+
+test!(
+    case: multi_byte_char_short,
+    text: "aβc",
+    source_file_start_pos: 0,
+    lines: vec![0],
+    multi_byte_chars: vec![(1, 2)],
+    non_narrow_chars: vec![],
+);
+
+test!(
+    case: multi_byte_char_long,
+    text: "0123456789abcΔf012345β",
+    source_file_start_pos: 0,
+    lines: vec![0],
+    multi_byte_chars: vec![(13, 2), (22, 2)],
+    non_narrow_chars: vec![],
+);
+
+test!(
+    case: multi_byte_char_across_chunk_boundary,
+    text: "0123456789abcdeΔ123456789abcdef01234",
+    source_file_start_pos: 0,
+    lines: vec![0],
+    multi_byte_chars: vec![(15, 2)],
+    non_narrow_chars: vec![],
+);
+
+test!(
+    case: multi_byte_char_across_chunk_boundary_tail,
+    text: "0123456789abcdeΔ....",
+    source_file_start_pos: 0,
+    lines: vec![0],
+    multi_byte_chars: vec![(15, 2)],
+    non_narrow_chars: vec![],
+);
+
+test!(
+    case: non_narrow_short,
+    text: "0\t2",
+    source_file_start_pos: 0,
+    lines: vec![0],
+    multi_byte_chars: vec![],
+    non_narrow_chars: vec![(1, 4)],
+);
+
+test!(
+    case: non_narrow_long,
+    text: "01\t3456789abcdef01234567\u{07}9",
+    source_file_start_pos: 0,
+    lines: vec![0],
+    multi_byte_chars: vec![],
+    non_narrow_chars: vec![(2, 4), (24, 0)],
+);
+
+test!(
+    case: output_offset_all,
+    text: "01\t345\n789abcΔf01234567\u{07}9\nbcΔf",
+    source_file_start_pos: 1000,
+    lines: vec![0 + 1000, 7 + 1000, 27 + 1000],
+    multi_byte_chars: vec![(13 + 1000, 2), (29 + 1000, 2)],
+    non_narrow_chars: vec![(2 + 1000, 4), (24 + 1000, 0)],
+);
--- a/compiler/rustc_span/src/caching_source_map_view.rs
+++ b/compiler/rustc_span/src/caching_source_map_view.rs
@ -0,0 +1,104 @@
+use crate::source_map::SourceMap;
+use crate::{BytePos, SourceFile};
+use rustc_data_structures::sync::Lrc;
+
+#[derive(Clone)]
+struct CacheEntry {
+    time_stamp: usize,
+    line_number: usize,
+    line_start: BytePos,
+    line_end: BytePos,
+    file: Lrc<SourceFile>,
+    file_index: usize,
+}
+
+#[derive(Clone)]
+pub struct CachingSourceMapView<'sm> {
+    source_map: &'sm SourceMap,
+    line_cache: [CacheEntry; 3],
+    time_stamp: usize,
+}
+
+impl<'sm> CachingSourceMapView<'sm> {
+    pub fn new(source_map: &'sm SourceMap) -> CachingSourceMapView<'sm> {
+        let files = source_map.files();
+        let first_file = files[0].clone();
+        let entry = CacheEntry {
+            time_stamp: 0,
+            line_number: 0,
+            line_start: BytePos(0),
+            line_end: BytePos(0),
+            file: first_file,
+            file_index: 0,
+        };
+
+        CachingSourceMapView {
+            source_map,
+            line_cache: [entry.clone(), entry.clone(), entry],
+            time_stamp: 0,
+        }
+    }
+
+    pub fn byte_pos_to_line_and_col(
+        &mut self,
+        pos: BytePos,
+    ) -> Option<(Lrc<SourceFile>, usize, BytePos)> {
+        self.time_stamp += 1;
+
+        // Check if the position is in one of the cached lines
+        for cache_entry in self.line_cache.iter_mut() {
+            if pos >= cache_entry.line_start && pos < cache_entry.line_end {
+                cache_entry.time_stamp = self.time_stamp;
+
+                return Some((
+                    cache_entry.file.clone(),
+                    cache_entry.line_number,
+                    pos - cache_entry.line_start,
+                ));
+            }
+        }
+
+        // No cache hit ...
+        let mut oldest = 0;
+        for index in 1..self.line_cache.len() {
+            if self.line_cache[index].time_stamp < self.line_cache[oldest].time_stamp {
+                oldest = index;
+            }
+        }
+
+        let cache_entry = &mut self.line_cache[oldest];
+
+        // If the entry doesn't point to the correct file, fix it up
+        if pos < cache_entry.file.start_pos || pos >= cache_entry.file.end_pos {
+            let file_valid;
+            if self.source_map.files().len() > 0 {
+                let file_index = self.source_map.lookup_source_file_idx(pos);
+                let file = self.source_map.files()[file_index].clone();
+
+                if pos >= file.start_pos && pos < file.end_pos {
+                    cache_entry.file = file;
+                    cache_entry.file_index = file_index;
+                    file_valid = true;
+                } else {
+                    file_valid = false;
+                }
+            } else {
+                file_valid = false;
+            }
+
+            if !file_valid {
+                return None;
+            }
+        }
+
+        let line_index = cache_entry.file.lookup_line(pos).unwrap();
+        let line_bounds = cache_entry.file.line_bounds(line_index);
+
+        cache_entry.line_number = line_index + 1;
+        cache_entry.line_start = line_bounds.0;
+        cache_entry.line_end = line_bounds.1;
+        cache_entry.time_stamp = self.time_stamp;
+
+        Some((cache_entry.file.clone(), cache_entry.line_number, pos - cache_entry.line_start))
+    }
+}
--- a/compiler/rustc_span/src/def_id.rs
+++ b/compiler/rustc_span/src/def_id.rs
@ -0,0 +1,280 @@
+use crate::HashStableContext;
+use rustc_data_structures::fingerprint::Fingerprint;
+use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
+use rustc_data_structures::AtomicRef;
+use rustc_index::vec::Idx;
+use rustc_macros::HashStable_Generic;
+use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
+use std::borrow::Borrow;
+use std::fmt;
+
+rustc_index::newtype_index! {
+    pub struct CrateId {
+        ENCODABLE = custom
+    }
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum CrateNum {
+    /// A special `CrateNum` that we use for the `tcx.rcache` when decoding from
+    /// the incr. comp. cache.
+    ReservedForIncrCompCache,
+    Index(CrateId),
+}
+
+/// Item definitions in the currently-compiled crate would have the `CrateNum`
+/// `LOCAL_CRATE` in their `DefId`.
+pub const LOCAL_CRATE: CrateNum = CrateNum::Index(CrateId::from_u32(0));
+
+impl Idx for CrateNum {
+    #[inline]
+    fn new(value: usize) -> Self {
+        CrateNum::Index(Idx::new(value))
+    }
+
+    #[inline]
+    fn index(self) -> usize {
+        match self {
+            CrateNum::Index(idx) => Idx::index(idx),
+            _ => panic!("Tried to get crate index of {:?}", self),
+        }
+    }
+}
+
+impl CrateNum {
+    pub fn new(x: usize) -> CrateNum {
+        CrateNum::from_usize(x)
+    }
+
+    pub fn from_usize(x: usize) -> CrateNum {
+        CrateNum::Index(CrateId::from_usize(x))
+    }
+
+    pub fn from_u32(x: u32) -> CrateNum {
+        CrateNum::Index(CrateId::from_u32(x))
+    }
+
+    pub fn as_usize(self) -> usize {
+        match self {
+            CrateNum::Index(id) => id.as_usize(),
+            _ => panic!("tried to get index of non-standard crate {:?}", self),
+        }
+    }
+
+    pub fn as_u32(self) -> u32 {
+        match self {
+            CrateNum::Index(id) => id.as_u32(),
+            _ => panic!("tried to get index of non-standard crate {:?}", self),
+        }
+    }
+
+    pub fn as_def_id(&self) -> DefId {
+        DefId { krate: *self, index: CRATE_DEF_INDEX }
+    }
+}
+
+impl fmt::Display for CrateNum {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            CrateNum::Index(id) => fmt::Display::fmt(&id.private, f),
+            CrateNum::ReservedForIncrCompCache => write!(f, "crate for decoding incr comp cache"),
+        }
+    }
+}
+
+/// As a local identifier, a `CrateNum` is only meaningful within its context, e.g. within a tcx.
+/// Therefore, make sure to include the context when encode a `CrateNum`.
+impl<E: Encoder> Encodable<E> for CrateNum {
+    default fn encode(&self, s: &mut E) -> Result<(), E::Error> {
+        s.emit_u32(self.as_u32())
+    }
+}
+
+impl<D: Decoder> Decodable<D> for CrateNum {
+    default fn decode(d: &mut D) -> Result<CrateNum, D::Error> {
+        Ok(CrateNum::from_u32(d.read_u32()?))
+    }
+}
+
+impl ::std::fmt::Debug for CrateNum {
+    fn fmt(&self, fmt: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result {
+        match self {
+            CrateNum::Index(id) => write!(fmt, "crate{}", id.private),
+            CrateNum::ReservedForIncrCompCache => write!(fmt, "crate for decoding incr comp cache"),
+        }
+    }
+}
+
+#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)]
+#[derive(HashStable_Generic, Encodable, Decodable)]
+pub struct DefPathHash(pub Fingerprint);
+
+impl Borrow<Fingerprint> for DefPathHash {
+    #[inline]
+    fn borrow(&self) -> &Fingerprint {
+        &self.0
+    }
+}
+
+rustc_index::newtype_index! {
+    /// A DefIndex is an index into the hir-map for a crate, identifying a
+    /// particular definition. It should really be considered an interned
+    /// shorthand for a particular DefPath.
+    pub struct DefIndex {
+        ENCODABLE = custom // (only encodable in metadata)
+
+        DEBUG_FORMAT = "DefIndex({})",
+        /// The crate root is always assigned index 0 by the AST Map code,
+        /// thanks to `NodeCollector::new`.
+        const CRATE_DEF_INDEX = 0,
+    }
+}
+
+impl<E: Encoder> Encodable<E> for DefIndex {
+    default fn encode(&self, _: &mut E) -> Result<(), E::Error> {
+        panic!("cannot encode `DefIndex` with `{}`", std::any::type_name::<E>());
+    }
+}
+
+impl<D: Decoder> Decodable<D> for DefIndex {
+    default fn decode(_: &mut D) -> Result<DefIndex, D::Error> {
+        panic!("cannot decode `DefIndex` with `{}`", std::any::type_name::<D>());
+    }
+}
+
+/// A `DefId` identifies a particular *definition*, by combining a crate
+/// index and a def index.
+///
+/// You can create a `DefId` from a `LocalDefId` using `local_def_id.to_def_id()`.
+#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Copy)]
+pub struct DefId {
+    pub krate: CrateNum,
+    pub index: DefIndex,
+}
+
+impl DefId {
+    /// Makes a local `DefId` from the given `DefIndex`.
+    #[inline]
+    pub fn local(index: DefIndex) -> DefId {
+        DefId { krate: LOCAL_CRATE, index }
+    }
+
+    #[inline]
+    pub fn is_local(self) -> bool {
+        self.krate == LOCAL_CRATE
+    }
+
+    #[inline]
+    pub fn as_local(self) -> Option<LocalDefId> {
+        if self.is_local() { Some(LocalDefId { local_def_index: self.index }) } else { None }
+    }
+
+    #[inline]
+    pub fn expect_local(self) -> LocalDefId {
+        self.as_local().unwrap_or_else(|| panic!("DefId::expect_local: `{:?}` isn't local", self))
+    }
+
+    pub fn is_top_level_module(self) -> bool {
+        self.is_local() && self.index == CRATE_DEF_INDEX
+    }
+}
+
+impl<E: Encoder> Encodable<E> for DefId {
+    default fn encode(&self, s: &mut E) -> Result<(), E::Error> {
+        s.emit_struct("DefId", 2, |s| {
+            s.emit_struct_field("krate", 0, |s| self.krate.encode(s))?;
+
+            s.emit_struct_field("index", 1, |s| self.index.encode(s))
+        })
+    }
+}
+
+impl<D: Decoder> Decodable<D> for DefId {
+    default fn decode(d: &mut D) -> Result<DefId, D::Error> {
+        d.read_struct("DefId", 2, |d| {
+            Ok(DefId {
+                krate: d.read_struct_field("krate", 0, Decodable::decode)?,
+                index: d.read_struct_field("index", 1, Decodable::decode)?,
+            })
+        })
+    }
+}
+
+pub fn default_def_id_debug(def_id: DefId, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+    f.debug_struct("DefId").field("krate", &def_id.krate).field("index", &def_id.index).finish()
+}
+
+pub static DEF_ID_DEBUG: AtomicRef<fn(DefId, &mut fmt::Formatter<'_>) -> fmt::Result> =
+    AtomicRef::new(&(default_def_id_debug as fn(_, &mut fmt::Formatter<'_>) -> _));
+
+impl fmt::Debug for DefId {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        (*DEF_ID_DEBUG)(*self, f)
+    }
+}
+
+rustc_data_structures::define_id_collections!(DefIdMap, DefIdSet, DefId);
+
+/// A LocalDefId is equivalent to a DefId with `krate == LOCAL_CRATE`. Since
+/// we encode this information in the type, we can ensure at compile time that
+/// no DefIds from upstream crates get thrown into the mix. There are quite a
+/// few cases where we know that only DefIds from the local crate are expected
+/// and a DefId from a different crate would signify a bug somewhere. This
+/// is when LocalDefId comes in handy.
+#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct LocalDefId {
+    pub local_def_index: DefIndex,
+}
+
+impl Idx for LocalDefId {
+    #[inline]
+    fn new(idx: usize) -> Self {
+        LocalDefId { local_def_index: Idx::new(idx) }
+    }
+    #[inline]
+    fn index(self) -> usize {
+        self.local_def_index.index()
+    }
+}
+
+impl LocalDefId {
+    #[inline]
+    pub fn to_def_id(self) -> DefId {
+        DefId { krate: LOCAL_CRATE, index: self.local_def_index }
+    }
+
+    #[inline]
+    pub fn is_top_level_module(self) -> bool {
+        self.local_def_index == CRATE_DEF_INDEX
+    }
+}
+
+impl fmt::Debug for LocalDefId {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        self.to_def_id().fmt(f)
+    }
+}
+
+impl<E: Encoder> Encodable<E> for LocalDefId {
+    fn encode(&self, s: &mut E) -> Result<(), E::Error> {
+        self.to_def_id().encode(s)
+    }
+}
+
+impl<D: Decoder> Decodable<D> for LocalDefId {
+    fn decode(d: &mut D) -> Result<LocalDefId, D::Error> {
+        DefId::decode(d).map(|d| d.expect_local())
+    }
+}
+
+impl<CTX: HashStableContext> HashStable<CTX> for DefId {
+    fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) {
+        hcx.hash_def_id(*self, hasher)
+    }
+}
+
+impl<CTX: HashStableContext> HashStable<CTX> for CrateNum {
+    fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) {
+        hcx.hash_crate_num(*self, hasher)
+    }
+}
--- a/compiler/rustc_span/src/edition.rs
+++ b/compiler/rustc_span/src/edition.rs
@ -0,0 +1,73 @@
+use crate::symbol::{sym, Symbol};
+use std::fmt;
+use std::str::FromStr;
+
+use rustc_macros::HashStable_Generic;
+
+/// The edition of the compiler (RFC 2052)
+#[derive(Clone, Copy, Hash, PartialEq, PartialOrd, Debug, Encodable, Decodable, Eq)]
+#[derive(HashStable_Generic)]
+pub enum Edition {
+    // editions must be kept in order, oldest to newest
+    /// The 2015 edition
+    Edition2015,
+    /// The 2018 edition
+    Edition2018,
+    // when adding new editions, be sure to update:
+    //
+    // - Update the `ALL_EDITIONS` const
+    // - Update the EDITION_NAME_LIST const
+    // - add a `rust_####()` function to the session
+    // - update the enum in Cargo's sources as well
+}
+
+// must be in order from oldest to newest
+pub const ALL_EDITIONS: &[Edition] = &[Edition::Edition2015, Edition::Edition2018];
+
+pub const EDITION_NAME_LIST: &str = "2015|2018";
+
+pub const DEFAULT_EDITION: Edition = Edition::Edition2015;
+
+impl fmt::Display for Edition {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let s = match *self {
+            Edition::Edition2015 => "2015",
+            Edition::Edition2018 => "2018",
+        };
+        write!(f, "{}", s)
+    }
+}
+
+impl Edition {
+    pub fn lint_name(&self) -> &'static str {
+        match *self {
+            Edition::Edition2015 => "rust_2015_compatibility",
+            Edition::Edition2018 => "rust_2018_compatibility",
+        }
+    }
+
+    pub fn feature_name(&self) -> Symbol {
+        match *self {
+            Edition::Edition2015 => sym::rust_2015_preview,
+            Edition::Edition2018 => sym::rust_2018_preview,
+        }
+    }
+
+    pub fn is_stable(&self) -> bool {
+        match *self {
+            Edition::Edition2015 => true,
+            Edition::Edition2018 => true,
+        }
+    }
+}
+
+impl FromStr for Edition {
+    type Err = ();
+    fn from_str(s: &str) -> Result<Self, ()> {
+        match s {
+            "2015" => Ok(Edition::Edition2015),
+            "2018" => Ok(Edition::Edition2018),
+            _ => Err(()),
+        }
+    }
+}
--- a/compiler/rustc_span/src/fatal_error.rs
+++ b/compiler/rustc_span/src/fatal_error.rs
@ -0,0 +1,26 @@
+/// Used as a return value to signify a fatal error occurred. (It is also
+/// used as the argument to panic at the moment, but that will eventually
+/// not be true.)
+#[derive(Copy, Clone, Debug)]
+#[must_use]
+pub struct FatalError;
+
+pub struct FatalErrorMarker;
+
+// Don't implement Send on FatalError. This makes it impossible to panic!(FatalError).
+// We don't want to invoke the panic handler and print a backtrace for fatal errors.
+impl !Send for FatalError {}
+
+impl FatalError {
+    pub fn raise(self) -> ! {
+        std::panic::resume_unwind(Box::new(FatalErrorMarker))
+    }
+}
+
+impl std::fmt::Display for FatalError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "parser fatal error")
+    }
+}
+
+impl std::error::Error for FatalError {}
--- a/compiler/rustc_span/src/hygiene.rs
+++ b/compiler/rustc_span/src/hygiene.rs
--- a/compiler/rustc_span/src/lib.rs
+++ b/compiler/rustc_span/src/lib.rs
--- a/compiler/rustc_span/src/source_map.rs
+++ b/compiler/rustc_span/src/source_map.rs
--- a/compiler/rustc_span/src/source_map/tests.rs
+++ b/compiler/rustc_span/src/source_map/tests.rs
@ -0,0 +1,272 @@
+use super::*;
+
+use rustc_data_structures::sync::Lrc;
+
+fn init_source_map() -> SourceMap {
+    let sm = SourceMap::new(FilePathMapping::empty());
+    sm.new_source_file(PathBuf::from("blork.rs").into(), "first line.\nsecond line".to_string());
+    sm.new_source_file(PathBuf::from("empty.rs").into(), String::new());
+    sm.new_source_file(PathBuf::from("blork2.rs").into(), "first line.\nsecond line".to_string());
+    sm
+}
+
+/// Tests `lookup_byte_offset`.
+#[test]
+fn t3() {
+    let sm = init_source_map();
+
+    let srcfbp1 = sm.lookup_byte_offset(BytePos(23));
+    assert_eq!(srcfbp1.sf.name, PathBuf::from("blork.rs").into());
+    assert_eq!(srcfbp1.pos, BytePos(23));
+
+    let srcfbp1 = sm.lookup_byte_offset(BytePos(24));
+    assert_eq!(srcfbp1.sf.name, PathBuf::from("empty.rs").into());
+    assert_eq!(srcfbp1.pos, BytePos(0));
+
+    let srcfbp2 = sm.lookup_byte_offset(BytePos(25));
+    assert_eq!(srcfbp2.sf.name, PathBuf::from("blork2.rs").into());
+    assert_eq!(srcfbp2.pos, BytePos(0));
+}
+
+/// Tests `bytepos_to_file_charpos`.
+#[test]
+fn t4() {
+    let sm = init_source_map();
+
+    let cp1 = sm.bytepos_to_file_charpos(BytePos(22));
+    assert_eq!(cp1, CharPos(22));
+
+    let cp2 = sm.bytepos_to_file_charpos(BytePos(25));
+    assert_eq!(cp2, CharPos(0));
+}
+
+/// Tests zero-length `SourceFile`s.
+#[test]
+fn t5() {
+    let sm = init_source_map();
+
+    let loc1 = sm.lookup_char_pos(BytePos(22));
+    assert_eq!(loc1.file.name, PathBuf::from("blork.rs").into());
+    assert_eq!(loc1.line, 2);
+    assert_eq!(loc1.col, CharPos(10));
+
+    let loc2 = sm.lookup_char_pos(BytePos(25));
+    assert_eq!(loc2.file.name, PathBuf::from("blork2.rs").into());
+    assert_eq!(loc2.line, 1);
+    assert_eq!(loc2.col, CharPos(0));
+}
+
+fn init_source_map_mbc() -> SourceMap {
+    let sm = SourceMap::new(FilePathMapping::empty());
+    // "€" is a three-byte UTF8 char.
+    sm.new_source_file(
+        PathBuf::from("blork.rs").into(),
+        "fir€st €€€€ line.\nsecond line".to_string(),
+    );
+    sm.new_source_file(
+        PathBuf::from("blork2.rs").into(),
+        "first line€€.\n€ second line".to_string(),
+    );
+    sm
+}
+
+/// Tests `bytepos_to_file_charpos` in the presence of multi-byte chars.
+#[test]
+fn t6() {
+    let sm = init_source_map_mbc();
+
+    let cp1 = sm.bytepos_to_file_charpos(BytePos(3));
+    assert_eq!(cp1, CharPos(3));
+
+    let cp2 = sm.bytepos_to_file_charpos(BytePos(6));
+    assert_eq!(cp2, CharPos(4));
+
+    let cp3 = sm.bytepos_to_file_charpos(BytePos(56));
+    assert_eq!(cp3, CharPos(12));
+
+    let cp4 = sm.bytepos_to_file_charpos(BytePos(61));
+    assert_eq!(cp4, CharPos(15));
+}
+
+/// Test `span_to_lines` for a span ending at the end of a `SourceFile`.
+#[test]
+fn t7() {
+    let sm = init_source_map();
+    let span = Span::with_root_ctxt(BytePos(12), BytePos(23));
+    let file_lines = sm.span_to_lines(span).unwrap();
+
+    assert_eq!(file_lines.file.name, PathBuf::from("blork.rs").into());
+    assert_eq!(file_lines.lines.len(), 1);
+    assert_eq!(file_lines.lines[0].line_index, 1);
+}
+
+/// Given a string like " ~~~~~~~~~~~~ ", produces a span
+/// converting that range. The idea is that the string has the same
+/// length as the input, and we uncover the byte positions. Note
+/// that this can span lines and so on.
+fn span_from_selection(input: &str, selection: &str) -> Span {
+    assert_eq!(input.len(), selection.len());
+    let left_index = selection.find('~').unwrap() as u32;
+    let right_index = selection.rfind('~').map(|x| x as u32).unwrap_or(left_index);
+    Span::with_root_ctxt(BytePos(left_index), BytePos(right_index + 1))
+}
+
+/// Tests `span_to_snippet` and `span_to_lines` for a span converting 3
+/// lines in the middle of a file.
+#[test]
+fn span_to_snippet_and_lines_spanning_multiple_lines() {
+    let sm = SourceMap::new(FilePathMapping::empty());
+    let inputtext = "aaaaa\nbbbbBB\nCCC\nDDDDDddddd\neee\n";
+    let selection = "     \n    ~~\n~~~\n~~~~~     \n   \n";
+    sm.new_source_file(Path::new("blork.rs").to_owned().into(), inputtext.to_string());
+    let span = span_from_selection(inputtext, selection);
+
+    // Check that we are extracting the text we thought we were extracting.
+    assert_eq!(&sm.span_to_snippet(span).unwrap(), "BB\nCCC\nDDDDD");
+
+    // Check that span_to_lines gives us the complete result with the lines/cols we expected.
+    let lines = sm.span_to_lines(span).unwrap();
+    let expected = vec![
+        LineInfo { line_index: 1, start_col: CharPos(4), end_col: CharPos(6) },
+        LineInfo { line_index: 2, start_col: CharPos(0), end_col: CharPos(3) },
+        LineInfo { line_index: 3, start_col: CharPos(0), end_col: CharPos(5) },
+    ];
+    assert_eq!(lines.lines, expected);
+}
+
+/// Test span_to_snippet for a span ending at the end of a `SourceFile`.
+#[test]
+fn t8() {
+    let sm = init_source_map();
+    let span = Span::with_root_ctxt(BytePos(12), BytePos(23));
+    let snippet = sm.span_to_snippet(span);
+
+    assert_eq!(snippet, Ok("second line".to_string()));
+}
+
+/// Test `span_to_str` for a span ending at the end of a `SourceFile`.
+#[test]
+fn t9() {
+    let sm = init_source_map();
+    let span = Span::with_root_ctxt(BytePos(12), BytePos(23));
+    let sstr = sm.span_to_string(span);
+
+    assert_eq!(sstr, "blork.rs:2:1: 2:12");
+}
+
+/// Tests failing to merge two spans on different lines.
+#[test]
+fn span_merging_fail() {
+    let sm = SourceMap::new(FilePathMapping::empty());
+    let inputtext = "bbbb BB\ncc CCC\n";
+    let selection1 = "     ~~\n      \n";
+    let selection2 = "       \n   ~~~\n";
+    sm.new_source_file(Path::new("blork.rs").to_owned().into(), inputtext.to_owned());
+    let span1 = span_from_selection(inputtext, selection1);
+    let span2 = span_from_selection(inputtext, selection2);
+
+    assert!(sm.merge_spans(span1, span2).is_none());
+}
+
+/// Tests loading an external source file that requires normalization.
+#[test]
+fn t10() {
+    let sm = SourceMap::new(FilePathMapping::empty());
+    let unnormalized = "first line.\r\nsecond line";
+    let normalized = "first line.\nsecond line";
+
+    let src_file = sm.new_source_file(PathBuf::from("blork.rs").into(), unnormalized.to_string());
+
+    assert_eq!(src_file.src.as_ref().unwrap().as_ref(), normalized);
+    assert!(
+        src_file.src_hash.matches(unnormalized),
+        "src_hash should use the source before normalization"
+    );
+
+    let SourceFile {
+        name,
+        name_was_remapped,
+        src_hash,
+        start_pos,
+        end_pos,
+        lines,
+        multibyte_chars,
+        non_narrow_chars,
+        normalized_pos,
+        name_hash,
+        ..
+    } = (*src_file).clone();
+
+    let imported_src_file = sm.new_imported_source_file(
+        name,
+        name_was_remapped,
+        src_hash,
+        name_hash,
+        (end_pos - start_pos).to_usize(),
+        CrateNum::new(0),
+        lines,
+        multibyte_chars,
+        non_narrow_chars,
+        normalized_pos,
+        start_pos,
+        end_pos,
+    );
+
+    assert!(
+        imported_src_file.external_src.borrow().get_source().is_none(),
+        "imported source file should not have source yet"
+    );
+    imported_src_file.add_external_src(|| Some(unnormalized.to_string()));
+    assert_eq!(
+        imported_src_file.external_src.borrow().get_source().unwrap().as_ref(),
+        normalized,
+        "imported source file should be normalized"
+    );
+}
+
+/// Returns the span corresponding to the `n`th occurrence of `substring` in `source_text`.
+trait SourceMapExtension {
+    fn span_substr(
+        &self,
+        file: &Lrc<SourceFile>,
+        source_text: &str,
+        substring: &str,
+        n: usize,
+    ) -> Span;
+}
+
+impl SourceMapExtension for SourceMap {
+    fn span_substr(
+        &self,
+        file: &Lrc<SourceFile>,
+        source_text: &str,
+        substring: &str,
+        n: usize,
+    ) -> Span {
+        println!(
+            "span_substr(file={:?}/{:?}, substring={:?}, n={})",
+            file.name, file.start_pos, substring, n
+        );
+        let mut i = 0;
+        let mut hi = 0;
+        loop {
+            let offset = source_text[hi..].find(substring).unwrap_or_else(|| {
+                panic!(
+                    "source_text `{}` does not have {} occurrences of `{}`, only {}",
+                    source_text, n, substring, i
+                );
+            });
+            let lo = hi + offset;
+            hi = lo + substring.len();
+            if i == n {
+                let span = Span::with_root_ctxt(
+                    BytePos(lo as u32 + file.start_pos.0),
+                    BytePos(hi as u32 + file.start_pos.0),
+                );
+                assert_eq!(&self.span_to_snippet(span).unwrap()[..], substring);
+                return span;
+            }
+            i += 1;
+        }
+    }
+}
--- a/compiler/rustc_span/src/span_encoding.rs
+++ b/compiler/rustc_span/src/span_encoding.rs
@ -0,0 +1,133 @@
+// Spans are encoded using 1-bit tag and 2 different encoding formats (one for each tag value).
+// One format is used for keeping span data inline,
+// another contains index into an out-of-line span interner.
+// The encoding format for inline spans were obtained by optimizing over crates in rustc/libstd.
+// See https://internals.rust-lang.org/t/rfc-compiler-refactoring-spans/1357/28
+
+use crate::hygiene::SyntaxContext;
+use crate::SESSION_GLOBALS;
+use crate::{BytePos, SpanData};
+
+use rustc_data_structures::fx::FxIndexSet;
+
+/// A compressed span.
+///
+/// `SpanData` is 12 bytes, which is a bit too big to stick everywhere. `Span`
+/// is a form that only takes up 8 bytes, with less space for the length and
+/// context. The vast majority (99.9%+) of `SpanData` instances will fit within
+/// those 8 bytes; any `SpanData` whose fields don't fit into a `Span` are
+/// stored in a separate interner table, and the `Span` will index into that
+/// table. Interning is rare enough that the cost is low, but common enough
+/// that the code is exercised regularly.
+///
+/// An earlier version of this code used only 4 bytes for `Span`, but that was
+/// slower because only 80--90% of spans could be stored inline (even less in
+/// very large crates) and so the interner was used a lot more.
+///
+/// Inline (compressed) format:
+/// - `span.base_or_index == span_data.lo`
+/// - `span.len_or_tag == len == span_data.hi - span_data.lo` (must be `<= MAX_LEN`)
+/// - `span.ctxt == span_data.ctxt` (must be `<= MAX_CTXT`)
+///
+/// Interned format:
+/// - `span.base_or_index == index` (indexes into the interner table)
+/// - `span.len_or_tag == LEN_TAG` (high bit set, all other bits are zero)
+/// - `span.ctxt == 0`
+///
+/// The inline form uses 0 for the tag value (rather than 1) so that we don't
+/// need to mask out the tag bit when getting the length, and so that the
+/// dummy span can be all zeroes.
+///
+/// Notes about the choice of field sizes:
+/// - `base` is 32 bits in both `Span` and `SpanData`, which means that `base`
+///   values never cause interning. The number of bits needed for `base`
+///   depends on the crate size. 32 bits allows up to 4 GiB of code in a crate.
+///   `script-servo` is the largest crate in `rustc-perf`, requiring 26 bits
+///   for some spans.
+/// - `len` is 15 bits in `Span` (a u16, minus 1 bit for the tag) and 32 bits
+///   in `SpanData`, which means that large `len` values will cause interning.
+///   The number of bits needed for `len` does not depend on the crate size.
+///   The most common number of bits for `len` are 0--7, with a peak usually at
+///   3 or 4, and then it drops off quickly from 8 onwards. 15 bits is enough
+///   for 99.99%+ of cases, but larger values (sometimes 20+ bits) might occur
+///   dozens of times in a typical crate.
+/// - `ctxt` is 16 bits in `Span` and 32 bits in `SpanData`, which means that
+///   large `ctxt` values will cause interning. The number of bits needed for
+///   `ctxt` values depend partly on the crate size and partly on the form of
+///   the code. No crates in `rustc-perf` need more than 15 bits for `ctxt`,
+///   but larger crates might need more than 16 bits.
+///
+#[derive(Clone, Copy, Eq, PartialEq, Hash)]
+pub struct Span {
+    base_or_index: u32,
+    len_or_tag: u16,
+    ctxt_or_zero: u16,
+}
+
+const LEN_TAG: u16 = 0b1000_0000_0000_0000;
+const MAX_LEN: u32 = 0b0111_1111_1111_1111;
+const MAX_CTXT: u32 = 0b1111_1111_1111_1111;
+
+/// Dummy span, both position and length are zero, syntax context is zero as well.
+pub const DUMMY_SP: Span = Span { base_or_index: 0, len_or_tag: 0, ctxt_or_zero: 0 };
+
+impl Span {
+    #[inline]
+    pub fn new(mut lo: BytePos, mut hi: BytePos, ctxt: SyntaxContext) -> Self {
+        if lo > hi {
+            std::mem::swap(&mut lo, &mut hi);
+        }
+
+        let (base, len, ctxt2) = (lo.0, hi.0 - lo.0, ctxt.as_u32());
+
+        if len <= MAX_LEN && ctxt2 <= MAX_CTXT {
+            // Inline format.
+            Span { base_or_index: base, len_or_tag: len as u16, ctxt_or_zero: ctxt2 as u16 }
+        } else {
+            // Interned format.
+            let index = with_span_interner(|interner| interner.intern(&SpanData { lo, hi, ctxt }));
+            Span { base_or_index: index, len_or_tag: LEN_TAG, ctxt_or_zero: 0 }
+        }
+    }
+
+    #[inline]
+    pub fn data(self) -> SpanData {
+        if self.len_or_tag != LEN_TAG {
+            // Inline format.
+            debug_assert!(self.len_or_tag as u32 <= MAX_LEN);
+            SpanData {
+                lo: BytePos(self.base_or_index),
+                hi: BytePos(self.base_or_index + self.len_or_tag as u32),
+                ctxt: SyntaxContext::from_u32(self.ctxt_or_zero as u32),
+            }
+        } else {
+            // Interned format.
+            debug_assert!(self.ctxt_or_zero == 0);
+            let index = self.base_or_index;
+            with_span_interner(|interner| *interner.get(index))
+        }
+    }
+}
+
+#[derive(Default)]
+pub struct SpanInterner {
+    spans: FxIndexSet<SpanData>,
+}
+
+impl SpanInterner {
+    fn intern(&mut self, span_data: &SpanData) -> u32 {
+        let (index, _) = self.spans.insert_full(*span_data);
+        index as u32
+    }
+
+    #[inline]
+    fn get(&self, index: u32) -> &SpanData {
+        &self.spans[index as usize]
+    }
+}
+
+// If an interner exists, return it. Otherwise, prepare a fresh one.
+#[inline]
+fn with_span_interner<T, F: FnOnce(&mut SpanInterner) -> T>(f: F) -> T {
+    SESSION_GLOBALS.with(|session_globals| f(&mut *session_globals.span_interner.lock()))
+}
--- a/compiler/rustc_span/src/symbol.rs
+++ b/compiler/rustc_span/src/symbol.rs
--- a/compiler/rustc_span/src/symbol/tests.rs
+++ b/compiler/rustc_span/src/symbol/tests.rs
@ -0,0 +1,25 @@
+use super::*;
+
+use crate::{edition, SessionGlobals};
+
+#[test]
+fn interner_tests() {
+    let mut i: Interner = Interner::default();
+    // first one is zero:
+    assert_eq!(i.intern("dog"), Symbol::new(0));
+    // re-use gets the same entry:
+    assert_eq!(i.intern("dog"), Symbol::new(0));
+    // different string gets a different #:
+    assert_eq!(i.intern("cat"), Symbol::new(1));
+    assert_eq!(i.intern("cat"), Symbol::new(1));
+    // dog is still at zero
+    assert_eq!(i.intern("dog"), Symbol::new(0));
+}
+
+#[test]
+fn without_first_quote_test() {
+    SESSION_GLOBALS.set(&SessionGlobals::new(edition::DEFAULT_EDITION), || {
+        let i = Ident::from_str("'break");
+        assert_eq!(i.without_first_quote().name, kw::Break);
+    });
+}
--- a/compiler/rustc_span/src/tests.rs
+++ b/compiler/rustc_span/src/tests.rs
@ -0,0 +1,40 @@
+use super::*;
+
+#[test]
+fn test_lookup_line() {
+    let lines = &[BytePos(3), BytePos(17), BytePos(28)];
+
+    assert_eq!(lookup_line(lines, BytePos(0)), -1);
+    assert_eq!(lookup_line(lines, BytePos(3)), 0);
+    assert_eq!(lookup_line(lines, BytePos(4)), 0);
+
+    assert_eq!(lookup_line(lines, BytePos(16)), 0);
+    assert_eq!(lookup_line(lines, BytePos(17)), 1);
+    assert_eq!(lookup_line(lines, BytePos(18)), 1);
+
+    assert_eq!(lookup_line(lines, BytePos(28)), 2);
+    assert_eq!(lookup_line(lines, BytePos(29)), 2);
+}
+
+#[test]
+fn test_normalize_newlines() {
+    fn check(before: &str, after: &str, expected_positions: &[u32]) {
+        let mut actual = before.to_string();
+        let mut actual_positions = vec![];
+        normalize_newlines(&mut actual, &mut actual_positions);
+        let actual_positions: Vec<_> = actual_positions.into_iter().map(|nc| nc.pos.0).collect();
+        assert_eq!(actual.as_str(), after);
+        assert_eq!(actual_positions, expected_positions);
+    }
+    check("", "", &[]);
+    check("\n", "\n", &[]);
+    check("\r", "\r", &[]);
+    check("\r\r", "\r\r", &[]);
+    check("\r\n", "\n", &[1]);
+    check("hello world", "hello world", &[]);
+    check("hello\nworld", "hello\nworld", &[]);
+    check("hello\r\nworld", "hello\nworld", &[6]);
+    check("\r\nhello\r\nworld\r\n", "\nhello\nworld\n", &[1, 7, 13]);
+    check("\r\r\n", "\r\n", &[2]);
+    check("hello\rworld", "hello\rworld", &[]);
+}