1
Fork 0

Rollup merge of #80358 - pierwill:edit_rustc_span, r=lcnr

Edit rustc_span documentation

Various changes to the `rustc_span` docs, including the following:

- Additions to top-level docs
- Edits to the source_map module docs
- Edits to documentation for `Span` and `SpanData`
- Added intra-docs links
- Documentation for Levenshtein distances
- Fixed missing punctuation
This commit is contained in:
Yuki Okushi 2020-12-30 18:15:11 +09:00 committed by GitHub
commit 6064be7ced
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 71 additions and 46 deletions

View file

@ -4,24 +4,25 @@ use std::str::FromStr;
use rustc_macros::HashStable_Generic; use rustc_macros::HashStable_Generic;
/// The edition of the compiler (RFC 2052) /// The edition of the compiler. (See [RFC 2052](https://github.com/rust-lang/rfcs/blob/master/text/2052-epochs.md).)
#[derive(Clone, Copy, Hash, PartialEq, PartialOrd, Debug, Encodable, Decodable, Eq)] #[derive(Clone, Copy, Hash, PartialEq, PartialOrd, Debug, Encodable, Decodable, Eq)]
#[derive(HashStable_Generic)] #[derive(HashStable_Generic)]
pub enum Edition { pub enum Edition {
// editions must be kept in order, oldest to newest // When adding new editions, be sure to do the following:
//
// - update the `ALL_EDITIONS` const
// - update the `EDITION_NAME_LIST` const
// - add a `rust_####()` function to the session
// - update the enum in Cargo's sources as well
//
// Editions *must* be kept in order, oldest to newest.
/// The 2015 edition /// The 2015 edition
Edition2015, Edition2015,
/// The 2018 edition /// The 2018 edition
Edition2018, Edition2018,
// when adding new editions, be sure to update:
//
// - Update the `ALL_EDITIONS` const
// - Update the EDITION_NAME_LIST const
// - add a `rust_####()` function to the session
// - update the enum in Cargo's sources as well
} }
// must be in order from oldest to newest // Must be in order from oldest to newest.
pub const ALL_EDITIONS: &[Edition] = &[Edition::Edition2015, Edition::Edition2018]; pub const ALL_EDITIONS: &[Edition] = &[Edition::Edition2015, Edition::Edition2018];
pub const EDITION_NAME_LIST: &str = "2015|2018"; pub const EDITION_NAME_LIST: &str = "2015|2018";

View file

@ -1,10 +1,16 @@
//! Levenshtein distances.
//!
//! The [Levenshtein distance] is a metric for measuring the difference between two strings.
//!
//! [Levenshtein distance]: https://en.wikipedia.org/wiki/Levenshtein_distance
use crate::symbol::Symbol; use crate::symbol::Symbol;
use std::cmp; use std::cmp;
#[cfg(test)] #[cfg(test)]
mod tests; mod tests;
/// Finds the Levenshtein distance between two strings /// Finds the Levenshtein distance between two strings.
pub fn lev_distance(a: &str, b: &str) -> usize { pub fn lev_distance(a: &str, b: &str) -> usize {
// cases which don't require further computation // cases which don't require further computation
if a.is_empty() { if a.is_empty() {
@ -35,14 +41,14 @@ pub fn lev_distance(a: &str, b: &str) -> usize {
dcol[t_last + 1] dcol[t_last + 1]
} }
/// Finds the best match for a given word in the given iterator /// Finds the best match for a given word in the given iterator.
/// ///
/// As a loose rule to avoid the obviously incorrect suggestions, it takes /// As a loose rule to avoid the obviously incorrect suggestions, it takes
/// an optional limit for the maximum allowable edit distance, which defaults /// an optional limit for the maximum allowable edit distance, which defaults
/// to one-third of the given word. /// to one-third of the given word.
/// ///
/// Besides Levenshtein, we use case insensitive comparison to improve accuracy on an edge case with /// Besides Levenshtein, we use case insensitive comparison to improve accuracy
/// a lower(upper)case letters mismatch. /// on an edge case with a lower(upper)case letters mismatch.
#[cold] #[cold]
pub fn find_best_match_for_name( pub fn find_best_match_for_name(
name_vec: &[Symbol], name_vec: &[Symbol],
@ -98,7 +104,7 @@ fn find_match_by_sorted_words(iter_names: &[Symbol], lookup: &str) -> Option<Sym
fn sort_by_words(name: &str) -> String { fn sort_by_words(name: &str) -> String {
let mut split_words: Vec<&str> = name.split('_').collect(); let mut split_words: Vec<&str> = name.split('_').collect();
// We are sorting primitive &strs and can use unstable sort here // We are sorting primitive &strs and can use unstable sort here.
split_words.sort_unstable(); split_words.sort_unstable();
split_words.join("_") split_words.join("_")
} }

View file

@ -1,4 +1,13 @@
//! The source positions and related helper functions. //! Source positions and related helper functions.
//!
//! Important concepts in this module include:
//!
//! - the *span*, represented by [`SpanData`] and related types;
//! - source code as represented by a [`SourceMap`]; and
//! - interned strings, represented by [`Symbol`]s, with some common symbols available statically in the [`sym`] module.
//!
//! Unlike most compilers, the span contains not only the position in the source code, but also various other metadata,
//! such as the edition and macro hygiene. This metadata is stored in [`SyntaxContext`] and [`ExpnData`].
//! //!
//! ## Note //! ## Note
//! //!
@ -124,7 +133,7 @@ pub enum RealFileName {
impl RealFileName { impl RealFileName {
/// Returns the path suitable for reading from the file system on the local host. /// Returns the path suitable for reading from the file system on the local host.
/// Avoid embedding this in build artifacts; see `stable_name` for that. /// Avoid embedding this in build artifacts; see `stable_name()` for that.
pub fn local_path(&self) -> &Path { pub fn local_path(&self) -> &Path {
match self { match self {
RealFileName::Named(p) RealFileName::Named(p)
@ -133,7 +142,7 @@ impl RealFileName {
} }
/// Returns the path suitable for reading from the file system on the local host. /// Returns the path suitable for reading from the file system on the local host.
/// Avoid embedding this in build artifacts; see `stable_name` for that. /// Avoid embedding this in build artifacts; see `stable_name()` for that.
pub fn into_local_path(self) -> PathBuf { pub fn into_local_path(self) -> PathBuf {
match self { match self {
RealFileName::Named(p) RealFileName::Named(p)
@ -143,7 +152,7 @@ impl RealFileName {
/// Returns the path suitable for embedding into build artifacts. Note that /// Returns the path suitable for embedding into build artifacts. Note that
/// a virtualized path will not correspond to a valid file system path; see /// a virtualized path will not correspond to a valid file system path; see
/// `local_path` for something that is more likely to return paths into the /// `local_path()` for something that is more likely to return paths into the
/// local host file system. /// local host file system.
pub fn stable_name(&self) -> &Path { pub fn stable_name(&self) -> &Path {
match self { match self {
@ -173,7 +182,7 @@ pub enum FileName {
/// Custom sources for explicit parser calls from plugins and drivers. /// Custom sources for explicit parser calls from plugins and drivers.
Custom(String), Custom(String),
DocTest(PathBuf, isize), DocTest(PathBuf, isize),
/// Post-substitution inline assembly from LLVM /// Post-substitution inline assembly from LLVM.
InlineAsm(u64), InlineAsm(u64),
} }
@ -266,14 +275,17 @@ impl FileName {
} }
} }
/// Represents a span.
///
/// Spans represent a region of code, used for error reporting. Positions in spans /// Spans represent a region of code, used for error reporting. Positions in spans
/// are *absolute* positions from the beginning of the source_map, not positions /// are *absolute* positions from the beginning of the [`SourceMap`], not positions
/// relative to `SourceFile`s. Methods on the `SourceMap` can be used to relate spans back /// relative to [`SourceFile`]s. Methods on the `SourceMap` can be used to relate spans back
/// to the original source. /// to the original source.
/// You must be careful if the span crosses more than one file - you will not be ///
/// You must be careful if the span crosses more than one file, since you will not be
/// able to use many of the functions on spans in source_map and you cannot assume /// able to use many of the functions on spans in source_map and you cannot assume
/// that the length of the `span = hi - lo`; there may be space in the `BytePos` /// that the length of the span is equal to `span.hi - span.lo`; there may be space in the
/// range between files. /// [`BytePos`] range between files.
/// ///
/// `SpanData` is public because `Span` uses a thread-local interner and can't be /// `SpanData` is public because `Span` uses a thread-local interner and can't be
/// sent to other threads, but some pieces of performance infra run in a separate thread. /// sent to other threads, but some pieces of performance infra run in a separate thread.
@ -384,7 +396,7 @@ impl Span {
Span::new(lo, hi, SyntaxContext::root()) Span::new(lo, hi, SyntaxContext::root())
} }
/// Returns a new span representing an empty span at the beginning of this span /// Returns a new span representing an empty span at the beginning of this span.
#[inline] #[inline]
pub fn shrink_to_lo(self) -> Span { pub fn shrink_to_lo(self) -> Span {
let span = self.data(); let span = self.data();
@ -398,7 +410,7 @@ impl Span {
} }
#[inline] #[inline]
/// Returns true if hi == lo /// Returns `true` if `hi == lo`.
pub fn is_empty(&self) -> bool { pub fn is_empty(&self) -> bool {
let span = self.data(); let span = self.data();
span.hi == span.lo span.hi == span.lo
@ -512,7 +524,7 @@ impl Span {
} }
/// Checks if a span is "internal" to a macro in which `unsafe` /// Checks if a span is "internal" to a macro in which `unsafe`
/// can be used without triggering the `unsafe_code` lint /// can be used without triggering the `unsafe_code` lint.
// (that is, a macro marked with `#[allow_internal_unsafe]`). // (that is, a macro marked with `#[allow_internal_unsafe]`).
pub fn allows_unsafe(&self) -> bool { pub fn allows_unsafe(&self) -> bool {
self.ctxt().outer_expn_data().allow_internal_unsafe self.ctxt().outer_expn_data().allow_internal_unsafe
@ -700,6 +712,7 @@ impl Span {
} }
} }
/// A span together with some additional data.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct SpanLabel { pub struct SpanLabel {
/// The span we are going to include in the final snippet. /// The span we are going to include in the final snippet.
@ -743,7 +756,7 @@ impl<D: Decoder> Decodable<D> for Span {
/// any spans that are debug-printed during the closure's execution. /// any spans that are debug-printed during the closure's execution.
/// ///
/// Normally, the global `TyCtxt` is used to retrieve the `SourceMap` /// Normally, the global `TyCtxt` is used to retrieve the `SourceMap`
/// (see `rustc_interface::callbacks::span_debug1). However, some parts /// (see `rustc_interface::callbacks::span_debug1`). However, some parts
/// of the compiler (e.g. `rustc_parse`) may debug-print `Span`s before /// of the compiler (e.g. `rustc_parse`) may debug-print `Span`s before
/// a `TyCtxt` is available. In this case, we fall back to /// a `TyCtxt` is available. In this case, we fall back to
/// the `SourceMap` provided to this function. If that is not available, /// the `SourceMap` provided to this function. If that is not available,
@ -994,9 +1007,9 @@ pub enum ExternalSource {
Unneeded, Unneeded,
Foreign { Foreign {
kind: ExternalSourceKind, kind: ExternalSourceKind,
/// This SourceFile's byte-offset within the source_map of its original crate /// This SourceFile's byte-offset within the source_map of its original crate.
original_start_pos: BytePos, original_start_pos: BytePos,
/// The end of this SourceFile within the source_map of its original crate /// The end of this SourceFile within the source_map of its original crate.
original_end_pos: BytePos, original_end_pos: BytePos,
}, },
} }
@ -1099,7 +1112,7 @@ impl SourceFileHash {
} }
} }
/// A single source in the `SourceMap`. /// A single source in the [`SourceMap`].
#[derive(Clone)] #[derive(Clone)]
pub struct SourceFile { pub struct SourceFile {
/// The name of the file that the source came from. Source that doesn't /// The name of the file that the source came from. Source that doesn't
@ -1580,7 +1593,7 @@ fn remove_bom(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>) {
/// Replaces `\r\n` with `\n` in-place in `src`. /// Replaces `\r\n` with `\n` in-place in `src`.
/// ///
/// Returns error if there's a lone `\r` in the string /// Returns error if there's a lone `\r` in the string.
fn normalize_newlines(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>) { fn normalize_newlines(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>) {
if !src.as_bytes().contains(&b'\r') { if !src.as_bytes().contains(&b'\r') {
return; return;
@ -1705,13 +1718,16 @@ macro_rules! impl_pos {
} }
impl_pos! { impl_pos! {
/// A byte offset. Keep this small (currently 32-bits), as AST contains /// A byte offset.
/// a lot of them. ///
/// Keep this small (currently 32-bits), as AST contains a lot of them.
#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] #[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
pub struct BytePos(pub u32); pub struct BytePos(pub u32);
/// A character offset. Because of multibyte UTF-8 characters, a byte offset /// A character offset.
/// is not equivalent to a character offset. The `SourceMap` will convert `BytePos` ///
/// Because of multibyte UTF-8 characters, a byte offset
/// is not equivalent to a character offset. The [`SourceMap`] will convert [`BytePos`]
/// values to `CharPos` values as necessary. /// values to `CharPos` values as necessary.
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
pub struct CharPos(pub usize); pub struct CharPos(pub usize);
@ -1835,8 +1851,9 @@ fn lookup_line(lines: &[BytePos], pos: BytePos) -> isize {
} }
/// Requirements for a `StableHashingContext` to be used in this crate. /// Requirements for a `StableHashingContext` to be used in this crate.
/// This is a hack to allow using the `HashStable_Generic` derive macro ///
/// instead of implementing everything in librustc_middle. /// This is a hack to allow using the [`HashStable_Generic`] derive macro
/// instead of implementing everything in rustc_middle.
pub trait HashStableContext { pub trait HashStableContext {
fn hash_def_id(&mut self, _: DefId, hasher: &mut StableHasher); fn hash_def_id(&mut self, _: DefId, hasher: &mut StableHasher);
fn hash_crate_num(&mut self, _: CrateNum, hasher: &mut StableHasher); fn hash_crate_num(&mut self, _: CrateNum, hasher: &mut StableHasher);
@ -1856,6 +1873,7 @@ where
/// offsets into the `SourceMap`). Instead, we hash the (file name, line, column) /// offsets into the `SourceMap`). Instead, we hash the (file name, line, column)
/// triple, which stays the same even if the containing `SourceFile` has moved /// triple, which stays the same even if the containing `SourceFile` has moved
/// within the `SourceMap`. /// within the `SourceMap`.
///
/// Also note that we are hashing byte offsets for the column, not unicode /// Also note that we are hashing byte offsets for the column, not unicode
/// codepoint offsets. For the purpose of the hash that's sufficient. /// codepoint offsets. For the purpose of the hash that's sufficient.
/// Also, hashing filenames is expensive so we avoid doing it twice when the /// Also, hashing filenames is expensive so we avoid doing it twice when the

View file

@ -1,9 +1,11 @@
//! The `SourceMap` tracks all the source code used within a single crate, mapping //! Types for tracking pieces of source code within a crate.
//!
//! The [`SourceMap`] tracks all the source code used within a single crate, mapping
//! from integer byte positions to the original source code location. Each bit //! from integer byte positions to the original source code location. Each bit
//! of source parsed during crate parsing (typically files, in-memory strings, //! of source parsed during crate parsing (typically files, in-memory strings,
//! or various bits of macro expansion) cover a continuous range of bytes in the //! or various bits of macro expansion) cover a continuous range of bytes in the
//! `SourceMap` and are represented by `SourceFile`s. Byte positions are stored in //! `SourceMap` and are represented by [`SourceFile`]s. Byte positions are stored in
//! `Span` and used pervasively in the compiler. They are absolute positions //! [`Span`] and used pervasively in the compiler. They are absolute positions
//! within the `SourceMap`, which upon request can be converted to line and column //! within the `SourceMap`, which upon request can be converted to line and column
//! information, source code snippets, etc. //! information, source code snippets, etc.

View file

@ -12,7 +12,7 @@ use rustc_data_structures::fx::FxIndexSet;
/// A compressed span. /// A compressed span.
/// ///
/// `SpanData` is 12 bytes, which is a bit too big to stick everywhere. `Span` /// Whereas [`SpanData`] is 12 bytes, which is a bit too big to stick everywhere, `Span`
/// is a form that only takes up 8 bytes, with less space for the length and /// is a form that only takes up 8 bytes, with less space for the length and
/// context. The vast majority (99.9%+) of `SpanData` instances will fit within /// context. The vast majority (99.9%+) of `SpanData` instances will fit within
/// those 8 bytes; any `SpanData` whose fields don't fit into a `Span` are /// those 8 bytes; any `SpanData` whose fields don't fit into a `Span` are
@ -42,13 +42,11 @@ use rustc_data_structures::fx::FxIndexSet;
/// - `base` is 32 bits in both `Span` and `SpanData`, which means that `base` /// - `base` is 32 bits in both `Span` and `SpanData`, which means that `base`
/// values never cause interning. The number of bits needed for `base` /// values never cause interning. The number of bits needed for `base`
/// depends on the crate size. 32 bits allows up to 4 GiB of code in a crate. /// depends on the crate size. 32 bits allows up to 4 GiB of code in a crate.
/// `script-servo` is the largest crate in `rustc-perf`, requiring 26 bits
/// for some spans.
/// - `len` is 15 bits in `Span` (a u16, minus 1 bit for the tag) and 32 bits /// - `len` is 15 bits in `Span` (a u16, minus 1 bit for the tag) and 32 bits
/// in `SpanData`, which means that large `len` values will cause interning. /// in `SpanData`, which means that large `len` values will cause interning.
/// The number of bits needed for `len` does not depend on the crate size. /// The number of bits needed for `len` does not depend on the crate size.
/// The most common number of bits for `len` are 0--7, with a peak usually at /// The most common numbers of bits for `len` are from 0 to 7, with a peak usually
/// 3 or 4, and then it drops off quickly from 8 onwards. 15 bits is enough /// at 3 or 4, and then it drops off quickly from 8 onwards. 15 bits is enough
/// for 99.99%+ of cases, but larger values (sometimes 20+ bits) might occur /// for 99.99%+ of cases, but larger values (sometimes 20+ bits) might occur
/// dozens of times in a typical crate. /// dozens of times in a typical crate.
/// - `ctxt` is 16 bits in `Span` and 32 bits in `SpanData`, which means that /// - `ctxt` is 16 bits in `Span` and 32 bits in `SpanData`, which means that