Shrink Token
.
From 72 bytes to 12 bytes (on x86-64). There are two parts to this: - Changing various source code offsets from 64-bit to 32-bit. This is not a problem because the rest of rustc also uses 32-bit source code offsets. This means `Token` is no longer `Copy` but this causes no problems. - Removing the `RawStrError` from `LiteralKind`. Raw string literal invalidity is now indicated by a `None` value within `RawStr`/`RawByteStr`, and the new `validate_raw_str` function can be used to re-lex an invalid raw string literal to get the `RawStrError`. There is one very small change in behaviour. Previously, if a raw string literal matched both the `InvalidStarter` and `TooManyHashes` cases, the latter would override the former. This has now changed, because `raw_double_quoted_string` now uses `?` and so returns immediately upon detecting the `InvalidStarter` case. I think this is a slight improvement to report the earlier-detected error, and it explains the change in the `test_too_many_hashes` test. The commit also removes a couple of comments that refer to #77629 and say that the size of these types don't affect performance. These comments are wrong, though the performance effect is small.
This commit is contained in:
parent
e6b9fccfb1
commit
99f5c79d64
9 changed files with 111 additions and 103 deletions
|
@ -194,7 +194,7 @@ pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comme
|
||||||
}
|
}
|
||||||
|
|
||||||
for token in rustc_lexer::tokenize(&text[pos..]) {
|
for token in rustc_lexer::tokenize(&text[pos..]) {
|
||||||
let token_text = &text[pos..pos + token.len];
|
let token_text = &text[pos..pos + token.len as usize];
|
||||||
match token.kind {
|
match token.kind {
|
||||||
rustc_lexer::TokenKind::Whitespace => {
|
rustc_lexer::TokenKind::Whitespace => {
|
||||||
if let Some(mut idx) = token_text.find('\n') {
|
if let Some(mut idx) = token_text.find('\n') {
|
||||||
|
@ -211,8 +211,10 @@ pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comme
|
||||||
}
|
}
|
||||||
rustc_lexer::TokenKind::BlockComment { doc_style, .. } => {
|
rustc_lexer::TokenKind::BlockComment { doc_style, .. } => {
|
||||||
if doc_style.is_none() {
|
if doc_style.is_none() {
|
||||||
let code_to_the_right =
|
let code_to_the_right = !matches!(
|
||||||
!matches!(text[pos + token.len..].chars().next(), Some('\r' | '\n'));
|
text[pos + token.len as usize..].chars().next(),
|
||||||
|
Some('\r' | '\n')
|
||||||
|
);
|
||||||
let style = match (code_to_the_left, code_to_the_right) {
|
let style = match (code_to_the_left, code_to_the_right) {
|
||||||
(_, true) => CommentStyle::Mixed,
|
(_, true) => CommentStyle::Mixed,
|
||||||
(false, false) => CommentStyle::Isolated,
|
(false, false) => CommentStyle::Isolated,
|
||||||
|
@ -246,7 +248,7 @@ pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comme
|
||||||
code_to_the_left = true;
|
code_to_the_left = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pos += token.len;
|
pos += token.len as usize;
|
||||||
}
|
}
|
||||||
|
|
||||||
comments
|
comments
|
||||||
|
|
|
@ -61,8 +61,8 @@ impl<'a> Cursor<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns amount of already consumed symbols.
|
/// Returns amount of already consumed symbols.
|
||||||
pub(crate) fn len_consumed(&self) -> usize {
|
pub(crate) fn len_consumed(&self) -> u32 {
|
||||||
self.initial_len - self.chars.as_str().len()
|
(self.initial_len - self.chars.as_str().len()) as u32
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Resets the number of bytes consumed to 0.
|
/// Resets the number of bytes consumed to 0.
|
||||||
|
|
|
@ -38,18 +38,17 @@ use std::convert::TryFrom;
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Token {
|
pub struct Token {
|
||||||
pub kind: TokenKind,
|
pub kind: TokenKind,
|
||||||
pub len: usize,
|
pub len: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Token {
|
impl Token {
|
||||||
fn new(kind: TokenKind, len: usize) -> Token {
|
fn new(kind: TokenKind, len: u32) -> Token {
|
||||||
Token { kind, len }
|
Token { kind, len }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Enum representing common lexeme types.
|
/// Enum representing common lexeme types.
|
||||||
// perf note: Changing all `usize` to `u32` doesn't change performance. See #77629
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
|
||||||
pub enum TokenKind {
|
pub enum TokenKind {
|
||||||
// Multi-char tokens:
|
// Multi-char tokens:
|
||||||
/// "// comment"
|
/// "// comment"
|
||||||
|
@ -76,7 +75,7 @@ pub enum TokenKind {
|
||||||
/// tokens.
|
/// tokens.
|
||||||
UnknownPrefix,
|
UnknownPrefix,
|
||||||
/// "12_u8", "1.0e-40", "b"123"". See `LiteralKind` for more details.
|
/// "12_u8", "1.0e-40", "b"123"". See `LiteralKind` for more details.
|
||||||
Literal { kind: LiteralKind, suffix_start: usize },
|
Literal { kind: LiteralKind, suffix_start: u32 },
|
||||||
/// "'a"
|
/// "'a"
|
||||||
Lifetime { starts_with_number: bool },
|
Lifetime { starts_with_number: bool },
|
||||||
|
|
||||||
|
@ -160,26 +159,24 @@ pub enum LiteralKind {
|
||||||
Str { terminated: bool },
|
Str { terminated: bool },
|
||||||
/// "b"abc"", "b"abc"
|
/// "b"abc"", "b"abc"
|
||||||
ByteStr { terminated: bool },
|
ByteStr { terminated: bool },
|
||||||
/// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a"
|
/// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates
|
||||||
RawStr { n_hashes: u8, err: Option<RawStrError> },
|
/// an invalid literal.
|
||||||
/// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a"
|
RawStr { n_hashes: Option<u8> },
|
||||||
RawByteStr { n_hashes: u8, err: Option<RawStrError> },
|
/// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None`
|
||||||
|
/// indicates an invalid literal.
|
||||||
|
RawByteStr { n_hashes: Option<u8> },
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Error produced validating a raw string. Represents cases like:
|
|
||||||
/// - `r##~"abcde"##`: `InvalidStarter`
|
|
||||||
/// - `r###"abcde"##`: `NoTerminator { expected: 3, found: 2, possible_terminator_offset: Some(11)`
|
|
||||||
/// - Too many `#`s (>255): `TooManyDelimiters`
|
|
||||||
// perf note: It doesn't matter that this makes `Token` 36 bytes bigger. See #77629
|
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
pub enum RawStrError {
|
pub enum RawStrError {
|
||||||
/// Non `#` characters exist between `r` and `"` eg. `r#~"..`
|
/// Non `#` characters exist between `r` and `"`, e.g. `r##~"abcde"##`
|
||||||
InvalidStarter { bad_char: char },
|
InvalidStarter { bad_char: char },
|
||||||
/// The string was never terminated. `possible_terminator_offset` is the number of characters after `r` or `br` where they
|
/// The string was not terminated, e.g. `r###"abcde"##`.
|
||||||
/// may have intended to terminate it.
|
/// `possible_terminator_offset` is the number of characters after `r` or
|
||||||
NoTerminator { expected: usize, found: usize, possible_terminator_offset: Option<usize> },
|
/// `br` where they may have intended to terminate it.
|
||||||
|
NoTerminator { expected: u32, found: u32, possible_terminator_offset: Option<u32> },
|
||||||
/// More than 255 `#`s exist.
|
/// More than 255 `#`s exist.
|
||||||
TooManyDelimiters { found: usize },
|
TooManyDelimiters { found: u32 },
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Base of numeric literal encoding according to its prefix.
|
/// Base of numeric literal encoding according to its prefix.
|
||||||
|
@ -227,6 +224,19 @@ pub fn first_token(input: &str) -> Token {
|
||||||
Cursor::new(input).advance_token()
|
Cursor::new(input).advance_token()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Validates a raw string literal. Used for getting more information about a
|
||||||
|
/// problem with a `RawStr`/`RawByteStr` with a `None` field.
|
||||||
|
#[inline]
|
||||||
|
pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError> {
|
||||||
|
debug_assert!(!input.is_empty());
|
||||||
|
let mut cursor = Cursor::new(input);
|
||||||
|
// Move past the leading `r` or `br`.
|
||||||
|
for _ in 0..prefix_len {
|
||||||
|
cursor.bump().unwrap();
|
||||||
|
}
|
||||||
|
cursor.raw_double_quoted_string(prefix_len).map(|_| ())
|
||||||
|
}
|
||||||
|
|
||||||
/// Creates an iterator that produces tokens from the input string.
|
/// Creates an iterator that produces tokens from the input string.
|
||||||
pub fn tokenize(input: &str) -> impl Iterator<Item = Token> + '_ {
|
pub fn tokenize(input: &str) -> impl Iterator<Item = Token> + '_ {
|
||||||
let mut cursor = Cursor::new(input);
|
let mut cursor = Cursor::new(input);
|
||||||
|
@ -316,12 +326,12 @@ impl Cursor<'_> {
|
||||||
'r' => match (self.first(), self.second()) {
|
'r' => match (self.first(), self.second()) {
|
||||||
('#', c1) if is_id_start(c1) => self.raw_ident(),
|
('#', c1) if is_id_start(c1) => self.raw_ident(),
|
||||||
('#', _) | ('"', _) => {
|
('#', _) | ('"', _) => {
|
||||||
let (n_hashes, err) = self.raw_double_quoted_string(1);
|
let res = self.raw_double_quoted_string(1);
|
||||||
let suffix_start = self.len_consumed();
|
let suffix_start = self.len_consumed();
|
||||||
if err.is_none() {
|
if res.is_ok() {
|
||||||
self.eat_literal_suffix();
|
self.eat_literal_suffix();
|
||||||
}
|
}
|
||||||
let kind = RawStr { n_hashes, err };
|
let kind = RawStr { n_hashes: res.ok() };
|
||||||
Literal { kind, suffix_start }
|
Literal { kind, suffix_start }
|
||||||
}
|
}
|
||||||
_ => self.ident_or_unknown_prefix(),
|
_ => self.ident_or_unknown_prefix(),
|
||||||
|
@ -351,12 +361,12 @@ impl Cursor<'_> {
|
||||||
}
|
}
|
||||||
('r', '"') | ('r', '#') => {
|
('r', '"') | ('r', '#') => {
|
||||||
self.bump();
|
self.bump();
|
||||||
let (n_hashes, err) = self.raw_double_quoted_string(2);
|
let res = self.raw_double_quoted_string(2);
|
||||||
let suffix_start = self.len_consumed();
|
let suffix_start = self.len_consumed();
|
||||||
if err.is_none() {
|
if res.is_ok() {
|
||||||
self.eat_literal_suffix();
|
self.eat_literal_suffix();
|
||||||
}
|
}
|
||||||
let kind = RawByteStr { n_hashes, err };
|
let kind = RawByteStr { n_hashes: res.ok() };
|
||||||
Literal { kind, suffix_start }
|
Literal { kind, suffix_start }
|
||||||
}
|
}
|
||||||
_ => self.ident_or_unknown_prefix(),
|
_ => self.ident_or_unknown_prefix(),
|
||||||
|
@ -699,19 +709,18 @@ impl Cursor<'_> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
|
/// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
|
||||||
fn raw_double_quoted_string(&mut self, prefix_len: usize) -> (u8, Option<RawStrError>) {
|
fn raw_double_quoted_string(&mut self, prefix_len: u32) -> Result<u8, RawStrError> {
|
||||||
// Wrap the actual function to handle the error with too many hashes.
|
// Wrap the actual function to handle the error with too many hashes.
|
||||||
// This way, it eats the whole raw string.
|
// This way, it eats the whole raw string.
|
||||||
let (n_hashes, err) = self.raw_string_unvalidated(prefix_len);
|
let n_hashes = self.raw_string_unvalidated(prefix_len)?;
|
||||||
// Only up to 255 `#`s are allowed in raw strings
|
// Only up to 255 `#`s are allowed in raw strings
|
||||||
match u8::try_from(n_hashes) {
|
match u8::try_from(n_hashes) {
|
||||||
Ok(num) => (num, err),
|
Ok(num) => Ok(num),
|
||||||
// We lie about the number of hashes here :P
|
Err(_) => Err(RawStrError::TooManyDelimiters { found: n_hashes }),
|
||||||
Err(_) => (0, Some(RawStrError::TooManyDelimiters { found: n_hashes })),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn raw_string_unvalidated(&mut self, prefix_len: usize) -> (usize, Option<RawStrError>) {
|
fn raw_string_unvalidated(&mut self, prefix_len: u32) -> Result<u32, RawStrError> {
|
||||||
debug_assert!(self.prev() == 'r');
|
debug_assert!(self.prev() == 'r');
|
||||||
let start_pos = self.len_consumed();
|
let start_pos = self.len_consumed();
|
||||||
let mut possible_terminator_offset = None;
|
let mut possible_terminator_offset = None;
|
||||||
|
@ -730,7 +739,7 @@ impl Cursor<'_> {
|
||||||
Some('"') => (),
|
Some('"') => (),
|
||||||
c => {
|
c => {
|
||||||
let c = c.unwrap_or(EOF_CHAR);
|
let c = c.unwrap_or(EOF_CHAR);
|
||||||
return (n_start_hashes, Some(RawStrError::InvalidStarter { bad_char: c }));
|
return Err(RawStrError::InvalidStarter { bad_char: c });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -740,14 +749,11 @@ impl Cursor<'_> {
|
||||||
self.eat_while(|c| c != '"');
|
self.eat_while(|c| c != '"');
|
||||||
|
|
||||||
if self.is_eof() {
|
if self.is_eof() {
|
||||||
return (
|
return Err(RawStrError::NoTerminator {
|
||||||
n_start_hashes,
|
expected: n_start_hashes,
|
||||||
Some(RawStrError::NoTerminator {
|
found: max_hashes,
|
||||||
expected: n_start_hashes,
|
possible_terminator_offset,
|
||||||
found: max_hashes,
|
});
|
||||||
possible_terminator_offset,
|
|
||||||
}),
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Eat closing double quote.
|
// Eat closing double quote.
|
||||||
|
@ -765,7 +771,7 @@ impl Cursor<'_> {
|
||||||
}
|
}
|
||||||
|
|
||||||
if n_end_hashes == n_start_hashes {
|
if n_end_hashes == n_start_hashes {
|
||||||
return (n_start_hashes, None);
|
return Ok(n_start_hashes);
|
||||||
} else if n_end_hashes > max_hashes {
|
} else if n_end_hashes > max_hashes {
|
||||||
// Keep track of possible terminators to give a hint about
|
// Keep track of possible terminators to give a hint about
|
||||||
// where there might be a missing terminator
|
// where there might be a missing terminator
|
||||||
|
|
|
@ -2,42 +2,39 @@ use super::*;
|
||||||
|
|
||||||
use expect_test::{expect, Expect};
|
use expect_test::{expect, Expect};
|
||||||
|
|
||||||
fn check_raw_str(s: &str, expected_hashes: u8, expected_err: Option<RawStrError>) {
|
fn check_raw_str(s: &str, expected: Result<u8, RawStrError>) {
|
||||||
let s = &format!("r{}", s);
|
let s = &format!("r{}", s);
|
||||||
let mut cursor = Cursor::new(s);
|
let mut cursor = Cursor::new(s);
|
||||||
cursor.bump();
|
cursor.bump();
|
||||||
let (n_hashes, err) = cursor.raw_double_quoted_string(0);
|
let res = cursor.raw_double_quoted_string(0);
|
||||||
assert_eq!(n_hashes, expected_hashes);
|
assert_eq!(res, expected);
|
||||||
assert_eq!(err, expected_err);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_naked_raw_str() {
|
fn test_naked_raw_str() {
|
||||||
check_raw_str(r#""abc""#, 0, None);
|
check_raw_str(r#""abc""#, Ok(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_raw_no_start() {
|
fn test_raw_no_start() {
|
||||||
check_raw_str(r##""abc"#"##, 0, None);
|
check_raw_str(r##""abc"#"##, Ok(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_too_many_terminators() {
|
fn test_too_many_terminators() {
|
||||||
// this error is handled in the parser later
|
// this error is handled in the parser later
|
||||||
check_raw_str(r###"#"abc"##"###, 1, None);
|
check_raw_str(r###"#"abc"##"###, Ok(1));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_unterminated() {
|
fn test_unterminated() {
|
||||||
check_raw_str(
|
check_raw_str(
|
||||||
r#"#"abc"#,
|
r#"#"abc"#,
|
||||||
1,
|
Err(RawStrError::NoTerminator { expected: 1, found: 0, possible_terminator_offset: None }),
|
||||||
Some(RawStrError::NoTerminator { expected: 1, found: 0, possible_terminator_offset: None }),
|
|
||||||
);
|
);
|
||||||
check_raw_str(
|
check_raw_str(
|
||||||
r###"##"abc"#"###,
|
r###"##"abc"#"###,
|
||||||
2,
|
Err(RawStrError::NoTerminator {
|
||||||
Some(RawStrError::NoTerminator {
|
|
||||||
expected: 2,
|
expected: 2,
|
||||||
found: 1,
|
found: 1,
|
||||||
possible_terminator_offset: Some(7),
|
possible_terminator_offset: Some(7),
|
||||||
|
@ -46,14 +43,13 @@ fn test_unterminated() {
|
||||||
// We're looking for "# not just any #
|
// We're looking for "# not just any #
|
||||||
check_raw_str(
|
check_raw_str(
|
||||||
r###"##"abc#"###,
|
r###"##"abc#"###,
|
||||||
2,
|
Err(RawStrError::NoTerminator { expected: 2, found: 0, possible_terminator_offset: None }),
|
||||||
Some(RawStrError::NoTerminator { expected: 2, found: 0, possible_terminator_offset: None }),
|
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_invalid_start() {
|
fn test_invalid_start() {
|
||||||
check_raw_str(r##"#~"abc"#"##, 1, Some(RawStrError::InvalidStarter { bad_char: '~' }));
|
check_raw_str(r##"#~"abc"#"##, Err(RawStrError::InvalidStarter { bad_char: '~' }));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -61,26 +57,24 @@ fn test_unterminated_no_pound() {
|
||||||
// https://github.com/rust-lang/rust/issues/70677
|
// https://github.com/rust-lang/rust/issues/70677
|
||||||
check_raw_str(
|
check_raw_str(
|
||||||
r#"""#,
|
r#"""#,
|
||||||
0,
|
Err(RawStrError::NoTerminator { expected: 0, found: 0, possible_terminator_offset: None }),
|
||||||
Some(RawStrError::NoTerminator { expected: 0, found: 0, possible_terminator_offset: None }),
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_too_many_hashes() {
|
fn test_too_many_hashes() {
|
||||||
let max_count = u8::MAX;
|
let max_count = u8::MAX;
|
||||||
let mut hashes: String = "#".repeat(max_count.into());
|
let hashes1 = "#".repeat(max_count as usize);
|
||||||
|
let hashes2 = "#".repeat(max_count as usize + 1);
|
||||||
|
let middle = "\"abc\"";
|
||||||
|
let s1 = [&hashes1, middle, &hashes1].join("");
|
||||||
|
let s2 = [&hashes2, middle, &hashes2].join("");
|
||||||
|
|
||||||
// Valid number of hashes (255 = 2^8 - 1 = u8::MAX), but invalid string.
|
// Valid number of hashes (255 = 2^8 - 1 = u8::MAX).
|
||||||
check_raw_str(&hashes, max_count, Some(RawStrError::InvalidStarter { bad_char: '\u{0}' }));
|
check_raw_str(&s1, Ok(255));
|
||||||
|
|
||||||
// One more hash sign (256 = 2^8) becomes too many.
|
// One more hash sign (256 = 2^8) becomes too many.
|
||||||
hashes.push('#');
|
check_raw_str(&s2, Err(RawStrError::TooManyDelimiters { found: u32::from(max_count) + 1 }));
|
||||||
check_raw_str(
|
|
||||||
&hashes,
|
|
||||||
0,
|
|
||||||
Some(RawStrError::TooManyDelimiters { found: usize::from(max_count) + 1 }),
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -251,7 +245,7 @@ fn raw_string() {
|
||||||
check_lexing(
|
check_lexing(
|
||||||
"r###\"\"#a\\b\x00c\"\"###",
|
"r###\"\"#a\\b\x00c\"\"###",
|
||||||
expect![[r#"
|
expect![[r#"
|
||||||
Token { kind: Literal { kind: RawStr { n_hashes: 3, err: None }, suffix_start: 17 }, len: 17 }
|
Token { kind: Literal { kind: RawStr { n_hashes: Some(3) }, suffix_start: 17 }, len: 17 }
|
||||||
"#]],
|
"#]],
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -295,9 +289,9 @@ br###"raw"###suffix
|
||||||
Token { kind: Whitespace, len: 1 }
|
Token { kind: Whitespace, len: 1 }
|
||||||
Token { kind: Literal { kind: Int { base: Decimal, empty_int: false }, suffix_start: 1 }, len: 3 }
|
Token { kind: Literal { kind: Int { base: Decimal, empty_int: false }, suffix_start: 1 }, len: 3 }
|
||||||
Token { kind: Whitespace, len: 1 }
|
Token { kind: Whitespace, len: 1 }
|
||||||
Token { kind: Literal { kind: RawStr { n_hashes: 3, err: None }, suffix_start: 12 }, len: 18 }
|
Token { kind: Literal { kind: RawStr { n_hashes: Some(3) }, suffix_start: 12 }, len: 18 }
|
||||||
Token { kind: Whitespace, len: 1 }
|
Token { kind: Whitespace, len: 1 }
|
||||||
Token { kind: Literal { kind: RawByteStr { n_hashes: 3, err: None }, suffix_start: 13 }, len: 19 }
|
Token { kind: Literal { kind: RawByteStr { n_hashes: Some(3) }, suffix_start: 13 }, len: 19 }
|
||||||
Token { kind: Whitespace, len: 1 }
|
Token { kind: Whitespace, len: 1 }
|
||||||
"#]],
|
"#]],
|
||||||
)
|
)
|
||||||
|
|
|
@ -27,7 +27,7 @@ use unescape_error_reporting::{emit_unescape_error, escaped_char};
|
||||||
// This assertion is in this crate, rather than in `rustc_lexer`, because that
|
// This assertion is in this crate, rather than in `rustc_lexer`, because that
|
||||||
// crate cannot depend on `rustc_data_structures`.
|
// crate cannot depend on `rustc_data_structures`.
|
||||||
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
|
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
|
||||||
rustc_data_structures::static_assert_size!(rustc_lexer::Token, 72);
|
rustc_data_structures::static_assert_size!(rustc_lexer::Token, 12);
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct UnmatchedBrace {
|
pub struct UnmatchedBrace {
|
||||||
|
@ -88,7 +88,7 @@ impl<'a> StringReader<'a> {
|
||||||
let token = rustc_lexer::first_token(text);
|
let token = rustc_lexer::first_token(text);
|
||||||
|
|
||||||
let start = self.pos;
|
let start = self.pos;
|
||||||
self.pos = self.pos + BytePos::from_usize(token.len);
|
self.pos = self.pos + BytePos(token.len);
|
||||||
|
|
||||||
debug!("next_token: {:?}({:?})", token.kind, self.str_from(start));
|
debug!("next_token: {:?}({:?})", token.kind, self.str_from(start));
|
||||||
|
|
||||||
|
@ -240,7 +240,7 @@ impl<'a> StringReader<'a> {
|
||||||
token::Ident(sym, false)
|
token::Ident(sym, false)
|
||||||
}
|
}
|
||||||
rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
|
rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
|
||||||
let suffix_start = start + BytePos(suffix_start as u32);
|
let suffix_start = start + BytePos(suffix_start);
|
||||||
let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
|
let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
|
||||||
let suffix = if suffix_start < self.pos {
|
let suffix = if suffix_start < self.pos {
|
||||||
let string = self.str_from(suffix_start);
|
let string = self.str_from(suffix_start);
|
||||||
|
@ -405,15 +405,21 @@ impl<'a> StringReader<'a> {
|
||||||
}
|
}
|
||||||
(token::ByteStr, Mode::ByteStr, 2, 1) // b" "
|
(token::ByteStr, Mode::ByteStr, 2, 1) // b" "
|
||||||
}
|
}
|
||||||
rustc_lexer::LiteralKind::RawStr { n_hashes, err } => {
|
rustc_lexer::LiteralKind::RawStr { n_hashes } => {
|
||||||
self.report_raw_str_error(start, err);
|
if let Some(n_hashes) = n_hashes {
|
||||||
let n = u32::from(n_hashes);
|
let n = u32::from(n_hashes);
|
||||||
(token::StrRaw(n_hashes), Mode::RawStr, 2 + n, 1 + n) // r##" "##
|
(token::StrRaw(n_hashes), Mode::RawStr, 2 + n, 1 + n) // r##" "##
|
||||||
|
} else {
|
||||||
|
self.report_raw_str_error(start, 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
rustc_lexer::LiteralKind::RawByteStr { n_hashes, err } => {
|
rustc_lexer::LiteralKind::RawByteStr { n_hashes } => {
|
||||||
self.report_raw_str_error(start, err);
|
if let Some(n_hashes) = n_hashes {
|
||||||
let n = u32::from(n_hashes);
|
let n = u32::from(n_hashes);
|
||||||
(token::ByteStrRaw(n_hashes), Mode::RawByteStr, 3 + n, 1 + n) // br##" "##
|
(token::ByteStrRaw(n_hashes), Mode::RawByteStr, 3 + n, 1 + n) // br##" "##
|
||||||
|
} else {
|
||||||
|
self.report_raw_str_error(start, 2);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
rustc_lexer::LiteralKind::Int { base, empty_int } => {
|
rustc_lexer::LiteralKind::Int { base, empty_int } => {
|
||||||
return if empty_int {
|
return if empty_int {
|
||||||
|
@ -484,17 +490,17 @@ impl<'a> StringReader<'a> {
|
||||||
&self.src[self.src_index(start)..self.src_index(end)]
|
&self.src[self.src_index(start)..self.src_index(end)]
|
||||||
}
|
}
|
||||||
|
|
||||||
fn report_raw_str_error(&self, start: BytePos, opt_err: Option<RawStrError>) {
|
fn report_raw_str_error(&self, start: BytePos, prefix_len: u32) -> ! {
|
||||||
match opt_err {
|
match rustc_lexer::validate_raw_str(self.str_from(start), prefix_len) {
|
||||||
Some(RawStrError::InvalidStarter { bad_char }) => {
|
Err(RawStrError::InvalidStarter { bad_char }) => {
|
||||||
self.report_non_started_raw_string(start, bad_char)
|
self.report_non_started_raw_string(start, bad_char)
|
||||||
}
|
}
|
||||||
Some(RawStrError::NoTerminator { expected, found, possible_terminator_offset }) => self
|
Err(RawStrError::NoTerminator { expected, found, possible_terminator_offset }) => self
|
||||||
.report_unterminated_raw_string(start, expected, possible_terminator_offset, found),
|
.report_unterminated_raw_string(start, expected, possible_terminator_offset, found),
|
||||||
Some(RawStrError::TooManyDelimiters { found }) => {
|
Err(RawStrError::TooManyDelimiters { found }) => {
|
||||||
self.report_too_many_hashes(start, found)
|
self.report_too_many_hashes(start, found)
|
||||||
}
|
}
|
||||||
None => (),
|
Ok(()) => panic!("no error found for supposedly invalid raw string literal"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -511,9 +517,9 @@ impl<'a> StringReader<'a> {
|
||||||
fn report_unterminated_raw_string(
|
fn report_unterminated_raw_string(
|
||||||
&self,
|
&self,
|
||||||
start: BytePos,
|
start: BytePos,
|
||||||
n_hashes: usize,
|
n_hashes: u32,
|
||||||
possible_offset: Option<usize>,
|
possible_offset: Option<u32>,
|
||||||
found_terminators: usize,
|
found_terminators: u32,
|
||||||
) -> ! {
|
) -> ! {
|
||||||
let mut err = self.sess.span_diagnostic.struct_span_fatal_with_code(
|
let mut err = self.sess.span_diagnostic.struct_span_fatal_with_code(
|
||||||
self.mk_sp(start, start),
|
self.mk_sp(start, start),
|
||||||
|
@ -526,7 +532,7 @@ impl<'a> StringReader<'a> {
|
||||||
if n_hashes > 0 {
|
if n_hashes > 0 {
|
||||||
err.note(&format!(
|
err.note(&format!(
|
||||||
"this raw string should be terminated with `\"{}`",
|
"this raw string should be terminated with `\"{}`",
|
||||||
"#".repeat(n_hashes)
|
"#".repeat(n_hashes as usize)
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -537,7 +543,7 @@ impl<'a> StringReader<'a> {
|
||||||
err.span_suggestion(
|
err.span_suggestion(
|
||||||
span,
|
span,
|
||||||
"consider terminating the string here",
|
"consider terminating the string here",
|
||||||
"#".repeat(n_hashes),
|
"#".repeat(n_hashes as usize),
|
||||||
Applicability::MaybeIncorrect,
|
Applicability::MaybeIncorrect,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -638,7 +644,7 @@ impl<'a> StringReader<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn report_too_many_hashes(&self, start: BytePos, found: usize) -> ! {
|
fn report_too_many_hashes(&self, start: BytePos, found: u32) -> ! {
|
||||||
self.fatal_span_(
|
self.fatal_span_(
|
||||||
start,
|
start,
|
||||||
self.pos,
|
self.pos,
|
||||||
|
|
|
@ -213,7 +213,7 @@ impl<'a> Iterator for TokenIter<'a> {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
let token = rustc_lexer::first_token(self.src);
|
let token = rustc_lexer::first_token(self.src);
|
||||||
let (text, rest) = self.src.split_at(token.len);
|
let (text, rest) = self.src.split_at(token.len as usize);
|
||||||
self.src = rest;
|
self.src = rest;
|
||||||
Some((token.kind, text))
|
Some((token.kind, text))
|
||||||
}
|
}
|
||||||
|
|
|
@ -1112,7 +1112,7 @@ fn span_contains_cfg(cx: &LateContext<'_>, s: Span) -> bool {
|
||||||
let mut pos = 0usize;
|
let mut pos = 0usize;
|
||||||
let mut iter = tokenize(&snip).map(|t| {
|
let mut iter = tokenize(&snip).map(|t| {
|
||||||
let start = pos;
|
let start = pos;
|
||||||
pos += t.len;
|
pos += t.len as usize;
|
||||||
(t.kind, start..pos)
|
(t.kind, start..pos)
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
@ -345,7 +345,7 @@ fn text_has_safety_comment(src: &str, line_starts: &[BytePos], offset: usize) ->
|
||||||
if line.starts_with("/*") {
|
if line.starts_with("/*") {
|
||||||
let src = src[line_start..line_starts.last().unwrap().to_usize() - offset].trim_start();
|
let src = src[line_start..line_starts.last().unwrap().to_usize() - offset].trim_start();
|
||||||
let mut tokens = tokenize(src);
|
let mut tokens = tokenize(src);
|
||||||
return src[..tokens.next().unwrap().len]
|
return src[..tokens.next().unwrap().len as usize]
|
||||||
.to_ascii_uppercase()
|
.to_ascii_uppercase()
|
||||||
.contains("SAFETY:")
|
.contains("SAFETY:")
|
||||||
&& tokens.all(|t| t.kind == TokenKind::Whitespace);
|
&& tokens.all(|t| t.kind == TokenKind::Whitespace);
|
||||||
|
|
|
@ -141,7 +141,7 @@ impl HirEqInterExpr<'_, '_, '_> {
|
||||||
let mut left_pos = 0;
|
let mut left_pos = 0;
|
||||||
let left = tokenize(&left)
|
let left = tokenize(&left)
|
||||||
.map(|t| {
|
.map(|t| {
|
||||||
let end = left_pos + t.len;
|
let end = left_pos + t.len as usize;
|
||||||
let s = &left[left_pos..end];
|
let s = &left[left_pos..end];
|
||||||
left_pos = end;
|
left_pos = end;
|
||||||
(t, s)
|
(t, s)
|
||||||
|
@ -156,7 +156,7 @@ impl HirEqInterExpr<'_, '_, '_> {
|
||||||
let mut right_pos = 0;
|
let mut right_pos = 0;
|
||||||
let right = tokenize(&right)
|
let right = tokenize(&right)
|
||||||
.map(|t| {
|
.map(|t| {
|
||||||
let end = right_pos + t.len;
|
let end = right_pos + t.len as usize;
|
||||||
let s = &right[right_pos..end];
|
let s = &right[right_pos..end];
|
||||||
right_pos = end;
|
right_pos = end;
|
||||||
(t, s)
|
(t, s)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue