Inline and remove cook_lexer_token
.
This is a small performance win, alas.
This commit is contained in:
parent
da84f0f4c3
commit
fb4dba0a17
1 changed files with 175 additions and 172 deletions
|
@ -86,88 +86,16 @@ impl<'a> StringReader<'a> {
|
||||||
|
|
||||||
debug!("next_token: {:?}({:?})", token.kind, self.str_from(start));
|
debug!("next_token: {:?}({:?})", token.kind, self.str_from(start));
|
||||||
|
|
||||||
match self.cook_lexer_token(token.kind, start) {
|
// Now "cook" the token, converting the simple `rustc_lexer::TokenKind` enum into a
|
||||||
Some(kind) => {
|
// rich `rustc_ast::TokenKind`. This turns strings into interned symbols and runs
|
||||||
let span = self.mk_sp(start, self.pos);
|
// additional validation.
|
||||||
return (Token::new(kind, span), preceded_by_whitespace);
|
let kind = match token.kind {
|
||||||
}
|
|
||||||
None => preceded_by_whitespace = true,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Report a fatal lexical error with a given span.
|
|
||||||
fn fatal_span(&self, sp: Span, m: &str) -> ! {
|
|
||||||
self.sess.span_diagnostic.span_fatal(sp, m)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Report a lexical error with a given span.
|
|
||||||
fn err_span(&self, sp: Span, m: &str) {
|
|
||||||
self.sess.span_diagnostic.struct_span_err(sp, m).emit();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Report a fatal error spanning [`from_pos`, `to_pos`).
|
|
||||||
fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> ! {
|
|
||||||
self.fatal_span(self.mk_sp(from_pos, to_pos), m)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Report a lexical error spanning [`from_pos`, `to_pos`).
|
|
||||||
fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) {
|
|
||||||
self.err_span(self.mk_sp(from_pos, to_pos), m)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn struct_fatal_span_char(
|
|
||||||
&self,
|
|
||||||
from_pos: BytePos,
|
|
||||||
to_pos: BytePos,
|
|
||||||
m: &str,
|
|
||||||
c: char,
|
|
||||||
) -> DiagnosticBuilder<'a, !> {
|
|
||||||
self.sess
|
|
||||||
.span_diagnostic
|
|
||||||
.struct_span_fatal(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn struct_err_span_char(
|
|
||||||
&self,
|
|
||||||
from_pos: BytePos,
|
|
||||||
to_pos: BytePos,
|
|
||||||
m: &str,
|
|
||||||
c: char,
|
|
||||||
) -> DiagnosticBuilder<'a, ErrorGuaranteed> {
|
|
||||||
self.sess
|
|
||||||
.span_diagnostic
|
|
||||||
.struct_span_err(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Detect usages of Unicode codepoints changing the direction of the text on screen and loudly
|
|
||||||
/// complain about it.
|
|
||||||
fn lint_unicode_text_flow(&self, start: BytePos) {
|
|
||||||
// Opening delimiter of the length 2 is not included into the comment text.
|
|
||||||
let content_start = start + BytePos(2);
|
|
||||||
let content = self.str_from(content_start);
|
|
||||||
if contains_text_flow_control_chars(content) {
|
|
||||||
let span = self.mk_sp(start, self.pos);
|
|
||||||
self.sess.buffer_lint_with_diagnostic(
|
|
||||||
&TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
|
|
||||||
span,
|
|
||||||
ast::CRATE_NODE_ID,
|
|
||||||
"unicode codepoint changing visible direction of text present in comment",
|
|
||||||
BuiltinLintDiagnostics::UnicodeTextFlow(span, content.to_string()),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Turns simple `rustc_lexer::TokenKind` enum into a rich
|
|
||||||
/// `rustc_ast::TokenKind`. This turns strings into interned
|
|
||||||
/// symbols and runs additional validation.
|
|
||||||
fn cook_lexer_token(&self, token: rustc_lexer::TokenKind, start: BytePos) -> Option<TokenKind> {
|
|
||||||
Some(match token {
|
|
||||||
rustc_lexer::TokenKind::LineComment { doc_style } => {
|
rustc_lexer::TokenKind::LineComment { doc_style } => {
|
||||||
// Skip non-doc comments
|
// Skip non-doc comments
|
||||||
let Some(doc_style) = doc_style else {
|
let Some(doc_style) = doc_style else {
|
||||||
self.lint_unicode_text_flow(start);
|
self.lint_unicode_text_flow(start);
|
||||||
return None;
|
preceded_by_whitespace = true;
|
||||||
|
continue;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Opening delimiter of the length 3 is not included into the symbol.
|
// Opening delimiter of the length 3 is not included into the symbol.
|
||||||
|
@ -183,7 +111,8 @@ impl<'a> StringReader<'a> {
|
||||||
// Skip non-doc comments
|
// Skip non-doc comments
|
||||||
let Some(doc_style) = doc_style else {
|
let Some(doc_style) = doc_style else {
|
||||||
self.lint_unicode_text_flow(start);
|
self.lint_unicode_text_flow(start);
|
||||||
return None;
|
preceded_by_whitespace = true;
|
||||||
|
continue;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Opening delimiter of the length 3 and closing delimiter of the length 2
|
// Opening delimiter of the length 3 and closing delimiter of the length 2
|
||||||
|
@ -193,7 +122,10 @@ impl<'a> StringReader<'a> {
|
||||||
let content = self.str_from_to(content_start, content_end);
|
let content = self.str_from_to(content_start, content_end);
|
||||||
self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style)
|
self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style)
|
||||||
}
|
}
|
||||||
rustc_lexer::TokenKind::Whitespace => return None,
|
rustc_lexer::TokenKind::Whitespace => {
|
||||||
|
preceded_by_whitespace = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
rustc_lexer::TokenKind::Ident => {
|
rustc_lexer::TokenKind::Ident => {
|
||||||
let sym = nfc_normalize(self.str_from(start));
|
let sym = nfc_normalize(self.str_from(start));
|
||||||
let span = self.mk_sp(start, self.pos);
|
let span = self.mk_sp(start, self.pos);
|
||||||
|
@ -225,12 +157,12 @@ impl<'a> StringReader<'a> {
|
||||||
.any(|&(c, _, _)| {
|
.any(|&(c, _, _)| {
|
||||||
let sym = self.str_from(start);
|
let sym = self.str_from(start);
|
||||||
sym.chars().count() == 1 && c == sym.chars().next().unwrap()
|
sym.chars().count() == 1 && c == sym.chars().next().unwrap()
|
||||||
})
|
}) =>
|
||||||
=>
|
|
||||||
{
|
{
|
||||||
let sym = nfc_normalize(self.str_from(start));
|
let sym = nfc_normalize(self.str_from(start));
|
||||||
let span = self.mk_sp(start, self.pos);
|
let span = self.mk_sp(start, self.pos);
|
||||||
self.sess.bad_unicode_identifiers.borrow_mut().entry(sym).or_default().push(span);
|
self.sess.bad_unicode_identifiers.borrow_mut().entry(sym).or_default()
|
||||||
|
.push(span);
|
||||||
token::Ident(sym, false)
|
token::Ident(sym, false)
|
||||||
}
|
}
|
||||||
rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
|
rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
|
||||||
|
@ -308,20 +240,91 @@ impl<'a> StringReader<'a> {
|
||||||
let c = self.str_from(start).chars().next().unwrap();
|
let c = self.str_from(start).chars().next().unwrap();
|
||||||
let mut err =
|
let mut err =
|
||||||
self.struct_err_span_char(start, self.pos, "unknown start of token", c);
|
self.struct_err_span_char(start, self.pos, "unknown start of token", c);
|
||||||
// FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs,
|
// FIXME: the lexer could be used to turn the ASCII version of unicode
|
||||||
// instead of keeping a table in `check_for_substitution`into the token. Ideally,
|
// homoglyphs, instead of keeping a table in `check_for_substitution`into the
|
||||||
// this should be inside `rustc_lexer`. However, we should first remove compound
|
// token. Ideally, this should be inside `rustc_lexer`. However, we should
|
||||||
// tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it,
|
// first remove compound tokens like `<<` from `rustc_lexer`, and then add
|
||||||
// as there will be less overall work to do this way.
|
// fancier error recovery to it, as there will be less overall work to do this
|
||||||
|
// way.
|
||||||
let token = unicode_chars::check_for_substitution(self, start, c, &mut err);
|
let token = unicode_chars::check_for_substitution(self, start, c, &mut err);
|
||||||
if c == '\x00' {
|
if c == '\x00' {
|
||||||
err.help("source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used");
|
err.help("source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used");
|
||||||
}
|
}
|
||||||
err.emit();
|
err.emit();
|
||||||
token?
|
if let Some(token) = token {
|
||||||
|
token
|
||||||
|
} else {
|
||||||
|
preceded_by_whitespace = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
rustc_lexer::TokenKind::Eof => token::Eof,
|
rustc_lexer::TokenKind::Eof => token::Eof,
|
||||||
})
|
};
|
||||||
|
let span = self.mk_sp(start, self.pos);
|
||||||
|
return (Token::new(kind, span), preceded_by_whitespace);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Report a fatal lexical error with a given span.
|
||||||
|
fn fatal_span(&self, sp: Span, m: &str) -> ! {
|
||||||
|
self.sess.span_diagnostic.span_fatal(sp, m)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Report a lexical error with a given span.
|
||||||
|
fn err_span(&self, sp: Span, m: &str) {
|
||||||
|
self.sess.span_diagnostic.struct_span_err(sp, m).emit();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Report a fatal error spanning [`from_pos`, `to_pos`).
|
||||||
|
fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> ! {
|
||||||
|
self.fatal_span(self.mk_sp(from_pos, to_pos), m)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Report a lexical error spanning [`from_pos`, `to_pos`).
|
||||||
|
fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) {
|
||||||
|
self.err_span(self.mk_sp(from_pos, to_pos), m)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn struct_fatal_span_char(
|
||||||
|
&self,
|
||||||
|
from_pos: BytePos,
|
||||||
|
to_pos: BytePos,
|
||||||
|
m: &str,
|
||||||
|
c: char,
|
||||||
|
) -> DiagnosticBuilder<'a, !> {
|
||||||
|
self.sess
|
||||||
|
.span_diagnostic
|
||||||
|
.struct_span_fatal(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn struct_err_span_char(
|
||||||
|
&self,
|
||||||
|
from_pos: BytePos,
|
||||||
|
to_pos: BytePos,
|
||||||
|
m: &str,
|
||||||
|
c: char,
|
||||||
|
) -> DiagnosticBuilder<'a, ErrorGuaranteed> {
|
||||||
|
self.sess
|
||||||
|
.span_diagnostic
|
||||||
|
.struct_span_err(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Detect usages of Unicode codepoints changing the direction of the text on screen and loudly
|
||||||
|
/// complain about it.
|
||||||
|
fn lint_unicode_text_flow(&self, start: BytePos) {
|
||||||
|
// Opening delimiter of the length 2 is not included into the comment text.
|
||||||
|
let content_start = start + BytePos(2);
|
||||||
|
let content = self.str_from(content_start);
|
||||||
|
if contains_text_flow_control_chars(content) {
|
||||||
|
let span = self.mk_sp(start, self.pos);
|
||||||
|
self.sess.buffer_lint_with_diagnostic(
|
||||||
|
&TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
|
||||||
|
span,
|
||||||
|
ast::CRATE_NODE_ID,
|
||||||
|
"unicode codepoint changing visible direction of text present in comment",
|
||||||
|
BuiltinLintDiagnostics::UnicodeTextFlow(span, content.to_string()),
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn cook_doc_comment(
|
fn cook_doc_comment(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue