Lint against RTL unicode codepoints in literals and comments
Address CVE-2021-42574.
This commit is contained in:
parent
38b01d9065
commit
c0b134582a
12 changed files with 535 additions and 10 deletions
|
@ -4,7 +4,9 @@ use rustc_ast::tokenstream::{Spacing, TokenStream};
|
|||
use rustc_errors::{error_code, Applicability, DiagnosticBuilder, FatalError, PResult};
|
||||
use rustc_lexer::unescape::{self, Mode};
|
||||
use rustc_lexer::{Base, DocStyle, RawStrError};
|
||||
use rustc_session::lint::builtin::RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX;
|
||||
use rustc_session::lint::builtin::{
|
||||
TEXT_DIRECTION_CODEPOINT_IN_COMMENT, RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX,
|
||||
};
|
||||
use rustc_session::lint::BuiltinLintDiagnostics;
|
||||
use rustc_session::parse::ParseSess;
|
||||
use rustc_span::symbol::{sym, Symbol};
|
||||
|
@ -129,6 +131,28 @@ impl<'a> StringReader<'a> {
|
|||
.struct_span_fatal(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
|
||||
}
|
||||
|
||||
/// Detect usages of Unicode codepoints changing the direction of the text on screen and loudly
|
||||
/// complain about it.
|
||||
fn lint_unicode_text_flow(&self, start: BytePos) {
|
||||
// Opening delimiter of the length 2 is not included into the comment text.
|
||||
let content_start = start + BytePos(2);
|
||||
let content = self.str_from(content_start);
|
||||
let span = self.mk_sp(start, self.pos);
|
||||
const UNICODE_TEXT_FLOW_CHARS: &[char] = &[
|
||||
'\u{202A}', '\u{202B}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}', '\u{2068}',
|
||||
'\u{202C}', '\u{2069}',
|
||||
];
|
||||
if content.contains(UNICODE_TEXT_FLOW_CHARS) {
|
||||
self.sess.buffer_lint_with_diagnostic(
|
||||
&TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
|
||||
span,
|
||||
ast::CRATE_NODE_ID,
|
||||
"unicode codepoint changing visible direction of text present in comment",
|
||||
BuiltinLintDiagnostics::UnicodeTextFlow(span, content.to_string()),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Turns simple `rustc_lexer::TokenKind` enum into a rich
|
||||
/// `rustc_ast::TokenKind`. This turns strings into interned
|
||||
/// symbols and runs additional validation.
|
||||
|
@ -136,7 +160,12 @@ impl<'a> StringReader<'a> {
|
|||
Some(match token {
|
||||
rustc_lexer::TokenKind::LineComment { doc_style } => {
|
||||
// Skip non-doc comments
|
||||
let doc_style = doc_style?;
|
||||
let doc_style = if let Some(doc_style) = doc_style {
|
||||
doc_style
|
||||
} else {
|
||||
self.lint_unicode_text_flow(start);
|
||||
return None;
|
||||
};
|
||||
|
||||
// Opening delimiter of the length 3 is not included into the symbol.
|
||||
let content_start = start + BytePos(3);
|
||||
|
@ -158,7 +187,12 @@ impl<'a> StringReader<'a> {
|
|||
}
|
||||
|
||||
// Skip non-doc comments
|
||||
let doc_style = doc_style?;
|
||||
let doc_style = if let Some(doc_style) = doc_style {
|
||||
doc_style
|
||||
} else {
|
||||
self.lint_unicode_text_flow(start);
|
||||
return None;
|
||||
};
|
||||
|
||||
// Opening delimiter of the length 3 and closing delimiter of the length 2
|
||||
// are not included into the symbol.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue