Lint against RTL unicode codepoints in literals and comments

Address CVE-2021-42574.
This commit is contained in:
Esteban Küber 2021-08-19 11:40:00 -07:00 committed by Pietro Albini
parent 38b01d9065
commit c0b134582a
No known key found for this signature in database
GPG key ID: 3E06ABE80BAAF19C
12 changed files with 535 additions and 10 deletions

View file

@ -4,7 +4,9 @@ use rustc_ast::tokenstream::{Spacing, TokenStream};
use rustc_errors::{error_code, Applicability, DiagnosticBuilder, FatalError, PResult};
use rustc_lexer::unescape::{self, Mode};
use rustc_lexer::{Base, DocStyle, RawStrError};
use rustc_session::lint::builtin::RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX;
use rustc_session::lint::builtin::{
TEXT_DIRECTION_CODEPOINT_IN_COMMENT, RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX,
};
use rustc_session::lint::BuiltinLintDiagnostics;
use rustc_session::parse::ParseSess;
use rustc_span::symbol::{sym, Symbol};
@ -129,6 +131,28 @@ impl<'a> StringReader<'a> {
.struct_span_fatal(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
}
/// Detect usages of Unicode codepoints changing the direction of the text on screen and loudly
/// complain about it.
fn lint_unicode_text_flow(&self, start: BytePos) {
// Opening delimiter of the length 2 is not included into the comment text.
let content_start = start + BytePos(2);
let content = self.str_from(content_start);
let span = self.mk_sp(start, self.pos);
const UNICODE_TEXT_FLOW_CHARS: &[char] = &[
'\u{202A}', '\u{202B}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}', '\u{2068}',
'\u{202C}', '\u{2069}',
];
if content.contains(UNICODE_TEXT_FLOW_CHARS) {
self.sess.buffer_lint_with_diagnostic(
&TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
span,
ast::CRATE_NODE_ID,
"unicode codepoint changing visible direction of text present in comment",
BuiltinLintDiagnostics::UnicodeTextFlow(span, content.to_string()),
);
}
}
/// Turns simple `rustc_lexer::TokenKind` enum into a rich
/// `rustc_ast::TokenKind`. This turns strings into interned
/// symbols and runs additional validation.
@ -136,7 +160,12 @@ impl<'a> StringReader<'a> {
Some(match token {
rustc_lexer::TokenKind::LineComment { doc_style } => {
// Skip non-doc comments
let doc_style = doc_style?;
let doc_style = if let Some(doc_style) = doc_style {
doc_style
} else {
self.lint_unicode_text_flow(start);
return None;
};
// Opening delimiter of the length 3 is not included into the symbol.
let content_start = start + BytePos(3);
@ -158,7 +187,12 @@ impl<'a> StringReader<'a> {
}
// Skip non-doc comments
let doc_style = doc_style?;
let doc_style = if let Some(doc_style) = doc_style {
doc_style
} else {
self.lint_unicode_text_flow(start);
return None;
};
// Opening delimiter of the length 3 and closing delimiter of the length 2
// are not included into the symbol.