1
Fork 0

Lint against RTL unicode codepoints in literals and comments

Address CVE-2021-42574.
This commit is contained in:
Esteban Küber 2021-08-19 11:40:00 -07:00 committed by Pietro Albini
parent 38b01d9065
commit c0b134582a
No known key found for this signature in database
GPG key ID: 3E06ABE80BAAF19C
12 changed files with 535 additions and 10 deletions

View file

@ -4,7 +4,9 @@ use rustc_ast::tokenstream::{Spacing, TokenStream};
use rustc_errors::{error_code, Applicability, DiagnosticBuilder, FatalError, PResult};
use rustc_lexer::unescape::{self, Mode};
use rustc_lexer::{Base, DocStyle, RawStrError};
use rustc_session::lint::builtin::RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX;
use rustc_session::lint::builtin::{
TEXT_DIRECTION_CODEPOINT_IN_COMMENT, RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX,
};
use rustc_session::lint::BuiltinLintDiagnostics;
use rustc_session::parse::ParseSess;
use rustc_span::symbol::{sym, Symbol};
@ -129,6 +131,28 @@ impl<'a> StringReader<'a> {
.struct_span_fatal(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
}
/// Detect usages of Unicode codepoints changing the direction of the text on screen and loudly
/// complain about it.
fn lint_unicode_text_flow(&self, start: BytePos) {
// Opening delimiter of the length 2 is not included into the comment text.
let content_start = start + BytePos(2);
let content = self.str_from(content_start);
let span = self.mk_sp(start, self.pos);
const UNICODE_TEXT_FLOW_CHARS: &[char] = &[
'\u{202A}', '\u{202B}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}', '\u{2068}',
'\u{202C}', '\u{2069}',
];
if content.contains(UNICODE_TEXT_FLOW_CHARS) {
self.sess.buffer_lint_with_diagnostic(
&TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
span,
ast::CRATE_NODE_ID,
"unicode codepoint changing visible direction of text present in comment",
BuiltinLintDiagnostics::UnicodeTextFlow(span, content.to_string()),
);
}
}
/// Turns simple `rustc_lexer::TokenKind` enum into a rich
/// `rustc_ast::TokenKind`. This turns strings into interned
/// symbols and runs additional validation.
@ -136,7 +160,12 @@ impl<'a> StringReader<'a> {
Some(match token {
rustc_lexer::TokenKind::LineComment { doc_style } => {
// Skip non-doc comments
let doc_style = doc_style?;
let doc_style = if let Some(doc_style) = doc_style {
doc_style
} else {
self.lint_unicode_text_flow(start);
return None;
};
// Opening delimiter of the length 3 is not included into the symbol.
let content_start = start + BytePos(3);
@ -158,7 +187,12 @@ impl<'a> StringReader<'a> {
}
// Skip non-doc comments
let doc_style = doc_style?;
let doc_style = if let Some(doc_style) = doc_style {
doc_style
} else {
self.lint_unicode_text_flow(start);
return None;
};
// Opening delimiter of the length 3 and closing delimiter of the length 2
// are not included into the symbol.

View file

@ -187,12 +187,17 @@ pub(crate) fn emit_unescape_error(
assert!(mode.is_bytes());
let (c, span) = last_char();
let mut err = handler.struct_span_err(span, "non-ASCII character in byte constant");
err.span_label(span, "byte constant must be ASCII");
let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
format!(" but is {:?}", c)
} else {
String::new()
};
err.span_label(span, &format!("byte constant must be ASCII{}", postfix));
if (c as u32) <= 0xFF {
err.span_suggestion(
span,
&format!(
"if you meant to use the unicode code point for '{}', use a \\xHH escape",
"if you meant to use the unicode code point for {:?}, use a \\xHH escape",
c
),
format!("\\x{:X}", c as u32),
@ -206,7 +211,7 @@ pub(crate) fn emit_unescape_error(
err.span_suggestion(
span,
&format!(
"if you meant to use the UTF-8 encoding of '{}', use \\xHH escapes",
"if you meant to use the UTF-8 encoding of {:?}, use \\xHH escapes",
c
),
utf8.as_bytes()
@ -220,10 +225,15 @@ pub(crate) fn emit_unescape_error(
}
EscapeError::NonAsciiCharInByteString => {
assert!(mode.is_bytes());
let (_c, span) = last_char();
let (c, span) = last_char();
let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
format!(" but is {:?}", c)
} else {
String::new()
};
handler
.struct_span_err(span, "raw byte string must be ASCII")
.span_label(span, "must be ASCII")
.span_label(span, &format!("must be ASCII{}", postfix))
.emit();
}
EscapeError::OutOfRangeHexEscape => {