Optimize literal, doc comment lint as well, extract function.
This commit is contained in:
parent
a5b25a2cfa
commit
7885233df0
5 changed files with 54 additions and 49 deletions
|
@ -16,6 +16,7 @@
|
||||||
#![feature(nll)]
|
#![feature(nll)]
|
||||||
#![feature(min_specialization)]
|
#![feature(min_specialization)]
|
||||||
#![recursion_limit = "256"]
|
#![recursion_limit = "256"]
|
||||||
|
#![feature(slice_internals)]
|
||||||
|
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate rustc_macros;
|
extern crate rustc_macros;
|
||||||
|
@ -25,6 +26,7 @@ pub mod util {
|
||||||
pub mod comments;
|
pub mod comments;
|
||||||
pub mod literal;
|
pub mod literal;
|
||||||
pub mod parser;
|
pub mod parser;
|
||||||
|
pub mod unicode;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub mod ast;
|
pub mod ast;
|
||||||
|
|
44
compiler/rustc_ast/src/util/unicode.rs
Normal file
44
compiler/rustc_ast/src/util/unicode.rs
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
pub const TEXT_FLOW_CONTROL_CHARS: &[char] = &[
|
||||||
|
'\u{202A}', '\u{202B}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}', '\u{2068}', '\u{202C}',
|
||||||
|
'\u{2069}',
|
||||||
|
];
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn contains_text_flow_control_chars(s: &str) -> bool {
|
||||||
|
// Char - UTF-8
|
||||||
|
// U+202A - E2 80 AA
|
||||||
|
// U+202B - E2 80 AB
|
||||||
|
// U+202C - E2 80 AC
|
||||||
|
// U+202D - E2 80 AD
|
||||||
|
// U+202E - E2 80 AE
|
||||||
|
// U+2066 - E2 81 A6
|
||||||
|
// U+2067 - E2 81 A7
|
||||||
|
// U+2068 - E2 81 A8
|
||||||
|
// U+2069 - E2 81 A9
|
||||||
|
let mut bytes = s.as_bytes();
|
||||||
|
loop {
|
||||||
|
match core::slice::memchr::memchr(0xE2, &bytes) {
|
||||||
|
Some(idx) => {
|
||||||
|
// bytes are valid UTF-8 -> E2 must be followed by two bytes
|
||||||
|
let ch = &bytes[idx..idx + 3];
|
||||||
|
match ch[1] {
|
||||||
|
0x80 => {
|
||||||
|
if (0xAA..=0xAE).contains(&ch[2]) {
|
||||||
|
break true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
0x81 => {
|
||||||
|
if (0xA6..=0xA9).contains(&ch[2]) {
|
||||||
|
break true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
bytes = &bytes[idx + 3..];
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
break false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -16,9 +16,9 @@
|
||||||
|
|
||||||
use self::TargetLint::*;
|
use self::TargetLint::*;
|
||||||
|
|
||||||
use crate::hidden_unicode_codepoints::UNICODE_TEXT_FLOW_CHARS;
|
|
||||||
use crate::levels::{is_known_lint_tool, LintLevelsBuilder};
|
use crate::levels::{is_known_lint_tool, LintLevelsBuilder};
|
||||||
use crate::passes::{EarlyLintPassObject, LateLintPassObject};
|
use crate::passes::{EarlyLintPassObject, LateLintPassObject};
|
||||||
|
use ast::util::unicode::TEXT_FLOW_CONTROL_CHARS;
|
||||||
use rustc_ast as ast;
|
use rustc_ast as ast;
|
||||||
use rustc_data_structures::fx::FxHashMap;
|
use rustc_data_structures::fx::FxHashMap;
|
||||||
use rustc_data_structures::sync;
|
use rustc_data_structures::sync;
|
||||||
|
@ -602,7 +602,7 @@ pub trait LintContext: Sized {
|
||||||
let spans: Vec<_> = content
|
let spans: Vec<_> = content
|
||||||
.char_indices()
|
.char_indices()
|
||||||
.filter_map(|(i, c)| {
|
.filter_map(|(i, c)| {
|
||||||
UNICODE_TEXT_FLOW_CHARS.contains(&c).then(|| {
|
TEXT_FLOW_CONTROL_CHARS.contains(&c).then(|| {
|
||||||
let lo = span.lo() + BytePos(2 + i as u32);
|
let lo = span.lo() + BytePos(2 + i as u32);
|
||||||
(c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32)))
|
(c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32)))
|
||||||
})
|
})
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
use crate::{EarlyContext, EarlyLintPass, LintContext};
|
use crate::{EarlyContext, EarlyLintPass, LintContext};
|
||||||
|
use ast::util::unicode::{contains_text_flow_control_chars, TEXT_FLOW_CONTROL_CHARS};
|
||||||
use rustc_ast as ast;
|
use rustc_ast as ast;
|
||||||
use rustc_errors::{Applicability, SuggestionStyle};
|
use rustc_errors::{Applicability, SuggestionStyle};
|
||||||
use rustc_span::{BytePos, Span, Symbol};
|
use rustc_span::{BytePos, Span, Symbol};
|
||||||
|
@ -37,11 +38,6 @@ declare_lint! {
|
||||||
|
|
||||||
declare_lint_pass!(HiddenUnicodeCodepoints => [TEXT_DIRECTION_CODEPOINT_IN_LITERAL]);
|
declare_lint_pass!(HiddenUnicodeCodepoints => [TEXT_DIRECTION_CODEPOINT_IN_LITERAL]);
|
||||||
|
|
||||||
crate const UNICODE_TEXT_FLOW_CHARS: &[char] = &[
|
|
||||||
'\u{202A}', '\u{202B}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}', '\u{2068}', '\u{202C}',
|
|
||||||
'\u{2069}',
|
|
||||||
];
|
|
||||||
|
|
||||||
impl HiddenUnicodeCodepoints {
|
impl HiddenUnicodeCodepoints {
|
||||||
fn lint_text_direction_codepoint(
|
fn lint_text_direction_codepoint(
|
||||||
&self,
|
&self,
|
||||||
|
@ -57,7 +53,7 @@ impl HiddenUnicodeCodepoints {
|
||||||
.as_str()
|
.as_str()
|
||||||
.char_indices()
|
.char_indices()
|
||||||
.filter_map(|(i, c)| {
|
.filter_map(|(i, c)| {
|
||||||
UNICODE_TEXT_FLOW_CHARS.contains(&c).then(|| {
|
TEXT_FLOW_CONTROL_CHARS.contains(&c).then(|| {
|
||||||
let lo = span.lo() + BytePos(i as u32 + padding);
|
let lo = span.lo() + BytePos(i as u32 + padding);
|
||||||
(c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32)))
|
(c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32)))
|
||||||
})
|
})
|
||||||
|
@ -131,7 +127,7 @@ impl HiddenUnicodeCodepoints {
|
||||||
impl EarlyLintPass for HiddenUnicodeCodepoints {
|
impl EarlyLintPass for HiddenUnicodeCodepoints {
|
||||||
fn check_attribute(&mut self, cx: &EarlyContext<'_>, attr: &ast::Attribute) {
|
fn check_attribute(&mut self, cx: &EarlyContext<'_>, attr: &ast::Attribute) {
|
||||||
if let ast::AttrKind::DocComment(_, comment) = attr.kind {
|
if let ast::AttrKind::DocComment(_, comment) = attr.kind {
|
||||||
if comment.as_str().contains(UNICODE_TEXT_FLOW_CHARS) {
|
if contains_text_flow_control_chars(&comment.as_str()) {
|
||||||
self.lint_text_direction_codepoint(cx, comment, attr.span, 0, false, "doc comment");
|
self.lint_text_direction_codepoint(cx, comment, attr.span, 0, false, "doc comment");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -142,7 +138,7 @@ impl EarlyLintPass for HiddenUnicodeCodepoints {
|
||||||
let (text, span, padding) = match &expr.kind {
|
let (text, span, padding) = match &expr.kind {
|
||||||
ast::ExprKind::Lit(ast::Lit { token, kind, span }) => {
|
ast::ExprKind::Lit(ast::Lit { token, kind, span }) => {
|
||||||
let text = token.symbol;
|
let text = token.symbol;
|
||||||
if !text.as_str().contains(UNICODE_TEXT_FLOW_CHARS) {
|
if !contains_text_flow_control_chars(&text.as_str()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
let padding = match kind {
|
let padding = match kind {
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
use rustc_ast::ast::{self, AttrStyle};
|
use rustc_ast::ast::{self, AttrStyle};
|
||||||
use rustc_ast::token::{self, CommentKind, Token, TokenKind};
|
use rustc_ast::token::{self, CommentKind, Token, TokenKind};
|
||||||
use rustc_ast::tokenstream::{Spacing, TokenStream};
|
use rustc_ast::tokenstream::{Spacing, TokenStream};
|
||||||
|
use rustc_ast::util::unicode::contains_text_flow_control_chars;
|
||||||
use rustc_errors::{error_code, Applicability, DiagnosticBuilder, FatalError, PResult};
|
use rustc_errors::{error_code, Applicability, DiagnosticBuilder, FatalError, PResult};
|
||||||
use rustc_lexer::unescape::{self, Mode};
|
use rustc_lexer::unescape::{self, Mode};
|
||||||
use rustc_lexer::{Base, DocStyle, RawStrError};
|
use rustc_lexer::{Base, DocStyle, RawStrError};
|
||||||
|
@ -137,45 +138,7 @@ impl<'a> StringReader<'a> {
|
||||||
// Opening delimiter of the length 2 is not included into the comment text.
|
// Opening delimiter of the length 2 is not included into the comment text.
|
||||||
let content_start = start + BytePos(2);
|
let content_start = start + BytePos(2);
|
||||||
let content = self.str_from(content_start);
|
let content = self.str_from(content_start);
|
||||||
|
if contains_text_flow_control_chars(content) {
|
||||||
// Char - UTF-8
|
|
||||||
// U+202A - E2 80 AA
|
|
||||||
// U+202B - E2 80 AB
|
|
||||||
// U+202C - E2 80 AC
|
|
||||||
// U+202D - E2 80 AD
|
|
||||||
// U+202E - E2 80 AE
|
|
||||||
// U+2066 - E2 81 A6
|
|
||||||
// U+2067 - E2 81 A7
|
|
||||||
// U+2068 - E2 81 A8
|
|
||||||
// U+2069 - E2 81 A9
|
|
||||||
let mut bytes = content.as_bytes();
|
|
||||||
let contains_flow_control_chars = loop {
|
|
||||||
match core::slice::memchr::memchr(0xE2, &bytes) {
|
|
||||||
Some(idx) => {
|
|
||||||
// bytes are valid UTF-8 -> E2 must be followed by two bytes
|
|
||||||
let ch = &bytes[idx..idx + 3];
|
|
||||||
match ch[1] {
|
|
||||||
0x80 => {
|
|
||||||
if (0xAA..=0xAE).contains(&ch[2]) {
|
|
||||||
break true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
0x81 => {
|
|
||||||
if (0xA6..=0xA9).contains(&ch[2]) {
|
|
||||||
break true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => {}
|
|
||||||
}
|
|
||||||
bytes = &bytes[idx + 3..];
|
|
||||||
}
|
|
||||||
None => {
|
|
||||||
break false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
if contains_flow_control_chars {
|
|
||||||
let span = self.mk_sp(start, self.pos);
|
let span = self.mk_sp(start, self.pos);
|
||||||
self.sess.buffer_lint_with_diagnostic(
|
self.sess.buffer_lint_with_diagnostic(
|
||||||
&TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
|
&TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue