1
Fork 0

Account for C string literals in HiddenUnicodeCodepoints lint

This commit is contained in:
Michael Goulet 2024-12-31 04:53:00 +00:00
parent 4e5fec2f1e
commit 54e33bbdec
3 changed files with 76 additions and 23 deletions

View file

@ -101,14 +101,28 @@ impl EarlyLintPass for HiddenUnicodeCodepoints {
if !contains_text_flow_control_chars(text.as_str()) {
return;
}
let padding = match token_lit.kind {
let (padding, point_at_inner_spans) = match token_lit.kind {
// account for `"` or `'`
ast::token::LitKind::Str | ast::token::LitKind::Char => 1,
ast::token::LitKind::Str | ast::token::LitKind::Char => (1, true),
// account for `c"`
ast::token::LitKind::CStr => (2, true),
// account for `r###"`
ast::token::LitKind::StrRaw(n) => n as u32 + 2,
_ => return,
ast::token::LitKind::StrRaw(n) => (n as u32 + 2, true),
// account for `cr###"`
ast::token::LitKind::CStrRaw(n) => (n as u32 + 3, true),
// suppress bad literals.
ast::token::LitKind::Err(_) => return,
// Be conservative just in case new literals do support these.
_ => (0, false),
};
self.lint_text_direction_codepoint(cx, text, expr.span, padding, true, "literal");
self.lint_text_direction_codepoint(
cx,
text,
expr.span,
padding,
point_at_inner_spans,
"literal",
);
}
_ => {}
};

View file

@ -1,3 +1,5 @@
//@ edition: 2021
fn main() {
// if access_level != "user" { // Check if admin
//~^ ERROR unicode codepoint changing visible direction of text present in comment
@ -25,6 +27,11 @@ fn main() {
//~| ERROR non-ASCII character in raw byte string literal
println!("{:?}", '');
//~^ ERROR unicode codepoint changing visible direction of text present in literal
let _ = c"";
//~^ ERROR unicode codepoint changing visible direction of text present in literal
let _ = cr#""#;
//~^ ERROR unicode codepoint changing visible direction of text present in literal
}
//"/* } if isAdmin begin admins only */"

View file

@ -1,5 +1,5 @@
error: unicode escape in byte string
--> $DIR/unicode-control-codepoints.rs:6:26
--> $DIR/unicode-control-codepoints.rs:8:26
|
LL | println!("{:?}", b"us\u{202B}e\u{202A}r");
| ^^^^^^^^ unicode escape in byte string
@ -7,7 +7,7 @@ LL | println!("{:?}", b"us\u{202B}e\u{202A}r");
= help: unicode escape sequences cannot be used as a byte or in a byte string
error: unicode escape in byte string
--> $DIR/unicode-control-codepoints.rs:6:35
--> $DIR/unicode-control-codepoints.rs:8:35
|
LL | println!("{:?}", b"us\u{202B}e\u{202A}r");
| ^^^^^^^^ unicode escape in byte string
@ -15,7 +15,7 @@ LL | println!("{:?}", b"us\u{202B}e\u{202A}r");
= help: unicode escape sequences cannot be used as a byte or in a byte string
error: non-ASCII character in byte string literal
--> $DIR/unicode-control-codepoints.rs:16:26
--> $DIR/unicode-control-codepoints.rs:18:26
|
LL | println!("{:?}", b"/*<2A> } <20>if isAdmin<69> <20> begin admins only ");
| ^ must be ASCII but is '\u{202e}'
@ -26,7 +26,7 @@ LL | println!("{:?}", b"/*\xE2\x80\xAE } <20>if isAdmin<69> <20> begin admins o
| ~~~~~~~~~~~~
error: non-ASCII character in byte string literal
--> $DIR/unicode-control-codepoints.rs:16:30
--> $DIR/unicode-control-codepoints.rs:18:30
|
LL | println!("{:?}", b"/*<2A> } <20>if isAdmin<69> <20> begin admins only ");
| ^ must be ASCII but is '\u{2066}'
@ -37,7 +37,7 @@ LL | println!("{:?}", b"/*<2A> } \xE2\x81\xA6if isAdmin<69> <20> begin admins o
| ~~~~~~~~~~~~
error: non-ASCII character in byte string literal
--> $DIR/unicode-control-codepoints.rs:16:41
--> $DIR/unicode-control-codepoints.rs:18:41
|
LL | println!("{:?}", b"/*<2A> } <20>if isAdmin<69> <20> begin admins only ");
| ^ must be ASCII but is '\u{2069}'
@ -48,7 +48,7 @@ LL | println!("{:?}", b"/*<2A> } <20>if isAdmin\xE2\x81\xA9 <20> begin admins o
| ~~~~~~~~~~~~
error: non-ASCII character in byte string literal
--> $DIR/unicode-control-codepoints.rs:16:43
--> $DIR/unicode-control-codepoints.rs:18:43
|
LL | println!("{:?}", b"/*<2A> } <20>if isAdmin<69> <20> begin admins only ");
| ^ must be ASCII but is '\u{2066}'
@ -59,31 +59,31 @@ LL | println!("{:?}", b"/*<2A> } <20>if isAdmin<69> \xE2\x81\xA6 begin admins o
| ~~~~~~~~~~~~
error: non-ASCII character in raw byte string literal
--> $DIR/unicode-control-codepoints.rs:21:29
--> $DIR/unicode-control-codepoints.rs:23:29
|
LL | println!("{:?}", br##"/*<2A> } <20>if isAdmin<69> <20> begin admins only "##);
| ^ must be ASCII but is '\u{202e}'
error: non-ASCII character in raw byte string literal
--> $DIR/unicode-control-codepoints.rs:21:33
--> $DIR/unicode-control-codepoints.rs:23:33
|
LL | println!("{:?}", br##"/*<2A> } <20>if isAdmin<69> <20> begin admins only "##);
| ^ must be ASCII but is '\u{2066}'
error: non-ASCII character in raw byte string literal
--> $DIR/unicode-control-codepoints.rs:21:44
--> $DIR/unicode-control-codepoints.rs:23:44
|
LL | println!("{:?}", br##"/*<2A> } <20>if isAdmin<69> <20> begin admins only "##);
| ^ must be ASCII but is '\u{2069}'
error: non-ASCII character in raw byte string literal
--> $DIR/unicode-control-codepoints.rs:21:46
--> $DIR/unicode-control-codepoints.rs:23:46
|
LL | println!("{:?}", br##"/*<2A> } <20>if isAdmin<69> <20> begin admins only "##);
| ^ must be ASCII but is '\u{2066}'
error: unicode codepoint changing visible direction of text present in comment
--> $DIR/unicode-control-codepoints.rs:2:5
--> $DIR/unicode-control-codepoints.rs:4:5
|
LL | // if access_level != "us<75>e<EFBFBD>r" { // Check if admin
| ^^^^^^^^^^^^^^^^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^^^
@ -97,7 +97,7 @@ LL | // if access_level != "us<75>e<EFBFBD>r" { // Check if admin
= help: if their presence wasn't intentional, you can remove them
error: unicode codepoint changing visible direction of text present in comment
--> $DIR/unicode-control-codepoints.rs:30:1
--> $DIR/unicode-control-codepoints.rs:37:1
|
LL | //"/*<2A> } <20>if isAdmin<69> <20> begin admins only */"
| ^^^^^-^^^-^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^^^
@ -112,7 +112,7 @@ LL | //"/*<2A> } <20>if isAdmin<69> <20> begin admins only */"
= help: if their presence wasn't intentional, you can remove them
error: unicode codepoint changing visible direction of text present in literal
--> $DIR/unicode-control-codepoints.rs:11:22
--> $DIR/unicode-control-codepoints.rs:13:22
|
LL | println!("{:?}", "/*<2A> } <20>if isAdmin<69> <20> begin admins only ");
| ^^^-^^^-^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^
@ -132,7 +132,7 @@ LL | println!("{:?}", "/*\u{202e} } \u{2066}if isAdmin\u{2069} \u{2066} begi
| ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ ~~~~~~~~
error: unicode codepoint changing visible direction of text present in literal
--> $DIR/unicode-control-codepoints.rs:14:22
--> $DIR/unicode-control-codepoints.rs:16:22
|
LL | println!("{:?}", r##"/*<2A> } <20>if isAdmin<69> <20> begin admins only "##);
| ^^^^^^-^^^-^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^^^
@ -151,7 +151,7 @@ LL | println!("{:?}", r##"/*\u{202e} } \u{2066}if isAdmin\u{2069} \u{2066} b
| ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ ~~~~~~~~
error: unicode codepoint changing visible direction of text present in literal
--> $DIR/unicode-control-codepoints.rs:26:22
--> $DIR/unicode-control-codepoints.rs:28:22
|
LL | println!("{:?}", '<27>');
| ^-^
@ -166,8 +166,40 @@ help: if you want to keep them but make them visible in your source code, you ca
LL | println!("{:?}", '\u{202e}');
| ~~~~~~~~
error: unicode codepoint changing visible direction of text present in literal
--> $DIR/unicode-control-codepoints.rs:31:13
|
LL | let _ = c"<22>";
| ^^-^
| | |
| | '\u{202e}'
| this literal contains an invisible unicode text flow control codepoint
|
= note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen
= help: if their presence wasn't intentional, you can remove them
help: if you want to keep them but make them visible in your source code, you can escape them
|
LL | let _ = c"\u{202e}";
| ~~~~~~~~
error: unicode codepoint changing visible direction of text present in literal
--> $DIR/unicode-control-codepoints.rs:33:13
|
LL | let _ = cr#"<22>"#;
| ^^^^-^^
| | |
| | '\u{202e}'
| this literal contains an invisible unicode text flow control codepoint
|
= note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen
= help: if their presence wasn't intentional, you can remove them
help: if you want to keep them but make them visible in your source code, you can escape them
|
LL | let _ = cr#"\u{202e}"#;
| ~~~~~~~~
error: unicode codepoint changing visible direction of text present in doc comment
--> $DIR/unicode-control-codepoints.rs:33:1
--> $DIR/unicode-control-codepoints.rs:40:1
|
LL | /** '<27>'); */fn foo() {}
| ^^^^^^^^^^^^^ this doc comment contains an invisible unicode text flow control codepoint
@ -177,7 +209,7 @@ LL | /** '<27>'); */fn foo() {}
= note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}'
error: unicode codepoint changing visible direction of text present in doc comment
--> $DIR/unicode-control-codepoints.rs:36:1
--> $DIR/unicode-control-codepoints.rs:43:1
|
LL | / /**
LL | | *
@ -188,5 +220,5 @@ LL | | * '<27>'); */fn bar() {}
= note: if their presence wasn't intentional, you can remove them
= note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}'
error: aborting due to 17 previous errors
error: aborting due to 19 previous errors