1
Fork 0

Account for C string literals in HiddenUnicodeCodepoints lint

This commit is contained in:
Michael Goulet 2024-12-31 04:53:00 +00:00
parent 4e5fec2f1e
commit 54e33bbdec
3 changed files with 76 additions and 23 deletions

View file

@ -101,14 +101,28 @@ impl EarlyLintPass for HiddenUnicodeCodepoints {
if !contains_text_flow_control_chars(text.as_str()) { if !contains_text_flow_control_chars(text.as_str()) {
return; return;
} }
let padding = match token_lit.kind { let (padding, point_at_inner_spans) = match token_lit.kind {
// account for `"` or `'` // account for `"` or `'`
ast::token::LitKind::Str | ast::token::LitKind::Char => 1, ast::token::LitKind::Str | ast::token::LitKind::Char => (1, true),
// account for `c"`
ast::token::LitKind::CStr => (2, true),
// account for `r###"` // account for `r###"`
ast::token::LitKind::StrRaw(n) => n as u32 + 2, ast::token::LitKind::StrRaw(n) => (n as u32 + 2, true),
_ => return, // account for `cr###"`
ast::token::LitKind::CStrRaw(n) => (n as u32 + 3, true),
// suppress bad literals.
ast::token::LitKind::Err(_) => return,
// Be conservative just in case new literals do support these.
_ => (0, false),
}; };
self.lint_text_direction_codepoint(cx, text, expr.span, padding, true, "literal"); self.lint_text_direction_codepoint(
cx,
text,
expr.span,
padding,
point_at_inner_spans,
"literal",
);
} }
_ => {} _ => {}
}; };

View file

@ -1,3 +1,5 @@
//@ edition: 2021
fn main() { fn main() {
// if access_level != "user" { // Check if admin // if access_level != "user" { // Check if admin
//~^ ERROR unicode codepoint changing visible direction of text present in comment //~^ ERROR unicode codepoint changing visible direction of text present in comment
@ -25,6 +27,11 @@ fn main() {
//~| ERROR non-ASCII character in raw byte string literal //~| ERROR non-ASCII character in raw byte string literal
println!("{:?}", ''); println!("{:?}", '');
//~^ ERROR unicode codepoint changing visible direction of text present in literal //~^ ERROR unicode codepoint changing visible direction of text present in literal
let _ = c"";
//~^ ERROR unicode codepoint changing visible direction of text present in literal
let _ = cr#""#;
//~^ ERROR unicode codepoint changing visible direction of text present in literal
} }
//"/* } if isAdmin begin admins only */" //"/* } if isAdmin begin admins only */"

View file

@ -1,5 +1,5 @@
error: unicode escape in byte string error: unicode escape in byte string
--> $DIR/unicode-control-codepoints.rs:6:26 --> $DIR/unicode-control-codepoints.rs:8:26
| |
LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); LL | println!("{:?}", b"us\u{202B}e\u{202A}r");
| ^^^^^^^^ unicode escape in byte string | ^^^^^^^^ unicode escape in byte string
@ -7,7 +7,7 @@ LL | println!("{:?}", b"us\u{202B}e\u{202A}r");
= help: unicode escape sequences cannot be used as a byte or in a byte string = help: unicode escape sequences cannot be used as a byte or in a byte string
error: unicode escape in byte string error: unicode escape in byte string
--> $DIR/unicode-control-codepoints.rs:6:35 --> $DIR/unicode-control-codepoints.rs:8:35
| |
LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); LL | println!("{:?}", b"us\u{202B}e\u{202A}r");
| ^^^^^^^^ unicode escape in byte string | ^^^^^^^^ unicode escape in byte string
@ -15,7 +15,7 @@ LL | println!("{:?}", b"us\u{202B}e\u{202A}r");
= help: unicode escape sequences cannot be used as a byte or in a byte string = help: unicode escape sequences cannot be used as a byte or in a byte string
error: non-ASCII character in byte string literal error: non-ASCII character in byte string literal
--> $DIR/unicode-control-codepoints.rs:16:26 --> $DIR/unicode-control-codepoints.rs:18:26
| |
LL | println!("{:?}", b"/*<2A> } <20>if isAdmin<69> <20> begin admins only "); LL | println!("{:?}", b"/*<2A> } <20>if isAdmin<69> <20> begin admins only ");
| ^ must be ASCII but is '\u{202e}' | ^ must be ASCII but is '\u{202e}'
@ -26,7 +26,7 @@ LL | println!("{:?}", b"/*\xE2\x80\xAE } <20>if isAdmin<69> <20> begin admins o
| ~~~~~~~~~~~~ | ~~~~~~~~~~~~
error: non-ASCII character in byte string literal error: non-ASCII character in byte string literal
--> $DIR/unicode-control-codepoints.rs:16:30 --> $DIR/unicode-control-codepoints.rs:18:30
| |
LL | println!("{:?}", b"/*<2A> } <20>if isAdmin<69> <20> begin admins only "); LL | println!("{:?}", b"/*<2A> } <20>if isAdmin<69> <20> begin admins only ");
| ^ must be ASCII but is '\u{2066}' | ^ must be ASCII but is '\u{2066}'
@ -37,7 +37,7 @@ LL | println!("{:?}", b"/*<2A> } \xE2\x81\xA6if isAdmin<69> <20> begin admins o
| ~~~~~~~~~~~~ | ~~~~~~~~~~~~
error: non-ASCII character in byte string literal error: non-ASCII character in byte string literal
--> $DIR/unicode-control-codepoints.rs:16:41 --> $DIR/unicode-control-codepoints.rs:18:41
| |
LL | println!("{:?}", b"/*<2A> } <20>if isAdmin<69> <20> begin admins only "); LL | println!("{:?}", b"/*<2A> } <20>if isAdmin<69> <20> begin admins only ");
| ^ must be ASCII but is '\u{2069}' | ^ must be ASCII but is '\u{2069}'
@ -48,7 +48,7 @@ LL | println!("{:?}", b"/*<2A> } <20>if isAdmin\xE2\x81\xA9 <20> begin admins o
| ~~~~~~~~~~~~ | ~~~~~~~~~~~~
error: non-ASCII character in byte string literal error: non-ASCII character in byte string literal
--> $DIR/unicode-control-codepoints.rs:16:43 --> $DIR/unicode-control-codepoints.rs:18:43
| |
LL | println!("{:?}", b"/*<2A> } <20>if isAdmin<69> <20> begin admins only "); LL | println!("{:?}", b"/*<2A> } <20>if isAdmin<69> <20> begin admins only ");
| ^ must be ASCII but is '\u{2066}' | ^ must be ASCII but is '\u{2066}'
@ -59,31 +59,31 @@ LL | println!("{:?}", b"/*<2A> } <20>if isAdmin<69> \xE2\x81\xA6 begin admins o
| ~~~~~~~~~~~~ | ~~~~~~~~~~~~
error: non-ASCII character in raw byte string literal error: non-ASCII character in raw byte string literal
--> $DIR/unicode-control-codepoints.rs:21:29 --> $DIR/unicode-control-codepoints.rs:23:29
| |
LL | println!("{:?}", br##"/*<2A> } <20>if isAdmin<69> <20> begin admins only "##); LL | println!("{:?}", br##"/*<2A> } <20>if isAdmin<69> <20> begin admins only "##);
| ^ must be ASCII but is '\u{202e}' | ^ must be ASCII but is '\u{202e}'
error: non-ASCII character in raw byte string literal error: non-ASCII character in raw byte string literal
--> $DIR/unicode-control-codepoints.rs:21:33 --> $DIR/unicode-control-codepoints.rs:23:33
| |
LL | println!("{:?}", br##"/*<2A> } <20>if isAdmin<69> <20> begin admins only "##); LL | println!("{:?}", br##"/*<2A> } <20>if isAdmin<69> <20> begin admins only "##);
| ^ must be ASCII but is '\u{2066}' | ^ must be ASCII but is '\u{2066}'
error: non-ASCII character in raw byte string literal error: non-ASCII character in raw byte string literal
--> $DIR/unicode-control-codepoints.rs:21:44 --> $DIR/unicode-control-codepoints.rs:23:44
| |
LL | println!("{:?}", br##"/*<2A> } <20>if isAdmin<69> <20> begin admins only "##); LL | println!("{:?}", br##"/*<2A> } <20>if isAdmin<69> <20> begin admins only "##);
| ^ must be ASCII but is '\u{2069}' | ^ must be ASCII but is '\u{2069}'
error: non-ASCII character in raw byte string literal error: non-ASCII character in raw byte string literal
--> $DIR/unicode-control-codepoints.rs:21:46 --> $DIR/unicode-control-codepoints.rs:23:46
| |
LL | println!("{:?}", br##"/*<2A> } <20>if isAdmin<69> <20> begin admins only "##); LL | println!("{:?}", br##"/*<2A> } <20>if isAdmin<69> <20> begin admins only "##);
| ^ must be ASCII but is '\u{2066}' | ^ must be ASCII but is '\u{2066}'
error: unicode codepoint changing visible direction of text present in comment error: unicode codepoint changing visible direction of text present in comment
--> $DIR/unicode-control-codepoints.rs:2:5 --> $DIR/unicode-control-codepoints.rs:4:5
| |
LL | // if access_level != "us<75>e<EFBFBD>r" { // Check if admin LL | // if access_level != "us<75>e<EFBFBD>r" { // Check if admin
| ^^^^^^^^^^^^^^^^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^^^ | ^^^^^^^^^^^^^^^^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^^^
@ -97,7 +97,7 @@ LL | // if access_level != "us<75>e<EFBFBD>r" { // Check if admin
= help: if their presence wasn't intentional, you can remove them = help: if their presence wasn't intentional, you can remove them
error: unicode codepoint changing visible direction of text present in comment error: unicode codepoint changing visible direction of text present in comment
--> $DIR/unicode-control-codepoints.rs:30:1 --> $DIR/unicode-control-codepoints.rs:37:1
| |
LL | //"/*<2A> } <20>if isAdmin<69> <20> begin admins only */" LL | //"/*<2A> } <20>if isAdmin<69> <20> begin admins only */"
| ^^^^^-^^^-^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^^^ | ^^^^^-^^^-^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^^^
@ -112,7 +112,7 @@ LL | //"/*<2A> } <20>if isAdmin<69> <20> begin admins only */"
= help: if their presence wasn't intentional, you can remove them = help: if their presence wasn't intentional, you can remove them
error: unicode codepoint changing visible direction of text present in literal error: unicode codepoint changing visible direction of text present in literal
--> $DIR/unicode-control-codepoints.rs:11:22 --> $DIR/unicode-control-codepoints.rs:13:22
| |
LL | println!("{:?}", "/*<2A> } <20>if isAdmin<69> <20> begin admins only "); LL | println!("{:?}", "/*<2A> } <20>if isAdmin<69> <20> begin admins only ");
| ^^^-^^^-^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^ | ^^^-^^^-^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^
@ -132,7 +132,7 @@ LL | println!("{:?}", "/*\u{202e} } \u{2066}if isAdmin\u{2069} \u{2066} begi
| ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ | ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ ~~~~~~~~
error: unicode codepoint changing visible direction of text present in literal error: unicode codepoint changing visible direction of text present in literal
--> $DIR/unicode-control-codepoints.rs:14:22 --> $DIR/unicode-control-codepoints.rs:16:22
| |
LL | println!("{:?}", r##"/*<2A> } <20>if isAdmin<69> <20> begin admins only "##); LL | println!("{:?}", r##"/*<2A> } <20>if isAdmin<69> <20> begin admins only "##);
| ^^^^^^-^^^-^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^^^ | ^^^^^^-^^^-^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^^^
@ -151,7 +151,7 @@ LL | println!("{:?}", r##"/*\u{202e} } \u{2066}if isAdmin\u{2069} \u{2066} b
| ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ | ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ ~~~~~~~~
error: unicode codepoint changing visible direction of text present in literal error: unicode codepoint changing visible direction of text present in literal
--> $DIR/unicode-control-codepoints.rs:26:22 --> $DIR/unicode-control-codepoints.rs:28:22
| |
LL | println!("{:?}", '<27>'); LL | println!("{:?}", '<27>');
| ^-^ | ^-^
@ -166,8 +166,40 @@ help: if you want to keep them but make them visible in your source code, you ca
LL | println!("{:?}", '\u{202e}'); LL | println!("{:?}", '\u{202e}');
| ~~~~~~~~ | ~~~~~~~~
error: unicode codepoint changing visible direction of text present in literal
--> $DIR/unicode-control-codepoints.rs:31:13
|
LL | let _ = c"<22>";
| ^^-^
| | |
| | '\u{202e}'
| this literal contains an invisible unicode text flow control codepoint
|
= note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen
= help: if their presence wasn't intentional, you can remove them
help: if you want to keep them but make them visible in your source code, you can escape them
|
LL | let _ = c"\u{202e}";
| ~~~~~~~~
error: unicode codepoint changing visible direction of text present in literal
--> $DIR/unicode-control-codepoints.rs:33:13
|
LL | let _ = cr#"<22>"#;
| ^^^^-^^
| | |
| | '\u{202e}'
| this literal contains an invisible unicode text flow control codepoint
|
= note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen
= help: if their presence wasn't intentional, you can remove them
help: if you want to keep them but make them visible in your source code, you can escape them
|
LL | let _ = cr#"\u{202e}"#;
| ~~~~~~~~
error: unicode codepoint changing visible direction of text present in doc comment error: unicode codepoint changing visible direction of text present in doc comment
--> $DIR/unicode-control-codepoints.rs:33:1 --> $DIR/unicode-control-codepoints.rs:40:1
| |
LL | /** '<27>'); */fn foo() {} LL | /** '<27>'); */fn foo() {}
| ^^^^^^^^^^^^^ this doc comment contains an invisible unicode text flow control codepoint | ^^^^^^^^^^^^^ this doc comment contains an invisible unicode text flow control codepoint
@ -177,7 +209,7 @@ LL | /** '<27>'); */fn foo() {}
= note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}' = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}'
error: unicode codepoint changing visible direction of text present in doc comment error: unicode codepoint changing visible direction of text present in doc comment
--> $DIR/unicode-control-codepoints.rs:36:1 --> $DIR/unicode-control-codepoints.rs:43:1
| |
LL | / /** LL | / /**
LL | | * LL | | *
@ -188,5 +220,5 @@ LL | | * '<27>'); */fn bar() {}
= note: if their presence wasn't intentional, you can remove them = note: if their presence wasn't intentional, you can remove them
= note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}' = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}'
error: aborting due to 17 previous errors error: aborting due to 19 previous errors