1
Fork 0

Rollup merge of #120840 - HTGAzureX1212:HTGAzureX1212/unicode-identifier-types, r=fmease,Manishearth

Split Diagnostics for Uncommon Codepoints: Add Individual Identifier Types

This pull request further modifies the `uncommon_codepoints` lint, adding the individual identifier types of `Technical`, `Not_NFKC`, `Exclusion` and `Limited_Use` to the diagnostic message.

Example rendered diagnostic:
```
error: identifier contains a Unicode codepoint that is not used in normalized strings: 'ij'
  --> $DIR/lint-uncommon-codepoints.rs:6:4
   |
LL | fn dijkstra() {}
   |    ^^^^^^^
   = note: this character is included in the Not_NFKC Unicode general security profile
```

Second step of #120228.
This commit is contained in:
Guillaume Gomez 2024-02-26 10:27:41 +01:00 committed by GitHub
commit 91d337dfa8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 79 additions and 19 deletions

View file

@ -244,9 +244,28 @@ lint_hidden_unicode_codepoints = unicode codepoint changing visible direction of
lint_identifier_non_ascii_char = identifier contains non-ASCII characters
lint_identifier_uncommon_codepoints = identifier contains {$codepoints_len ->
[one] an uncommon Unicode codepoint
*[other] uncommon Unicode codepoints
[one] { $identifier_type ->
[Exclusion] a character from an archaic script
[Technical] a character that is for non-linguistic, specialized usage
[Limited_Use] a character from a script in limited use
[Not_NFKC] a non normalized (NFKC) character
*[other] an uncommon character
}
*[other] { $identifier_type ->
[Exclusion] {$codepoints_len} characters from archaic scripts
[Technical] {$codepoints_len} characters that are for non-linguistic, specialized usage
[Limited_Use] {$codepoints_len} characters from scripts in limited use
[Not_NFKC] {$codepoints_len} non normalized (NFKC) characters
*[other] uncommon characters
}
}: {$codepoints}
.note = {$codepoints_len ->
[one] this character is
*[other] these characters are
} included in the{$identifier_type ->
[Restricted] {""}
*[other] {" "}{$identifier_type}
} Unicode general security profile
lint_ignored_unless_crate_specified = {$level}({$name}) is ignored unless specified at crate level

View file

@ -31,6 +31,7 @@
#![feature(array_windows)]
#![feature(box_patterns)]
#![feature(control_flow_enum)]
#![feature(extract_if)]
#![feature(generic_nonzero)]
#![feature(if_let_guard)]
#![feature(iter_order_by)]

View file

@ -1129,9 +1129,11 @@ pub struct IdentifierNonAsciiChar;
#[derive(LintDiagnostic)]
#[diag(lint_identifier_uncommon_codepoints)]
#[note]
pub struct IdentifierUncommonCodepoints {
pub codepoints: Vec<char>,
pub codepoints_len: usize,
pub identifier_type: &'static str,
}
#[derive(LintDiagnostic)]

View file

@ -7,6 +7,7 @@ use rustc_ast as ast;
use rustc_data_structures::fx::FxIndexMap;
use rustc_data_structures::unord::UnordMap;
use rustc_span::symbol::Symbol;
use unicode_security::general_security_profile::IdentifierType;
declare_lint! {
/// The `non_ascii_idents` lint detects non-ASCII identifiers.
@ -189,17 +190,47 @@ impl EarlyLintPass for NonAsciiIdents {
if check_uncommon_codepoints
&& !symbol_str.chars().all(GeneralSecurityProfile::identifier_allowed)
{
let codepoints: Vec<_> = symbol_str
let mut chars: Vec<_> = symbol_str
.chars()
.filter(|c| !GeneralSecurityProfile::identifier_allowed(*c))
.map(|c| (c, GeneralSecurityProfile::identifier_type(c)))
.collect();
let codepoints_len = codepoints.len();
cx.emit_span_lint(
UNCOMMON_CODEPOINTS,
sp,
IdentifierUncommonCodepoints { codepoints, codepoints_len },
);
for (id_ty, id_ty_descr) in [
(IdentifierType::Exclusion, "Exclusion"),
(IdentifierType::Technical, "Technical"),
(IdentifierType::Limited_Use, "Limited_Use"),
(IdentifierType::Not_NFKC, "Not_NFKC"),
] {
let codepoints: Vec<_> =
chars.extract_if(|(_, ty)| *ty == Some(id_ty)).collect();
if codepoints.is_empty() {
continue;
}
cx.emit_span_lint(
UNCOMMON_CODEPOINTS,
sp,
IdentifierUncommonCodepoints {
codepoints_len: codepoints.len(),
codepoints: codepoints.into_iter().map(|(c, _)| c).collect(),
identifier_type: id_ty_descr,
},
);
}
let remaining = chars
.extract_if(|(c, _)| !GeneralSecurityProfile::identifier_allowed(*c))
.collect::<Vec<_>>();
if !remaining.is_empty() {
cx.emit_span_lint(
UNCOMMON_CODEPOINTS,
sp,
IdentifierUncommonCodepoints {
codepoints_len: remaining.len(),
codepoints: remaining.into_iter().map(|(c, _)| c).collect(),
identifier_type: "Restricted",
},
);
}
}
}