Auto merge of #90559 - rusticstuff:optimize-bidi-detection, r=davidtwco
Optimize bidi character detection. Should fix most of the performance regression of the bidi character detection (#90514), to be confirmed with a perf run.
This commit is contained in:
commit
5ec7d1dad6
5 changed files with 46 additions and 16 deletions
|
@ -16,6 +16,7 @@
|
|||
#![feature(nll)]
|
||||
#![feature(min_specialization)]
|
||||
#![recursion_limit = "256"]
|
||||
#![feature(slice_internals)]
|
||||
|
||||
#[macro_use]
|
||||
extern crate rustc_macros;
|
||||
|
@ -25,6 +26,7 @@ pub mod util {
|
|||
pub mod comments;
|
||||
pub mod literal;
|
||||
pub mod parser;
|
||||
pub mod unicode;
|
||||
}
|
||||
|
||||
pub mod ast;
|
||||
|
|
35
compiler/rustc_ast/src/util/unicode.rs
Normal file
35
compiler/rustc_ast/src/util/unicode.rs
Normal file
|
@ -0,0 +1,35 @@
|
|||
pub const TEXT_FLOW_CONTROL_CHARS: &[char] = &[
|
||||
'\u{202A}', '\u{202B}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}', '\u{2068}', '\u{202C}',
|
||||
'\u{2069}',
|
||||
];
|
||||
|
||||
#[inline]
|
||||
pub fn contains_text_flow_control_chars(s: &str) -> bool {
|
||||
// Char - UTF-8
|
||||
// U+202A - E2 80 AA
|
||||
// U+202B - E2 80 AB
|
||||
// U+202C - E2 80 AC
|
||||
// U+202D - E2 80 AD
|
||||
// U+202E - E2 80 AE
|
||||
// U+2066 - E2 81 A6
|
||||
// U+2067 - E2 81 A7
|
||||
// U+2068 - E2 81 A8
|
||||
// U+2069 - E2 81 A9
|
||||
let mut bytes = s.as_bytes();
|
||||
loop {
|
||||
match core::slice::memchr::memchr(0xE2, &bytes) {
|
||||
Some(idx) => {
|
||||
// bytes are valid UTF-8 -> E2 must be followed by two bytes
|
||||
let ch = &bytes[idx..idx + 3];
|
||||
match ch {
|
||||
[_, 0x80, 0xAA..=0xAE] | [_, 0x81, 0xA6..=0xA9] => break true,
|
||||
_ => {}
|
||||
}
|
||||
bytes = &bytes[idx + 3..];
|
||||
}
|
||||
None => {
|
||||
break false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue