1
Fork 0

Use as_chunks in analyze_source_file_sse2

This commit is contained in:
real-eren 2025-02-20 16:04:02 -05:00
parent b522e7c5ea
commit 62f5a5501f
2 changed files with 5 additions and 7 deletions

View file

@ -68,9 +68,7 @@ cfg_match! {
const CHUNK_SIZE: usize = 16;
let src_bytes = src.as_bytes();
let chunk_count = src.len() / CHUNK_SIZE;
let (chunks, tail) = src.as_bytes().as_chunks::<CHUNK_SIZE>();
// This variable keeps track of where we should start decoding a
// chunk. If a multi-byte character spans across chunk boundaries,
@ -78,11 +76,10 @@ cfg_match! {
// handled it.
let mut intra_chunk_offset = 0;
for chunk_index in 0..chunk_count {
let ptr = src_bytes.as_ptr() as *const __m128i;
for (chunk_index, chunk) in chunks.iter().enumerate() {
// We don't know if the pointer is aligned to 16 bytes, so we
// use `loadu`, which supports unaligned loading.
let chunk = unsafe { _mm_loadu_si128(ptr.add(chunk_index)) };
let chunk = unsafe { _mm_loadu_si128(chunk.as_ptr() as *const __m128i) };
// For character in the chunk, see if its byte value is < 0, which
// indicates that it's part of a UTF-8 char.
@ -123,7 +120,7 @@ cfg_match! {
}
// There might still be a tail left to analyze
let tail_start = chunk_count * CHUNK_SIZE + intra_chunk_offset;
let tail_start = src.len() - tail.len() + intra_chunk_offset;
if tail_start < src.len() {
analyze_source_file_generic(
&src[tail_start..],

View file

@ -31,6 +31,7 @@
#![feature(round_char_boundary)]
#![feature(rustc_attrs)]
#![feature(rustdoc_internals)]
#![feature(slice_as_chunks)]
#![warn(unreachable_pub)]
// tidy-alphabetical-end