1
Fork 0
This commit is contained in:
Michael Goulet 2024-07-14 13:25:06 -04:00
parent cae4a84146
commit 71eb49c318

View file

@ -35,25 +35,25 @@ pub fn analyze_source_file(
cfg_match! { cfg_match! {
cfg(any(target_arch = "x86", target_arch = "x86_64")) => { cfg(any(target_arch = "x86", target_arch = "x86_64")) => {
fn analyze_source_file_dispatch(src: &str, fn analyze_source_file_dispatch(
src: &str,
lines: &mut Vec<RelativeBytePos>, lines: &mut Vec<RelativeBytePos>,
multi_byte_chars: &mut Vec<MultiByteChar>, multi_byte_chars: &mut Vec<MultiByteChar>,
non_narrow_chars: &mut Vec<NonNarrowChar>) { non_narrow_chars: &mut Vec<NonNarrowChar>,
) {
if is_x86_feature_detected!("sse2") { if is_x86_feature_detected!("sse2") {
unsafe { unsafe {
analyze_source_file_sse2(src, analyze_source_file_sse2(src, lines, multi_byte_chars, non_narrow_chars);
lines,
multi_byte_chars,
non_narrow_chars);
} }
} else { } else {
analyze_source_file_generic(src, analyze_source_file_generic(
src,
src.len(), src.len(),
RelativeBytePos::from_u32(0), RelativeBytePos::from_u32(0),
lines, lines,
multi_byte_chars, multi_byte_chars,
non_narrow_chars); non_narrow_chars,
);
} }
} }
@ -62,10 +62,12 @@ cfg_match! {
/// function falls back to the generic implementation. Otherwise it uses /// function falls back to the generic implementation. Otherwise it uses
/// SSE2 intrinsics to quickly find all newlines. /// SSE2 intrinsics to quickly find all newlines.
#[target_feature(enable = "sse2")] #[target_feature(enable = "sse2")]
unsafe fn analyze_source_file_sse2(src: &str, unsafe fn analyze_source_file_sse2(
src: &str,
lines: &mut Vec<RelativeBytePos>, lines: &mut Vec<RelativeBytePos>,
multi_byte_chars: &mut Vec<MultiByteChar>, multi_byte_chars: &mut Vec<MultiByteChar>,
non_narrow_chars: &mut Vec<NonNarrowChar>) { non_narrow_chars: &mut Vec<NonNarrowChar>,
) {
#[cfg(target_arch = "x86")] #[cfg(target_arch = "x86")]
use std::arch::x86::*; use std::arch::x86::*;
#[cfg(target_arch = "x86_64")] #[cfg(target_arch = "x86_64")]
@ -83,7 +85,7 @@ cfg_match! {
// handled it. // handled it.
let mut intra_chunk_offset = 0; let mut intra_chunk_offset = 0;
for chunk_index in 0 .. chunk_count { for chunk_index in 0..chunk_count {
let ptr = src_bytes.as_ptr() as *const __m128i; let ptr = src_bytes.as_ptr() as *const __m128i;
// We don't know if the pointer is aligned to 16 bytes, so we // We don't know if the pointer is aligned to 16 bytes, so we
// use `loadu`, which supports unaligned loading. // use `loadu`, which supports unaligned loading.
@ -126,7 +128,7 @@ cfg_match! {
if index >= CHUNK_SIZE as u32 { if index >= CHUNK_SIZE as u32 {
// We have arrived at the end of the chunk. // We have arrived at the end of the chunk.
break break;
} }
lines.push(RelativeBytePos(index) + output_offset); lines.push(RelativeBytePos(index) + output_offset);
@ -137,14 +139,14 @@ cfg_match! {
// We are done for this chunk. All control characters were // We are done for this chunk. All control characters were
// newlines and we took care of those. // newlines and we took care of those.
continue continue;
} else { } else {
// Some of the control characters are not newlines, // Some of the control characters are not newlines,
// fall through to the slow path below. // fall through to the slow path below.
} }
} else { } else {
// No control characters, nothing to record for this chunk // No control characters, nothing to record for this chunk
continue continue;
} }
} }
@ -152,43 +154,48 @@ cfg_match! {
// There are control chars in here, fallback to generic decoding. // There are control chars in here, fallback to generic decoding.
let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset; let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;
intra_chunk_offset = analyze_source_file_generic( intra_chunk_offset = analyze_source_file_generic(
&src[scan_start .. ], &src[scan_start..],
CHUNK_SIZE - intra_chunk_offset, CHUNK_SIZE - intra_chunk_offset,
RelativeBytePos::from_usize(scan_start), RelativeBytePos::from_usize(scan_start),
lines, lines,
multi_byte_chars, multi_byte_chars,
non_narrow_chars non_narrow_chars,
); );
} }
// There might still be a tail left to analyze // There might still be a tail left to analyze
let tail_start = chunk_count * CHUNK_SIZE + intra_chunk_offset; let tail_start = chunk_count * CHUNK_SIZE + intra_chunk_offset;
if tail_start < src.len() { if tail_start < src.len() {
analyze_source_file_generic(&src[tail_start ..], analyze_source_file_generic(
&src[tail_start..],
src.len() - tail_start, src.len() - tail_start,
RelativeBytePos::from_usize(tail_start), RelativeBytePos::from_usize(tail_start),
lines, lines,
multi_byte_chars, multi_byte_chars,
non_narrow_chars); non_narrow_chars,
);
} }
} }
} }
_ => { _ => {
// The target (or compiler version) does not support SSE2 ... // The target (or compiler version) does not support SSE2 ...
fn analyze_source_file_dispatch(src: &str, fn analyze_source_file_dispatch(
src: &str,
lines: &mut Vec<RelativeBytePos>, lines: &mut Vec<RelativeBytePos>,
multi_byte_chars: &mut Vec<MultiByteChar>, multi_byte_chars: &mut Vec<MultiByteChar>,
non_narrow_chars: &mut Vec<NonNarrowChar>) { non_narrow_chars: &mut Vec<NonNarrowChar>,
analyze_source_file_generic(src, ) {
analyze_source_file_generic(
src,
src.len(), src.len(),
RelativeBytePos::from_u32(0), RelativeBytePos::from_u32(0),
lines, lines,
multi_byte_chars, multi_byte_chars,
non_narrow_chars); non_narrow_chars,
);
} }
} }
} }
// `scan_len` determines the number of bytes in `src` to scan. Note that the // `scan_len` determines the number of bytes in `src` to scan. Note that the
// function can read past `scan_len` if a multi-byte character start within the // function can read past `scan_len` if a multi-byte character start within the
// range but extends past it. The overflow is returned by the function. // range but extends past it. The overflow is returned by the function.