Auto merge of #136831 - ehuss:update-stdarch, r=Amanieu
Update stdarch Updates stdarch - core_arch: Add LoongArch basic intrinsics: https://github.com/rust-lang/stdarch/pull/1688 - New ARM intrinsic generator: https://github.com/rust-lang/stdarch/pull/1693 - Fix the bug in CMPINT intrinsics with IMM3=7: https://github.com/rust-lang/stdarch/pull/1694 - Expand feature detection on AArch64 Darwin: https://github.com/rust-lang/stdarch/pull/1695 - Tidying x86 `as_*` functions: https://github.com/rust-lang/stdarch/pull/1696 - Fix typo and prettify comment: https://github.com/rust-lang/stdarch/pull/1697 - add is_s390x_feature_detected: https://github.com/rust-lang/stdarch/pull/1699 - add vec_add for s390x: https://github.com/rust-lang/stdarch/pull/1703 - s390x: add vec_sub, vec_mul, vec_min, vec_max, vec_abs and vec_splats: https://github.com/rust-lang/stdarch/pull/1704 - Fix build and CLI behaviour for stdarch-gen-arm. https://github.com/rust-lang/stdarch/pull/1705 - Fix some test naming, and refactor stdarch-verify in general: https://github.com/rust-lang/stdarch/pull/1707 - Update all stdarch crates to Rust 2024: https://github.com/rust-lang/stdarch/pull/1710 - Add keylocker (kl and widekl) intrinsics and runtime feature detection: https://github.com/rust-lang/stdarch/pull/1706 - S390x vector bitwise operations: https://github.com/rust-lang/stdarch/pull/1709 - Update CI to FreeBSD 13.4: https://github.com/rust-lang/stdarch/pull/1715 - Update wasm sub sat intrinsics for LLVM 20: https://github.com/rust-lang/stdarch/pull/1719 - powerpc: use more target-independent llvm intrinsics (min, max, round, countlz): https://github.com/rust-lang/stdarch/pull/1713 - S390x float rounding: https://github.com/rust-lang/stdarch/pull/1712 - mark riscv intrinsics as safe: https://github.com/rust-lang/stdarch/pull/1717 - change redundant transmutations of sign to cast_unsigned: https://github.com/rust-lang/stdarch/pull/1711 - Fix - AArch64 Big Endian Intrinsics: https://github.com/rust-lang/stdarch/pull/1708 - mark x86 intrinsics as safe: https://github.com/rust-lang/stdarch/pull/1714 - AArch64: Add NEON fp16 intrinsics: https://github.com/rust-lang/stdarch/pull/1726 - wasm: use simd_as for float to integer conversions: https://github.com/rust-lang/stdarch/pull/1724 - nvptx: use simd_fmin and simd_fmax for minnum and maxnum: https://github.com/rust-lang/stdarch/pull/1725 - powerpc: use simd_ceil and simd_floor: https://github.com/rust-lang/stdarch/pull/1723 - Changed altivec.rs to new intrinsic declaration: https://github.com/rust-lang/stdarch/pull/1722 - Remove some allow(unsafe_op_in_unsafe_fn)s and use target_feature 1.1 in examples: https://github.com/rust-lang/stdarch/pull/1727 - fix - neon type signed unsigned conversions: https://github.com/rust-lang/stdarch/pull/1729 - s390x_is_feature_detected!: detect more features: https://github.com/rust-lang/stdarch/pull/1720 - Fix doctests failing due to unused_unsafe: https://github.com/rust-lang/stdarch/pull/1731 - fix compilation on armebv7r-none-eabi: https://github.com/rust-lang/stdarch/pull/1733 - wasm: update for rintf intrinsic rename: https://github.com/rust-lang/stdarch/pull/1721 - powerpc: use the simd_fma intrinsic for vec_madd: https://github.com/rust-lang/stdarch/pull/1734 - powerpc: use llvm.fshl for vec_rl: https://github.com/rust-lang/stdarch/pull/1735 - s390x: add more intrinsics: https://github.com/rust-lang/stdarch/pull/1728 - make _mm256_zero{upper,all} safe: https://github.com/rust-lang/stdarch/pull/1736 - fix unnecessary unsafe error in doctest: https://github.com/rust-lang/stdarch/pull/1739 - Feat - Aarch64 FEAT_FAMINMAX: https://github.com/rust-lang/stdarch/pull/1732 - feat - FEAT_LUT neon instrinsics: https://github.com/rust-lang/stdarch/pull/1741
This commit is contained in:
commit
b74da9613a
5 changed files with 25 additions and 14 deletions
|
@ -83,17 +83,17 @@ cfg_match! {
|
|||
|
||||
// For character in the chunk, see if its byte value is < 0, which
|
||||
// indicates that it's part of a UTF-8 char.
|
||||
let multibyte_test = unsafe { _mm_cmplt_epi8(chunk, _mm_set1_epi8(0)) };
|
||||
let multibyte_test = _mm_cmplt_epi8(chunk, _mm_set1_epi8(0));
|
||||
// Create a bit mask from the comparison results.
|
||||
let multibyte_mask = unsafe { _mm_movemask_epi8(multibyte_test) };
|
||||
let multibyte_mask = _mm_movemask_epi8(multibyte_test);
|
||||
|
||||
// If the bit mask is all zero, we only have ASCII chars here:
|
||||
if multibyte_mask == 0 {
|
||||
assert!(intra_chunk_offset == 0);
|
||||
|
||||
// Check for newlines in the chunk
|
||||
let newlines_test = unsafe { _mm_cmpeq_epi8(chunk, _mm_set1_epi8(b'\n' as i8)) };
|
||||
let mut newlines_mask = unsafe { _mm_movemask_epi8(newlines_test) };
|
||||
let newlines_test = _mm_cmpeq_epi8(chunk, _mm_set1_epi8(b'\n' as i8));
|
||||
let mut newlines_mask = _mm_movemask_epi8(newlines_test);
|
||||
|
||||
let output_offset = RelativeBytePos::from_usize(chunk_index * CHUNK_SIZE + 1);
|
||||
|
||||
|
|
|
@ -32,6 +32,8 @@ check-cfg = [
|
|||
'cfg(bootstrap)',
|
||||
'cfg(no_fp_fmt_parse)',
|
||||
'cfg(stdarch_intel_sde)',
|
||||
# #[cfg(bootstrap)]
|
||||
'cfg(target_feature, values("vector-enhancements-1"))',
|
||||
# core use #[path] imports to portable-simd `core_simd` crate
|
||||
# and to stdarch `core_arch` crate which messes-up with Cargo list
|
||||
# of declared features, we therefor expect any feature cfg
|
||||
|
|
|
@ -202,14 +202,17 @@
|
|||
//
|
||||
// Target features:
|
||||
// tidy-alphabetical-start
|
||||
#![feature(aarch64_unstable_target_feature)]
|
||||
#![feature(arm_target_feature)]
|
||||
#![feature(avx512_target_feature)]
|
||||
#![feature(hexagon_target_feature)]
|
||||
#![feature(keylocker_x86)]
|
||||
#![feature(loongarch_target_feature)]
|
||||
#![feature(mips_target_feature)]
|
||||
#![feature(powerpc_target_feature)]
|
||||
#![feature(riscv_target_feature)]
|
||||
#![feature(rtm_target_feature)]
|
||||
#![feature(s390x_target_feature)]
|
||||
#![feature(sha512_sm_x86)]
|
||||
#![feature(sse4a_target_feature)]
|
||||
#![feature(tbm_target_feature)]
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit 684de0d6fef708cae08214fef9643dd9ec7296e1
|
||||
Subproject commit 9426bb56586c6ae4095a2dcbd66c570253e6fb32
|
|
@ -257,6 +257,8 @@ fn analyze_source_file_dispatch(
|
|||
/// SSE2 intrinsics to quickly find all newlines.
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
// This can be removed once 1.87 is stable due to some intrinsics switching to safe.
|
||||
#[allow(unsafe_op_in_unsafe_fn)]
|
||||
unsafe fn analyze_source_file_sse2(
|
||||
src: &str,
|
||||
lines: &mut Vec<TextSize>,
|
||||
|
@ -287,17 +289,17 @@ unsafe fn analyze_source_file_sse2(
|
|||
|
||||
// For character in the chunk, see if its byte value is < 0, which
|
||||
// indicates that it's part of a UTF-8 char.
|
||||
let multibyte_test = unsafe { _mm_cmplt_epi8(chunk, _mm_set1_epi8(0)) };
|
||||
let multibyte_test = _mm_cmplt_epi8(chunk, _mm_set1_epi8(0));
|
||||
// Create a bit mask from the comparison results.
|
||||
let multibyte_mask = unsafe { _mm_movemask_epi8(multibyte_test) };
|
||||
let multibyte_mask = _mm_movemask_epi8(multibyte_test);
|
||||
|
||||
// If the bit mask is all zero, we only have ASCII chars here:
|
||||
if multibyte_mask == 0 {
|
||||
assert!(intra_chunk_offset == 0);
|
||||
|
||||
// Check for newlines in the chunk
|
||||
let newlines_test = unsafe { _mm_cmpeq_epi8(chunk, _mm_set1_epi8(b'\n' as i8)) };
|
||||
let newlines_mask = unsafe { _mm_movemask_epi8(newlines_test) };
|
||||
let newlines_test = _mm_cmpeq_epi8(chunk, _mm_set1_epi8(b'\n' as i8));
|
||||
let newlines_mask = _mm_movemask_epi8(newlines_test);
|
||||
|
||||
if newlines_mask != 0 {
|
||||
// All control characters are newlines, record them
|
||||
|
@ -354,15 +356,19 @@ unsafe fn analyze_source_file_sse2(
|
|||
// The mask is a 64-bit integer, where each 4-bit corresponds to a u8 in the
|
||||
// input vector. The least significant 4 bits correspond to the first byte in
|
||||
// the vector.
|
||||
// This can be removed once 1.87 is stable due to some intrinsics switching to safe.
|
||||
#[allow(unsafe_op_in_unsafe_fn)]
|
||||
unsafe fn move_mask(v: std::arch::aarch64::uint8x16_t) -> u64 {
|
||||
use std::arch::aarch64::*;
|
||||
|
||||
let nibble_mask = unsafe { vshrn_n_u16(vreinterpretq_u16_u8(v), 4) };
|
||||
unsafe { vget_lane_u64(vreinterpret_u64_u8(nibble_mask), 0) }
|
||||
let nibble_mask = vshrn_n_u16(vreinterpretq_u16_u8(v), 4);
|
||||
vget_lane_u64(vreinterpret_u64_u8(nibble_mask), 0)
|
||||
}
|
||||
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
|
||||
// This can be removed once 1.87 is stable due to some intrinsics switching to safe.
|
||||
#[allow(unsafe_op_in_unsafe_fn)]
|
||||
unsafe fn analyze_source_file_neon(
|
||||
src: &str,
|
||||
lines: &mut Vec<TextSize>,
|
||||
|
@ -376,7 +382,7 @@ unsafe fn analyze_source_file_neon(
|
|||
|
||||
let chunk_count = src.len() / CHUNK_SIZE;
|
||||
|
||||
let newline = unsafe { vdupq_n_s8(b'\n' as i8) };
|
||||
let newline = vdupq_n_s8(b'\n' as i8);
|
||||
|
||||
// This variable keeps track of where we should start decoding a
|
||||
// chunk. If a multi-byte character spans across chunk boundaries,
|
||||
|
@ -390,7 +396,7 @@ unsafe fn analyze_source_file_neon(
|
|||
|
||||
// For character in the chunk, see if its byte value is < 0, which
|
||||
// indicates that it's part of a UTF-8 char.
|
||||
let multibyte_test = unsafe { vcltzq_s8(chunk) };
|
||||
let multibyte_test = vcltzq_s8(chunk);
|
||||
// Create a bit mask from the comparison results.
|
||||
let multibyte_mask = unsafe { move_mask(multibyte_test) };
|
||||
|
||||
|
@ -399,7 +405,7 @@ unsafe fn analyze_source_file_neon(
|
|||
assert!(intra_chunk_offset == 0);
|
||||
|
||||
// Check for newlines in the chunk
|
||||
let newlines_test = unsafe { vceqq_s8(chunk, newline) };
|
||||
let newlines_test = vceqq_s8(chunk, newline);
|
||||
let mut newlines_mask = unsafe { move_mask(newlines_test) };
|
||||
|
||||
// If the bit mask is not all zero, there are newlines in this chunk.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue