1
Fork 0

Auto merge of #136831 - ehuss:update-stdarch, r=Amanieu

Update stdarch

Updates stdarch

- core_arch: Add LoongArch basic intrinsics: https://github.com/rust-lang/stdarch/pull/1688
- New ARM intrinsic generator: https://github.com/rust-lang/stdarch/pull/1693
- Fix the bug in CMPINT intrinsics with IMM3=7: https://github.com/rust-lang/stdarch/pull/1694
- Expand feature detection on AArch64 Darwin: https://github.com/rust-lang/stdarch/pull/1695
- Tidying x86 `as_*` functions: https://github.com/rust-lang/stdarch/pull/1696
- Fix typo and prettify comment: https://github.com/rust-lang/stdarch/pull/1697
- add is_s390x_feature_detected: https://github.com/rust-lang/stdarch/pull/1699
- add vec_add for s390x: https://github.com/rust-lang/stdarch/pull/1703
- s390x: add vec_sub, vec_mul, vec_min, vec_max, vec_abs and vec_splats: https://github.com/rust-lang/stdarch/pull/1704
- Fix build and CLI behaviour for stdarch-gen-arm. https://github.com/rust-lang/stdarch/pull/1705
- Fix some test naming, and refactor stdarch-verify in general: https://github.com/rust-lang/stdarch/pull/1707
- Update all stdarch crates to Rust 2024: https://github.com/rust-lang/stdarch/pull/1710
- Add keylocker (kl and widekl) intrinsics and runtime feature detection: https://github.com/rust-lang/stdarch/pull/1706
- S390x vector bitwise operations: https://github.com/rust-lang/stdarch/pull/1709
- Update CI to FreeBSD 13.4: https://github.com/rust-lang/stdarch/pull/1715
- Update wasm sub sat intrinsics for LLVM 20: https://github.com/rust-lang/stdarch/pull/1719
- powerpc: use more target-independent llvm intrinsics (min, max, round, countlz): https://github.com/rust-lang/stdarch/pull/1713
- S390x float rounding: https://github.com/rust-lang/stdarch/pull/1712
- mark riscv intrinsics as safe: https://github.com/rust-lang/stdarch/pull/1717
- change redundant transmutations of sign to cast_unsigned: https://github.com/rust-lang/stdarch/pull/1711
- Fix - AArch64 Big Endian Intrinsics: https://github.com/rust-lang/stdarch/pull/1708
- mark x86 intrinsics as safe: https://github.com/rust-lang/stdarch/pull/1714
- AArch64: Add NEON fp16 intrinsics: https://github.com/rust-lang/stdarch/pull/1726
- wasm: use simd_as for float to integer conversions: https://github.com/rust-lang/stdarch/pull/1724
- nvptx: use simd_fmin and simd_fmax for minnum and maxnum: https://github.com/rust-lang/stdarch/pull/1725
- powerpc: use simd_ceil and simd_floor: https://github.com/rust-lang/stdarch/pull/1723
- Changed altivec.rs to new intrinsic declaration: https://github.com/rust-lang/stdarch/pull/1722
- Remove some allow(unsafe_op_in_unsafe_fn)s and use target_feature 1.1 in examples: https://github.com/rust-lang/stdarch/pull/1727
- fix - neon type signed unsigned conversions: https://github.com/rust-lang/stdarch/pull/1729
- s390x_is_feature_detected!: detect more features: https://github.com/rust-lang/stdarch/pull/1720
- Fix doctests failing due to unused_unsafe: https://github.com/rust-lang/stdarch/pull/1731
- fix compilation on armebv7r-none-eabi: https://github.com/rust-lang/stdarch/pull/1733
- wasm: update for rintf intrinsic rename: https://github.com/rust-lang/stdarch/pull/1721
- powerpc: use the simd_fma intrinsic for vec_madd: https://github.com/rust-lang/stdarch/pull/1734
- powerpc: use llvm.fshl for vec_rl: https://github.com/rust-lang/stdarch/pull/1735
- s390x: add more intrinsics: https://github.com/rust-lang/stdarch/pull/1728
- make _mm256_zero{upper,all} safe: https://github.com/rust-lang/stdarch/pull/1736
- fix unnecessary unsafe error in doctest: https://github.com/rust-lang/stdarch/pull/1739
- Feat - Aarch64 FEAT_FAMINMAX: https://github.com/rust-lang/stdarch/pull/1732
- feat - FEAT_LUT neon instrinsics: https://github.com/rust-lang/stdarch/pull/1741
This commit is contained in:
bors 2025-03-06 20:34:20 +00:00
commit b74da9613a
5 changed files with 25 additions and 14 deletions

View file

@ -83,17 +83,17 @@ cfg_match! {
// For character in the chunk, see if its byte value is < 0, which
// indicates that it's part of a UTF-8 char.
let multibyte_test = unsafe { _mm_cmplt_epi8(chunk, _mm_set1_epi8(0)) };
let multibyte_test = _mm_cmplt_epi8(chunk, _mm_set1_epi8(0));
// Create a bit mask from the comparison results.
let multibyte_mask = unsafe { _mm_movemask_epi8(multibyte_test) };
let multibyte_mask = _mm_movemask_epi8(multibyte_test);
// If the bit mask is all zero, we only have ASCII chars here:
if multibyte_mask == 0 {
assert!(intra_chunk_offset == 0);
// Check for newlines in the chunk
let newlines_test = unsafe { _mm_cmpeq_epi8(chunk, _mm_set1_epi8(b'\n' as i8)) };
let mut newlines_mask = unsafe { _mm_movemask_epi8(newlines_test) };
let newlines_test = _mm_cmpeq_epi8(chunk, _mm_set1_epi8(b'\n' as i8));
let mut newlines_mask = _mm_movemask_epi8(newlines_test);
let output_offset = RelativeBytePos::from_usize(chunk_index * CHUNK_SIZE + 1);

View file

@ -32,6 +32,8 @@ check-cfg = [
'cfg(bootstrap)',
'cfg(no_fp_fmt_parse)',
'cfg(stdarch_intel_sde)',
# #[cfg(bootstrap)]
'cfg(target_feature, values("vector-enhancements-1"))',
# core use #[path] imports to portable-simd `core_simd` crate
# and to stdarch `core_arch` crate which messes-up with Cargo list
# of declared features, we therefor expect any feature cfg

View file

@ -202,14 +202,17 @@
//
// Target features:
// tidy-alphabetical-start
#![feature(aarch64_unstable_target_feature)]
#![feature(arm_target_feature)]
#![feature(avx512_target_feature)]
#![feature(hexagon_target_feature)]
#![feature(keylocker_x86)]
#![feature(loongarch_target_feature)]
#![feature(mips_target_feature)]
#![feature(powerpc_target_feature)]
#![feature(riscv_target_feature)]
#![feature(rtm_target_feature)]
#![feature(s390x_target_feature)]
#![feature(sha512_sm_x86)]
#![feature(sse4a_target_feature)]
#![feature(tbm_target_feature)]

@ -1 +1 @@
Subproject commit 684de0d6fef708cae08214fef9643dd9ec7296e1
Subproject commit 9426bb56586c6ae4095a2dcbd66c570253e6fb32

View file

@ -257,6 +257,8 @@ fn analyze_source_file_dispatch(
/// SSE2 intrinsics to quickly find all newlines.
#[target_feature(enable = "sse2")]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
// This can be removed once 1.87 is stable due to some intrinsics switching to safe.
#[allow(unsafe_op_in_unsafe_fn)]
unsafe fn analyze_source_file_sse2(
src: &str,
lines: &mut Vec<TextSize>,
@ -287,17 +289,17 @@ unsafe fn analyze_source_file_sse2(
// For character in the chunk, see if its byte value is < 0, which
// indicates that it's part of a UTF-8 char.
let multibyte_test = unsafe { _mm_cmplt_epi8(chunk, _mm_set1_epi8(0)) };
let multibyte_test = _mm_cmplt_epi8(chunk, _mm_set1_epi8(0));
// Create a bit mask from the comparison results.
let multibyte_mask = unsafe { _mm_movemask_epi8(multibyte_test) };
let multibyte_mask = _mm_movemask_epi8(multibyte_test);
// If the bit mask is all zero, we only have ASCII chars here:
if multibyte_mask == 0 {
assert!(intra_chunk_offset == 0);
// Check for newlines in the chunk
let newlines_test = unsafe { _mm_cmpeq_epi8(chunk, _mm_set1_epi8(b'\n' as i8)) };
let newlines_mask = unsafe { _mm_movemask_epi8(newlines_test) };
let newlines_test = _mm_cmpeq_epi8(chunk, _mm_set1_epi8(b'\n' as i8));
let newlines_mask = _mm_movemask_epi8(newlines_test);
if newlines_mask != 0 {
// All control characters are newlines, record them
@ -354,15 +356,19 @@ unsafe fn analyze_source_file_sse2(
// The mask is a 64-bit integer, where each 4-bit corresponds to a u8 in the
// input vector. The least significant 4 bits correspond to the first byte in
// the vector.
// This can be removed once 1.87 is stable due to some intrinsics switching to safe.
#[allow(unsafe_op_in_unsafe_fn)]
unsafe fn move_mask(v: std::arch::aarch64::uint8x16_t) -> u64 {
use std::arch::aarch64::*;
let nibble_mask = unsafe { vshrn_n_u16(vreinterpretq_u16_u8(v), 4) };
unsafe { vget_lane_u64(vreinterpret_u64_u8(nibble_mask), 0) }
let nibble_mask = vshrn_n_u16(vreinterpretq_u16_u8(v), 4);
vget_lane_u64(vreinterpret_u64_u8(nibble_mask), 0)
}
#[target_feature(enable = "neon")]
#[cfg(all(target_arch = "aarch64", target_endian = "little"))]
// This can be removed once 1.87 is stable due to some intrinsics switching to safe.
#[allow(unsafe_op_in_unsafe_fn)]
unsafe fn analyze_source_file_neon(
src: &str,
lines: &mut Vec<TextSize>,
@ -376,7 +382,7 @@ unsafe fn analyze_source_file_neon(
let chunk_count = src.len() / CHUNK_SIZE;
let newline = unsafe { vdupq_n_s8(b'\n' as i8) };
let newline = vdupq_n_s8(b'\n' as i8);
// This variable keeps track of where we should start decoding a
// chunk. If a multi-byte character spans across chunk boundaries,
@ -390,7 +396,7 @@ unsafe fn analyze_source_file_neon(
// For character in the chunk, see if its byte value is < 0, which
// indicates that it's part of a UTF-8 char.
let multibyte_test = unsafe { vcltzq_s8(chunk) };
let multibyte_test = vcltzq_s8(chunk);
// Create a bit mask from the comparison results.
let multibyte_mask = unsafe { move_mask(multibyte_test) };
@ -399,7 +405,7 @@ unsafe fn analyze_source_file_neon(
assert!(intra_chunk_offset == 0);
// Check for newlines in the chunk
let newlines_test = unsafe { vceqq_s8(chunk, newline) };
let newlines_test = vceqq_s8(chunk, newline);
let mut newlines_mask = unsafe { move_mask(newlines_test) };
// If the bit mask is not all zero, there are newlines in this chunk.