rust/tests/codegen/simd-intrinsic/simd-intrinsic-generic-scatter.rs

//

//@ compile-flags: -C no-prepopulate-passes

#![crate_type = "lib"]
#![feature(repr_simd, core_intrinsics)]
#![allow(non_camel_case_types)]

use std::intrinsics::simd::simd_scatter;

#[repr(simd)]
#[derive(Copy, Clone, PartialEq, Debug)]
pub struct Vec2<T>(pub [T; 2]);

#[repr(simd)]
#[derive(Copy, Clone, PartialEq, Debug)]
pub struct Vec4<T>(pub [T; 4]);

// CHECK-LABEL: @scatter_f32x2
#[no_mangle]
pub unsafe fn scatter_f32x2(pointers: Vec2<*mut f32>, mask: Vec2<i32>, values: Vec2<f32>) {
    // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> {{.*}}, {{<i32 31, i32 31>|splat \(i32 31\)}}
    // CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1>
    // CHECK: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> {{.*}}, <2 x ptr> {{.*}}, i32 {{.*}}, <2 x i1> [[B]]
    simd_scatter(values, pointers, mask)
}

// CHECK-LABEL: @scatter_f32x2_unsigned
#[no_mangle]
pub unsafe fn scatter_f32x2_unsigned(pointers: Vec2<*mut f32>, mask: Vec2<u32>, values: Vec2<f32>) {
    // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> {{.*}}, {{<i32 31, i32 31>|splat \(i32 31\)}}
    // CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1>
    // CHECK: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> {{.*}}, <2 x ptr> {{.*}}, i32 {{.*}}, <2 x i1> [[B]]
    simd_scatter(values, pointers, mask)
}

// CHECK-LABEL: @scatter_pf32x2
#[no_mangle]
pub unsafe fn scatter_pf32x2(
    pointers: Vec2<*mut *const f32>,
    mask: Vec2<i32>,
    values: Vec2<*const f32>,
) {
    // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> {{.*}}, {{<i32 31, i32 31>|splat \(i32 31\)}}
    // CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1>
    // CHECK: call void @llvm.masked.scatter.v2p0.v2p0(<2 x ptr> {{.*}}, <2 x ptr> {{.*}}, i32 {{.*}}, <2 x i1> [[B]]
    simd_scatter(values, pointers, mask)
}
Remove redundant `ignore-tidy-linelength` annotations This is step 2 towards fixing #77548. In the codegen and codegen-units test suites, the `//` comment markers were kept in order not to affect any source locations. This is because these tests cannot be automatically `--bless`ed. 2021-04-03 13:05:11 +02:00			`//`
add simd float intrinsics and gather/scatter 2018-05-04 20:07:35 +02:00
			`//@ compile-flags: -C no-prepopulate-passes`

			`#![crate_type = "lib"]`
remove most `simd_` intrinsic declaration in tests instead, we can just import the intrinsics from core 2025-02-24 17:26:56 +01:00			`#![feature(repr_simd, core_intrinsics)]`
add simd float intrinsics and gather/scatter 2018-05-04 20:07:35 +02:00			`#![allow(non_camel_case_types)]`

remove most `simd_` intrinsic declaration in tests instead, we can just import the intrinsics from core 2025-02-24 17:26:56 +01:00			`use std::intrinsics::simd::simd_scatter;`

add simd float intrinsics and gather/scatter 2018-05-04 20:07:35 +02:00			`#[repr(simd)]`
			`#[derive(Copy, Clone, PartialEq, Debug)]`
Ban non-array SIMD 2024-08-22 01:28:20 -07:00			`pub struct Vec2<T>(pub [T; 2]);`
add simd float intrinsics and gather/scatter 2018-05-04 20:07:35 +02:00
			`#[repr(simd)]`
			`#[derive(Copy, Clone, PartialEq, Debug)]`
Ban non-array SIMD 2024-08-22 01:28:20 -07:00			`pub struct Vec4<T>(pub [T; 4]);`
add simd float intrinsics and gather/scatter 2018-05-04 20:07:35 +02:00
			`// CHECK-LABEL: @scatter_f32x2`
			`#[no_mangle]`
remove most `simd_` intrinsic declaration in tests instead, we can just import the intrinsics from core 2025-02-24 17:26:56 +01:00			`pub unsafe fn scatter_f32x2(pointers: Vec2<*mut f32>, mask: Vec2<i32>, values: Vec2<f32>) {`
Fix SIMD codegen tests on LLVM 20 The splat contents are printed differently on LLVM 20. 2025-01-27 15:10:42 +01:00			`// CHECK: [[A:%[0-9]+]] = lshr <2 x i32> {{.*}}, {{<i32 31, i32 31>\|splat \(i32 31\)}}`
Consistently use the most significant bit of vector masks This improves the codegen for vector `select`, `gather`, `scatter` and boolean reduction intrinsics and fixes rust-lang/portable-simd#316. The current behavior of most mask operations during llvm codegen is to truncate the mask vector to <N x i1>, telling llvm to use the least significat bit. The exception is the `simd_bitmask` intrinsics, which already used the most signifiant bit. Since sse/avx instructions are defined to use the most significant bit, truncating means that llvm has to insert a left shift to move the bit into the most significant position, before the mask can actually be used. Similarly on aarch64, mask operations like blend work bit by bit, repeating the least significant bit across the whole lane involves shifting it into the sign position and then comparing against zero. By shifting before truncating to <N x i1>, we tell llvm that we only consider the most significant bit, removing the need for additional shift instructions in the assembly. 2023-01-04 23:55:40 +01:00			`// CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1>`
			`// CHECK: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> {{.}}, <2 x ptr> {{.}}, i32 {{.*}}, <2 x i1> [[B]]`
add simd float intrinsics and gather/scatter 2018-05-04 20:07:35 +02:00			`simd_scatter(values, pointers, mask)`
			`}`

simd intrinsics with mask: accept unsigned integer masks 2025-03-03 18:18:33 +01:00			`// CHECK-LABEL: @scatter_f32x2_unsigned`
			`#[no_mangle]`
			`pub unsafe fn scatter_f32x2_unsigned(pointers: Vec2<*mut f32>, mask: Vec2<u32>, values: Vec2<f32>) {`
			`// CHECK: [[A:%[0-9]+]] = lshr <2 x i32> {{.*}}, {{<i32 31, i32 31>\|splat \(i32 31\)}}`
			`// CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1>`
			`// CHECK: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> {{.}}, <2 x ptr> {{.}}, i32 {{.*}}, <2 x i1> [[B]]`
			`simd_scatter(values, pointers, mask)`
			`}`

add simd float intrinsics and gather/scatter 2018-05-04 20:07:35 +02:00			`// CHECK-LABEL: @scatter_pf32x2`
			`#[no_mangle]`
remove most `simd_` intrinsic declaration in tests instead, we can just import the intrinsics from core 2025-02-24 17:26:56 +01:00			`pub unsafe fn scatter_pf32x2(`
			`pointers: Vec2<mut const f32>,`
			`mask: Vec2<i32>,`
			`values: Vec2<*const f32>,`
			`) {`
Fix SIMD codegen tests on LLVM 20 The splat contents are printed differently on LLVM 20. 2025-01-27 15:10:42 +01:00			`// CHECK: [[A:%[0-9]+]] = lshr <2 x i32> {{.*}}, {{<i32 31, i32 31>\|splat \(i32 31\)}}`
Consistently use the most significant bit of vector masks This improves the codegen for vector `select`, `gather`, `scatter` and boolean reduction intrinsics and fixes rust-lang/portable-simd#316. The current behavior of most mask operations during llvm codegen is to truncate the mask vector to <N x i1>, telling llvm to use the least significat bit. The exception is the `simd_bitmask` intrinsics, which already used the most signifiant bit. Since sse/avx instructions are defined to use the most significant bit, truncating means that llvm has to insert a left shift to move the bit into the most significant position, before the mask can actually be used. Similarly on aarch64, mask operations like blend work bit by bit, repeating the least significant bit across the whole lane involves shifting it into the sign position and then comparing against zero. By shifting before truncating to <N x i1>, we tell llvm that we only consider the most significant bit, removing the need for additional shift instructions in the assembly. 2023-01-04 23:55:40 +01:00			`// CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1>`
			`// CHECK: call void @llvm.masked.scatter.v2p0.v2p0(<2 x ptr> {{.}}, <2 x ptr> {{.}}, i32 {{.*}}, <2 x i1> [[B]]`
add simd float intrinsics and gather/scatter 2018-05-04 20:07:35 +02:00			`simd_scatter(values, pointers, mask)`
			`}`