remove simd_reduce_{min,max}_nanless
This commit is contained in:
parent
3dc631a61a
commit
07b6240947
10 changed files with 10 additions and 69 deletions
|
@ -743,7 +743,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
|
||||||
simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().bxor(a, b));
|
simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().bxor(a, b));
|
||||||
}
|
}
|
||||||
|
|
||||||
sym::simd_reduce_min | sym::simd_reduce_min_nanless => {
|
sym::simd_reduce_min => {
|
||||||
intrinsic_args!(fx, args => (v); intrinsic);
|
intrinsic_args!(fx, args => (v); intrinsic);
|
||||||
|
|
||||||
if !v.layout().ty.is_simd() {
|
if !v.layout().ty.is_simd() {
|
||||||
|
@ -762,7 +762,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
sym::simd_reduce_max | sym::simd_reduce_max_nanless => {
|
sym::simd_reduce_max => {
|
||||||
intrinsic_args!(fx, args => (v); intrinsic);
|
intrinsic_args!(fx, args => (v); intrinsic);
|
||||||
|
|
||||||
if !v.layout().ty.is_simd() {
|
if !v.layout().ty.is_simd() {
|
||||||
|
|
|
@ -1041,9 +1041,6 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
|
||||||
|
|
||||||
minmax_red!(simd_reduce_min: vector_reduce_min, vector_reduce_fmin);
|
minmax_red!(simd_reduce_min: vector_reduce_min, vector_reduce_fmin);
|
||||||
minmax_red!(simd_reduce_max: vector_reduce_max, vector_reduce_fmax);
|
minmax_red!(simd_reduce_max: vector_reduce_max, vector_reduce_fmax);
|
||||||
// TODO(sadlerap): revisit these intrinsics to generate more optimal reductions
|
|
||||||
minmax_red!(simd_reduce_min_nanless: vector_reduce_min, vector_reduce_fmin);
|
|
||||||
minmax_red!(simd_reduce_max_nanless: vector_reduce_max, vector_reduce_fmax);
|
|
||||||
|
|
||||||
macro_rules! bitwise_red {
|
macro_rules! bitwise_red {
|
||||||
($name:ident : $op:expr, $boolean:expr) => {
|
($name:ident : $op:expr, $boolean:expr) => {
|
||||||
|
|
|
@ -1406,22 +1406,6 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
|
||||||
llvm::LLVMRustBuildVectorReduceFMax(self.llbuilder, src, /*NoNaNs:*/ false)
|
llvm::LLVMRustBuildVectorReduceFMax(self.llbuilder, src, /*NoNaNs:*/ false)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub fn vector_reduce_fmin_fast(&mut self, src: &'ll Value) -> &'ll Value {
|
|
||||||
unsafe {
|
|
||||||
let instr =
|
|
||||||
llvm::LLVMRustBuildVectorReduceFMin(self.llbuilder, src, /*NoNaNs:*/ true);
|
|
||||||
llvm::LLVMRustSetFastMath(instr);
|
|
||||||
instr
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pub fn vector_reduce_fmax_fast(&mut self, src: &'ll Value) -> &'ll Value {
|
|
||||||
unsafe {
|
|
||||||
let instr =
|
|
||||||
llvm::LLVMRustBuildVectorReduceFMax(self.llbuilder, src, /*NoNaNs:*/ true);
|
|
||||||
llvm::LLVMRustSetFastMath(instr);
|
|
||||||
instr
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pub fn vector_reduce_min(&mut self, src: &'ll Value, is_signed: bool) -> &'ll Value {
|
pub fn vector_reduce_min(&mut self, src: &'ll Value, is_signed: bool) -> &'ll Value {
|
||||||
unsafe { llvm::LLVMRustBuildVectorReduceMin(self.llbuilder, src, is_signed) }
|
unsafe { llvm::LLVMRustBuildVectorReduceMin(self.llbuilder, src, is_signed) }
|
||||||
}
|
}
|
||||||
|
|
|
@ -1920,9 +1920,6 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
|
||||||
minmax_red!(simd_reduce_min: vector_reduce_min, vector_reduce_fmin);
|
minmax_red!(simd_reduce_min: vector_reduce_min, vector_reduce_fmin);
|
||||||
minmax_red!(simd_reduce_max: vector_reduce_max, vector_reduce_fmax);
|
minmax_red!(simd_reduce_max: vector_reduce_max, vector_reduce_fmax);
|
||||||
|
|
||||||
minmax_red!(simd_reduce_min_nanless: vector_reduce_min, vector_reduce_fmin_fast);
|
|
||||||
minmax_red!(simd_reduce_max_nanless: vector_reduce_max, vector_reduce_fmax_fast);
|
|
||||||
|
|
||||||
macro_rules! bitwise_red {
|
macro_rules! bitwise_red {
|
||||||
($name:ident : $red:ident, $boolean:expr) => {
|
($name:ident : $red:ident, $boolean:expr) => {
|
||||||
if name == sym::$name {
|
if name == sym::$name {
|
||||||
|
|
|
@ -606,9 +606,7 @@ pub fn check_platform_intrinsic_type(
|
||||||
| sym::simd_reduce_or
|
| sym::simd_reduce_or
|
||||||
| sym::simd_reduce_xor
|
| sym::simd_reduce_xor
|
||||||
| sym::simd_reduce_min
|
| sym::simd_reduce_min
|
||||||
| sym::simd_reduce_max
|
| sym::simd_reduce_max => (2, 0, vec![param(0)], param(1)),
|
||||||
| sym::simd_reduce_min_nanless
|
|
||||||
| sym::simd_reduce_max_nanless => (2, 0, vec![param(0)], param(1)),
|
|
||||||
sym::simd_shuffle => (3, 0, vec![param(0), param(0), param(1)], param(2)),
|
sym::simd_shuffle => (3, 0, vec![param(0), param(0), param(1)], param(2)),
|
||||||
sym::simd_shuffle_generic => (2, 1, vec![param(0), param(0)], param(1)),
|
sym::simd_shuffle_generic => (2, 1, vec![param(0), param(0)], param(1)),
|
||||||
_ => {
|
_ => {
|
||||||
|
|
|
@ -418,17 +418,11 @@ extern "C" LLVMAttributeRef LLVMRustCreateMemoryEffectsAttr(LLVMContextRef C,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
<<<<<<< HEAD
|
|
||||||
// Enable all fast-math flags, including those which will cause floating-point operations
|
// Enable all fast-math flags, including those which will cause floating-point operations
|
||||||
// to return poison for some well-defined inputs. This function can only be used to build
|
// to return poison for some well-defined inputs. This function can only be used to build
|
||||||
// unsafe Rust intrinsics. That unsafety does permit additional optimizations, but at the
|
// unsafe Rust intrinsics. That unsafety does permit additional optimizations, but at the
|
||||||
// time of writing, their value is not well-understood relative to those enabled by
|
// time of writing, their value is not well-understood relative to those enabled by
|
||||||
// LLVMRustSetAlgebraicMath.
|
// LLVMRustSetAlgebraicMath.
|
||||||
||||||| parent of 019019d83e2 (make simd_reduce_{mul,add}_unordered use only the 'reassoc' flag, not all fast-math flags)
|
|
||||||
// Enable a fast-math flag
|
|
||||||
=======
|
|
||||||
// Enable all fast-math flags
|
|
||||||
>>>>>>> 019019d83e2 (make simd_reduce_{mul,add}_unordered use only the 'reassoc' flag, not all fast-math flags)
|
|
||||||
//
|
//
|
||||||
// https://llvm.org/docs/LangRef.html#fast-math-flags
|
// https://llvm.org/docs/LangRef.html#fast-math-flags
|
||||||
extern "C" void LLVMRustSetFastMath(LLVMValueRef V) {
|
extern "C" void LLVMRustSetFastMath(LLVMValueRef V) {
|
||||||
|
@ -456,7 +450,12 @@ extern "C" void LLVMRustSetAlgebraicMath(LLVMValueRef V) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enable the reassoc fast-math flag
|
// Enable the reassoc fast-math flag, allowing transformations that pretend
|
||||||
|
// floating-point addition and multiplication are associative.
|
||||||
|
//
|
||||||
|
// Note that this does NOT enable any flags which can cause a floating-point operation on
|
||||||
|
// well-defined inputs to return poison, and therefore this function can be used to build
|
||||||
|
// safe Rust intrinsics (such as fadd_algebraic).
|
||||||
//
|
//
|
||||||
// https://llvm.org/docs/LangRef.html#fast-math-flags
|
// https://llvm.org/docs/LangRef.html#fast-math-flags
|
||||||
extern "C" void LLVMRustSetAllowReassoc(LLVMValueRef V) {
|
extern "C" void LLVMRustSetAllowReassoc(LLVMValueRef V) {
|
||||||
|
|
|
@ -1553,9 +1553,7 @@ symbols! {
|
||||||
simd_reduce_and,
|
simd_reduce_and,
|
||||||
simd_reduce_any,
|
simd_reduce_any,
|
||||||
simd_reduce_max,
|
simd_reduce_max,
|
||||||
simd_reduce_max_nanless,
|
|
||||||
simd_reduce_min,
|
simd_reduce_min,
|
||||||
simd_reduce_min_nanless,
|
|
||||||
simd_reduce_mul_ordered,
|
simd_reduce_mul_ordered,
|
||||||
simd_reduce_mul_unordered,
|
simd_reduce_mul_unordered,
|
||||||
simd_reduce_or,
|
simd_reduce_or,
|
||||||
|
|
|
@ -385,19 +385,6 @@ extern "platform-intrinsic" {
|
||||||
/// For floating-point values, uses IEEE-754 `maxNum`.
|
/// For floating-point values, uses IEEE-754 `maxNum`.
|
||||||
pub fn simd_reduce_max<T, U>(x: T) -> U;
|
pub fn simd_reduce_max<T, U>(x: T) -> U;
|
||||||
|
|
||||||
/// Return the maximum element of a vector.
|
|
||||||
///
|
|
||||||
/// `T` must be a vector of integer or floating-point primitive types.
|
|
||||||
///
|
|
||||||
/// `U` must be the element type of `T`.
|
|
||||||
///
|
|
||||||
/// For floating-point values, uses IEEE-754 `maxNum`.
|
|
||||||
///
|
|
||||||
/// # Safety
|
|
||||||
///
|
|
||||||
/// All input elements must be finite (i.e., not NAN and not +/- INF).
|
|
||||||
pub fn simd_reduce_max_nanless<T, U>(x: T) -> U;
|
|
||||||
|
|
||||||
/// Return the minimum element of a vector.
|
/// Return the minimum element of a vector.
|
||||||
///
|
///
|
||||||
/// `T` must be a vector of integer or floating-point primitive types.
|
/// `T` must be a vector of integer or floating-point primitive types.
|
||||||
|
@ -407,19 +394,6 @@ extern "platform-intrinsic" {
|
||||||
/// For floating-point values, uses IEEE-754 `minNum`.
|
/// For floating-point values, uses IEEE-754 `minNum`.
|
||||||
pub fn simd_reduce_min<T, U>(x: T) -> U;
|
pub fn simd_reduce_min<T, U>(x: T) -> U;
|
||||||
|
|
||||||
/// Return the minimum element of a vector.
|
|
||||||
///
|
|
||||||
/// `T` must be a vector of integer or floating-point primitive types.
|
|
||||||
///
|
|
||||||
/// `U` must be the element type of `T`.
|
|
||||||
///
|
|
||||||
/// For floating-point values, uses IEEE-754 `minNum`.
|
|
||||||
///
|
|
||||||
/// # Safety
|
|
||||||
///
|
|
||||||
/// All input elements must be finite (i.e., not NAN and not +/- INF).
|
|
||||||
pub fn simd_reduce_min_nanless<T, U>(x: T) -> U;
|
|
||||||
|
|
||||||
/// Logical "and" all elements together.
|
/// Logical "and" all elements together.
|
||||||
///
|
///
|
||||||
/// `T` must be a vector of integer or floating-point primitive types.
|
/// `T` must be a vector of integer or floating-point primitive types.
|
||||||
|
|
|
@ -12,7 +12,7 @@ use std::arch::x86_64::*;
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
#[target_feature(enable = "avx512f")] // Function-level target feature mismatches inhibit inlining
|
#[target_feature(enable = "avx512f")] // Function-level target feature mismatches inhibit inlining
|
||||||
pub unsafe fn demo() -> bool {
|
pub unsafe fn demo() -> bool {
|
||||||
// CHECK: %0 = tail call reassoc nsz arcp contract double @llvm.vector.reduce.fadd.v8f64(
|
// CHECK: %0 = tail call reassoc double @llvm.vector.reduce.fadd.v8f64(
|
||||||
// CHECK: %_0.i = fcmp uno double %0, 0.000000e+00
|
// CHECK: %_0.i = fcmp uno double %0, 0.000000e+00
|
||||||
// CHECK: ret i1 %_0.i
|
// CHECK: ret i1 %_0.i
|
||||||
let res = unsafe {
|
let res = unsafe {
|
||||||
|
|
|
@ -31,8 +31,6 @@ extern "platform-intrinsic" {
|
||||||
fn simd_reduce_mul_ordered<T, U>(x: T, acc: U) -> U;
|
fn simd_reduce_mul_ordered<T, U>(x: T, acc: U) -> U;
|
||||||
fn simd_reduce_min<T, U>(x: T) -> U;
|
fn simd_reduce_min<T, U>(x: T) -> U;
|
||||||
fn simd_reduce_max<T, U>(x: T) -> U;
|
fn simd_reduce_max<T, U>(x: T) -> U;
|
||||||
fn simd_reduce_min_nanless<T, U>(x: T) -> U;
|
|
||||||
fn simd_reduce_max_nanless<T, U>(x: T) -> U;
|
|
||||||
fn simd_reduce_and<T, U>(x: T) -> U;
|
fn simd_reduce_and<T, U>(x: T) -> U;
|
||||||
fn simd_reduce_or<T, U>(x: T) -> U;
|
fn simd_reduce_or<T, U>(x: T) -> U;
|
||||||
fn simd_reduce_xor<T, U>(x: T) -> U;
|
fn simd_reduce_xor<T, U>(x: T) -> U;
|
||||||
|
@ -127,10 +125,6 @@ fn main() {
|
||||||
assert_eq!(r, -2_f32);
|
assert_eq!(r, -2_f32);
|
||||||
let r: f32 = simd_reduce_max(x);
|
let r: f32 = simd_reduce_max(x);
|
||||||
assert_eq!(r, 4_f32);
|
assert_eq!(r, 4_f32);
|
||||||
let r: f32 = simd_reduce_min_nanless(x);
|
|
||||||
assert_eq!(r, -2_f32);
|
|
||||||
let r: f32 = simd_reduce_max_nanless(x);
|
|
||||||
assert_eq!(r, 4_f32);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe {
|
unsafe {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue