Implement rotate using funnel shift on LLVM >= 7
Implement the rotate_left and rotate_right operations using llvm.fshl and llvm.fshr if they are available (LLVM >= 7). Originally I wanted to expose the funnel_shift_left and funnel_shift_right intrinsics and implement rotate_left and rotate_right on top of them. However, emulation of funnel shifts requires emitting a conditional to check for zero shift amount, which is not necessary for rotates. I was uncomfortable doing that here, as I don't want to rely on LLVM to optimize away that conditional (and for variable rotates, I'm not sure it can). We should revisit that question when we raise our minimum version requirement to LLVM 7 and don't need emulation code anymore.
This commit is contained in:
parent
2ad8c7b350
commit
4c40ff6a24
10 changed files with 93 additions and 8 deletions
|
@ -1465,6 +1465,20 @@ extern "rust-intrinsic" {
|
|||
/// y < 0 or y >= N, where N is the width of T in bits.
|
||||
pub fn unchecked_shr<T>(x: T, y: T) -> T;
|
||||
|
||||
/// Performs rotate left.
|
||||
/// The stabilized versions of this intrinsic are available on the integer
|
||||
/// primitives via the `rotate_left` method. For example,
|
||||
/// [`std::u32::rotate_left`](../../std/primitive.u32.html#method.rotate_left)
|
||||
#[cfg(not(stage0))]
|
||||
pub fn rotate_left<T>(x: T, y: T) -> T;
|
||||
|
||||
/// Performs rotate right.
|
||||
/// The stabilized versions of this intrinsic are available on the integer
|
||||
/// primitives via the `rotate_right` method. For example,
|
||||
/// [`std::u32::rotate_right`](../../std/primitive.u32.html#method.rotate_right)
|
||||
#[cfg(not(stage0))]
|
||||
pub fn rotate_right<T>(x: T, y: T) -> T;
|
||||
|
||||
/// Returns (a + b) mod 2<sup>N</sup>, where N is the width of T in bits.
|
||||
/// The stabilized versions of this intrinsic are available on the integer
|
||||
/// primitives via the `wrapping_add` method. For example,
|
||||
|
|
|
@ -2301,9 +2301,14 @@ assert_eq!(n.rotate_left(", $rot, "), m);
|
|||
#[rustc_const_unstable(feature = "const_int_rotate")]
|
||||
#[inline]
|
||||
pub const fn rotate_left(self, n: u32) -> Self {
|
||||
#[cfg(not(stage0))] {
|
||||
unsafe { intrinsics::rotate_left(self, n as $SelfT) }
|
||||
}
|
||||
#[cfg(stage0)] {
|
||||
(self << (n % $BITS)) | (self >> (($BITS - (n % $BITS)) % $BITS))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
doc_comment! {
|
||||
concat!("Shifts the bits to the right by a specified amount, `n`,
|
||||
|
@ -2326,9 +2331,14 @@ assert_eq!(n.rotate_right(", $rot, "), m);
|
|||
#[rustc_const_unstable(feature = "const_int_rotate")]
|
||||
#[inline]
|
||||
pub const fn rotate_right(self, n: u32) -> Self {
|
||||
#[cfg(not(stage0))] {
|
||||
unsafe { intrinsics::rotate_right(self, n as $SelfT) }
|
||||
}
|
||||
#[cfg(stage0)] {
|
||||
(self >> (n % $BITS)) | (self << (($BITS - (n % $BITS)) % $BITS))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
doc_comment! {
|
||||
concat!("
|
||||
|
|
|
@ -726,6 +726,18 @@ fn declare_intrinsic(cx: &CodegenCx<'ll, '_>, key: &str) -> Option<&'ll Value> {
|
|||
ifn!("llvm.bitreverse.i64", fn(t_i64) -> t_i64);
|
||||
ifn!("llvm.bitreverse.i128", fn(t_i128) -> t_i128);
|
||||
|
||||
ifn!("llvm.fshl.i8", fn(t_i8, t_i8, t_i8) -> t_i8);
|
||||
ifn!("llvm.fshl.i16", fn(t_i16, t_i16, t_i16) -> t_i16);
|
||||
ifn!("llvm.fshl.i32", fn(t_i32, t_i32, t_i32) -> t_i32);
|
||||
ifn!("llvm.fshl.i64", fn(t_i64, t_i64, t_i64) -> t_i64);
|
||||
ifn!("llvm.fshl.i128", fn(t_i128, t_i128, t_i128) -> t_i128);
|
||||
|
||||
ifn!("llvm.fshr.i8", fn(t_i8, t_i8, t_i8) -> t_i8);
|
||||
ifn!("llvm.fshr.i16", fn(t_i16, t_i16, t_i16) -> t_i16);
|
||||
ifn!("llvm.fshr.i32", fn(t_i32, t_i32, t_i32) -> t_i32);
|
||||
ifn!("llvm.fshr.i64", fn(t_i64, t_i64, t_i64) -> t_i64);
|
||||
ifn!("llvm.fshr.i128", fn(t_i128, t_i128, t_i128) -> t_i128);
|
||||
|
||||
ifn!("llvm.sadd.with.overflow.i8", fn(t_i8, t_i8) -> mk_struct!{t_i8, i1});
|
||||
ifn!("llvm.sadd.with.overflow.i16", fn(t_i16, t_i16) -> mk_struct!{t_i16, i1});
|
||||
ifn!("llvm.sadd.with.overflow.i32", fn(t_i32, t_i32) -> mk_struct!{t_i32, i1});
|
||||
|
|
|
@ -23,6 +23,7 @@ use value::Value;
|
|||
use llvm;
|
||||
use llvm::debuginfo::{DIType, DIFile, DIScope, DIDescriptor,
|
||||
DICompositeType, DILexicalBlock, DIFlags};
|
||||
use llvm_util;
|
||||
|
||||
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
|
||||
use rustc::hir::CodegenFnAttrFlags;
|
||||
|
@ -1169,9 +1170,8 @@ fn prepare_union_metadata(
|
|||
fn use_enum_fallback(cx: &CodegenCx) -> bool {
|
||||
// On MSVC we have to use the fallback mode, because LLVM doesn't
|
||||
// lower variant parts to PDB.
|
||||
return cx.sess().target.target.options.is_like_msvc || unsafe {
|
||||
llvm::LLVMRustVersionMajor() < 7
|
||||
};
|
||||
return cx.sess().target.target.options.is_like_msvc
|
||||
|| llvm_util::get_major_version() < 7;
|
||||
}
|
||||
|
||||
// Describes the members of an enum value: An enum is described as a union of
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
use attributes;
|
||||
use intrinsics::{self, Intrinsic};
|
||||
use llvm::{self, TypeKind};
|
||||
use llvm_util;
|
||||
use abi::{Abi, FnType, LlvmType, PassMode};
|
||||
use mir::place::PlaceRef;
|
||||
use mir::operand::{OperandRef, OperandValue};
|
||||
|
@ -284,7 +285,8 @@ pub fn codegen_intrinsic_call(
|
|||
"ctlz" | "ctlz_nonzero" | "cttz" | "cttz_nonzero" | "ctpop" | "bswap" |
|
||||
"bitreverse" | "add_with_overflow" | "sub_with_overflow" |
|
||||
"mul_with_overflow" | "overflowing_add" | "overflowing_sub" | "overflowing_mul" |
|
||||
"unchecked_div" | "unchecked_rem" | "unchecked_shl" | "unchecked_shr" | "exact_div" => {
|
||||
"unchecked_div" | "unchecked_rem" | "unchecked_shl" | "unchecked_shr" | "exact_div" |
|
||||
"rotate_left" | "rotate_right" => {
|
||||
let ty = arg_tys[0];
|
||||
match int_type_width_signed(ty, cx) {
|
||||
Some((width, signed)) =>
|
||||
|
@ -363,6 +365,27 @@ pub fn codegen_intrinsic_call(
|
|||
} else {
|
||||
bx.lshr(args[0].immediate(), args[1].immediate())
|
||||
},
|
||||
"rotate_left" | "rotate_right" => {
|
||||
let is_left = name == "rotate_left";
|
||||
let val = args[0].immediate();
|
||||
let raw_shift = args[1].immediate();
|
||||
if llvm_util::get_major_version() >= 7 {
|
||||
// rotate = funnel shift with first two args the same
|
||||
let llvm_name = &format!("llvm.fsh{}.i{}",
|
||||
if is_left { 'l' } else { 'r' }, width);
|
||||
let llfn = cx.get_intrinsic(llvm_name);
|
||||
bx.call(llfn, &[val, val, raw_shift], None)
|
||||
} else {
|
||||
// rotate_left: (X << (S % BW)) | (X >> ((BW - S) % BW))
|
||||
// rotate_right: (X << ((BW - S) % BW)) | (X >> (S % BW))
|
||||
let width = C_uint(Type::ix(cx, width), width);
|
||||
let shift = bx.urem(raw_shift, width);
|
||||
let inv_shift = bx.urem(bx.sub(width, raw_shift), width);
|
||||
let shift1 = bx.shl(val, if is_left { shift } else { inv_shift });
|
||||
let shift2 = bx.lshr(val, if !is_left { shift } else { inv_shift });
|
||||
bx.or(shift1, shift2)
|
||||
}
|
||||
},
|
||||
_ => bug!(),
|
||||
},
|
||||
None => {
|
||||
|
|
|
@ -256,6 +256,10 @@ pub fn print_version() {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn get_major_version() -> u32 {
|
||||
unsafe { llvm::LLVMRustVersionMajor() }
|
||||
}
|
||||
|
||||
pub fn print_passes() {
|
||||
// Can be called without initializing LLVM
|
||||
unsafe { llvm::LLVMRustPrintPasses(); }
|
||||
|
|
|
@ -12,6 +12,7 @@ use common::{C_i32, C_null};
|
|||
use libc::c_uint;
|
||||
use llvm::{self, BasicBlock};
|
||||
use llvm::debuginfo::DIScope;
|
||||
use llvm_util;
|
||||
use rustc::ty::{self, Ty, TypeFoldable, UpvarSubsts};
|
||||
use rustc::ty::layout::{LayoutOf, TyLayout};
|
||||
use rustc::mir::{self, Mir};
|
||||
|
@ -612,7 +613,7 @@ fn arg_local_refs(
|
|||
// doesn't actually strip the offset when splitting the closure
|
||||
// environment into its components so it ends up out of bounds.
|
||||
// (cuviper) It seems to be fine without the alloca on LLVM 6 and later.
|
||||
let env_alloca = !env_ref && unsafe { llvm::LLVMRustVersionMajor() < 6 };
|
||||
let env_alloca = !env_ref && llvm_util::get_major_version() < 6;
|
||||
let env_ptr = if env_alloca {
|
||||
let scratch = PlaceRef::alloca(bx,
|
||||
bx.cx.layout_of(tcx.mk_mut_ptr(arg.layout.ty)),
|
||||
|
|
|
@ -150,6 +150,24 @@ impl<'a, 'mir, 'tcx, M: Machine<'a, 'mir, 'tcx>> EvalContext<'a, 'mir, 'tcx, M>
|
|||
}
|
||||
self.write_scalar(val, dest)?;
|
||||
}
|
||||
"rotate_left" | "rotate_right" => {
|
||||
// rotate_left: (X << (S % BW)) | (X >> ((BW - S) % BW))
|
||||
// rotate_right: (X << ((BW - S) % BW)) | (X >> (S % BW))
|
||||
let layout = self.layout_of(substs.type_at(0))?;
|
||||
let val_bits = self.read_scalar(args[0])?.to_bits(layout.size)?;
|
||||
let raw_shift_bits = self.read_scalar(args[1])?.to_bits(layout.size)?;
|
||||
let width_bits = layout.size.bits() as u128;
|
||||
let shift_bits = raw_shift_bits % width_bits;
|
||||
let inv_shift_bits = (width_bits - raw_shift_bits) % width_bits;
|
||||
let result_bits = if intrinsic_name == "rotate_left" {
|
||||
(val_bits << shift_bits) | (val_bits >> inv_shift_bits)
|
||||
} else {
|
||||
(val_bits >> shift_bits) | (val_bits << inv_shift_bits)
|
||||
};
|
||||
let truncated_bits = self.truncate(result_bits, layout);
|
||||
let result = Scalar::from_uint(truncated_bits, layout.size);
|
||||
self.write_scalar(result, dest)?;
|
||||
}
|
||||
"transmute" => {
|
||||
self.copy_op_transmute(args[0], dest)?;
|
||||
}
|
||||
|
|
|
@ -869,6 +869,8 @@ impl<'a, 'tcx> Visitor<'tcx> for Qualifier<'a, 'tcx, 'tcx> {
|
|||
| "overflowing_mul"
|
||||
| "unchecked_shl"
|
||||
| "unchecked_shr"
|
||||
| "rotate_left"
|
||||
| "rotate_right"
|
||||
| "add_with_overflow"
|
||||
| "sub_with_overflow"
|
||||
| "mul_with_overflow"
|
||||
|
|
|
@ -292,7 +292,8 @@ pub fn check_intrinsic_type<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
|
|||
|
||||
"unchecked_div" | "unchecked_rem" | "exact_div" =>
|
||||
(1, vec![param(0), param(0)], param(0)),
|
||||
"unchecked_shl" | "unchecked_shr" =>
|
||||
"unchecked_shl" | "unchecked_shr" |
|
||||
"rotate_left" | "rotate_right" =>
|
||||
(1, vec![param(0), param(0)], param(0)),
|
||||
|
||||
"overflowing_add" | "overflowing_sub" | "overflowing_mul" =>
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue