1
Fork 0

Implement rotate using funnel shift on LLVM >= 7

Implement the rotate_left and rotate_right operations using
llvm.fshl and llvm.fshr if they are available (LLVM >= 7).

Originally I wanted to expose the funnel_shift_left and
funnel_shift_right intrinsics and implement rotate_left and
rotate_right on top of them. However, emulation of funnel
shifts requires emitting a conditional to check for zero shift
amount, which is not necessary for rotates. I was uncomfortable
doing that here, as I don't want to rely on LLVM to optimize
away that conditional (and for variable rotates, I'm not sure it
can). We should revisit that question when we raise our minimum
version requirement to LLVM 7 and don't need emulation code
anymore.
This commit is contained in:
Nikita Popov 2018-11-03 15:48:29 +01:00
parent 2ad8c7b350
commit 4c40ff6a24
10 changed files with 93 additions and 8 deletions

View file

@ -1465,6 +1465,20 @@ extern "rust-intrinsic" {
/// y < 0 or y >= N, where N is the width of T in bits.
pub fn unchecked_shr<T>(x: T, y: T) -> T;
/// Performs rotate left.
/// The stabilized versions of this intrinsic are available on the integer
/// primitives via the `rotate_left` method. For example,
/// [`std::u32::rotate_left`](../../std/primitive.u32.html#method.rotate_left)
#[cfg(not(stage0))]
pub fn rotate_left<T>(x: T, y: T) -> T;
/// Performs rotate right.
/// The stabilized versions of this intrinsic are available on the integer
/// primitives via the `rotate_right` method. For example,
/// [`std::u32::rotate_right`](../../std/primitive.u32.html#method.rotate_right)
#[cfg(not(stage0))]
pub fn rotate_right<T>(x: T, y: T) -> T;
/// Returns (a + b) mod 2<sup>N</sup>, where N is the width of T in bits.
/// The stabilized versions of this intrinsic are available on the integer
/// primitives via the `wrapping_add` method. For example,

View file

@ -2301,7 +2301,12 @@ assert_eq!(n.rotate_left(", $rot, "), m);
#[rustc_const_unstable(feature = "const_int_rotate")]
#[inline]
pub const fn rotate_left(self, n: u32) -> Self {
(self << (n % $BITS)) | (self >> (($BITS - (n % $BITS)) % $BITS))
#[cfg(not(stage0))] {
unsafe { intrinsics::rotate_left(self, n as $SelfT) }
}
#[cfg(stage0)] {
(self << (n % $BITS)) | (self >> (($BITS - (n % $BITS)) % $BITS))
}
}
}
@ -2326,7 +2331,12 @@ assert_eq!(n.rotate_right(", $rot, "), m);
#[rustc_const_unstable(feature = "const_int_rotate")]
#[inline]
pub const fn rotate_right(self, n: u32) -> Self {
(self >> (n % $BITS)) | (self << (($BITS - (n % $BITS)) % $BITS))
#[cfg(not(stage0))] {
unsafe { intrinsics::rotate_right(self, n as $SelfT) }
}
#[cfg(stage0)] {
(self >> (n % $BITS)) | (self << (($BITS - (n % $BITS)) % $BITS))
}
}
}

View file

@ -726,6 +726,18 @@ fn declare_intrinsic(cx: &CodegenCx<'ll, '_>, key: &str) -> Option<&'ll Value> {
ifn!("llvm.bitreverse.i64", fn(t_i64) -> t_i64);
ifn!("llvm.bitreverse.i128", fn(t_i128) -> t_i128);
ifn!("llvm.fshl.i8", fn(t_i8, t_i8, t_i8) -> t_i8);
ifn!("llvm.fshl.i16", fn(t_i16, t_i16, t_i16) -> t_i16);
ifn!("llvm.fshl.i32", fn(t_i32, t_i32, t_i32) -> t_i32);
ifn!("llvm.fshl.i64", fn(t_i64, t_i64, t_i64) -> t_i64);
ifn!("llvm.fshl.i128", fn(t_i128, t_i128, t_i128) -> t_i128);
ifn!("llvm.fshr.i8", fn(t_i8, t_i8, t_i8) -> t_i8);
ifn!("llvm.fshr.i16", fn(t_i16, t_i16, t_i16) -> t_i16);
ifn!("llvm.fshr.i32", fn(t_i32, t_i32, t_i32) -> t_i32);
ifn!("llvm.fshr.i64", fn(t_i64, t_i64, t_i64) -> t_i64);
ifn!("llvm.fshr.i128", fn(t_i128, t_i128, t_i128) -> t_i128);
ifn!("llvm.sadd.with.overflow.i8", fn(t_i8, t_i8) -> mk_struct!{t_i8, i1});
ifn!("llvm.sadd.with.overflow.i16", fn(t_i16, t_i16) -> mk_struct!{t_i16, i1});
ifn!("llvm.sadd.with.overflow.i32", fn(t_i32, t_i32) -> mk_struct!{t_i32, i1});

View file

@ -23,6 +23,7 @@ use value::Value;
use llvm;
use llvm::debuginfo::{DIType, DIFile, DIScope, DIDescriptor,
DICompositeType, DILexicalBlock, DIFlags};
use llvm_util;
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
use rustc::hir::CodegenFnAttrFlags;
@ -1169,9 +1170,8 @@ fn prepare_union_metadata(
fn use_enum_fallback(cx: &CodegenCx) -> bool {
// On MSVC we have to use the fallback mode, because LLVM doesn't
// lower variant parts to PDB.
return cx.sess().target.target.options.is_like_msvc || unsafe {
llvm::LLVMRustVersionMajor() < 7
};
return cx.sess().target.target.options.is_like_msvc
|| llvm_util::get_major_version() < 7;
}
// Describes the members of an enum value: An enum is described as a union of

View file

@ -13,6 +13,7 @@
use attributes;
use intrinsics::{self, Intrinsic};
use llvm::{self, TypeKind};
use llvm_util;
use abi::{Abi, FnType, LlvmType, PassMode};
use mir::place::PlaceRef;
use mir::operand::{OperandRef, OperandValue};
@ -284,7 +285,8 @@ pub fn codegen_intrinsic_call(
"ctlz" | "ctlz_nonzero" | "cttz" | "cttz_nonzero" | "ctpop" | "bswap" |
"bitreverse" | "add_with_overflow" | "sub_with_overflow" |
"mul_with_overflow" | "overflowing_add" | "overflowing_sub" | "overflowing_mul" |
"unchecked_div" | "unchecked_rem" | "unchecked_shl" | "unchecked_shr" | "exact_div" => {
"unchecked_div" | "unchecked_rem" | "unchecked_shl" | "unchecked_shr" | "exact_div" |
"rotate_left" | "rotate_right" => {
let ty = arg_tys[0];
match int_type_width_signed(ty, cx) {
Some((width, signed)) =>
@ -363,6 +365,27 @@ pub fn codegen_intrinsic_call(
} else {
bx.lshr(args[0].immediate(), args[1].immediate())
},
"rotate_left" | "rotate_right" => {
let is_left = name == "rotate_left";
let val = args[0].immediate();
let raw_shift = args[1].immediate();
if llvm_util::get_major_version() >= 7 {
// rotate = funnel shift with first two args the same
let llvm_name = &format!("llvm.fsh{}.i{}",
if is_left { 'l' } else { 'r' }, width);
let llfn = cx.get_intrinsic(llvm_name);
bx.call(llfn, &[val, val, raw_shift], None)
} else {
// rotate_left: (X << (S % BW)) | (X >> ((BW - S) % BW))
// rotate_right: (X << ((BW - S) % BW)) | (X >> (S % BW))
let width = C_uint(Type::ix(cx, width), width);
let shift = bx.urem(raw_shift, width);
let inv_shift = bx.urem(bx.sub(width, raw_shift), width);
let shift1 = bx.shl(val, if is_left { shift } else { inv_shift });
let shift2 = bx.lshr(val, if !is_left { shift } else { inv_shift });
bx.or(shift1, shift2)
}
},
_ => bug!(),
},
None => {

View file

@ -256,6 +256,10 @@ pub fn print_version() {
}
}
pub fn get_major_version() -> u32 {
unsafe { llvm::LLVMRustVersionMajor() }
}
pub fn print_passes() {
// Can be called without initializing LLVM
unsafe { llvm::LLVMRustPrintPasses(); }

View file

@ -12,6 +12,7 @@ use common::{C_i32, C_null};
use libc::c_uint;
use llvm::{self, BasicBlock};
use llvm::debuginfo::DIScope;
use llvm_util;
use rustc::ty::{self, Ty, TypeFoldable, UpvarSubsts};
use rustc::ty::layout::{LayoutOf, TyLayout};
use rustc::mir::{self, Mir};
@ -612,7 +613,7 @@ fn arg_local_refs(
// doesn't actually strip the offset when splitting the closure
// environment into its components so it ends up out of bounds.
// (cuviper) It seems to be fine without the alloca on LLVM 6 and later.
let env_alloca = !env_ref && unsafe { llvm::LLVMRustVersionMajor() < 6 };
let env_alloca = !env_ref && llvm_util::get_major_version() < 6;
let env_ptr = if env_alloca {
let scratch = PlaceRef::alloca(bx,
bx.cx.layout_of(tcx.mk_mut_ptr(arg.layout.ty)),

View file

@ -150,6 +150,24 @@ impl<'a, 'mir, 'tcx, M: Machine<'a, 'mir, 'tcx>> EvalContext<'a, 'mir, 'tcx, M>
}
self.write_scalar(val, dest)?;
}
"rotate_left" | "rotate_right" => {
// rotate_left: (X << (S % BW)) | (X >> ((BW - S) % BW))
// rotate_right: (X << ((BW - S) % BW)) | (X >> (S % BW))
let layout = self.layout_of(substs.type_at(0))?;
let val_bits = self.read_scalar(args[0])?.to_bits(layout.size)?;
let raw_shift_bits = self.read_scalar(args[1])?.to_bits(layout.size)?;
let width_bits = layout.size.bits() as u128;
let shift_bits = raw_shift_bits % width_bits;
let inv_shift_bits = (width_bits - raw_shift_bits) % width_bits;
let result_bits = if intrinsic_name == "rotate_left" {
(val_bits << shift_bits) | (val_bits >> inv_shift_bits)
} else {
(val_bits >> shift_bits) | (val_bits << inv_shift_bits)
};
let truncated_bits = self.truncate(result_bits, layout);
let result = Scalar::from_uint(truncated_bits, layout.size);
self.write_scalar(result, dest)?;
}
"transmute" => {
self.copy_op_transmute(args[0], dest)?;
}

View file

@ -869,6 +869,8 @@ impl<'a, 'tcx> Visitor<'tcx> for Qualifier<'a, 'tcx, 'tcx> {
| "overflowing_mul"
| "unchecked_shl"
| "unchecked_shr"
| "rotate_left"
| "rotate_right"
| "add_with_overflow"
| "sub_with_overflow"
| "mul_with_overflow"

View file

@ -292,7 +292,8 @@ pub fn check_intrinsic_type<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
"unchecked_div" | "unchecked_rem" | "exact_div" =>
(1, vec![param(0), param(0)], param(0)),
"unchecked_shl" | "unchecked_shr" =>
"unchecked_shl" | "unchecked_shr" |
"rotate_left" | "rotate_right" =>
(1, vec![param(0), param(0)], param(0)),
"overflowing_add" | "overflowing_sub" | "overflowing_mul" =>