1
Fork 0

Auto merge of #85828 - scottmcm:raw-eq, r=oli-obk

Stop generating `alloca`s & `memcmp` for simple short array equality

Example:
```rust
pub fn demo(x: [u16; 6], y: [u16; 6]) -> bool { x == y }
```

Before:
```llvm
define zeroext i1 `@_ZN10playground4demo17h48537f7eac23948fE(i96` %0, i96 %1) unnamed_addr #0 {
start:
  %y = alloca [6 x i16], align 8
  %x = alloca [6 x i16], align 8
  %.0..sroa_cast = bitcast [6 x i16]* %x to i96*
  store i96 %0, i96* %.0..sroa_cast, align 8
  %.0..sroa_cast3 = bitcast [6 x i16]* %y to i96*
  store i96 %1, i96* %.0..sroa_cast3, align 8
  %_11.i.i.i = bitcast [6 x i16]* %x to i8*
  %_14.i.i.i = bitcast [6 x i16]* %y to i8*
  %bcmp.i.i.i = call i32 `@bcmp(i8*` nonnull dereferenceable(12) %_11.i.i.i, i8* nonnull dereferenceable(12) %_14.i.i.i, i64 12) #2, !alias.scope !2
  %2 = icmp eq i32 %bcmp.i.i.i, 0
  ret i1 %2
}
```
```x86
playground::demo: # `@playground::demo`
	sub	rsp, 32
	mov	qword ptr [rsp], rdi
	mov	dword ptr [rsp + 8], esi
	mov	qword ptr [rsp + 16], rdx
	mov	dword ptr [rsp + 24], ecx
	xor	rdi, rdx
	xor	esi, ecx
	or	rsi, rdi
	sete	al
	add	rsp, 32
	ret
```

After:
```llvm
define zeroext i1 `@_ZN4mini4demo17h7a8994aaa314c981E(i96` %0, i96 %1) unnamed_addr #0 {
start:
  %2 = icmp eq i96 %0, %1
  ret i1 %2
}
```
```x86
_ZN4mini4demo17h7a8994aaa314c981E:
	xor	rcx, r8
	xor	edx, r9d
	or	rdx, rcx
	sete	al
	ret
```
This commit is contained in:
bors 2021-07-09 09:16:27 +00:00
commit ee86f96ba1
15 changed files with 410 additions and 114 deletions

View file

@ -1115,6 +1115,40 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
);
ret.write_cvalue(fx, CValue::by_val(res, ret.layout()));
};
raw_eq, <T>(v lhs_ref, v rhs_ref) {
fn type_by_size(size: Size) -> Option<Type> {
Type::int(size.bits().try_into().ok()?)
}
let size = fx.layout_of(T).layout.size;
let is_eq_value =
if size == Size::ZERO {
// No bytes means they're trivially equal
fx.bcx.ins().iconst(types::I8, 1)
} else if let Some(clty) = type_by_size(size) {
// Can't use `trusted` for these loads; they could be unaligned.
let mut flags = MemFlags::new();
flags.set_notrap();
let lhs_val = fx.bcx.ins().load(clty, flags, lhs_ref, 0);
let rhs_val = fx.bcx.ins().load(clty, flags, rhs_ref, 0);
let eq = fx.bcx.ins().icmp(IntCC::Equal, lhs_val, rhs_val);
fx.bcx.ins().bint(types::I8, eq)
} else {
// Just call `memcmp` (like slices do in core) when the
// size is too large or it's not a power-of-two.
let ptr_ty = pointer_ty(fx.tcx);
let signed_bytes = i64::try_from(size.bytes()).unwrap();
let bytes_val = fx.bcx.ins().iconst(ptr_ty, signed_bytes);
let params = vec![AbiParam::new(ptr_ty); 3];
let returns = vec![AbiParam::new(types::I32)];
let args = &[lhs_ref, rhs_ref, bytes_val];
let cmp = fx.lib_call("memcmp", params, returns, args)[0];
let eq = fx.bcx.ins().icmp_imm(IntCC::Equal, cmp, 0);
fx.bcx.ins().bint(types::I8, eq)
};
ret.write_cvalue(fx, CValue::by_val(is_eq_value, ret.layout()));
};
}
if let Some((_, dest)) = destination {

View file

@ -453,6 +453,10 @@ impl<'tcx> CPlace<'tcx> {
ptr.store(fx, data, MemFlags::trusted());
ptr.load(fx, dst_ty, MemFlags::trusted())
}
// `CValue`s should never contain SSA-only types, so if you ended
// up here having seen an error like `B1 -> I8`, then before
// calling `write_cvalue` you need to add a `bint` instruction.
_ => unreachable!("write_cvalue_transmute: {:?} -> {:?}", src_ty, dst_ty),
};
//fx.bcx.set_val_label(data, cranelift_codegen::ir::ValueLabel::new(var.index()));

View file

@ -500,6 +500,7 @@ impl CodegenCx<'b, 'tcx> {
let t_i32 = self.type_i32();
let t_i64 = self.type_i64();
let t_i128 = self.type_i128();
let t_isize = self.type_isize();
let t_f32 = self.type_f32();
let t_f64 = self.type_f64();
@ -712,6 +713,10 @@ impl CodegenCx<'b, 'tcx> {
ifn!("llvm.assume", fn(i1) -> void);
ifn!("llvm.prefetch", fn(i8p, t_i32, t_i32, t_i32) -> void);
// This isn't an "LLVM intrinsic", but LLVM's optimization passes
// recognize it like one and we assume it exists in `core::slice::cmp`
ifn!("memcmp", fn(i8p, i8p, t_isize) -> t_i32);
// variadic intrinsics
ifn!("llvm.va_start", fn(i8p) -> void);
ifn!("llvm.va_end", fn(i8p) -> void);

View file

@ -296,6 +296,44 @@ impl IntrinsicCallMethods<'tcx> for Builder<'a, 'll, 'tcx> {
}
}
sym::raw_eq => {
use abi::Abi::*;
let tp_ty = substs.type_at(0);
let layout = self.layout_of(tp_ty).layout;
let use_integer_compare = match layout.abi {
Scalar(_) | ScalarPair(_, _) => true,
Uninhabited | Vector { .. } => false,
Aggregate { .. } => {
// For rusty ABIs, small aggregates are actually passed
// as `RegKind::Integer` (see `FnAbi::adjust_for_abi`),
// so we re-use that same threshold here.
layout.size <= self.data_layout().pointer_size * 2
}
};
let a = args[0].immediate();
let b = args[1].immediate();
if layout.size.bytes() == 0 {
self.const_bool(true)
} else if use_integer_compare {
let integer_ty = self.type_ix(layout.size.bits());
let ptr_ty = self.type_ptr_to(integer_ty);
let a_ptr = self.bitcast(a, ptr_ty);
let a_val = self.load(a_ptr, layout.align.abi);
let b_ptr = self.bitcast(b, ptr_ty);
let b_val = self.load(b_ptr, layout.align.abi);
self.icmp(IntPredicate::IntEQ, a_val, b_val)
} else {
let i8p_ty = self.type_i8p();
let a_ptr = self.bitcast(a, i8p_ty);
let b_ptr = self.bitcast(b, i8p_ty);
let n = self.const_usize(layout.size.bytes());
let llfn = self.get_intrinsic("memcmp");
let cmp = self.call(llfn, &[a_ptr, b_ptr, n], None);
self.icmp(IntPredicate::IntEQ, cmp, self.const_i32(0))
}
}
_ if name_str.starts_with("simd_") => {
match generic_simd_intrinsic(self, name, callee_ty, args, ret_ty, llret_ty, span) {
Ok(llval) => llval,

View file

@ -472,6 +472,10 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
throw_ub_format!("`assume` intrinsic called with `false`");
}
}
sym::raw_eq => {
let result = self.raw_eq_intrinsic(&args[0], &args[1])?;
self.write_scalar(result, dest)?;
}
_ => return Ok(false),
}
@ -559,4 +563,19 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
self.memory.copy(src, align, dst, align, size, nonoverlapping)
}
pub(crate) fn raw_eq_intrinsic(
&mut self,
lhs: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::PointerTag>,
rhs: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::PointerTag>,
) -> InterpResult<'tcx, Scalar<M::PointerTag>> {
let layout = self.layout_of(lhs.layout.ty.builtin_deref(true).unwrap().ty)?;
assert!(!layout.is_unsized());
let lhs = self.read_scalar(lhs)?.check_init()?;
let rhs = self.read_scalar(rhs)?.check_init()?;
let lhs_bytes = self.memory.read_bytes(lhs, layout.size)?;
let rhs_bytes = self.memory.read_bytes(rhs, layout.size)?;
Ok(Scalar::from_bool(lhs_bytes == rhs_bytes))
}
}

View file

@ -934,6 +934,7 @@ symbols! {
quote,
range_inclusive_new,
raw_dylib,
raw_eq,
raw_identifiers,
raw_ref_op,
re_rebalance_coherence,

View file

@ -380,6 +380,13 @@ pub fn check_intrinsic_type(tcx: TyCtxt<'_>, it: &hir::ForeignItem<'_>) {
sym::nontemporal_store => (1, vec![tcx.mk_mut_ptr(param(0)), param(0)], tcx.mk_unit()),
sym::raw_eq => {
let br = ty::BoundRegion { var: ty::BoundVar::from_u32(0), kind: ty::BrAnon(0) };
let param_ty =
tcx.mk_imm_ref(tcx.mk_region(ty::ReLateBound(ty::INNERMOST, br)), param(0));
(1, vec![param_ty; 2], tcx.types.bool)
}
other => {
tcx.sess.emit_err(UnrecognizedIntrinsicFunction { span: it.span, name: other });
return;