Merge commit '54cbb6e753
' into sync_cg_clif-2024-03-08
This commit is contained in:
commit
8fb8b08716
19 changed files with 308 additions and 121 deletions
|
@ -392,18 +392,25 @@ impl<'tcx> FunctionCx<'_, '_, 'tcx> {
|
|||
}
|
||||
|
||||
pub(crate) fn create_stack_slot(&mut self, size: u32, align: u32) -> Pointer {
|
||||
if align <= 16 {
|
||||
let abi_align = if self.tcx.sess.target.arch == "s390x" { 8 } else { 16 };
|
||||
if align <= abi_align {
|
||||
let stack_slot = self.bcx.create_sized_stack_slot(StackSlotData {
|
||||
kind: StackSlotKind::ExplicitSlot,
|
||||
// FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to
|
||||
// specify stack slot alignment.
|
||||
size: (size + 15) / 16 * 16,
|
||||
// FIXME Don't force the size to a multiple of <abi_align> bytes once Cranelift gets
|
||||
// a way to specify stack slot alignment.
|
||||
size: (size + abi_align - 1) / abi_align * abi_align,
|
||||
});
|
||||
Pointer::stack_slot(stack_slot)
|
||||
} else {
|
||||
// Alignment is too big to handle using the above hack. Dynamically realign a stack slot
|
||||
// instead. This wastes some space for the realignment.
|
||||
let base_ptr = self.create_stack_slot(size + align, 16).get_addr(self);
|
||||
let stack_slot = self.bcx.create_sized_stack_slot(StackSlotData {
|
||||
kind: StackSlotKind::ExplicitSlot,
|
||||
// FIXME Don't force the size to a multiple of <abi_align> bytes once Cranelift gets
|
||||
// a way to specify stack slot alignment.
|
||||
size: (size + align) / abi_align * abi_align,
|
||||
});
|
||||
let base_ptr = self.bcx.ins().stack_addr(self.pointer_type, stack_slot, 0);
|
||||
let misalign_offset = self.bcx.ins().urem_imm(base_ptr, i64::from(align));
|
||||
let realign_offset = self.bcx.ins().irsub_imm(misalign_offset, i64::from(align));
|
||||
Pointer::new(self.bcx.ins().iadd(base_ptr, realign_offset))
|
||||
|
|
|
@ -372,7 +372,13 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant
|
|||
}
|
||||
|
||||
let bytes = alloc.inspect_with_uninit_and_ptr_outside_interpreter(0..alloc.len()).to_vec();
|
||||
data.define(bytes.into_boxed_slice());
|
||||
if bytes.is_empty() {
|
||||
// FIXME(bytecodealliance/wasmtime#7918) cranelift-jit has a bug where it causes UB on
|
||||
// empty data objects
|
||||
data.define(Box::new([0]));
|
||||
} else {
|
||||
data.define(bytes.into_boxed_slice());
|
||||
}
|
||||
|
||||
for &(offset, prov) in alloc.provenance().ptrs().iter() {
|
||||
let alloc_id = prov.alloc_id();
|
||||
|
|
|
@ -170,6 +170,65 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
|
|||
}
|
||||
}
|
||||
|
||||
"llvm.x86.sse.add.ss" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_ss&ig_expand=171
|
||||
intrinsic_args!(fx, args => (a, b); intrinsic);
|
||||
|
||||
assert_eq!(a.layout(), b.layout());
|
||||
assert_eq!(a.layout(), ret.layout());
|
||||
let layout = a.layout();
|
||||
|
||||
let (_, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
|
||||
assert!(lane_ty.is_floating_point());
|
||||
let ret_lane_layout = fx.layout_of(lane_ty);
|
||||
|
||||
ret.write_cvalue(fx, a);
|
||||
|
||||
let a_lane = a.value_lane(fx, 0).load_scalar(fx);
|
||||
let b_lane = b.value_lane(fx, 0).load_scalar(fx);
|
||||
|
||||
let res = fx.bcx.ins().fadd(a_lane, b_lane);
|
||||
|
||||
let res_lane = CValue::by_val(res, ret_lane_layout);
|
||||
ret.place_lane(fx, 0).write_cvalue(fx, res_lane);
|
||||
}
|
||||
|
||||
"llvm.x86.sse.sqrt.ps" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_ps&ig_expand=6245
|
||||
intrinsic_args!(fx, args => (a); intrinsic);
|
||||
|
||||
// FIXME use vector instructions when possible
|
||||
simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
|
||||
fx.bcx.ins().sqrt(lane)
|
||||
});
|
||||
}
|
||||
|
||||
"llvm.x86.sse.max.ps" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_ps&ig_expand=4357
|
||||
intrinsic_args!(fx, args => (a, b); intrinsic);
|
||||
|
||||
simd_pair_for_each_lane(
|
||||
fx,
|
||||
a,
|
||||
b,
|
||||
ret,
|
||||
&|fx, _lane_ty, _res_lane_ty, a_lane, b_lane| fx.bcx.ins().fmax(a_lane, b_lane),
|
||||
);
|
||||
}
|
||||
|
||||
"llvm.x86.sse.min.ps" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_ps&ig_expand=4489
|
||||
intrinsic_args!(fx, args => (a, b); intrinsic);
|
||||
|
||||
simd_pair_for_each_lane(
|
||||
fx,
|
||||
a,
|
||||
b,
|
||||
ret,
|
||||
&|fx, _lane_ty, _res_lane_ty, a_lane, b_lane| fx.bcx.ins().fmin(a_lane, b_lane),
|
||||
);
|
||||
}
|
||||
|
||||
"llvm.x86.sse.cmp.ps" | "llvm.x86.sse2.cmp.pd" => {
|
||||
let (x, y, kind) = match args {
|
||||
[x, y, kind] => (x, y, kind),
|
||||
|
@ -1067,6 +1126,122 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
|
|||
);
|
||||
}
|
||||
|
||||
"llvm.x86.sha1rnds4" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1rnds4_epu32&ig_expand=5877
|
||||
intrinsic_args!(fx, args => (a, b, _func); intrinsic);
|
||||
|
||||
let a = a.load_scalar(fx);
|
||||
let b = b.load_scalar(fx);
|
||||
|
||||
let func = if let Some(func) =
|
||||
crate::constant::mir_operand_get_const_val(fx, &args[2].node)
|
||||
{
|
||||
func
|
||||
} else {
|
||||
fx.tcx
|
||||
.dcx()
|
||||
.span_fatal(span, "Func argument for `_mm_sha1rnds4_epu32` is not a constant");
|
||||
};
|
||||
|
||||
let func = func.try_to_u8().unwrap_or_else(|_| panic!("kind not scalar: {:?}", func));
|
||||
|
||||
codegen_inline_asm_inner(
|
||||
fx,
|
||||
&[InlineAsmTemplatePiece::String(format!("sha1rnds4 xmm1, xmm2, {func}"))],
|
||||
&[
|
||||
CInlineAsmOperand::InOut {
|
||||
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
|
||||
_late: true,
|
||||
in_value: a,
|
||||
out_place: Some(ret),
|
||||
},
|
||||
CInlineAsmOperand::In {
|
||||
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)),
|
||||
value: b,
|
||||
},
|
||||
],
|
||||
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
|
||||
);
|
||||
}
|
||||
|
||||
"llvm.x86.sha1msg1" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1msg1_epu32&ig_expand=5874
|
||||
intrinsic_args!(fx, args => (a, b); intrinsic);
|
||||
|
||||
let a = a.load_scalar(fx);
|
||||
let b = b.load_scalar(fx);
|
||||
|
||||
codegen_inline_asm_inner(
|
||||
fx,
|
||||
&[InlineAsmTemplatePiece::String("sha1msg1 xmm1, xmm2".to_string())],
|
||||
&[
|
||||
CInlineAsmOperand::InOut {
|
||||
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
|
||||
_late: true,
|
||||
in_value: a,
|
||||
out_place: Some(ret),
|
||||
},
|
||||
CInlineAsmOperand::In {
|
||||
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)),
|
||||
value: b,
|
||||
},
|
||||
],
|
||||
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
|
||||
);
|
||||
}
|
||||
|
||||
"llvm.x86.sha1msg2" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1msg2_epu32&ig_expand=5875
|
||||
intrinsic_args!(fx, args => (a, b); intrinsic);
|
||||
|
||||
let a = a.load_scalar(fx);
|
||||
let b = b.load_scalar(fx);
|
||||
|
||||
codegen_inline_asm_inner(
|
||||
fx,
|
||||
&[InlineAsmTemplatePiece::String("sha1msg2 xmm1, xmm2".to_string())],
|
||||
&[
|
||||
CInlineAsmOperand::InOut {
|
||||
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
|
||||
_late: true,
|
||||
in_value: a,
|
||||
out_place: Some(ret),
|
||||
},
|
||||
CInlineAsmOperand::In {
|
||||
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)),
|
||||
value: b,
|
||||
},
|
||||
],
|
||||
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
|
||||
);
|
||||
}
|
||||
|
||||
"llvm.x86.sha1nexte" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1nexte_epu32&ig_expand=5876
|
||||
intrinsic_args!(fx, args => (a, b); intrinsic);
|
||||
|
||||
let a = a.load_scalar(fx);
|
||||
let b = b.load_scalar(fx);
|
||||
|
||||
codegen_inline_asm_inner(
|
||||
fx,
|
||||
&[InlineAsmTemplatePiece::String("sha1nexte xmm1, xmm2".to_string())],
|
||||
&[
|
||||
CInlineAsmOperand::InOut {
|
||||
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
|
||||
_late: true,
|
||||
in_value: a,
|
||||
out_place: Some(ret),
|
||||
},
|
||||
CInlineAsmOperand::In {
|
||||
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)),
|
||||
value: b,
|
||||
},
|
||||
],
|
||||
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
|
||||
);
|
||||
}
|
||||
|
||||
"llvm.x86.sha256rnds2" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256rnds2_epu32&ig_expand=5977
|
||||
intrinsic_args!(fx, args => (a, b, k); intrinsic);
|
||||
|
|
|
@ -391,12 +391,15 @@ fn codegen_float_intrinsic_call<'tcx>(
|
|||
| sym::ceilf32
|
||||
| sym::ceilf64
|
||||
| sym::truncf32
|
||||
| sym::truncf64 => {
|
||||
| sym::truncf64
|
||||
| sym::sqrtf32
|
||||
| sym::sqrtf64 => {
|
||||
let val = match intrinsic {
|
||||
sym::fabsf32 | sym::fabsf64 => fx.bcx.ins().fabs(args[0]),
|
||||
sym::floorf32 | sym::floorf64 => fx.bcx.ins().floor(args[0]),
|
||||
sym::ceilf32 | sym::ceilf64 => fx.bcx.ins().ceil(args[0]),
|
||||
sym::truncf32 | sym::truncf64 => fx.bcx.ins().trunc(args[0]),
|
||||
sym::sqrtf32 | sym::sqrtf64 => fx.bcx.ins().sqrt(args[0]),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
|
|
|
@ -853,7 +853,13 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
|
|||
};
|
||||
|
||||
for lane in 0..lane_count {
|
||||
let m_lane = fx.bcx.ins().ushr_imm(m, u64::from(lane) as i64);
|
||||
// The bit order of the mask depends on the byte endianness, LSB-first for
|
||||
// little endian and MSB-first for big endian.
|
||||
let mask_lane = match fx.tcx.sess.target.endian {
|
||||
Endian::Big => lane_count - 1 - lane,
|
||||
Endian::Little => lane,
|
||||
};
|
||||
let m_lane = fx.bcx.ins().ushr_imm(m, u64::from(mask_lane) as i64);
|
||||
let m_lane = fx.bcx.ins().band_imm(m_lane, 1);
|
||||
let a_lane = a.value_lane(fx, lane).load_scalar(fx);
|
||||
let b_lane = b.value_lane(fx, lane).load_scalar(fx);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue