Merge commit '54cbb6e753' into sync_cg_clif-2024-03-08

This commit is contained in:
bjorn3 2024-03-08 20:41:29 +00:00
commit 8fb8b08716
19 changed files with 308 additions and 121 deletions

View file

@ -392,18 +392,25 @@ impl<'tcx> FunctionCx<'_, '_, 'tcx> {
}
pub(crate) fn create_stack_slot(&mut self, size: u32, align: u32) -> Pointer {
if align <= 16 {
let abi_align = if self.tcx.sess.target.arch == "s390x" { 8 } else { 16 };
if align <= abi_align {
let stack_slot = self.bcx.create_sized_stack_slot(StackSlotData {
kind: StackSlotKind::ExplicitSlot,
// FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to
// specify stack slot alignment.
size: (size + 15) / 16 * 16,
// FIXME Don't force the size to a multiple of <abi_align> bytes once Cranelift gets
// a way to specify stack slot alignment.
size: (size + abi_align - 1) / abi_align * abi_align,
});
Pointer::stack_slot(stack_slot)
} else {
// Alignment is too big to handle using the above hack. Dynamically realign a stack slot
// instead. This wastes some space for the realignment.
let base_ptr = self.create_stack_slot(size + align, 16).get_addr(self);
let stack_slot = self.bcx.create_sized_stack_slot(StackSlotData {
kind: StackSlotKind::ExplicitSlot,
// FIXME Don't force the size to a multiple of <abi_align> bytes once Cranelift gets
// a way to specify stack slot alignment.
size: (size + align) / abi_align * abi_align,
});
let base_ptr = self.bcx.ins().stack_addr(self.pointer_type, stack_slot, 0);
let misalign_offset = self.bcx.ins().urem_imm(base_ptr, i64::from(align));
let realign_offset = self.bcx.ins().irsub_imm(misalign_offset, i64::from(align));
Pointer::new(self.bcx.ins().iadd(base_ptr, realign_offset))

View file

@ -372,7 +372,13 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant
}
let bytes = alloc.inspect_with_uninit_and_ptr_outside_interpreter(0..alloc.len()).to_vec();
data.define(bytes.into_boxed_slice());
if bytes.is_empty() {
// FIXME(bytecodealliance/wasmtime#7918) cranelift-jit has a bug where it causes UB on
// empty data objects
data.define(Box::new([0]));
} else {
data.define(bytes.into_boxed_slice());
}
for &(offset, prov) in alloc.provenance().ptrs().iter() {
let alloc_id = prov.alloc_id();

View file

@ -170,6 +170,65 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
}
}
"llvm.x86.sse.add.ss" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_ss&ig_expand=171
intrinsic_args!(fx, args => (a, b); intrinsic);
assert_eq!(a.layout(), b.layout());
assert_eq!(a.layout(), ret.layout());
let layout = a.layout();
let (_, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
assert!(lane_ty.is_floating_point());
let ret_lane_layout = fx.layout_of(lane_ty);
ret.write_cvalue(fx, a);
let a_lane = a.value_lane(fx, 0).load_scalar(fx);
let b_lane = b.value_lane(fx, 0).load_scalar(fx);
let res = fx.bcx.ins().fadd(a_lane, b_lane);
let res_lane = CValue::by_val(res, ret_lane_layout);
ret.place_lane(fx, 0).write_cvalue(fx, res_lane);
}
"llvm.x86.sse.sqrt.ps" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_ps&ig_expand=6245
intrinsic_args!(fx, args => (a); intrinsic);
// FIXME use vector instructions when possible
simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
fx.bcx.ins().sqrt(lane)
});
}
"llvm.x86.sse.max.ps" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_ps&ig_expand=4357
intrinsic_args!(fx, args => (a, b); intrinsic);
simd_pair_for_each_lane(
fx,
a,
b,
ret,
&|fx, _lane_ty, _res_lane_ty, a_lane, b_lane| fx.bcx.ins().fmax(a_lane, b_lane),
);
}
"llvm.x86.sse.min.ps" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_ps&ig_expand=4489
intrinsic_args!(fx, args => (a, b); intrinsic);
simd_pair_for_each_lane(
fx,
a,
b,
ret,
&|fx, _lane_ty, _res_lane_ty, a_lane, b_lane| fx.bcx.ins().fmin(a_lane, b_lane),
);
}
"llvm.x86.sse.cmp.ps" | "llvm.x86.sse2.cmp.pd" => {
let (x, y, kind) = match args {
[x, y, kind] => (x, y, kind),
@ -1067,6 +1126,122 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
);
}
"llvm.x86.sha1rnds4" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1rnds4_epu32&ig_expand=5877
intrinsic_args!(fx, args => (a, b, _func); intrinsic);
let a = a.load_scalar(fx);
let b = b.load_scalar(fx);
let func = if let Some(func) =
crate::constant::mir_operand_get_const_val(fx, &args[2].node)
{
func
} else {
fx.tcx
.dcx()
.span_fatal(span, "Func argument for `_mm_sha1rnds4_epu32` is not a constant");
};
let func = func.try_to_u8().unwrap_or_else(|_| panic!("kind not scalar: {:?}", func));
codegen_inline_asm_inner(
fx,
&[InlineAsmTemplatePiece::String(format!("sha1rnds4 xmm1, xmm2, {func}"))],
&[
CInlineAsmOperand::InOut {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
_late: true,
in_value: a,
out_place: Some(ret),
},
CInlineAsmOperand::In {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)),
value: b,
},
],
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
);
}
"llvm.x86.sha1msg1" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1msg1_epu32&ig_expand=5874
intrinsic_args!(fx, args => (a, b); intrinsic);
let a = a.load_scalar(fx);
let b = b.load_scalar(fx);
codegen_inline_asm_inner(
fx,
&[InlineAsmTemplatePiece::String("sha1msg1 xmm1, xmm2".to_string())],
&[
CInlineAsmOperand::InOut {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
_late: true,
in_value: a,
out_place: Some(ret),
},
CInlineAsmOperand::In {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)),
value: b,
},
],
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
);
}
"llvm.x86.sha1msg2" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1msg2_epu32&ig_expand=5875
intrinsic_args!(fx, args => (a, b); intrinsic);
let a = a.load_scalar(fx);
let b = b.load_scalar(fx);
codegen_inline_asm_inner(
fx,
&[InlineAsmTemplatePiece::String("sha1msg2 xmm1, xmm2".to_string())],
&[
CInlineAsmOperand::InOut {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
_late: true,
in_value: a,
out_place: Some(ret),
},
CInlineAsmOperand::In {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)),
value: b,
},
],
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
);
}
"llvm.x86.sha1nexte" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1nexte_epu32&ig_expand=5876
intrinsic_args!(fx, args => (a, b); intrinsic);
let a = a.load_scalar(fx);
let b = b.load_scalar(fx);
codegen_inline_asm_inner(
fx,
&[InlineAsmTemplatePiece::String("sha1nexte xmm1, xmm2".to_string())],
&[
CInlineAsmOperand::InOut {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
_late: true,
in_value: a,
out_place: Some(ret),
},
CInlineAsmOperand::In {
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)),
value: b,
},
],
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
);
}
"llvm.x86.sha256rnds2" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256rnds2_epu32&ig_expand=5977
intrinsic_args!(fx, args => (a, b, k); intrinsic);

View file

@ -391,12 +391,15 @@ fn codegen_float_intrinsic_call<'tcx>(
| sym::ceilf32
| sym::ceilf64
| sym::truncf32
| sym::truncf64 => {
| sym::truncf64
| sym::sqrtf32
| sym::sqrtf64 => {
let val = match intrinsic {
sym::fabsf32 | sym::fabsf64 => fx.bcx.ins().fabs(args[0]),
sym::floorf32 | sym::floorf64 => fx.bcx.ins().floor(args[0]),
sym::ceilf32 | sym::ceilf64 => fx.bcx.ins().ceil(args[0]),
sym::truncf32 | sym::truncf64 => fx.bcx.ins().trunc(args[0]),
sym::sqrtf32 | sym::sqrtf64 => fx.bcx.ins().sqrt(args[0]),
_ => unreachable!(),
};

View file

@ -853,7 +853,13 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
};
for lane in 0..lane_count {
let m_lane = fx.bcx.ins().ushr_imm(m, u64::from(lane) as i64);
// The bit order of the mask depends on the byte endianness, LSB-first for
// little endian and MSB-first for big endian.
let mask_lane = match fx.tcx.sess.target.endian {
Endian::Big => lane_count - 1 - lane,
Endian::Little => lane,
};
let m_lane = fx.bcx.ins().ushr_imm(m, u64::from(mask_lane) as i64);
let m_lane = fx.bcx.ins().band_imm(m_lane, 1);
let a_lane = a.value_lane(fx, lane).load_scalar(fx);
let b_lane = b.value_lane(fx, lane).load_scalar(fx);