From 3fa1d4726746a5bf35083a47ae9559682252f176 Mon Sep 17 00:00:00 2001 From: Folkert Date: Tue, 28 May 2024 16:28:23 +0200 Subject: [PATCH 1/2] add support for `pclmulqdq` --- src/tools/miri/src/shims/x86/mod.rs | 69 +++++++++++++++++++ .../shims/x86/intrinsics-x86-pclmulqdq.rs | 48 +++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 src/tools/miri/tests/pass/shims/x86/intrinsics-x86-pclmulqdq.rs diff --git a/src/tools/miri/src/shims/x86/mod.rs b/src/tools/miri/src/shims/x86/mod.rs index 7c40f3de54d..0374cfedc5a 100644 --- a/src/tools/miri/src/shims/x86/mod.rs +++ b/src/tools/miri/src/shims/x86/mod.rs @@ -105,6 +105,13 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { } } + "pclmulqdq" => { + let [left, right, imm] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + pclmulqdq(this, left, right, imm, dest)?; + } + name if name.starts_with("sse.") => { return sse::EvalContextExt::emulate_x86_sse_intrinsic( this, link_name, abi, args, dest, @@ -1133,6 +1140,68 @@ fn pmulhrsw<'tcx>( Ok(()) } +/// Perform a carry-less multiplication of two 64-bit integers, selected from left and right according to imm8, +/// and store the results in dst. +/// +/// Left and right are both vectors of type 2 x i64. Only bits 0 and 4 of imm8 matter; +/// they select the element of left and right, respectively. +/// +/// +fn pclmulqdq<'tcx>( + this: &mut crate::MiriInterpCx<'tcx>, + left: &OpTy<'tcx>, + right: &OpTy<'tcx>, + imm8: &OpTy<'tcx>, + dest: &MPlaceTy<'tcx>, +) -> InterpResult<'tcx, ()> { + assert_eq!(left.layout, right.layout); + assert_eq!(left.layout.size, dest.layout.size); + + // Transmute to `[u64; 2]` + + let array_layout = this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.u64, 2))?; + let left = left.transmute(array_layout, this)?; + let right = right.transmute(array_layout, this)?; + let dest = dest.transmute(array_layout, this)?; + + let imm8 = this.read_scalar(imm8)?.to_u8()?; + + // select the 64-bit integer from left that the user specified (low or high) + let index = if (imm8 & 0x01) == 0 { 0 } else { 1 }; + let left = this.read_scalar(&this.project_index(&left, index)?)?.to_u64()?; + + // select the 64-bit integer from right that the user specified (low or high) + let index = if (imm8 & 0x10) == 0 { 0 } else { 1 }; + let right = this.read_scalar(&this.project_index(&right, index)?)?.to_u64()?; + + // Perform carry-less multiplication + // + // This operation is like long multiplication, but ignores all carries. + // That idea corresponds to the xor operator, which is used in the implementation. + // + // Wikipedia has an example https://en.wikipedia.org/wiki/Carry-less_product#Example + let mut result: u128 = 0; + + for i in 0..64 { + // if the i-th bit in right is set + if (right & (1 << i)) != 0 { + // xor result with `left` shifted to the left by i positions + result ^= (left as u128) << i; + } + } + + let result_low = (result & 0xFFFF_FFFF_FFFF_FFFF) as u64; + let result_high = (result >> 64) as u64; + + let dest_low = this.project_index(&dest, 0)?; + this.write_scalar(Scalar::from_u64(result_low), &dest_low)?; + + let dest_high = this.project_index(&dest, 1)?; + this.write_scalar(Scalar::from_u64(result_high), &dest_high)?; + + Ok(()) +} + /// Packs two N-bit integer vectors to a single N/2-bit integers. /// /// The conversion from N-bit to N/2-bit should be provided by `f`. diff --git a/src/tools/miri/tests/pass/shims/x86/intrinsics-x86-pclmulqdq.rs b/src/tools/miri/tests/pass/shims/x86/intrinsics-x86-pclmulqdq.rs new file mode 100644 index 00000000000..2f242dd5379 --- /dev/null +++ b/src/tools/miri/tests/pass/shims/x86/intrinsics-x86-pclmulqdq.rs @@ -0,0 +1,48 @@ +// Ignore everything except x86 and x86_64 +// Any new targets that are added to CI should be ignored here. +// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.) +//@ignore-target-aarch64 +//@ignore-target-arm +//@ignore-target-avr +//@ignore-target-s390x +//@ignore-target-thumbv7em +//@ignore-target-wasm32 +//@compile-flags: -C target-feature=+pclmulqdq + +#[cfg(target_arch = "x86")] +use std::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::*; + +fn main() { + assert!(is_x86_feature_detected!("pclmulqdq")); + + let a = (0x7fffffffffffffff, 0x4317e40ab4ddcf05); + let b = (0xdd358416f52ecd34, 0x633d11cc638ca16b); + + unsafe { + assert_eq!(clmulepi64_si128::<0x00>(a, b), (13036940098130298092, 2704901987789626761)); + assert_eq!(clmulepi64_si128::<0x01>(a, b), (6707488474444649956, 3901733953304450635)); + assert_eq!(clmulepi64_si128::<0x10>(a, b), (11607166829323378905, 1191897396234301548)); + assert_eq!(clmulepi64_si128::<0x11>(a, b), (7731954893213347271, 1760130762532070957)); + } +} + +#[target_feature(enable = "pclmulqdq")] +unsafe fn clmulepi64_si128( + (a1, a2): (u64, u64), + (b1, b2): (u64, u64), +) -> (u64, u64) { + // SAFETY: There are no safety requirements for calling `_mm_clmulepi64_si128`. + // It's just unsafe for API consistency with other intrinsics. + unsafe { + let a = core::mem::transmute::<_, __m128i>([a1, a2]); + let b = core::mem::transmute::<_, __m128i>([b1, b2]); + + let out = _mm_clmulepi64_si128::(a, b); + + let [c1, c2] = core::mem::transmute::<_, [u64; 2]>(out); + + (c1, c2) + } +} From ea73f0067fa6274a7c845f8ff0a9418cbde243bb Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 8 Jun 2024 18:31:46 +0200 Subject: [PATCH 2/2] comment nits --- src/tools/miri/src/shims/x86/mod.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/tools/miri/src/shims/x86/mod.rs b/src/tools/miri/src/shims/x86/mod.rs index 0374cfedc5a..f2d120df21c 100644 --- a/src/tools/miri/src/shims/x86/mod.rs +++ b/src/tools/miri/src/shims/x86/mod.rs @@ -1140,15 +1140,15 @@ fn pmulhrsw<'tcx>( Ok(()) } -/// Perform a carry-less multiplication of two 64-bit integers, selected from left and right according to imm8, -/// and store the results in dst. +/// Perform a carry-less multiplication of two 64-bit integers, selected from `left` and `right` according to `imm8`, +/// and store the results in `dst`. /// -/// Left and right are both vectors of type 2 x i64. Only bits 0 and 4 of imm8 matter; -/// they select the element of left and right, respectively. +/// `left` and `right` are both vectors of type 2 x i64. Only bits 0 and 4 of `imm8` matter; +/// they select the element of `left` and `right`, respectively. /// /// fn pclmulqdq<'tcx>( - this: &mut crate::MiriInterpCx<'tcx>, + this: &mut MiriInterpCx<'tcx>, left: &OpTy<'tcx>, right: &OpTy<'tcx>, imm8: &OpTy<'tcx>,