Rollup merge of #94703 - kjetilkjeka:nvptx-kernel-args-abi2, r=nagisa

Fix codegen bug in "ptx-kernel" abi related to arg passing I found a codegen bug in the nvptx abi related to that args are passed as ptrs ([see comment](https://github.com/rust-lang/rust/issues/38788#issuecomment-1048999928)), this is not as specified in the [ptx-interoperability doc](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/) or how C/C++ does it. It will also almost always fail in practice since device/host uses different memory spaces for most hardware. This PR fixes the bug and add tests for passing structs to ptx kernels. I observed that all nvptx assembly tests had been marked as [ignore a long time ago](https://github.com/rust-lang/rust/pull/59752#issuecomment-501713428). I'm not sure if the new one should be marked as ignore, it passed on my computer but it might fail if ptx-linker is missing on the server? I guess this is outside scope for this PR and should be looked at in a different issue/PR. I only fixed the nvptx64-nvidia-cuda target and not the potential code paths for the non-existing 32bit target. Even though 32bit nvptx is not a supported target there are still some code under the hood supporting codegen for 32 bit ptx. I was advised to create an MCP to find out if this code should be removed or updated. Perhaps ``@RDambrosio016`` would have interest in taking a quick look at this.
2022-04-26 13:22:27 +02:00 · 2022-04-26 13:22:27 +02:00 · fe49981ea0
commit fe49981ea0
parent eaf8beb3f3 5bf5acc324
6 changed files with 352 additions and 9 deletions
--- a/compiler/rustc_target/src/abi/call/mod.rs
+++ b/compiler/rustc_target/src/abi/call/mod.rs
@ -696,7 +696,13 @@ impl<'a, Ty> FnAbi<'a, Ty> {
            "sparc" => sparc::compute_abi_info(cx, self),
            "sparc64" => sparc64::compute_abi_info(cx, self),
            "nvptx" => nvptx::compute_abi_info(self),
-            "nvptx64" => nvptx64::compute_abi_info(self),
+            "nvptx64" => {
+                if cx.target_spec().adjust_abi(abi) == spec::abi::Abi::PtxKernel {
+                    nvptx64::compute_ptx_kernel_abi_info(cx, self)
+                } else {
+                    nvptx64::compute_abi_info(self)
+                }
+            }
            "hexagon" => hexagon::compute_abi_info(self),
            "riscv32" | "riscv64" => riscv::compute_abi_info(cx, self),
            "wasm32" | "wasm64" => {
--- a/compiler/rustc_target/src/abi/call/nvptx64.rs
+++ b/compiler/rustc_target/src/abi/call/nvptx64.rs
@ -1,21 +1,35 @@
-// Reference: PTX Writer's Guide to Interoperability
-// https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability
-
-use crate::abi::call::{ArgAbi, FnAbi};
+use crate::abi::call::{ArgAbi, FnAbi, PassMode, Reg, Size, Uniform};
+use crate::abi::{HasDataLayout, TyAbiInterface};

 fn classify_ret<Ty>(ret: &mut ArgAbi<'_, Ty>) {
    if ret.layout.is_aggregate() && ret.layout.size.bits() > 64 {
        ret.make_indirect();
-    } else {
-        ret.extend_integer_width_to(64);
    }
 }

 fn classify_arg<Ty>(arg: &mut ArgAbi<'_, Ty>) {
    if arg.layout.is_aggregate() && arg.layout.size.bits() > 64 {
        arg.make_indirect();
-    } else {
-        arg.extend_integer_width_to(64);
+    }
+}
+
+fn classify_arg_kernel<'a, Ty, C>(_cx: &C, arg: &mut ArgAbi<'a, Ty>)
+where
+    Ty: TyAbiInterface<'a, C> + Copy,
+    C: HasDataLayout,
+{
+    if matches!(arg.mode, PassMode::Pair(..)) && (arg.layout.is_adt() || arg.layout.is_tuple()) {
+        let align_bytes = arg.layout.align.abi.bytes();
+
+        let unit = match align_bytes {
+            1 => Reg::i8(),
+            2 => Reg::i16(),
+            4 => Reg::i32(),
+            8 => Reg::i64(),
+            16 => Reg::i128(),
+            _ => unreachable!("Align is given as power of 2 no larger than 16 bytes"),
+        };
+        arg.cast_to(Uniform { unit, total: Size::from_bytes(2 * align_bytes) });
    }
 }

@ -31,3 +45,20 @@ pub fn compute_abi_info<Ty>(fn_abi: &mut FnAbi<'_, Ty>) {
        classify_arg(arg);
    }
 }
+
+pub fn compute_ptx_kernel_abi_info<'a, Ty, C>(cx: &C, fn_abi: &mut FnAbi<'a, Ty>)
+where
+    Ty: TyAbiInterface<'a, C> + Copy,
+    C: HasDataLayout,
+{
+    if !fn_abi.ret.layout.is_unit() && !fn_abi.ret.layout.is_never() {
+        panic!("Kernels should not return anything other than () or !");
+    }
+
+    for arg in &mut fn_abi.args {
+        if arg.is_ignore() {
+            continue;
+        }
+        classify_arg_kernel(cx, arg);
+    }
+}
--- a/compiler/rustc_target/src/abi/mod.rs
+++ b/compiler/rustc_target/src/abi/mod.rs
@ -1355,6 +1355,10 @@ pub trait TyAbiInterface<'a, C>: Sized {
        cx: &C,
        offset: Size,
    ) -> Option<PointeeInfo>;
+    fn is_adt(this: TyAndLayout<'a, Self>) -> bool;
+    fn is_never(this: TyAndLayout<'a, Self>) -> bool;
+    fn is_tuple(this: TyAndLayout<'a, Self>) -> bool;
+    fn is_unit(this: TyAndLayout<'a, Self>) -> bool;
 }

 impl<'a, Ty> TyAndLayout<'a, Ty> {
@ -1396,6 +1400,34 @@ impl<'a, Ty> TyAndLayout<'a, Ty> {
            _ => false,
        }
    }
+
+    pub fn is_adt<C>(self) -> bool
+    where
+        Ty: TyAbiInterface<'a, C>,
+    {
+        Ty::is_adt(self)
+    }
+
+    pub fn is_never<C>(self) -> bool
+    where
+        Ty: TyAbiInterface<'a, C>,
+    {
+        Ty::is_never(self)
+    }
+
+    pub fn is_tuple<C>(self) -> bool
+    where
+        Ty: TyAbiInterface<'a, C>,
+    {
+        Ty::is_tuple(self)
+    }
+
+    pub fn is_unit<C>(self) -> bool
+    where
+        Ty: TyAbiInterface<'a, C>,
+    {
+        Ty::is_unit(self)
+    }
 }

 impl<'a, Ty> TyAndLayout<'a, Ty> {