From 99ec64c34c65630d9a66befa2cbf2faa9478baa0 Mon Sep 17 00:00:00 2001 From: Flakebi Date: Mon, 10 Feb 2025 22:37:43 +0100 Subject: [PATCH] Export kernel descriptor for amdgpu kernels The host runtime (HIP or HSA) expects a kernel descriptor object for each kernel in the ELF file. The amdgpu LLVM backend generates the object. It is created as a symbol with the name of the kernel plus a `.kd` suffix. Add it to the exported symbols in the linker script, so that it can be found. --- compiler/rustc_codegen_ssa/src/back/linker.rs | 1 + .../src/back/symbol_export.rs | 92 ++++++++++++------- tests/run-make/amdgpu-kd/foo.rs | 11 +++ tests/run-make/amdgpu-kd/rmake.rs | 20 ++++ 4 files changed, 92 insertions(+), 32 deletions(-) create mode 100644 tests/run-make/amdgpu-kd/foo.rs create mode 100644 tests/run-make/amdgpu-kd/rmake.rs diff --git a/compiler/rustc_codegen_ssa/src/back/linker.rs b/compiler/rustc_codegen_ssa/src/back/linker.rs index 8fb831471a9..a65684f6724 100644 --- a/compiler/rustc_codegen_ssa/src/back/linker.rs +++ b/compiler/rustc_codegen_ssa/src/back/linker.rs @@ -1776,6 +1776,7 @@ fn exported_symbols_for_non_proc_macro(tcx: TyCtxt<'_>, crate_type: CrateType) - symbols.push(symbol_export::exporting_symbol_name_for_instance_in_crate( tcx, symbol, cnum, )); + symbol_export::extend_exported_symbols(&mut symbols, tcx, symbol, cnum); } }); diff --git a/compiler/rustc_codegen_ssa/src/back/symbol_export.rs b/compiler/rustc_codegen_ssa/src/back/symbol_export.rs index 60ab2919352..e498f07641f 100644 --- a/compiler/rustc_codegen_ssa/src/back/symbol_export.rs +++ b/compiler/rustc_codegen_ssa/src/back/symbol_export.rs @@ -10,9 +10,10 @@ use rustc_middle::middle::exported_symbols::{ ExportedSymbol, SymbolExportInfo, SymbolExportKind, SymbolExportLevel, metadata_symbol_name, }; use rustc_middle::query::LocalCrate; -use rustc_middle::ty::{self, GenericArgKind, GenericArgsRef, Instance, SymbolName, TyCtxt}; +use rustc_middle::ty::{self, GenericArgKind, GenericArgsRef, Instance, SymbolName, Ty, TyCtxt}; use rustc_middle::util::Providers; use rustc_session::config::{CrateType, OomStrategy}; +use rustc_target::callconv::Conv; use rustc_target::spec::{SanitizerSet, TlsModel}; use tracing::debug; @@ -551,6 +552,42 @@ pub(crate) fn symbol_name_for_instance_in_crate<'tcx>( } } +fn calling_convention_for_symbol<'tcx>( + tcx: TyCtxt<'tcx>, + symbol: ExportedSymbol<'tcx>, +) -> (Conv, &'tcx [rustc_target::callconv::ArgAbi<'tcx, Ty<'tcx>>]) { + let instance = match symbol { + ExportedSymbol::NonGeneric(def_id) | ExportedSymbol::Generic(def_id, _) + if tcx.is_static(def_id) => + { + None + } + ExportedSymbol::NonGeneric(def_id) => Some(Instance::mono(tcx, def_id)), + ExportedSymbol::Generic(def_id, args) => Some(Instance::new(def_id, args)), + // DropGlue always use the Rust calling convention and thus follow the target's default + // symbol decoration scheme. + ExportedSymbol::DropGlue(..) => None, + // AsyncDropGlueCtorShim always use the Rust calling convention and thus follow the + // target's default symbol decoration scheme. + ExportedSymbol::AsyncDropGlueCtorShim(..) => None, + // NoDefId always follow the target's default symbol decoration scheme. + ExportedSymbol::NoDefId(..) => None, + // ThreadLocalShim always follow the target's default symbol decoration scheme. + ExportedSymbol::ThreadLocalShim(..) => None, + }; + + instance + .map(|i| { + tcx.fn_abi_of_instance( + ty::TypingEnv::fully_monomorphized().as_query_input((i, ty::List::empty())), + ) + .unwrap_or_else(|_| bug!("fn_abi_of_instance({i:?}) failed")) + }) + .map(|fnabi| (fnabi.conv, &fnabi.args[..])) + // FIXME(workingjubilee): why don't we know the convention here? + .unwrap_or((Conv::Rust, &[])) +} + /// This is the symbol name of the given instance as seen by the linker. /// /// On 32-bit Windows symbols are decorated according to their calling conventions. @@ -559,8 +596,6 @@ pub(crate) fn linking_symbol_name_for_instance_in_crate<'tcx>( symbol: ExportedSymbol<'tcx>, instantiating_crate: CrateNum, ) -> String { - use rustc_target::callconv::Conv; - let mut undecorated = symbol_name_for_instance_in_crate(tcx, symbol, instantiating_crate); // thread local will not be a function call, @@ -584,35 +619,7 @@ pub(crate) fn linking_symbol_name_for_instance_in_crate<'tcx>( _ => return undecorated, }; - let instance = match symbol { - ExportedSymbol::NonGeneric(def_id) | ExportedSymbol::Generic(def_id, _) - if tcx.is_static(def_id) => - { - None - } - ExportedSymbol::NonGeneric(def_id) => Some(Instance::mono(tcx, def_id)), - ExportedSymbol::Generic(def_id, args) => Some(Instance::new(def_id, args)), - // DropGlue always use the Rust calling convention and thus follow the target's default - // symbol decoration scheme. - ExportedSymbol::DropGlue(..) => None, - // AsyncDropGlueCtorShim always use the Rust calling convention and thus follow the - // target's default symbol decoration scheme. - ExportedSymbol::AsyncDropGlueCtorShim(..) => None, - // NoDefId always follow the target's default symbol decoration scheme. - ExportedSymbol::NoDefId(..) => None, - // ThreadLocalShim always follow the target's default symbol decoration scheme. - ExportedSymbol::ThreadLocalShim(..) => None, - }; - - let (conv, args) = instance - .map(|i| { - tcx.fn_abi_of_instance( - ty::TypingEnv::fully_monomorphized().as_query_input((i, ty::List::empty())), - ) - .unwrap_or_else(|_| bug!("fn_abi_of_instance({i:?}) failed")) - }) - .map(|fnabi| (fnabi.conv, &fnabi.args[..])) - .unwrap_or((Conv::Rust, &[])); + let (conv, args) = calling_convention_for_symbol(tcx, symbol); // Decorate symbols with prefixes, suffixes and total number of bytes of arguments. // Reference: https://docs.microsoft.com/en-us/cpp/build/reference/decorated-names?view=msvc-170 @@ -644,6 +651,27 @@ pub(crate) fn exporting_symbol_name_for_instance_in_crate<'tcx>( maybe_emutls_symbol_name(tcx, symbol, &undecorated).unwrap_or(undecorated) } +/// On amdhsa, `gpu-kernel` functions have an associated metadata object with a `.kd` suffix. +/// Add it to the symbols list for all kernel functions, so that it is exported in the linked +/// object. +pub(crate) fn extend_exported_symbols<'tcx>( + symbols: &mut Vec, + tcx: TyCtxt<'tcx>, + symbol: ExportedSymbol<'tcx>, + instantiating_crate: CrateNum, +) { + let (conv, _) = calling_convention_for_symbol(tcx, symbol); + + if conv != Conv::GpuKernel || tcx.sess.target.os != "amdhsa" { + return; + } + + let undecorated = symbol_name_for_instance_in_crate(tcx, symbol, instantiating_crate); + + // Add the symbol for the kernel descriptor (with .kd suffix) + symbols.push(format!("{undecorated}.kd")); +} + fn maybe_emutls_symbol_name<'tcx>( tcx: TyCtxt<'tcx>, symbol: ExportedSymbol<'tcx>, diff --git a/tests/run-make/amdgpu-kd/foo.rs b/tests/run-make/amdgpu-kd/foo.rs new file mode 100644 index 00000000000..a0d803ab813 --- /dev/null +++ b/tests/run-make/amdgpu-kd/foo.rs @@ -0,0 +1,11 @@ +#![allow(internal_features)] +#![feature(no_core, lang_items, abi_gpu_kernel)] +#![no_core] +#![no_std] + +// This is needed because of #![no_core]: +#[lang = "sized"] +trait Sized {} + +#[no_mangle] +extern "gpu-kernel" fn kernel() {} diff --git a/tests/run-make/amdgpu-kd/rmake.rs b/tests/run-make/amdgpu-kd/rmake.rs new file mode 100644 index 00000000000..a787fa1da93 --- /dev/null +++ b/tests/run-make/amdgpu-kd/rmake.rs @@ -0,0 +1,20 @@ +// On the amdhsa OS, the host runtime (HIP or HSA) expects a kernel descriptor object for each +// kernel in the ELF file. The amdgpu LLVM backend generates the object. It is created as a symbol +// with the name of the kernel plus a .kd suffix. +// Check that the produced object has the .kd symbol exported. + +//@ needs-llvm-components: amdgpu +//@ needs-rust-lld + +use run_make_support::{llvm_readobj, rustc}; + +fn main() { + rustc() + .crate_name("foo") + .target("amdgcn-amd-amdhsa") + .arg("-Ctarget-cpu=gfx900") + .crate_type("cdylib") + .input("foo.rs") + .run(); + llvm_readobj().input("foo.elf").symbols().run().assert_stdout_contains("kernel.kd"); +}