Export kernel descriptor for amdgpu kernels

The host runtime (HIP or HSA) expects a kernel descriptor object for
each kernel in the ELF file. The amdgpu LLVM backend generates the
object. It is created as a symbol with the name of the kernel plus a
`.kd` suffix.

Add it to the exported symbols in the linker script, so that it can be
found.
This commit is contained in:
Flakebi 2025-02-10 22:37:43 +01:00
parent cd805f09ff
commit 99ec64c34c
No known key found for this signature in database
GPG key ID: 38E7ED984D7DCD02
4 changed files with 92 additions and 32 deletions

View file

@ -1776,6 +1776,7 @@ fn exported_symbols_for_non_proc_macro(tcx: TyCtxt<'_>, crate_type: CrateType) -
symbols.push(symbol_export::exporting_symbol_name_for_instance_in_crate(
tcx, symbol, cnum,
));
symbol_export::extend_exported_symbols(&mut symbols, tcx, symbol, cnum);
}
});

View file

@ -10,9 +10,10 @@ use rustc_middle::middle::exported_symbols::{
ExportedSymbol, SymbolExportInfo, SymbolExportKind, SymbolExportLevel, metadata_symbol_name,
};
use rustc_middle::query::LocalCrate;
use rustc_middle::ty::{self, GenericArgKind, GenericArgsRef, Instance, SymbolName, TyCtxt};
use rustc_middle::ty::{self, GenericArgKind, GenericArgsRef, Instance, SymbolName, Ty, TyCtxt};
use rustc_middle::util::Providers;
use rustc_session::config::{CrateType, OomStrategy};
use rustc_target::callconv::Conv;
use rustc_target::spec::{SanitizerSet, TlsModel};
use tracing::debug;
@ -551,6 +552,42 @@ pub(crate) fn symbol_name_for_instance_in_crate<'tcx>(
}
}
fn calling_convention_for_symbol<'tcx>(
tcx: TyCtxt<'tcx>,
symbol: ExportedSymbol<'tcx>,
) -> (Conv, &'tcx [rustc_target::callconv::ArgAbi<'tcx, Ty<'tcx>>]) {
let instance = match symbol {
ExportedSymbol::NonGeneric(def_id) | ExportedSymbol::Generic(def_id, _)
if tcx.is_static(def_id) =>
{
None
}
ExportedSymbol::NonGeneric(def_id) => Some(Instance::mono(tcx, def_id)),
ExportedSymbol::Generic(def_id, args) => Some(Instance::new(def_id, args)),
// DropGlue always use the Rust calling convention and thus follow the target's default
// symbol decoration scheme.
ExportedSymbol::DropGlue(..) => None,
// AsyncDropGlueCtorShim always use the Rust calling convention and thus follow the
// target's default symbol decoration scheme.
ExportedSymbol::AsyncDropGlueCtorShim(..) => None,
// NoDefId always follow the target's default symbol decoration scheme.
ExportedSymbol::NoDefId(..) => None,
// ThreadLocalShim always follow the target's default symbol decoration scheme.
ExportedSymbol::ThreadLocalShim(..) => None,
};
instance
.map(|i| {
tcx.fn_abi_of_instance(
ty::TypingEnv::fully_monomorphized().as_query_input((i, ty::List::empty())),
)
.unwrap_or_else(|_| bug!("fn_abi_of_instance({i:?}) failed"))
})
.map(|fnabi| (fnabi.conv, &fnabi.args[..]))
// FIXME(workingjubilee): why don't we know the convention here?
.unwrap_or((Conv::Rust, &[]))
}
/// This is the symbol name of the given instance as seen by the linker.
///
/// On 32-bit Windows symbols are decorated according to their calling conventions.
@ -559,8 +596,6 @@ pub(crate) fn linking_symbol_name_for_instance_in_crate<'tcx>(
symbol: ExportedSymbol<'tcx>,
instantiating_crate: CrateNum,
) -> String {
use rustc_target::callconv::Conv;
let mut undecorated = symbol_name_for_instance_in_crate(tcx, symbol, instantiating_crate);
// thread local will not be a function call,
@ -584,35 +619,7 @@ pub(crate) fn linking_symbol_name_for_instance_in_crate<'tcx>(
_ => return undecorated,
};
let instance = match symbol {
ExportedSymbol::NonGeneric(def_id) | ExportedSymbol::Generic(def_id, _)
if tcx.is_static(def_id) =>
{
None
}
ExportedSymbol::NonGeneric(def_id) => Some(Instance::mono(tcx, def_id)),
ExportedSymbol::Generic(def_id, args) => Some(Instance::new(def_id, args)),
// DropGlue always use the Rust calling convention and thus follow the target's default
// symbol decoration scheme.
ExportedSymbol::DropGlue(..) => None,
// AsyncDropGlueCtorShim always use the Rust calling convention and thus follow the
// target's default symbol decoration scheme.
ExportedSymbol::AsyncDropGlueCtorShim(..) => None,
// NoDefId always follow the target's default symbol decoration scheme.
ExportedSymbol::NoDefId(..) => None,
// ThreadLocalShim always follow the target's default symbol decoration scheme.
ExportedSymbol::ThreadLocalShim(..) => None,
};
let (conv, args) = instance
.map(|i| {
tcx.fn_abi_of_instance(
ty::TypingEnv::fully_monomorphized().as_query_input((i, ty::List::empty())),
)
.unwrap_or_else(|_| bug!("fn_abi_of_instance({i:?}) failed"))
})
.map(|fnabi| (fnabi.conv, &fnabi.args[..]))
.unwrap_or((Conv::Rust, &[]));
let (conv, args) = calling_convention_for_symbol(tcx, symbol);
// Decorate symbols with prefixes, suffixes and total number of bytes of arguments.
// Reference: https://docs.microsoft.com/en-us/cpp/build/reference/decorated-names?view=msvc-170
@ -644,6 +651,27 @@ pub(crate) fn exporting_symbol_name_for_instance_in_crate<'tcx>(
maybe_emutls_symbol_name(tcx, symbol, &undecorated).unwrap_or(undecorated)
}
/// On amdhsa, `gpu-kernel` functions have an associated metadata object with a `.kd` suffix.
/// Add it to the symbols list for all kernel functions, so that it is exported in the linked
/// object.
pub(crate) fn extend_exported_symbols<'tcx>(
symbols: &mut Vec<String>,
tcx: TyCtxt<'tcx>,
symbol: ExportedSymbol<'tcx>,
instantiating_crate: CrateNum,
) {
let (conv, _) = calling_convention_for_symbol(tcx, symbol);
if conv != Conv::GpuKernel || tcx.sess.target.os != "amdhsa" {
return;
}
let undecorated = symbol_name_for_instance_in_crate(tcx, symbol, instantiating_crate);
// Add the symbol for the kernel descriptor (with .kd suffix)
symbols.push(format!("{undecorated}.kd"));
}
fn maybe_emutls_symbol_name<'tcx>(
tcx: TyCtxt<'tcx>,
symbol: ExportedSymbol<'tcx>,

View file

@ -0,0 +1,11 @@
#![allow(internal_features)]
#![feature(no_core, lang_items, abi_gpu_kernel)]
#![no_core]
#![no_std]
// This is needed because of #![no_core]:
#[lang = "sized"]
trait Sized {}
#[no_mangle]
extern "gpu-kernel" fn kernel() {}

View file

@ -0,0 +1,20 @@
// On the amdhsa OS, the host runtime (HIP or HSA) expects a kernel descriptor object for each
// kernel in the ELF file. The amdgpu LLVM backend generates the object. It is created as a symbol
// with the name of the kernel plus a .kd suffix.
// Check that the produced object has the .kd symbol exported.
//@ needs-llvm-components: amdgpu
//@ needs-rust-lld
use run_make_support::{llvm_readobj, rustc};
fn main() {
rustc()
.crate_name("foo")
.target("amdgcn-amd-amdhsa")
.arg("-Ctarget-cpu=gfx900")
.crate_type("cdylib")
.input("foo.rs")
.run();
llvm_readobj().input("foo.elf").symbols().run().assert_stdout_contains("kernel.kd");
}