Export kernel descriptor for amdgpu kernels
The host runtime (HIP or HSA) expects a kernel descriptor object for each kernel in the ELF file. The amdgpu LLVM backend generates the object. It is created as a symbol with the name of the kernel plus a `.kd` suffix. Add it to the exported symbols in the linker script, so that it can be found.
This commit is contained in:
parent
cd805f09ff
commit
99ec64c34c
4 changed files with 92 additions and 32 deletions
|
@ -1776,6 +1776,7 @@ fn exported_symbols_for_non_proc_macro(tcx: TyCtxt<'_>, crate_type: CrateType) -
|
|||
symbols.push(symbol_export::exporting_symbol_name_for_instance_in_crate(
|
||||
tcx, symbol, cnum,
|
||||
));
|
||||
symbol_export::extend_exported_symbols(&mut symbols, tcx, symbol, cnum);
|
||||
}
|
||||
});
|
||||
|
||||
|
|
|
@ -10,9 +10,10 @@ use rustc_middle::middle::exported_symbols::{
|
|||
ExportedSymbol, SymbolExportInfo, SymbolExportKind, SymbolExportLevel, metadata_symbol_name,
|
||||
};
|
||||
use rustc_middle::query::LocalCrate;
|
||||
use rustc_middle::ty::{self, GenericArgKind, GenericArgsRef, Instance, SymbolName, TyCtxt};
|
||||
use rustc_middle::ty::{self, GenericArgKind, GenericArgsRef, Instance, SymbolName, Ty, TyCtxt};
|
||||
use rustc_middle::util::Providers;
|
||||
use rustc_session::config::{CrateType, OomStrategy};
|
||||
use rustc_target::callconv::Conv;
|
||||
use rustc_target::spec::{SanitizerSet, TlsModel};
|
||||
use tracing::debug;
|
||||
|
||||
|
@ -551,6 +552,42 @@ pub(crate) fn symbol_name_for_instance_in_crate<'tcx>(
|
|||
}
|
||||
}
|
||||
|
||||
fn calling_convention_for_symbol<'tcx>(
|
||||
tcx: TyCtxt<'tcx>,
|
||||
symbol: ExportedSymbol<'tcx>,
|
||||
) -> (Conv, &'tcx [rustc_target::callconv::ArgAbi<'tcx, Ty<'tcx>>]) {
|
||||
let instance = match symbol {
|
||||
ExportedSymbol::NonGeneric(def_id) | ExportedSymbol::Generic(def_id, _)
|
||||
if tcx.is_static(def_id) =>
|
||||
{
|
||||
None
|
||||
}
|
||||
ExportedSymbol::NonGeneric(def_id) => Some(Instance::mono(tcx, def_id)),
|
||||
ExportedSymbol::Generic(def_id, args) => Some(Instance::new(def_id, args)),
|
||||
// DropGlue always use the Rust calling convention and thus follow the target's default
|
||||
// symbol decoration scheme.
|
||||
ExportedSymbol::DropGlue(..) => None,
|
||||
// AsyncDropGlueCtorShim always use the Rust calling convention and thus follow the
|
||||
// target's default symbol decoration scheme.
|
||||
ExportedSymbol::AsyncDropGlueCtorShim(..) => None,
|
||||
// NoDefId always follow the target's default symbol decoration scheme.
|
||||
ExportedSymbol::NoDefId(..) => None,
|
||||
// ThreadLocalShim always follow the target's default symbol decoration scheme.
|
||||
ExportedSymbol::ThreadLocalShim(..) => None,
|
||||
};
|
||||
|
||||
instance
|
||||
.map(|i| {
|
||||
tcx.fn_abi_of_instance(
|
||||
ty::TypingEnv::fully_monomorphized().as_query_input((i, ty::List::empty())),
|
||||
)
|
||||
.unwrap_or_else(|_| bug!("fn_abi_of_instance({i:?}) failed"))
|
||||
})
|
||||
.map(|fnabi| (fnabi.conv, &fnabi.args[..]))
|
||||
// FIXME(workingjubilee): why don't we know the convention here?
|
||||
.unwrap_or((Conv::Rust, &[]))
|
||||
}
|
||||
|
||||
/// This is the symbol name of the given instance as seen by the linker.
|
||||
///
|
||||
/// On 32-bit Windows symbols are decorated according to their calling conventions.
|
||||
|
@ -559,8 +596,6 @@ pub(crate) fn linking_symbol_name_for_instance_in_crate<'tcx>(
|
|||
symbol: ExportedSymbol<'tcx>,
|
||||
instantiating_crate: CrateNum,
|
||||
) -> String {
|
||||
use rustc_target::callconv::Conv;
|
||||
|
||||
let mut undecorated = symbol_name_for_instance_in_crate(tcx, symbol, instantiating_crate);
|
||||
|
||||
// thread local will not be a function call,
|
||||
|
@ -584,35 +619,7 @@ pub(crate) fn linking_symbol_name_for_instance_in_crate<'tcx>(
|
|||
_ => return undecorated,
|
||||
};
|
||||
|
||||
let instance = match symbol {
|
||||
ExportedSymbol::NonGeneric(def_id) | ExportedSymbol::Generic(def_id, _)
|
||||
if tcx.is_static(def_id) =>
|
||||
{
|
||||
None
|
||||
}
|
||||
ExportedSymbol::NonGeneric(def_id) => Some(Instance::mono(tcx, def_id)),
|
||||
ExportedSymbol::Generic(def_id, args) => Some(Instance::new(def_id, args)),
|
||||
// DropGlue always use the Rust calling convention and thus follow the target's default
|
||||
// symbol decoration scheme.
|
||||
ExportedSymbol::DropGlue(..) => None,
|
||||
// AsyncDropGlueCtorShim always use the Rust calling convention and thus follow the
|
||||
// target's default symbol decoration scheme.
|
||||
ExportedSymbol::AsyncDropGlueCtorShim(..) => None,
|
||||
// NoDefId always follow the target's default symbol decoration scheme.
|
||||
ExportedSymbol::NoDefId(..) => None,
|
||||
// ThreadLocalShim always follow the target's default symbol decoration scheme.
|
||||
ExportedSymbol::ThreadLocalShim(..) => None,
|
||||
};
|
||||
|
||||
let (conv, args) = instance
|
||||
.map(|i| {
|
||||
tcx.fn_abi_of_instance(
|
||||
ty::TypingEnv::fully_monomorphized().as_query_input((i, ty::List::empty())),
|
||||
)
|
||||
.unwrap_or_else(|_| bug!("fn_abi_of_instance({i:?}) failed"))
|
||||
})
|
||||
.map(|fnabi| (fnabi.conv, &fnabi.args[..]))
|
||||
.unwrap_or((Conv::Rust, &[]));
|
||||
let (conv, args) = calling_convention_for_symbol(tcx, symbol);
|
||||
|
||||
// Decorate symbols with prefixes, suffixes and total number of bytes of arguments.
|
||||
// Reference: https://docs.microsoft.com/en-us/cpp/build/reference/decorated-names?view=msvc-170
|
||||
|
@ -644,6 +651,27 @@ pub(crate) fn exporting_symbol_name_for_instance_in_crate<'tcx>(
|
|||
maybe_emutls_symbol_name(tcx, symbol, &undecorated).unwrap_or(undecorated)
|
||||
}
|
||||
|
||||
/// On amdhsa, `gpu-kernel` functions have an associated metadata object with a `.kd` suffix.
|
||||
/// Add it to the symbols list for all kernel functions, so that it is exported in the linked
|
||||
/// object.
|
||||
pub(crate) fn extend_exported_symbols<'tcx>(
|
||||
symbols: &mut Vec<String>,
|
||||
tcx: TyCtxt<'tcx>,
|
||||
symbol: ExportedSymbol<'tcx>,
|
||||
instantiating_crate: CrateNum,
|
||||
) {
|
||||
let (conv, _) = calling_convention_for_symbol(tcx, symbol);
|
||||
|
||||
if conv != Conv::GpuKernel || tcx.sess.target.os != "amdhsa" {
|
||||
return;
|
||||
}
|
||||
|
||||
let undecorated = symbol_name_for_instance_in_crate(tcx, symbol, instantiating_crate);
|
||||
|
||||
// Add the symbol for the kernel descriptor (with .kd suffix)
|
||||
symbols.push(format!("{undecorated}.kd"));
|
||||
}
|
||||
|
||||
fn maybe_emutls_symbol_name<'tcx>(
|
||||
tcx: TyCtxt<'tcx>,
|
||||
symbol: ExportedSymbol<'tcx>,
|
||||
|
|
11
tests/run-make/amdgpu-kd/foo.rs
Normal file
11
tests/run-make/amdgpu-kd/foo.rs
Normal file
|
@ -0,0 +1,11 @@
|
|||
#![allow(internal_features)]
|
||||
#![feature(no_core, lang_items, abi_gpu_kernel)]
|
||||
#![no_core]
|
||||
#![no_std]
|
||||
|
||||
// This is needed because of #![no_core]:
|
||||
#[lang = "sized"]
|
||||
trait Sized {}
|
||||
|
||||
#[no_mangle]
|
||||
extern "gpu-kernel" fn kernel() {}
|
20
tests/run-make/amdgpu-kd/rmake.rs
Normal file
20
tests/run-make/amdgpu-kd/rmake.rs
Normal file
|
@ -0,0 +1,20 @@
|
|||
// On the amdhsa OS, the host runtime (HIP or HSA) expects a kernel descriptor object for each
|
||||
// kernel in the ELF file. The amdgpu LLVM backend generates the object. It is created as a symbol
|
||||
// with the name of the kernel plus a .kd suffix.
|
||||
// Check that the produced object has the .kd symbol exported.
|
||||
|
||||
//@ needs-llvm-components: amdgpu
|
||||
//@ needs-rust-lld
|
||||
|
||||
use run_make_support::{llvm_readobj, rustc};
|
||||
|
||||
fn main() {
|
||||
rustc()
|
||||
.crate_name("foo")
|
||||
.target("amdgcn-amd-amdhsa")
|
||||
.arg("-Ctarget-cpu=gfx900")
|
||||
.crate_type("cdylib")
|
||||
.input("foo.rs")
|
||||
.run();
|
||||
llvm_readobj().input("foo.elf").symbols().run().assert_stdout_contains("kernel.kd");
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue