Export kernel descriptor for amdgpu kernels
The host runtime (HIP or HSA) expects a kernel descriptor object for each kernel in the ELF file. The amdgpu LLVM backend generates the object. It is created as a symbol with the name of the kernel plus a `.kd` suffix. Add it to the exported symbols in the linker script, so that it can be found.
This commit is contained in:
parent
cd805f09ff
commit
99ec64c34c
4 changed files with 92 additions and 32 deletions
|
@ -1776,6 +1776,7 @@ fn exported_symbols_for_non_proc_macro(tcx: TyCtxt<'_>, crate_type: CrateType) -
|
||||||
symbols.push(symbol_export::exporting_symbol_name_for_instance_in_crate(
|
symbols.push(symbol_export::exporting_symbol_name_for_instance_in_crate(
|
||||||
tcx, symbol, cnum,
|
tcx, symbol, cnum,
|
||||||
));
|
));
|
||||||
|
symbol_export::extend_exported_symbols(&mut symbols, tcx, symbol, cnum);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
@ -10,9 +10,10 @@ use rustc_middle::middle::exported_symbols::{
|
||||||
ExportedSymbol, SymbolExportInfo, SymbolExportKind, SymbolExportLevel, metadata_symbol_name,
|
ExportedSymbol, SymbolExportInfo, SymbolExportKind, SymbolExportLevel, metadata_symbol_name,
|
||||||
};
|
};
|
||||||
use rustc_middle::query::LocalCrate;
|
use rustc_middle::query::LocalCrate;
|
||||||
use rustc_middle::ty::{self, GenericArgKind, GenericArgsRef, Instance, SymbolName, TyCtxt};
|
use rustc_middle::ty::{self, GenericArgKind, GenericArgsRef, Instance, SymbolName, Ty, TyCtxt};
|
||||||
use rustc_middle::util::Providers;
|
use rustc_middle::util::Providers;
|
||||||
use rustc_session::config::{CrateType, OomStrategy};
|
use rustc_session::config::{CrateType, OomStrategy};
|
||||||
|
use rustc_target::callconv::Conv;
|
||||||
use rustc_target::spec::{SanitizerSet, TlsModel};
|
use rustc_target::spec::{SanitizerSet, TlsModel};
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
|
@ -551,6 +552,42 @@ pub(crate) fn symbol_name_for_instance_in_crate<'tcx>(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn calling_convention_for_symbol<'tcx>(
|
||||||
|
tcx: TyCtxt<'tcx>,
|
||||||
|
symbol: ExportedSymbol<'tcx>,
|
||||||
|
) -> (Conv, &'tcx [rustc_target::callconv::ArgAbi<'tcx, Ty<'tcx>>]) {
|
||||||
|
let instance = match symbol {
|
||||||
|
ExportedSymbol::NonGeneric(def_id) | ExportedSymbol::Generic(def_id, _)
|
||||||
|
if tcx.is_static(def_id) =>
|
||||||
|
{
|
||||||
|
None
|
||||||
|
}
|
||||||
|
ExportedSymbol::NonGeneric(def_id) => Some(Instance::mono(tcx, def_id)),
|
||||||
|
ExportedSymbol::Generic(def_id, args) => Some(Instance::new(def_id, args)),
|
||||||
|
// DropGlue always use the Rust calling convention and thus follow the target's default
|
||||||
|
// symbol decoration scheme.
|
||||||
|
ExportedSymbol::DropGlue(..) => None,
|
||||||
|
// AsyncDropGlueCtorShim always use the Rust calling convention and thus follow the
|
||||||
|
// target's default symbol decoration scheme.
|
||||||
|
ExportedSymbol::AsyncDropGlueCtorShim(..) => None,
|
||||||
|
// NoDefId always follow the target's default symbol decoration scheme.
|
||||||
|
ExportedSymbol::NoDefId(..) => None,
|
||||||
|
// ThreadLocalShim always follow the target's default symbol decoration scheme.
|
||||||
|
ExportedSymbol::ThreadLocalShim(..) => None,
|
||||||
|
};
|
||||||
|
|
||||||
|
instance
|
||||||
|
.map(|i| {
|
||||||
|
tcx.fn_abi_of_instance(
|
||||||
|
ty::TypingEnv::fully_monomorphized().as_query_input((i, ty::List::empty())),
|
||||||
|
)
|
||||||
|
.unwrap_or_else(|_| bug!("fn_abi_of_instance({i:?}) failed"))
|
||||||
|
})
|
||||||
|
.map(|fnabi| (fnabi.conv, &fnabi.args[..]))
|
||||||
|
// FIXME(workingjubilee): why don't we know the convention here?
|
||||||
|
.unwrap_or((Conv::Rust, &[]))
|
||||||
|
}
|
||||||
|
|
||||||
/// This is the symbol name of the given instance as seen by the linker.
|
/// This is the symbol name of the given instance as seen by the linker.
|
||||||
///
|
///
|
||||||
/// On 32-bit Windows symbols are decorated according to their calling conventions.
|
/// On 32-bit Windows symbols are decorated according to their calling conventions.
|
||||||
|
@ -559,8 +596,6 @@ pub(crate) fn linking_symbol_name_for_instance_in_crate<'tcx>(
|
||||||
symbol: ExportedSymbol<'tcx>,
|
symbol: ExportedSymbol<'tcx>,
|
||||||
instantiating_crate: CrateNum,
|
instantiating_crate: CrateNum,
|
||||||
) -> String {
|
) -> String {
|
||||||
use rustc_target::callconv::Conv;
|
|
||||||
|
|
||||||
let mut undecorated = symbol_name_for_instance_in_crate(tcx, symbol, instantiating_crate);
|
let mut undecorated = symbol_name_for_instance_in_crate(tcx, symbol, instantiating_crate);
|
||||||
|
|
||||||
// thread local will not be a function call,
|
// thread local will not be a function call,
|
||||||
|
@ -584,35 +619,7 @@ pub(crate) fn linking_symbol_name_for_instance_in_crate<'tcx>(
|
||||||
_ => return undecorated,
|
_ => return undecorated,
|
||||||
};
|
};
|
||||||
|
|
||||||
let instance = match symbol {
|
let (conv, args) = calling_convention_for_symbol(tcx, symbol);
|
||||||
ExportedSymbol::NonGeneric(def_id) | ExportedSymbol::Generic(def_id, _)
|
|
||||||
if tcx.is_static(def_id) =>
|
|
||||||
{
|
|
||||||
None
|
|
||||||
}
|
|
||||||
ExportedSymbol::NonGeneric(def_id) => Some(Instance::mono(tcx, def_id)),
|
|
||||||
ExportedSymbol::Generic(def_id, args) => Some(Instance::new(def_id, args)),
|
|
||||||
// DropGlue always use the Rust calling convention and thus follow the target's default
|
|
||||||
// symbol decoration scheme.
|
|
||||||
ExportedSymbol::DropGlue(..) => None,
|
|
||||||
// AsyncDropGlueCtorShim always use the Rust calling convention and thus follow the
|
|
||||||
// target's default symbol decoration scheme.
|
|
||||||
ExportedSymbol::AsyncDropGlueCtorShim(..) => None,
|
|
||||||
// NoDefId always follow the target's default symbol decoration scheme.
|
|
||||||
ExportedSymbol::NoDefId(..) => None,
|
|
||||||
// ThreadLocalShim always follow the target's default symbol decoration scheme.
|
|
||||||
ExportedSymbol::ThreadLocalShim(..) => None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let (conv, args) = instance
|
|
||||||
.map(|i| {
|
|
||||||
tcx.fn_abi_of_instance(
|
|
||||||
ty::TypingEnv::fully_monomorphized().as_query_input((i, ty::List::empty())),
|
|
||||||
)
|
|
||||||
.unwrap_or_else(|_| bug!("fn_abi_of_instance({i:?}) failed"))
|
|
||||||
})
|
|
||||||
.map(|fnabi| (fnabi.conv, &fnabi.args[..]))
|
|
||||||
.unwrap_or((Conv::Rust, &[]));
|
|
||||||
|
|
||||||
// Decorate symbols with prefixes, suffixes and total number of bytes of arguments.
|
// Decorate symbols with prefixes, suffixes and total number of bytes of arguments.
|
||||||
// Reference: https://docs.microsoft.com/en-us/cpp/build/reference/decorated-names?view=msvc-170
|
// Reference: https://docs.microsoft.com/en-us/cpp/build/reference/decorated-names?view=msvc-170
|
||||||
|
@ -644,6 +651,27 @@ pub(crate) fn exporting_symbol_name_for_instance_in_crate<'tcx>(
|
||||||
maybe_emutls_symbol_name(tcx, symbol, &undecorated).unwrap_or(undecorated)
|
maybe_emutls_symbol_name(tcx, symbol, &undecorated).unwrap_or(undecorated)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// On amdhsa, `gpu-kernel` functions have an associated metadata object with a `.kd` suffix.
|
||||||
|
/// Add it to the symbols list for all kernel functions, so that it is exported in the linked
|
||||||
|
/// object.
|
||||||
|
pub(crate) fn extend_exported_symbols<'tcx>(
|
||||||
|
symbols: &mut Vec<String>,
|
||||||
|
tcx: TyCtxt<'tcx>,
|
||||||
|
symbol: ExportedSymbol<'tcx>,
|
||||||
|
instantiating_crate: CrateNum,
|
||||||
|
) {
|
||||||
|
let (conv, _) = calling_convention_for_symbol(tcx, symbol);
|
||||||
|
|
||||||
|
if conv != Conv::GpuKernel || tcx.sess.target.os != "amdhsa" {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let undecorated = symbol_name_for_instance_in_crate(tcx, symbol, instantiating_crate);
|
||||||
|
|
||||||
|
// Add the symbol for the kernel descriptor (with .kd suffix)
|
||||||
|
symbols.push(format!("{undecorated}.kd"));
|
||||||
|
}
|
||||||
|
|
||||||
fn maybe_emutls_symbol_name<'tcx>(
|
fn maybe_emutls_symbol_name<'tcx>(
|
||||||
tcx: TyCtxt<'tcx>,
|
tcx: TyCtxt<'tcx>,
|
||||||
symbol: ExportedSymbol<'tcx>,
|
symbol: ExportedSymbol<'tcx>,
|
||||||
|
|
11
tests/run-make/amdgpu-kd/foo.rs
Normal file
11
tests/run-make/amdgpu-kd/foo.rs
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
#![allow(internal_features)]
|
||||||
|
#![feature(no_core, lang_items, abi_gpu_kernel)]
|
||||||
|
#![no_core]
|
||||||
|
#![no_std]
|
||||||
|
|
||||||
|
// This is needed because of #![no_core]:
|
||||||
|
#[lang = "sized"]
|
||||||
|
trait Sized {}
|
||||||
|
|
||||||
|
#[no_mangle]
|
||||||
|
extern "gpu-kernel" fn kernel() {}
|
20
tests/run-make/amdgpu-kd/rmake.rs
Normal file
20
tests/run-make/amdgpu-kd/rmake.rs
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
// On the amdhsa OS, the host runtime (HIP or HSA) expects a kernel descriptor object for each
|
||||||
|
// kernel in the ELF file. The amdgpu LLVM backend generates the object. It is created as a symbol
|
||||||
|
// with the name of the kernel plus a .kd suffix.
|
||||||
|
// Check that the produced object has the .kd symbol exported.
|
||||||
|
|
||||||
|
//@ needs-llvm-components: amdgpu
|
||||||
|
//@ needs-rust-lld
|
||||||
|
|
||||||
|
use run_make_support::{llvm_readobj, rustc};
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
rustc()
|
||||||
|
.crate_name("foo")
|
||||||
|
.target("amdgcn-amd-amdhsa")
|
||||||
|
.arg("-Ctarget-cpu=gfx900")
|
||||||
|
.crate_type("cdylib")
|
||||||
|
.input("foo.rs")
|
||||||
|
.run();
|
||||||
|
llvm_readobj().input("foo.elf").symbols().run().assert_stdout_contains("kernel.kd");
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue