Auto merge of #138766 - Zalathar:unused-fn, r=SparrowLii

coverage: Deal with unused functions and their names in one place

When coverage codegen creates dummy instances and covfun records for unused functions, we already know that they are unused, so we might as well set up the special array of unused function names at the same time.

---

The first commit only moves code around; all significant changes are in the second commit.

There should be no change in compiler output.
This commit is contained in:
bors 2025-04-07 03:19:35 +00:00
commit b9856b6e40
3 changed files with 180 additions and 161 deletions

View file

@ -5,15 +5,11 @@ use rustc_abi::Align;
use rustc_codegen_ssa::traits::{
BaseTypeCodegenMethods, ConstCodegenMethods, StaticCodegenMethods,
};
use rustc_data_structures::fx::{FxHashSet, FxIndexMap};
use rustc_hir::def_id::{DefId, LocalDefId};
use rustc_data_structures::fx::FxIndexMap;
use rustc_index::IndexVec;
use rustc_middle::mir;
use rustc_middle::mir::mono::MonoItemPartitions;
use rustc_middle::ty::{self, TyCtxt};
use rustc_middle::ty::TyCtxt;
use rustc_session::RemapFileNameExt;
use rustc_session::config::RemapPathScopeComponents;
use rustc_span::def_id::DefIdSet;
use rustc_span::{SourceFile, StableSourceFileId};
use tracing::debug;
@ -24,6 +20,7 @@ use crate::llvm;
mod covfun;
mod spans;
mod unused;
/// Generates and exports the coverage map, which is embedded in special
/// linker sections in the final binary.
@ -76,12 +73,11 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) {
// In a single designated CGU, also prepare covfun records for functions
// in this crate that were instrumented for coverage, but are unused.
if cx.codegen_unit.is_code_coverage_dead_code_cgu() {
let mut unused_instances = gather_unused_function_instances(cx);
// Sort the unused instances by symbol name, for the same reason as the used ones.
unused_instances.sort_by_cached_key(|&instance| tcx.symbol_name(instance).name);
covfun_records.extend(unused_instances.into_iter().filter_map(|instance| {
prepare_covfun_record(tcx, &mut global_file_table, instance, false)
}));
unused::prepare_covfun_records_for_unused_functions(
cx,
&mut global_file_table,
&mut covfun_records,
);
}
// If there are no covfun records for this CGU, don't generate a covmap record.
@ -100,33 +96,10 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) {
// contain multiple covmap records from different compilation units.
let filenames_hash = llvm_cov::hash_bytes(&filenames_buffer);
let mut unused_function_names = vec![];
for covfun in &covfun_records {
unused_function_names.extend(covfun.mangled_function_name_if_unused());
covfun::generate_covfun_record(cx, filenames_hash, covfun)
}
// For unused functions, we need to take their mangled names and store them
// in a specially-named global array. LLVM's `InstrProfiling` pass will
// detect this global and include those names in its `__llvm_prf_names`
// section. (See `llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp`.)
if !unused_function_names.is_empty() {
assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu());
let name_globals = unused_function_names
.into_iter()
.map(|mangled_function_name| cx.const_str(mangled_function_name).0)
.collect::<Vec<_>>();
let initializer = cx.const_array(cx.type_ptr(), &name_globals);
let array = llvm::add_global(cx.llmod, cx.val_ty(initializer), c"__llvm_coverage_names");
llvm::set_global_constant(array, true);
llvm::set_linkage(array, llvm::Linkage::InternalLinkage);
llvm::set_initializer(array, initializer);
}
// Generate the coverage map header, which contains the filenames used by
// this CGU's coverage mappings, and store it in a well-known global.
// (This is skipped if we returned early due to having no covfun records.)
@ -249,121 +222,3 @@ fn generate_covmap_record<'ll>(cx: &CodegenCx<'ll, '_>, version: u32, filenames_
cx.add_used_global(covmap_global);
}
/// Each CGU will normally only emit coverage metadata for the functions that it actually generates.
/// But since we don't want unused functions to disappear from coverage reports, we also scan for
/// functions that were instrumented but are not participating in codegen.
///
/// These unused functions don't need to be codegenned, but we do need to add them to the function
/// coverage map (in a single designated CGU) so that we still emit coverage mappings for them.
/// We also end up adding their symbol names to a special global array that LLVM will include in
/// its embedded coverage data.
fn gather_unused_function_instances<'tcx>(cx: &CodegenCx<'_, 'tcx>) -> Vec<ty::Instance<'tcx>> {
assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu());
let tcx = cx.tcx;
let usage = prepare_usage_sets(tcx);
let is_unused_fn = |def_id: LocalDefId| -> bool {
// Usage sets expect `DefId`, so convert from `LocalDefId`.
let d: DefId = LocalDefId::to_def_id(def_id);
// To be potentially eligible for "unused function" mappings, a definition must:
// - Be eligible for coverage instrumentation
// - Not participate directly in codegen (or have lost all its coverage statements)
// - Not have any coverage statements inlined into codegenned functions
tcx.is_eligible_for_coverage(def_id)
&& (!usage.all_mono_items.contains(&d) || usage.missing_own_coverage.contains(&d))
&& !usage.used_via_inlining.contains(&d)
};
// FIXME(#79651): Consider trying to filter out dummy instantiations of
// unused generic functions from library crates, because they can produce
// "unused instantiation" in coverage reports even when they are actually
// used by some downstream crate in the same binary.
tcx.mir_keys(())
.iter()
.copied()
.filter(|&def_id| is_unused_fn(def_id))
.map(|def_id| make_dummy_instance(tcx, def_id))
.collect::<Vec<_>>()
}
struct UsageSets<'tcx> {
all_mono_items: &'tcx DefIdSet,
used_via_inlining: FxHashSet<DefId>,
missing_own_coverage: FxHashSet<DefId>,
}
/// Prepare sets of definitions that are relevant to deciding whether something
/// is an "unused function" for coverage purposes.
fn prepare_usage_sets<'tcx>(tcx: TyCtxt<'tcx>) -> UsageSets<'tcx> {
let MonoItemPartitions { all_mono_items, codegen_units, .. } =
tcx.collect_and_partition_mono_items(());
// Obtain a MIR body for each function participating in codegen, via an
// arbitrary instance.
let mut def_ids_seen = FxHashSet::default();
let def_and_mir_for_all_mono_fns = codegen_units
.iter()
.flat_map(|cgu| cgu.items().keys())
.filter_map(|item| match item {
mir::mono::MonoItem::Fn(instance) => Some(instance),
mir::mono::MonoItem::Static(_) | mir::mono::MonoItem::GlobalAsm(_) => None,
})
// We only need one arbitrary instance per definition.
.filter(move |instance| def_ids_seen.insert(instance.def_id()))
.map(|instance| {
// We don't care about the instance, just its underlying MIR.
let body = tcx.instance_mir(instance.def);
(instance.def_id(), body)
});
// Functions whose coverage statements were found inlined into other functions.
let mut used_via_inlining = FxHashSet::default();
// Functions that were instrumented, but had all of their coverage statements
// removed by later MIR transforms (e.g. UnreachablePropagation).
let mut missing_own_coverage = FxHashSet::default();
for (def_id, body) in def_and_mir_for_all_mono_fns {
let mut saw_own_coverage = false;
// Inspect every coverage statement in the function's MIR.
for stmt in body
.basic_blocks
.iter()
.flat_map(|block| &block.statements)
.filter(|stmt| matches!(stmt.kind, mir::StatementKind::Coverage(_)))
{
if let Some(inlined) = stmt.source_info.scope.inlined_instance(&body.source_scopes) {
// This coverage statement was inlined from another function.
used_via_inlining.insert(inlined.def_id());
} else {
// Non-inlined coverage statements belong to the enclosing function.
saw_own_coverage = true;
}
}
if !saw_own_coverage && body.function_coverage_info.is_some() {
missing_own_coverage.insert(def_id);
}
}
UsageSets { all_mono_items, used_via_inlining, missing_own_coverage }
}
fn make_dummy_instance<'tcx>(tcx: TyCtxt<'tcx>, local_def_id: LocalDefId) -> ty::Instance<'tcx> {
let def_id = local_def_id.to_def_id();
// Make a dummy instance that fills in all generics with placeholders.
ty::Instance::new(
def_id,
ty::GenericArgs::for_item(tcx, def_id, |param, _| {
if let ty::GenericParamDefKind::Lifetime = param.kind {
tcx.lifetimes.re_erased.into()
} else {
tcx.mk_param_from_def(param)
}
}),
)
}

View file

@ -37,14 +37,6 @@ pub(crate) struct CovfunRecord<'tcx> {
regions: ffi::Regions,
}
impl<'tcx> CovfunRecord<'tcx> {
/// FIXME(Zalathar): Make this the responsibility of the code that determines
/// which functions are unused.
pub(crate) fn mangled_function_name_if_unused(&self) -> Option<&'tcx str> {
(!self.is_used).then_some(self.mangled_function_name)
}
}
pub(crate) fn prepare_covfun_record<'tcx>(
tcx: TyCtxt<'tcx>,
global_file_table: &mut GlobalFileTable,

View file

@ -0,0 +1,172 @@
use rustc_codegen_ssa::traits::{BaseTypeCodegenMethods, ConstCodegenMethods};
use rustc_data_structures::fx::FxHashSet;
use rustc_hir::def_id::{DefId, LocalDefId};
use rustc_middle::mir;
use rustc_middle::mir::mono::MonoItemPartitions;
use rustc_middle::ty::{self, TyCtxt};
use rustc_span::def_id::DefIdSet;
use crate::common::CodegenCx;
use crate::coverageinfo::mapgen::GlobalFileTable;
use crate::coverageinfo::mapgen::covfun::{CovfunRecord, prepare_covfun_record};
use crate::llvm;
/// Each CGU will normally only emit coverage metadata for the functions that it actually generates.
/// But since we don't want unused functions to disappear from coverage reports, we also scan for
/// functions that were instrumented but are not participating in codegen.
///
/// These unused functions don't need to be codegenned, but we do need to add them to the function
/// coverage map (in a single designated CGU) so that we still emit coverage mappings for them.
/// We also end up adding their symbol names to a special global array that LLVM will include in
/// its embedded coverage data.
pub(crate) fn prepare_covfun_records_for_unused_functions<'tcx>(
cx: &CodegenCx<'_, 'tcx>,
global_file_table: &mut GlobalFileTable,
covfun_records: &mut Vec<CovfunRecord<'tcx>>,
) {
assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu());
let mut unused_instances = gather_unused_function_instances(cx);
// Sort the unused instances by symbol name, so that their order isn't hash-sensitive.
unused_instances.sort_by_key(|instance| instance.symbol_name);
// Try to create a covfun record for each unused function.
let mut name_globals = Vec::with_capacity(unused_instances.len());
covfun_records.extend(unused_instances.into_iter().filter_map(|unused| try {
let record = prepare_covfun_record(cx.tcx, global_file_table, unused.instance, false)?;
// If successful, also store its symbol name in a global constant.
name_globals.push(cx.const_str(unused.symbol_name.name).0);
record
}));
// Store the names of unused functions in a specially-named global array.
// LLVM's `InstrProfilling` pass will detect this array, and include the
// referenced names in its `__llvm_prf_names` section.
// (See `llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp`.)
if !name_globals.is_empty() {
let initializer = cx.const_array(cx.type_ptr(), &name_globals);
let array = llvm::add_global(cx.llmod, cx.val_ty(initializer), c"__llvm_coverage_names");
llvm::set_global_constant(array, true);
llvm::set_linkage(array, llvm::Linkage::InternalLinkage);
llvm::set_initializer(array, initializer);
}
}
/// Holds a dummy function instance along with its symbol name, to avoid having
/// to repeatedly query for the name.
struct UnusedInstance<'tcx> {
instance: ty::Instance<'tcx>,
symbol_name: ty::SymbolName<'tcx>,
}
fn gather_unused_function_instances<'tcx>(cx: &CodegenCx<'_, 'tcx>) -> Vec<UnusedInstance<'tcx>> {
assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu());
let tcx = cx.tcx;
let usage = prepare_usage_sets(tcx);
let is_unused_fn = |def_id: LocalDefId| -> bool {
// Usage sets expect `DefId`, so convert from `LocalDefId`.
let d: DefId = LocalDefId::to_def_id(def_id);
// To be potentially eligible for "unused function" mappings, a definition must:
// - Be eligible for coverage instrumentation
// - Not participate directly in codegen (or have lost all its coverage statements)
// - Not have any coverage statements inlined into codegenned functions
tcx.is_eligible_for_coverage(def_id)
&& (!usage.all_mono_items.contains(&d) || usage.missing_own_coverage.contains(&d))
&& !usage.used_via_inlining.contains(&d)
};
// FIXME(#79651): Consider trying to filter out dummy instantiations of
// unused generic functions from library crates, because they can produce
// "unused instantiation" in coverage reports even when they are actually
// used by some downstream crate in the same binary.
tcx.mir_keys(())
.iter()
.copied()
.filter(|&def_id| is_unused_fn(def_id))
.map(|def_id| make_dummy_instance(tcx, def_id))
.map(|instance| UnusedInstance { instance, symbol_name: tcx.symbol_name(instance) })
.collect::<Vec<_>>()
}
struct UsageSets<'tcx> {
all_mono_items: &'tcx DefIdSet,
used_via_inlining: FxHashSet<DefId>,
missing_own_coverage: FxHashSet<DefId>,
}
/// Prepare sets of definitions that are relevant to deciding whether something
/// is an "unused function" for coverage purposes.
fn prepare_usage_sets<'tcx>(tcx: TyCtxt<'tcx>) -> UsageSets<'tcx> {
let MonoItemPartitions { all_mono_items, codegen_units, .. } =
tcx.collect_and_partition_mono_items(());
// Obtain a MIR body for each function participating in codegen, via an
// arbitrary instance.
let mut def_ids_seen = FxHashSet::default();
let def_and_mir_for_all_mono_fns = codegen_units
.iter()
.flat_map(|cgu| cgu.items().keys())
.filter_map(|item| match item {
mir::mono::MonoItem::Fn(instance) => Some(instance),
mir::mono::MonoItem::Static(_) | mir::mono::MonoItem::GlobalAsm(_) => None,
})
// We only need one arbitrary instance per definition.
.filter(move |instance| def_ids_seen.insert(instance.def_id()))
.map(|instance| {
// We don't care about the instance, just its underlying MIR.
let body = tcx.instance_mir(instance.def);
(instance.def_id(), body)
});
// Functions whose coverage statements were found inlined into other functions.
let mut used_via_inlining = FxHashSet::default();
// Functions that were instrumented, but had all of their coverage statements
// removed by later MIR transforms (e.g. UnreachablePropagation).
let mut missing_own_coverage = FxHashSet::default();
for (def_id, body) in def_and_mir_for_all_mono_fns {
let mut saw_own_coverage = false;
// Inspect every coverage statement in the function's MIR.
for stmt in body
.basic_blocks
.iter()
.flat_map(|block| &block.statements)
.filter(|stmt| matches!(stmt.kind, mir::StatementKind::Coverage(_)))
{
if let Some(inlined) = stmt.source_info.scope.inlined_instance(&body.source_scopes) {
// This coverage statement was inlined from another function.
used_via_inlining.insert(inlined.def_id());
} else {
// Non-inlined coverage statements belong to the enclosing function.
saw_own_coverage = true;
}
}
if !saw_own_coverage && body.function_coverage_info.is_some() {
missing_own_coverage.insert(def_id);
}
}
UsageSets { all_mono_items, used_via_inlining, missing_own_coverage }
}
fn make_dummy_instance<'tcx>(tcx: TyCtxt<'tcx>, local_def_id: LocalDefId) -> ty::Instance<'tcx> {
let def_id = local_def_id.to_def_id();
// Make a dummy instance that fills in all generics with placeholders.
ty::Instance::new(
def_id,
ty::GenericArgs::for_item(tcx, def_id, |param, _| {
if let ty::GenericParamDefKind::Lifetime = param.kind {
tcx.lifetimes.re_erased.into()
} else {
tcx.mk_param_from_def(param)
}
}),
)
}