coverage: Build the CGU's global file table as late as possible

This commit is contained in:
Zalathar 2025-03-31 12:51:16 +11:00
parent b9856b6e40
commit 4322b6e97d
3 changed files with 107 additions and 85 deletions

View file

@ -53,13 +53,6 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) {
None => return, None => return,
}; };
// The order of entries in this global file table needs to be deterministic,
// and ideally should also be independent of the details of stable-hashing,
// because coverage tests snapshots (`.cov-map`) can observe the order and
// would need to be re-blessed if it changes. As long as those requirements
// are satisfied, the order can be arbitrary.
let mut global_file_table = GlobalFileTable::new();
let mut covfun_records = instances_used let mut covfun_records = instances_used
.iter() .iter()
.copied() .copied()
@ -67,17 +60,13 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) {
// order that doesn't depend on the stable-hash-based order in which // order that doesn't depend on the stable-hash-based order in which
// instances were visited during codegen. // instances were visited during codegen.
.sorted_by_cached_key(|&instance| tcx.symbol_name(instance).name) .sorted_by_cached_key(|&instance| tcx.symbol_name(instance).name)
.filter_map(|instance| prepare_covfun_record(tcx, &mut global_file_table, instance, true)) .filter_map(|instance| prepare_covfun_record(tcx, instance, true))
.collect::<Vec<_>>(); .collect::<Vec<_>>();
// In a single designated CGU, also prepare covfun records for functions // In a single designated CGU, also prepare covfun records for functions
// in this crate that were instrumented for coverage, but are unused. // in this crate that were instrumented for coverage, but are unused.
if cx.codegen_unit.is_code_coverage_dead_code_cgu() { if cx.codegen_unit.is_code_coverage_dead_code_cgu() {
unused::prepare_covfun_records_for_unused_functions( unused::prepare_covfun_records_for_unused_functions(cx, &mut covfun_records);
cx,
&mut global_file_table,
&mut covfun_records,
);
} }
// If there are no covfun records for this CGU, don't generate a covmap record. // If there are no covfun records for this CGU, don't generate a covmap record.
@ -89,68 +78,88 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) {
return; return;
} }
// Encode all filenames referenced by coverage mappings in this CGU. // Prepare the global file table for this CGU, containing all paths needed
let filenames_buffer = global_file_table.make_filenames_buffer(tcx); // by one or more covfun records.
// The `llvm-cov` tool uses this hash to associate each covfun record with let global_file_table =
// its corresponding filenames table, since the final binary will typically GlobalFileTable::build(tcx, covfun_records.iter().flat_map(|c| c.all_source_files()));
// contain multiple covmap records from different compilation units.
let filenames_hash = llvm_cov::hash_bytes(&filenames_buffer);
for covfun in &covfun_records { for covfun in &covfun_records {
covfun::generate_covfun_record(cx, filenames_hash, covfun) covfun::generate_covfun_record(cx, &global_file_table, covfun)
} }
// Generate the coverage map header, which contains the filenames used by // Generate the coverage map header, which contains the filenames used by
// this CGU's coverage mappings, and store it in a well-known global. // this CGU's coverage mappings, and store it in a well-known global.
// (This is skipped if we returned early due to having no covfun records.) // (This is skipped if we returned early due to having no covfun records.)
generate_covmap_record(cx, covmap_version, &filenames_buffer); generate_covmap_record(cx, covmap_version, &global_file_table.filenames_buffer);
} }
/// Maps "global" (per-CGU) file ID numbers to their underlying source files. /// Maps "global" (per-CGU) file ID numbers to their underlying source file paths.
#[derive(Debug)]
struct GlobalFileTable { struct GlobalFileTable {
/// This "raw" table doesn't include the working dir, so a file's /// This "raw" table doesn't include the working dir, so a file's
/// global ID is its index in this set **plus one**. /// global ID is its index in this set **plus one**.
raw_file_table: FxIndexMap<StableSourceFileId, Arc<SourceFile>>, raw_file_table: FxIndexMap<StableSourceFileId, String>,
/// The file table in encoded form (possibly compressed), which can be
/// included directly in this CGU's `__llvm_covmap` record.
filenames_buffer: Vec<u8>,
/// Truncated hash of the bytes in `filenames_buffer`.
///
/// The `llvm-cov` tool uses this hash to associate each covfun record with
/// its corresponding filenames table, since the final binary will typically
/// contain multiple covmap records from different compilation units.
filenames_hash: u64,
} }
impl GlobalFileTable { impl GlobalFileTable {
fn new() -> Self { /// Builds a "global file table" for this CGU, mapping numeric IDs to
Self { raw_file_table: FxIndexMap::default() } /// path strings.
fn build<'a>(tcx: TyCtxt<'_>, all_files: impl Iterator<Item = &'a SourceFile>) -> Self {
let mut raw_file_table = FxIndexMap::default();
for file in all_files {
raw_file_table.entry(file.stable_id).or_insert_with(|| {
file.name
.for_scope(tcx.sess, RemapPathScopeComponents::MACRO)
.to_string_lossy()
.into_owned()
});
} }
fn global_file_id_for_file(&mut self, file: &Arc<SourceFile>) -> GlobalFileId { // FIXME(Zalathar): Consider sorting the file table here, but maybe
// Ensure the given file has a table entry, and get its index. // only after adding filename support to coverage-dump, so that the
let entry = self.raw_file_table.entry(file.stable_id); // table order isn't directly visible in `.coverage-map` snapshots.
let raw_id = entry.index();
entry.or_insert_with(|| Arc::clone(file));
// The raw file table doesn't include an entry for the working dir let mut table = Vec::with_capacity(raw_file_table.len() + 1);
// (which has ID 0), so add 1 to get the correct ID.
GlobalFileId::from_usize(raw_id + 1)
}
fn make_filenames_buffer(&self, tcx: TyCtxt<'_>) -> Vec<u8> { // Since version 6 of the LLVM coverage mapping format, the first entry
let mut table = Vec::with_capacity(self.raw_file_table.len() + 1); // in the global file table is treated as a base directory, used to
// resolve any other entries that are stored as relative paths.
// LLVM Coverage Mapping Format version 6 (zero-based encoded as 5) let base_dir = tcx
// requires setting the first filename to the compilation directory. .sess
// Since rustc generates coverage maps with relative paths, the
// compilation directory can be combined with the relative paths
// to get absolute paths, if needed.
table.push(
tcx.sess
.opts .opts
.working_dir .working_dir
.for_scope(tcx.sess, RemapPathScopeComponents::MACRO) .for_scope(tcx.sess, RemapPathScopeComponents::MACRO)
.to_string_lossy(), .to_string_lossy();
); table.push(base_dir.as_ref());
// Add the regular entries after the base directory. // Add the regular entries after the base directory.
table.extend(self.raw_file_table.values().map(|file| { table.extend(raw_file_table.values().map(|name| name.as_str()));
file.name.for_scope(tcx.sess, RemapPathScopeComponents::MACRO).to_string_lossy()
}));
llvm_cov::write_filenames_to_buffer(&table) // Encode the file table into a buffer, and get the hash of its encoded
// bytes, so that we can embed that hash in `__llvm_covfun` records.
let filenames_buffer = llvm_cov::write_filenames_to_buffer(&table);
let filenames_hash = llvm_cov::hash_bytes(&filenames_buffer);
Self { raw_file_table, filenames_buffer, filenames_hash }
}
fn get_existing_id(&self, file: &SourceFile) -> Option<GlobalFileId> {
let raw_id = self.raw_file_table.get_index_of(&file.stable_id)?;
// The raw file table doesn't include an entry for the base dir
// (which has ID 0), so add 1 to get the correct ID.
Some(GlobalFileId::from_usize(raw_id + 1))
} }
} }
@ -166,26 +175,31 @@ rustc_index::newtype_index! {
struct LocalFileId {} struct LocalFileId {}
} }
/// Holds a mapping from "local" (per-function) file IDs to "global" (per-CGU) /// Holds a mapping from "local" (per-function) file IDs to their corresponding
/// file IDs. /// source files.
#[derive(Debug, Default)] #[derive(Debug, Default)]
struct VirtualFileMapping { struct VirtualFileMapping {
local_to_global: IndexVec<LocalFileId, GlobalFileId>, local_file_table: IndexVec<LocalFileId, Arc<SourceFile>>,
global_to_local: FxIndexMap<GlobalFileId, LocalFileId>,
} }
impl VirtualFileMapping { impl VirtualFileMapping {
fn local_id_for_global(&mut self, global_file_id: GlobalFileId) -> LocalFileId { fn push_file(&mut self, source_file: &Arc<SourceFile>) -> LocalFileId {
*self self.local_file_table.push(Arc::clone(source_file))
.global_to_local
.entry(global_file_id)
.or_insert_with(|| self.local_to_global.push(global_file_id))
} }
fn to_vec(&self) -> Vec<u32> { /// Resolves all of the filenames in this local file mapping to a list of
// This clone could be avoided by transmuting `&[GlobalFileId]` to `&[u32]`, /// global file IDs in its CGU, for inclusion in this function's
// but it isn't hot or expensive enough to justify the extra unsafety. /// `__llvm_covfun` record.
self.local_to_global.iter().map(|&global| GlobalFileId::as_u32(global)).collect() ///
/// The global file IDs are returned as `u32` to make FFI easier.
fn resolve_all(&self, global_file_table: &GlobalFileTable) -> Option<Vec<u32>> {
self.local_file_table
.iter()
.map(|file| try {
let id = global_file_table.get_existing_id(file)?;
GlobalFileId::as_u32(id)
})
.collect::<Option<Vec<_>>>()
} }
} }

View file

@ -5,6 +5,7 @@
//! [^win]: On Windows the section name is `.lcovfun`. //! [^win]: On Windows the section name is `.lcovfun`.
use std::ffi::CString; use std::ffi::CString;
use std::sync::Arc;
use rustc_abi::Align; use rustc_abi::Align;
use rustc_codegen_ssa::traits::{ use rustc_codegen_ssa::traits::{
@ -15,7 +16,7 @@ use rustc_middle::mir::coverage::{
MappingKind, Op, MappingKind, Op,
}; };
use rustc_middle::ty::{Instance, TyCtxt}; use rustc_middle::ty::{Instance, TyCtxt};
use rustc_span::Span; use rustc_span::{SourceFile, Span};
use rustc_target::spec::HasTargetSpec; use rustc_target::spec::HasTargetSpec;
use tracing::debug; use tracing::debug;
@ -37,9 +38,16 @@ pub(crate) struct CovfunRecord<'tcx> {
regions: ffi::Regions, regions: ffi::Regions,
} }
impl<'tcx> CovfunRecord<'tcx> {
/// Iterator that yields all source files referred to by this function's
/// coverage mappings. Used to build the global file table for the CGU.
pub(crate) fn all_source_files(&self) -> impl Iterator<Item = &SourceFile> {
self.virtual_file_mapping.local_file_table.iter().map(Arc::as_ref)
}
}
pub(crate) fn prepare_covfun_record<'tcx>( pub(crate) fn prepare_covfun_record<'tcx>(
tcx: TyCtxt<'tcx>, tcx: TyCtxt<'tcx>,
global_file_table: &mut GlobalFileTable,
instance: Instance<'tcx>, instance: Instance<'tcx>,
is_used: bool, is_used: bool,
) -> Option<CovfunRecord<'tcx>> { ) -> Option<CovfunRecord<'tcx>> {
@ -57,7 +65,7 @@ pub(crate) fn prepare_covfun_record<'tcx>(
regions: ffi::Regions::default(), regions: ffi::Regions::default(),
}; };
fill_region_tables(tcx, global_file_table, fn_cov_info, ids_info, &mut covfun); fill_region_tables(tcx, fn_cov_info, ids_info, &mut covfun);
if covfun.regions.has_no_regions() { if covfun.regions.has_no_regions() {
debug!(?covfun, "function has no mappings to embed; skipping"); debug!(?covfun, "function has no mappings to embed; skipping");
@ -92,7 +100,6 @@ fn prepare_expressions(ids_info: &CoverageIdsInfo) -> Vec<ffi::CounterExpression
/// Populates the mapping region tables in the current function's covfun record. /// Populates the mapping region tables in the current function's covfun record.
fn fill_region_tables<'tcx>( fn fill_region_tables<'tcx>(
tcx: TyCtxt<'tcx>, tcx: TyCtxt<'tcx>,
global_file_table: &mut GlobalFileTable,
fn_cov_info: &'tcx FunctionCoverageInfo, fn_cov_info: &'tcx FunctionCoverageInfo,
ids_info: &'tcx CoverageIdsInfo, ids_info: &'tcx CoverageIdsInfo,
covfun: &mut CovfunRecord<'tcx>, covfun: &mut CovfunRecord<'tcx>,
@ -106,11 +113,7 @@ fn fill_region_tables<'tcx>(
}; };
let source_file = source_map.lookup_source_file(first_span.lo()); let source_file = source_map.lookup_source_file(first_span.lo());
// Look up the global file ID for that file. let local_file_id = covfun.virtual_file_mapping.push_file(&source_file);
let global_file_id = global_file_table.global_file_id_for_file(&source_file);
// Associate that global file ID with a local file ID for this function.
let local_file_id = covfun.virtual_file_mapping.local_id_for_global(global_file_id);
// In rare cases, _all_ of a function's spans are discarded, and coverage // In rare cases, _all_ of a function's spans are discarded, and coverage
// codegen needs to handle that gracefully to avoid #133606. // codegen needs to handle that gracefully to avoid #133606.
@ -179,7 +182,7 @@ fn fill_region_tables<'tcx>(
/// as a global variable in the `__llvm_covfun` section. /// as a global variable in the `__llvm_covfun` section.
pub(crate) fn generate_covfun_record<'tcx>( pub(crate) fn generate_covfun_record<'tcx>(
cx: &CodegenCx<'_, 'tcx>, cx: &CodegenCx<'_, 'tcx>,
filenames_hash: u64, global_file_table: &GlobalFileTable,
covfun: &CovfunRecord<'tcx>, covfun: &CovfunRecord<'tcx>,
) { ) {
let &CovfunRecord { let &CovfunRecord {
@ -191,12 +194,19 @@ pub(crate) fn generate_covfun_record<'tcx>(
ref regions, ref regions,
} = covfun; } = covfun;
// Encode the function's coverage mappings into a buffer. let Some(local_file_table) = virtual_file_mapping.resolve_all(global_file_table) else {
let coverage_mapping_buffer = llvm_cov::write_function_mappings_to_buffer( debug_assert!(
&virtual_file_mapping.to_vec(), false,
expressions, "all local files should be present in the global file table: \
regions, global_file_table = {global_file_table:?}, \
virtual_file_mapping = {virtual_file_mapping:?}"
); );
return;
};
// Encode the function's coverage mappings into a buffer.
let coverage_mapping_buffer =
llvm_cov::write_function_mappings_to_buffer(&local_file_table, expressions, regions);
// A covfun record consists of four target-endian integers, followed by the // A covfun record consists of four target-endian integers, followed by the
// encoded mapping data in bytes. Note that the length field is 32 bits. // encoded mapping data in bytes. Note that the length field is 32 bits.
@ -209,7 +219,7 @@ pub(crate) fn generate_covfun_record<'tcx>(
cx.const_u64(func_name_hash), cx.const_u64(func_name_hash),
cx.const_u32(coverage_mapping_buffer.len() as u32), cx.const_u32(coverage_mapping_buffer.len() as u32),
cx.const_u64(source_hash), cx.const_u64(source_hash),
cx.const_u64(filenames_hash), cx.const_u64(global_file_table.filenames_hash),
cx.const_bytes(&coverage_mapping_buffer), cx.const_bytes(&coverage_mapping_buffer),
], ],
// This struct needs to be packed, so that the 32-bit length field // This struct needs to be packed, so that the 32-bit length field

View file

@ -7,7 +7,6 @@ use rustc_middle::ty::{self, TyCtxt};
use rustc_span::def_id::DefIdSet; use rustc_span::def_id::DefIdSet;
use crate::common::CodegenCx; use crate::common::CodegenCx;
use crate::coverageinfo::mapgen::GlobalFileTable;
use crate::coverageinfo::mapgen::covfun::{CovfunRecord, prepare_covfun_record}; use crate::coverageinfo::mapgen::covfun::{CovfunRecord, prepare_covfun_record};
use crate::llvm; use crate::llvm;
@ -21,7 +20,6 @@ use crate::llvm;
/// its embedded coverage data. /// its embedded coverage data.
pub(crate) fn prepare_covfun_records_for_unused_functions<'tcx>( pub(crate) fn prepare_covfun_records_for_unused_functions<'tcx>(
cx: &CodegenCx<'_, 'tcx>, cx: &CodegenCx<'_, 'tcx>,
global_file_table: &mut GlobalFileTable,
covfun_records: &mut Vec<CovfunRecord<'tcx>>, covfun_records: &mut Vec<CovfunRecord<'tcx>>,
) { ) {
assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu()); assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu());
@ -33,7 +31,7 @@ pub(crate) fn prepare_covfun_records_for_unused_functions<'tcx>(
// Try to create a covfun record for each unused function. // Try to create a covfun record for each unused function.
let mut name_globals = Vec::with_capacity(unused_instances.len()); let mut name_globals = Vec::with_capacity(unused_instances.len());
covfun_records.extend(unused_instances.into_iter().filter_map(|unused| try { covfun_records.extend(unused_instances.into_iter().filter_map(|unused| try {
let record = prepare_covfun_record(cx.tcx, global_file_table, unused.instance, false)?; let record = prepare_covfun_record(cx.tcx, unused.instance, false)?;
// If successful, also store its symbol name in a global constant. // If successful, also store its symbol name in a global constant.
name_globals.push(cx.const_str(unused.symbol_name.name).0); name_globals.push(cx.const_str(unused.symbol_name.name).0);
record record