Rollup merge of #134497 - Zalathar:spans, r=jieyouxu

coverage: Store coverage source regions as `Span` until codegen (take 2)

This is an attempt to re-land #133418:

> Historically, coverage spans were converted into line/column coordinates during the MIR instrumentation pass.

> This PR moves that conversion step into codegen, so that coverage spans spend most of their time stored as Span instead.

> In addition to being conceptually nicer, this also reduces the size of coverage mappings in MIR, because Span is smaller than 4x u32.

That PR was reverted by #133608, because in some circumstances not covered by our test suite we were emitting coverage metadata that was causing `llvm-cov` to exit with an error (#133606).

---

The implementation here is *mostly* the same, but adapted for subsequent changes in the relevant code (e.g. #134163).

I believe that the changes in #134163 should be sufficient to prevent the problem that required the original PR to be reverted. But I haven't been able to reproduce the original breakage in a regression test, and the `llvm-cov` error message is extremely unhelpful, so I can't completely rule out the possibility of this breaking again.

r? jieyouxu (reviewer of the original PR)
This commit is contained in:
Matthias Krüger 2024-12-19 15:26:16 +01:00 committed by GitHub
commit 57cbd078f2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 298 additions and 270 deletions

View file

@ -1,6 +1,4 @@
use rustc_middle::mir::coverage::{CounterId, CovTerm, ExpressionId, SourceRegion};
use crate::coverageinfo::mapgen::LocalFileId;
use rustc_middle::mir::coverage::{CounterId, CovTerm, ExpressionId};
/// Must match the layout of `LLVMRustCounterKind`.
#[derive(Copy, Clone, Debug)]
@ -126,30 +124,16 @@ pub(crate) struct CoverageSpan {
/// Local index into the function's local-to-global file ID table.
/// The value at that index is itself an index into the coverage filename
/// table in the CGU's `__llvm_covmap` section.
file_id: u32,
pub(crate) file_id: u32,
/// 1-based starting line of the source code span.
start_line: u32,
pub(crate) start_line: u32,
/// 1-based starting column of the source code span.
start_col: u32,
pub(crate) start_col: u32,
/// 1-based ending line of the source code span.
end_line: u32,
pub(crate) end_line: u32,
/// 1-based ending column of the source code span. High bit must be unset.
end_col: u32,
}
impl CoverageSpan {
pub(crate) fn from_source_region(
local_file_id: LocalFileId,
code_region: &SourceRegion,
) -> Self {
let file_id = local_file_id.as_u32();
let &SourceRegion { start_line, start_col, end_line, end_col } = code_region;
// Internally, LLVM uses the high bit of `end_col` to distinguish between
// code regions and gap regions, so it can't be used by the column number.
assert!(end_col & (1u32 << 31) == 0, "high bit of `end_col` must be unset: {end_col:#X}");
Self { file_id, start_line, start_col, end_line, end_col }
}
pub(crate) end_col: u32,
}
/// Holds tables of the various region types in one struct.
@ -184,7 +168,7 @@ impl Regions {
#[derive(Clone, Debug)]
#[repr(C)]
pub(crate) struct CodeRegion {
pub(crate) span: CoverageSpan,
pub(crate) cov_span: CoverageSpan,
pub(crate) counter: Counter,
}
@ -192,7 +176,7 @@ pub(crate) struct CodeRegion {
#[derive(Clone, Debug)]
#[repr(C)]
pub(crate) struct BranchRegion {
pub(crate) span: CoverageSpan,
pub(crate) cov_span: CoverageSpan,
pub(crate) true_counter: Counter,
pub(crate) false_counter: Counter,
}
@ -201,7 +185,7 @@ pub(crate) struct BranchRegion {
#[derive(Clone, Debug)]
#[repr(C)]
pub(crate) struct MCDCBranchRegion {
pub(crate) span: CoverageSpan,
pub(crate) cov_span: CoverageSpan,
pub(crate) true_counter: Counter,
pub(crate) false_counter: Counter,
pub(crate) mcdc_branch_params: mcdc::BranchParameters,
@ -211,6 +195,6 @@ pub(crate) struct MCDCBranchRegion {
#[derive(Clone, Debug)]
#[repr(C)]
pub(crate) struct MCDCDecisionRegion {
pub(crate) span: CoverageSpan,
pub(crate) cov_span: CoverageSpan,
pub(crate) mcdc_decision_params: mcdc::DecisionParameters,
}

View file

@ -40,11 +40,10 @@ pub(crate) fn create_pgo_func_name_var<'ll>(
}
}
pub(crate) fn write_filenames_to_buffer<'a>(
filenames: impl IntoIterator<Item = &'a str>,
) -> Vec<u8> {
pub(crate) fn write_filenames_to_buffer(filenames: &[impl AsRef<str>]) -> Vec<u8> {
let (pointers, lengths) = filenames
.into_iter()
.map(AsRef::as_ref)
.map(|s: &str| (s.as_c_char_ptr(), s.len()))
.unzip::<_, _, Vec<_>, Vec<_>>();

View file

@ -1,11 +1,11 @@
use std::iter;
use std::sync::Arc;
use itertools::Itertools;
use rustc_abi::Align;
use rustc_codegen_ssa::traits::{
BaseTypeCodegenMethods, ConstCodegenMethods, StaticCodegenMethods,
};
use rustc_data_structures::fx::{FxHashSet, FxIndexMap, FxIndexSet};
use rustc_data_structures::fx::{FxHashSet, FxIndexMap};
use rustc_hir::def_id::{DefId, LocalDefId};
use rustc_index::IndexVec;
use rustc_middle::mir;
@ -13,7 +13,7 @@ use rustc_middle::ty::{self, TyCtxt};
use rustc_session::RemapFileNameExt;
use rustc_session::config::RemapPathScopeComponents;
use rustc_span::def_id::DefIdSet;
use rustc_span::{Span, Symbol};
use rustc_span::{SourceFile, StableSourceFileId};
use tracing::debug;
use crate::common::CodegenCx;
@ -22,6 +22,7 @@ use crate::coverageinfo::mapgen::covfun::prepare_covfun_record;
use crate::llvm;
mod covfun;
mod spans;
/// Generates and exports the coverage map, which is embedded in special
/// linker sections in the final binary.
@ -131,45 +132,51 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) {
generate_covmap_record(cx, covmap_version, &filenames_buffer);
}
/// Maps "global" (per-CGU) file ID numbers to their underlying filenames.
/// Maps "global" (per-CGU) file ID numbers to their underlying source files.
struct GlobalFileTable {
/// This "raw" table doesn't include the working dir, so a filename's
/// This "raw" table doesn't include the working dir, so a file's
/// global ID is its index in this set **plus one**.
raw_file_table: FxIndexSet<Symbol>,
raw_file_table: FxIndexMap<StableSourceFileId, Arc<SourceFile>>,
}
impl GlobalFileTable {
fn new() -> Self {
Self { raw_file_table: FxIndexSet::default() }
Self { raw_file_table: FxIndexMap::default() }
}
fn global_file_id_for_file_name(&mut self, file_name: Symbol) -> GlobalFileId {
fn global_file_id_for_file(&mut self, file: &Arc<SourceFile>) -> GlobalFileId {
// Ensure the given file has a table entry, and get its index.
let (raw_id, _) = self.raw_file_table.insert_full(file_name);
let entry = self.raw_file_table.entry(file.stable_id);
let raw_id = entry.index();
entry.or_insert_with(|| Arc::clone(file));
// The raw file table doesn't include an entry for the working dir
// (which has ID 0), so add 1 to get the correct ID.
GlobalFileId::from_usize(raw_id + 1)
}
fn make_filenames_buffer(&self, tcx: TyCtxt<'_>) -> Vec<u8> {
let mut table = Vec::with_capacity(self.raw_file_table.len() + 1);
// LLVM Coverage Mapping Format version 6 (zero-based encoded as 5)
// requires setting the first filename to the compilation directory.
// Since rustc generates coverage maps with relative paths, the
// compilation directory can be combined with the relative paths
// to get absolute paths, if needed.
use rustc_session::RemapFileNameExt;
use rustc_session::config::RemapPathScopeComponents;
let working_dir: &str = &tcx
.sess
.opts
.working_dir
.for_scope(tcx.sess, RemapPathScopeComponents::MACRO)
.to_string_lossy();
table.push(
tcx.sess
.opts
.working_dir
.for_scope(tcx.sess, RemapPathScopeComponents::MACRO)
.to_string_lossy(),
);
// Insert the working dir at index 0, before the other filenames.
let filenames =
iter::once(working_dir).chain(self.raw_file_table.iter().map(Symbol::as_str));
llvm_cov::write_filenames_to_buffer(filenames)
// Add the regular entries after the base directory.
table.extend(self.raw_file_table.values().map(|file| {
file.name.for_scope(tcx.sess, RemapPathScopeComponents::MACRO).to_string_lossy()
}));
llvm_cov::write_filenames_to_buffer(&table)
}
}
@ -182,7 +189,7 @@ rustc_index::newtype_index! {
/// An index into a function's list of global file IDs. That underlying list
/// of local-to-global mappings will be embedded in the function's record in
/// the `__llvm_covfun` linker section.
pub(crate) struct LocalFileId {}
struct LocalFileId {}
}
/// Holds a mapping from "local" (per-function) file IDs to "global" (per-CGU)
@ -208,13 +215,6 @@ impl VirtualFileMapping {
}
}
fn span_file_name(tcx: TyCtxt<'_>, span: Span) -> Symbol {
let source_file = tcx.sess.source_map().lookup_source_file(span.lo());
let name =
source_file.name.for_scope(tcx.sess, RemapPathScopeComponents::MACRO).to_string_lossy();
Symbol::intern(&name)
}
/// Generates the contents of the covmap record for this CGU, which mostly
/// consists of a header and a list of filenames. The record is then stored
/// as a global variable in the `__llvm_covmap` section.

View file

@ -10,16 +10,16 @@ use rustc_abi::Align;
use rustc_codegen_ssa::traits::{
BaseTypeCodegenMethods, ConstCodegenMethods, StaticCodegenMethods,
};
use rustc_middle::bug;
use rustc_middle::mir::coverage::{
CovTerm, CoverageIdsInfo, Expression, FunctionCoverageInfo, Mapping, MappingKind, Op,
};
use rustc_middle::ty::{Instance, TyCtxt};
use rustc_span::Span;
use rustc_target::spec::HasTargetSpec;
use tracing::debug;
use crate::common::CodegenCx;
use crate::coverageinfo::mapgen::{GlobalFileTable, VirtualFileMapping, span_file_name};
use crate::coverageinfo::mapgen::{GlobalFileTable, VirtualFileMapping, spans};
use crate::coverageinfo::{ffi, llvm_cov};
use crate::llvm;
@ -67,12 +67,8 @@ pub(crate) fn prepare_covfun_record<'tcx>(
fill_region_tables(tcx, global_file_table, fn_cov_info, ids_info, &mut covfun);
if covfun.regions.has_no_regions() {
if covfun.is_used {
bug!("a used function should have had coverage mapping data but did not: {covfun:?}");
} else {
debug!(?covfun, "unused function had no coverage mapping data");
return None;
}
debug!(?covfun, "function has no mappings to embed; skipping");
return None;
}
Some(covfun)
@ -121,41 +117,58 @@ fn fill_region_tables<'tcx>(
covfun: &mut CovfunRecord<'tcx>,
) {
// Currently a function's mappings must all be in the same file as its body span.
let file_name = span_file_name(tcx, fn_cov_info.body_span);
let source_map = tcx.sess.source_map();
let source_file = source_map.lookup_source_file(fn_cov_info.body_span.lo());
// Look up the global file ID for that filename.
let global_file_id = global_file_table.global_file_id_for_file_name(file_name);
// Look up the global file ID for that file.
let global_file_id = global_file_table.global_file_id_for_file(&source_file);
// Associate that global file ID with a local file ID for this function.
let local_file_id = covfun.virtual_file_mapping.local_id_for_global(global_file_id);
debug!(" file id: {local_file_id:?} => {global_file_id:?} = '{file_name:?}'");
let ffi::Regions { code_regions, branch_regions, mcdc_branch_regions, mcdc_decision_regions } =
&mut covfun.regions;
let make_cov_span = |span: Span| {
spans::make_coverage_span(local_file_id, source_map, fn_cov_info, &source_file, span)
};
let discard_all = tcx.sess.coverage_discard_all_spans_in_codegen();
// For each counter/region pair in this function+file, convert it to a
// form suitable for FFI.
let is_zero_term = |term| !covfun.is_used || ids_info.is_zero_term(term);
for Mapping { kind, ref source_region } in &fn_cov_info.mappings {
for &Mapping { ref kind, span } in &fn_cov_info.mappings {
// If the mapping refers to counters/expressions that were removed by
// MIR opts, replace those occurrences with zero.
let kind = kind.map_terms(|term| if is_zero_term(term) { CovTerm::Zero } else { term });
let span = ffi::CoverageSpan::from_source_region(local_file_id, source_region);
// Convert the `Span` into coordinates that we can pass to LLVM, or
// discard the span if conversion fails. In rare, cases _all_ of a
// function's spans are discarded, and the rest of coverage codegen
// needs to handle that gracefully to avoid a repeat of #133606.
// We don't have a good test case for triggering that organically, so
// instead we set `-Zcoverage-options=discard-all-spans-in-codegen`
// to force it to occur.
let Some(cov_span) = make_cov_span(span) else { continue };
if discard_all {
continue;
}
match kind {
MappingKind::Code(term) => {
code_regions.push(ffi::CodeRegion { span, counter: ffi::Counter::from_term(term) });
code_regions
.push(ffi::CodeRegion { cov_span, counter: ffi::Counter::from_term(term) });
}
MappingKind::Branch { true_term, false_term } => {
branch_regions.push(ffi::BranchRegion {
span,
cov_span,
true_counter: ffi::Counter::from_term(true_term),
false_counter: ffi::Counter::from_term(false_term),
});
}
MappingKind::MCDCBranch { true_term, false_term, mcdc_params } => {
mcdc_branch_regions.push(ffi::MCDCBranchRegion {
span,
cov_span,
true_counter: ffi::Counter::from_term(true_term),
false_counter: ffi::Counter::from_term(false_term),
mcdc_branch_params: ffi::mcdc::BranchParameters::from(mcdc_params),
@ -163,7 +176,7 @@ fn fill_region_tables<'tcx>(
}
MappingKind::MCDCDecision(mcdc_decision_params) => {
mcdc_decision_regions.push(ffi::MCDCDecisionRegion {
span,
cov_span,
mcdc_decision_params: ffi::mcdc::DecisionParameters::from(mcdc_decision_params),
});
}

View file

@ -0,0 +1,126 @@
use rustc_middle::mir::coverage::FunctionCoverageInfo;
use rustc_span::source_map::SourceMap;
use rustc_span::{BytePos, Pos, SourceFile, Span};
use tracing::debug;
use crate::coverageinfo::ffi;
use crate::coverageinfo::mapgen::LocalFileId;
/// Converts the span into its start line and column, and end line and column.
///
/// Line numbers and column numbers are 1-based. Unlike most column numbers emitted by
/// the compiler, these column numbers are denoted in **bytes**, because that's what
/// LLVM's `llvm-cov` tool expects to see in coverage maps.
///
/// Returns `None` if the conversion failed for some reason. This shouldn't happen,
/// but it's hard to rule out entirely (especially in the presence of complex macros
/// or other expansions), and if it does happen then skipping a span or function is
/// better than an ICE or `llvm-cov` failure that the user might have no way to avoid.
pub(crate) fn make_coverage_span(
file_id: LocalFileId,
source_map: &SourceMap,
fn_cov_info: &FunctionCoverageInfo,
file: &SourceFile,
span: Span,
) -> Option<ffi::CoverageSpan> {
let span = ensure_non_empty_span(source_map, fn_cov_info, span)?;
let lo = span.lo();
let hi = span.hi();
// Column numbers need to be in bytes, so we can't use the more convenient
// `SourceMap` methods for looking up file coordinates.
let line_and_byte_column = |pos: BytePos| -> Option<(usize, usize)> {
let rpos = file.relative_position(pos);
let line_index = file.lookup_line(rpos)?;
let line_start = file.lines()[line_index];
// Line numbers and column numbers are 1-based, so add 1 to each.
Some((line_index + 1, (rpos - line_start).to_usize() + 1))
};
let (mut start_line, start_col) = line_and_byte_column(lo)?;
let (mut end_line, end_col) = line_and_byte_column(hi)?;
// Apply an offset so that code in doctests has correct line numbers.
// FIXME(#79417): Currently we have no way to offset doctest _columns_.
start_line = source_map.doctest_offset_line(&file.name, start_line);
end_line = source_map.doctest_offset_line(&file.name, end_line);
check_coverage_span(ffi::CoverageSpan {
file_id: file_id.as_u32(),
start_line: start_line as u32,
start_col: start_col as u32,
end_line: end_line as u32,
end_col: end_col as u32,
})
}
fn ensure_non_empty_span(
source_map: &SourceMap,
fn_cov_info: &FunctionCoverageInfo,
span: Span,
) -> Option<Span> {
if !span.is_empty() {
return Some(span);
}
let lo = span.lo();
let hi = span.hi();
// The span is empty, so try to expand it to cover an adjacent '{' or '}',
// but only within the bounds of the body span.
let try_next = hi < fn_cov_info.body_span.hi();
let try_prev = fn_cov_info.body_span.lo() < lo;
if !(try_next || try_prev) {
return None;
}
source_map
.span_to_source(span, |src, start, end| try {
// Adjusting span endpoints by `BytePos(1)` is normally a bug,
// but in this case we have specifically checked that the character
// we're skipping over is one of two specific ASCII characters, so
// adjusting by exactly 1 byte is correct.
if try_next && src.as_bytes()[end] == b'{' {
Some(span.with_hi(hi + BytePos(1)))
} else if try_prev && src.as_bytes()[start - 1] == b'}' {
Some(span.with_lo(lo - BytePos(1)))
} else {
None
}
})
.ok()?
}
/// If `llvm-cov` sees a source region that is improperly ordered (end < start),
/// it will immediately exit with a fatal error. To prevent that from happening,
/// discard regions that are improperly ordered, or might be interpreted in a
/// way that makes them improperly ordered.
fn check_coverage_span(cov_span: ffi::CoverageSpan) -> Option<ffi::CoverageSpan> {
let ffi::CoverageSpan { file_id: _, start_line, start_col, end_line, end_col } = cov_span;
// Line/column coordinates are supposed to be 1-based. If we ever emit
// coordinates of 0, `llvm-cov` might misinterpret them.
let all_nonzero = [start_line, start_col, end_line, end_col].into_iter().all(|x| x != 0);
// Coverage mappings use the high bit of `end_col` to indicate that a
// region is actually a "gap" region, so make sure it's unset.
let end_col_has_high_bit_unset = (end_col & (1 << 31)) == 0;
// If a region is improperly ordered (end < start), `llvm-cov` will exit
// with a fatal error, which is inconvenient for users and hard to debug.
let is_ordered = (start_line, start_col) <= (end_line, end_col);
if all_nonzero && end_col_has_high_bit_unset && is_ordered {
Some(cov_span)
} else {
debug!(
?cov_span,
?all_nonzero,
?end_col_has_high_bit_unset,
?is_ordered,
"Skipping source region that would be misinterpreted or rejected by LLVM"
);
// If this happens in a debug build, ICE to make it easier to notice.
debug_assert!(false, "Improper source region: {cov_span:?}");
None
}
}

View file

@ -17,6 +17,7 @@
#![feature(iter_intersperse)]
#![feature(let_chains)]
#![feature(rustdoc_internals)]
#![feature(try_blocks)]
#![warn(unreachable_pub)]
// tidy-alphabetical-end