Rollup merge of #119033 - Zalathar:unicode, r=davidtwco
coverage: `llvm-cov` expects column numbers to be bytes, not code points Normally the compiler emits column numbers as a 1-based number of Unicode code points. But when we embed coverage mappings for `-Cinstrument-coverage`, those mappings will ultimately be read by the `llvm-cov` tool. That tool assumes that column numbers are 1-based numbers of *bytes*, and relies on that assumption when slicing up source code to apply highlighting (in HTML reports, and in text-based reports with colour). For the very common case of all-ASCII source code, bytes and code points are the same, so the difference isn't noticeable. But for code that contains non-ASCII characters, emitting column numbers as code points will result in `llvm-cov` slicing strings in the wrong places, producing mangled output or fatal errors. (See https://github.com/taiki-e/cargo-llvm-cov/issues/275 as an example of what can go wrong.)
This commit is contained in:
commit
70e3f8d240
5 changed files with 201 additions and 21 deletions
|
@ -23,7 +23,7 @@ use rustc_middle::mir::{
|
|||
use rustc_middle::ty::TyCtxt;
|
||||
use rustc_span::def_id::LocalDefId;
|
||||
use rustc_span::source_map::SourceMap;
|
||||
use rustc_span::{Span, Symbol};
|
||||
use rustc_span::{BytePos, Pos, RelativeBytePos, Span, Symbol};
|
||||
|
||||
/// Inserts `StatementKind::Coverage` statements that either instrument the binary with injected
|
||||
/// counters, via intrinsic `llvm.instrprof.increment`, and/or inject metadata used during codegen
|
||||
|
@ -107,6 +107,12 @@ impl<'a, 'tcx> Instrumentor<'a, 'tcx> {
|
|||
);
|
||||
|
||||
let mappings = self.create_mappings(&coverage_spans, &coverage_counters);
|
||||
if mappings.is_empty() {
|
||||
// No spans could be converted into valid mappings, so skip this function.
|
||||
debug!("no spans could be converted into valid mappings; skipping");
|
||||
return;
|
||||
}
|
||||
|
||||
self.inject_coverage_statements(bcb_has_coverage_spans, &coverage_counters);
|
||||
|
||||
self.mir_body.function_coverage_info = Some(Box::new(FunctionCoverageInfo {
|
||||
|
@ -148,9 +154,9 @@ impl<'a, 'tcx> Instrumentor<'a, 'tcx> {
|
|||
// Flatten the spans into individual term/span pairs.
|
||||
.flat_map(|(term, spans)| spans.iter().map(move |&span| (term, span)))
|
||||
// Convert each span to a code region, and create the final mapping.
|
||||
.map(|(term, span)| {
|
||||
let code_region = make_code_region(source_map, file_name, span, body_span);
|
||||
Mapping { term, code_region }
|
||||
.filter_map(|(term, span)| {
|
||||
let code_region = make_code_region(source_map, file_name, span, body_span)?;
|
||||
Some(Mapping { term, code_region })
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
@ -252,13 +258,22 @@ fn inject_statement(mir_body: &mut mir::Body<'_>, counter_kind: CoverageKind, bb
|
|||
data.statements.insert(0, statement);
|
||||
}
|
||||
|
||||
/// Convert the Span into its file name, start line and column, and end line and column
|
||||
/// Convert the Span into its file name, start line and column, and end line and column.
|
||||
///
|
||||
/// Line numbers and column numbers are 1-based. Unlike most column numbers emitted by
|
||||
/// the compiler, these column numbers are denoted in **bytes**, because that's what
|
||||
/// LLVM's `llvm-cov` tool expects to see in coverage maps.
|
||||
///
|
||||
/// Returns `None` if the conversion failed for some reason. This shouldn't happen,
|
||||
/// but it's hard to rule out entirely (especially in the presence of complex macros
|
||||
/// or other expansions), and if it does happen then skipping a span or function is
|
||||
/// better than an ICE or `llvm-cov` failure that the user might have no way to avoid.
|
||||
fn make_code_region(
|
||||
source_map: &SourceMap,
|
||||
file_name: Symbol,
|
||||
span: Span,
|
||||
body_span: Span,
|
||||
) -> CodeRegion {
|
||||
) -> Option<CodeRegion> {
|
||||
debug!(
|
||||
"Called make_code_region(file_name={}, span={}, body_span={})",
|
||||
file_name,
|
||||
|
@ -266,27 +281,62 @@ fn make_code_region(
|
|||
source_map.span_to_diagnostic_string(body_span)
|
||||
);
|
||||
|
||||
let (file, mut start_line, mut start_col, mut end_line, mut end_col) =
|
||||
source_map.span_to_location_info(span);
|
||||
if span.hi() == span.lo() {
|
||||
// Extend an empty span by one character so the region will be counted.
|
||||
if span.hi() == body_span.hi() {
|
||||
start_col = start_col.saturating_sub(1);
|
||||
} else {
|
||||
end_col = start_col + 1;
|
||||
}
|
||||
};
|
||||
if let Some(file) = file {
|
||||
start_line = source_map.doctest_offset_line(&file.name, start_line);
|
||||
end_line = source_map.doctest_offset_line(&file.name, end_line);
|
||||
let lo = span.lo();
|
||||
let hi = span.hi();
|
||||
|
||||
let file = source_map.lookup_source_file(lo);
|
||||
if !file.contains(hi) {
|
||||
debug!(?span, ?file, ?lo, ?hi, "span crosses multiple files; skipping");
|
||||
return None;
|
||||
}
|
||||
CodeRegion {
|
||||
|
||||
// Column numbers need to be in bytes, so we can't use the more convenient
|
||||
// `SourceMap` methods for looking up file coordinates.
|
||||
let rpos_and_line_and_byte_column = |pos: BytePos| -> Option<(RelativeBytePos, usize, usize)> {
|
||||
let rpos = file.relative_position(pos);
|
||||
let line_index = file.lookup_line(rpos)?;
|
||||
let line_start = file.lines()[line_index];
|
||||
// Line numbers and column numbers are 1-based, so add 1 to each.
|
||||
Some((rpos, line_index + 1, (rpos - line_start).to_usize() + 1))
|
||||
};
|
||||
|
||||
let (lo_rpos, mut start_line, mut start_col) = rpos_and_line_and_byte_column(lo)?;
|
||||
let (hi_rpos, mut end_line, mut end_col) = rpos_and_line_and_byte_column(hi)?;
|
||||
|
||||
// If the span is empty, try to expand it horizontally by one character's
|
||||
// worth of bytes, so that it is more visible in `llvm-cov` reports.
|
||||
// We do this after resolving line/column numbers, so that empty spans at the
|
||||
// end of a line get an extra column instead of wrapping to the next line.
|
||||
if span.is_empty()
|
||||
&& body_span.contains(span)
|
||||
&& let Some(src) = &file.src
|
||||
{
|
||||
// Prefer to expand the end position, if it won't go outside the body span.
|
||||
if hi < body_span.hi() {
|
||||
let hi_rpos = hi_rpos.to_usize();
|
||||
let nudge_bytes = src.ceil_char_boundary(hi_rpos + 1) - hi_rpos;
|
||||
end_col += nudge_bytes;
|
||||
} else if lo > body_span.lo() {
|
||||
let lo_rpos = lo_rpos.to_usize();
|
||||
let nudge_bytes = lo_rpos - src.floor_char_boundary(lo_rpos - 1);
|
||||
// Subtract the nudge, but don't go below column 1.
|
||||
start_col = start_col.saturating_sub(nudge_bytes).max(1);
|
||||
}
|
||||
// If neither nudge could be applied, stick with the empty span coordinates.
|
||||
}
|
||||
|
||||
// Apply an offset so that code in doctests has correct line numbers.
|
||||
// FIXME(#79417): Currently we have no way to offset doctest _columns_.
|
||||
start_line = source_map.doctest_offset_line(&file.name, start_line);
|
||||
end_line = source_map.doctest_offset_line(&file.name, end_line);
|
||||
|
||||
Some(CodeRegion {
|
||||
file_name,
|
||||
start_line: start_line as u32,
|
||||
start_col: start_col as u32,
|
||||
end_line: end_line as u32,
|
||||
end_col: end_col as u32,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn is_eligible_for_coverage(tcx: TyCtxt<'_>, def_id: LocalDefId) -> bool {
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#![feature(min_specialization)]
|
||||
#![feature(never_type)]
|
||||
#![feature(option_get_or_insert_default)]
|
||||
#![feature(round_char_boundary)]
|
||||
#![feature(trusted_step)]
|
||||
#![feature(try_blocks)]
|
||||
#![feature(yeet_expr)]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue