1
Fork 0

Auto merge of #98393 - michaelwoerister:new-cpp-like-enum-debuginfo, r=wesleywiser

debuginfo: Generalize C++-like encoding for enums.

The updated encoding should be able to handle niche layouts where more than one variant has fields (as introduced in https://github.com/rust-lang/rust/pull/94075).

The new encoding is more uniform as there is no structural difference between direct-tag, niche-tag, and no-tag layouts anymore. The only difference between those cases is that the "dataful" variant in a niche-tag enum will have a `(start, end)` pair denoting the tag range instead of a single value.

The new encoding now also supports 128-bit tags, which occur in at least some standard library types. These tags are represented as `u64` pairs so that debuggers (which don't always have support for 128-bit integers) can reliably deal with them. The downside is that this adds quite a bit of complexity to the encoding and especially to the corresponding NatVis.

The new encoding seems to increase the size of (x86_64-pc-windows-msvc) debuginfo by 10-15%. The size of binaries is not affected (release builds were built with `-Cdebuginfo=2`, numbers are in kilobytes):

EXE | before | after | relative
-- | -- | -- | --
cargo (debug) | 40453 | 40450 | +0%
ripgrep (debug) | 10275 | 10273 | +0%
cargo (release) | 16186 | 16185 | +0%
ripgrep (release) | 4727 | 4726 | +0%

PDB | before | after | relative
-- | -- | -- | --
cargo (debug) | 236524 | 261412 | +11%
ripgrep (debug) | 53140 | 59060 | +11%
cargo (release) | 148516 | 169620 | +14%
ripgrep (release) | 10676 | 11804 | +11%

Given that the new encoding is more general, this is to be expected. Only platforms using C++-like debuginfo are affected -- which currently is only `*-pc-windows-msvc`.

*TODO*
- [x] Properly update documentation
- [x] Add regression tests for new optimized enum layouts as introduced by #94075.

r? `@wesleywiser`
This commit is contained in:
bors 2022-08-15 12:59:53 +00:00
commit 4916e2b9e6
23 changed files with 1137 additions and 443 deletions

View file

@ -18,11 +18,10 @@ use rustc_hir::definitions::{DefPathData, DefPathDataName, DisambiguatedDefPathD
use rustc_hir::{AsyncGeneratorKind, GeneratorKind, Mutability};
use rustc_middle::ty::layout::{IntegerExt, TyAndLayout};
use rustc_middle::ty::subst::{GenericArgKind, SubstsRef};
use rustc_middle::ty::{self, ExistentialProjection, GeneratorSubsts, ParamEnv, Ty, TyCtxt};
use rustc_target::abi::{Integer, TagEncoding, Variants};
use rustc_middle::ty::{self, ExistentialProjection, ParamEnv, Ty, TyCtxt};
use rustc_target::abi::Integer;
use smallvec::SmallVec;
use std::borrow::Cow;
use std::fmt::Write;
use crate::debuginfo::wants_c_like_enum_debuginfo;
@ -98,7 +97,6 @@ fn push_debuginfo_type_name<'tcx>(
if let Some(ty_and_layout) = layout_for_cpp_like_fallback {
msvc_enum_fallback(
tcx,
ty_and_layout,
&|output, visited| {
push_item_name(tcx, def.did(), true, output);
@ -391,11 +389,10 @@ fn push_debuginfo_type_name<'tcx>(
// Name will be "{closure_env#0}<T1, T2, ...>", "{generator_env#0}<T1, T2, ...>", or
// "{async_fn_env#0}<T1, T2, ...>", etc.
// In the case of cpp-like debuginfo, the name additionally gets wrapped inside of
// an artificial `enum$<>` type, as defined in msvc_enum_fallback().
// an artificial `enum2$<>` type, as defined in msvc_enum_fallback().
if cpp_like_debuginfo && t.is_generator() {
let ty_and_layout = tcx.layout_of(ParamEnv::reveal_all().and(t)).unwrap();
msvc_enum_fallback(
tcx,
ty_and_layout,
&|output, visited| {
push_closure_or_generator_name(tcx, def_id, substs, true, output, visited);
@ -428,58 +425,17 @@ fn push_debuginfo_type_name<'tcx>(
/// MSVC names enums differently than other platforms so that the debugging visualization
// format (natvis) is able to understand enums and render the active variant correctly in the
// debugger. For more information, look in `src/etc/natvis/intrinsic.natvis` and
// `EnumMemberDescriptionFactor::create_member_descriptions`.
// debugger. For more information, look in
// rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs.
fn msvc_enum_fallback<'tcx>(
tcx: TyCtxt<'tcx>,
ty_and_layout: TyAndLayout<'tcx>,
push_inner: &dyn Fn(/*output*/ &mut String, /*visited*/ &mut FxHashSet<Ty<'tcx>>),
output: &mut String,
visited: &mut FxHashSet<Ty<'tcx>>,
) {
debug_assert!(!wants_c_like_enum_debuginfo(ty_and_layout));
let ty = ty_and_layout.ty;
output.push_str("enum$<");
output.push_str("enum2$<");
push_inner(output, visited);
let variant_name = |variant_index| match ty.kind() {
ty::Adt(adt_def, _) => {
debug_assert!(adt_def.is_enum());
Cow::from(adt_def.variant(variant_index).name.as_str())
}
ty::Generator(..) => GeneratorSubsts::variant_name(variant_index),
_ => unreachable!(),
};
if let Variants::Multiple {
tag_encoding: TagEncoding::Niche { dataful_variant, .. },
tag,
variants,
..
} = &ty_and_layout.variants
{
let dataful_variant_layout = &variants[*dataful_variant];
// calculate the range of values for the dataful variant
let dataful_discriminant_range =
dataful_variant_layout.largest_niche().unwrap().valid_range;
let min = dataful_discriminant_range.start;
let min = tag.size(&tcx).truncate(min);
let max = dataful_discriminant_range.end;
let max = tag.size(&tcx).truncate(max);
let dataful_variant_name = variant_name(*dataful_variant);
write!(output, ", {}, {}, {}", min, max, dataful_variant_name).unwrap();
} else if let Variants::Single { index: variant_idx } = &ty_and_layout.variants {
// Uninhabited enums can't be constructed and should never need to be visualized so
// skip this step for them.
if !ty_and_layout.abi.is_uninhabited() {
write!(output, ", {}", variant_name(*variant_idx)).unwrap();
}
}
push_close_angle_bracket(true, output);
}