1
Fork 0

Improve debug symbol names to avoid ambiguity and work better with MSVC's debugger

There are several cases where names of types and functions in the debug info are either ambiguous, or not helpful, such as including ambiguous placeholders (e.g., `{{impl}}`, `{{closure}}` or `dyn _'`) or dropping qualifications (e.g., for dynamic types).

Instead, each debug symbol name should be unique and useful:
* Include disambiguators for anonymous `DefPathDataName` (closures and generators), and unify their formatting when used as a path-qualifier vs item being qualified.
* Qualify the principal trait for dynamic types.
* If there is no principal trait for a dynamic type, emit all other traits instead.
* Respect the `qualified` argument when emitting ref and pointer types.
* For implementations, emit the disambiguator.
* Print const generics when emitting generic parameters or arguments.

Additionally, when targeting MSVC, its debugger treats many command arguments as C++ expressions, even when the argument is defined to be a symbol name. As such names in the debug info need to be more C++-like to be parsed correctly:
* Avoid characters with special meaning (`#`, `[`, `"`, `+`).
* Never start a name with `<` or `{` as this is treated as an operator.
* `>>` is always treated as a right-shift, even when parsing generic arguments (so add a space to avoid this).
* Emit function declarations using C/C++ style syntax (e.g., leading return type).
* Emit arrays as a synthetic `array$<type, size>` type.
* Include a `$` in all synthetic types as this is a legal character for C++, but not Rust (thus we avoid collisions with user types).
This commit is contained in:
Daniel Paoliello 2021-06-24 10:36:28 -07:00
parent 868c702d0c
commit aac8a88552
29 changed files with 855 additions and 357 deletions

View file

@ -1,9 +1,22 @@
// Type Names for Debug Info.
// Notes on targetting MSVC:
// In general, MSVC's debugger attempts to parse all arguments as C++ expressions,
// even if the argument is explicitly a symbol name.
// As such, there are many things that cause parsing issues:
// * `#` is treated as a special character for macros.
// * `{` or `<` at the beginning of a name is treated as an operator.
// * `>>` is always treated as a right-shift.
// * `[` in a name is treated like a regex bracket expression (match any char
// within the brackets).
// * `"` is treated as the start of a string.
use rustc_data_structures::fx::FxHashSet;
use rustc_hir as hir;
use rustc_hir::def_id::DefId;
use rustc_middle::ty::{self, subst::SubstsRef, AdtDef, Ty, TyCtxt};
use rustc_hir::definitions::{DefPathData, DefPathDataName, DisambiguatedDefPathData};
use rustc_middle::ty::subst::{GenericArgKind, SubstsRef};
use rustc_middle::ty::{self, AdtDef, Ty, TyCtxt};
use rustc_target::abi::{TagEncoding, Variants};
use std::fmt::Write;
@ -40,7 +53,13 @@ pub fn push_debuginfo_type_name<'tcx>(
ty::Bool => output.push_str("bool"),
ty::Char => output.push_str("char"),
ty::Str => output.push_str("str"),
ty::Never => output.push('!'),
ty::Never => {
if cpp_like_names {
output.push_str("never$");
} else {
output.push('!');
}
}
ty::Int(int_ty) => output.push_str(int_ty.name_str()),
ty::Uint(uint_ty) => output.push_str(uint_ty.name_str()),
ty::Float(float_ty) => output.push_str(float_ty.name_str()),
@ -50,12 +69,12 @@ pub fn push_debuginfo_type_name<'tcx>(
msvc_enum_fallback(tcx, t, def, substs, output, visited);
} else {
push_item_name(tcx, def.did, qualified, output);
push_type_params(tcx, substs, output, visited);
push_generic_params_internal(tcx, substs, output, visited);
}
}
ty::Tuple(component_types) => {
if cpp_like_names {
output.push_str("tuple<");
output.push_str("tuple$<");
} else {
output.push('(');
}
@ -70,54 +89,79 @@ pub fn push_debuginfo_type_name<'tcx>(
}
if cpp_like_names {
output.push('>');
push_close_angle_bracket(tcx, output);
} else {
output.push(')');
}
}
ty::RawPtr(ty::TypeAndMut { ty: inner_type, mutbl }) => {
if !cpp_like_names {
if cpp_like_names {
match mutbl {
hir::Mutability::Not => output.push_str("ptr_const$<"),
hir::Mutability::Mut => output.push_str("ptr_mut$<"),
}
} else {
output.push('*');
}
match mutbl {
hir::Mutability::Not => output.push_str("const "),
hir::Mutability::Mut => output.push_str("mut "),
match mutbl {
hir::Mutability::Not => output.push_str("const "),
hir::Mutability::Mut => output.push_str("mut "),
}
}
push_debuginfo_type_name(tcx, inner_type, true, output, visited);
push_debuginfo_type_name(tcx, inner_type, qualified, output, visited);
if cpp_like_names {
output.push('*');
push_close_angle_bracket(tcx, output);
}
}
ty::Ref(_, inner_type, mutbl) => {
// Slices and `&str` are treated like C++ pointers when computing debug
// info for MSVC debugger. However, wrapping these types' names in a synthetic type
// causes the .natvis engine for WinDbg to fail to display their data, so we opt these
// types out to aid debugging in MSVC.
let is_slice_or_str = match *inner_type.kind() {
ty::Slice(_) | ty::Str => true,
_ => false,
};
if !cpp_like_names {
output.push('&');
}
output.push_str(mutbl.prefix_str());
push_debuginfo_type_name(tcx, inner_type, true, output, visited);
if cpp_like_names {
// Slices and `&str` are treated like C++ pointers when computing debug
// info for MSVC debugger. However, adding '*' at the end of these types' names
// causes the .natvis engine for WinDbg to fail to display their data, so we opt these
// types out to aid debugging in MSVC.
match *inner_type.kind() {
ty::Slice(_) | ty::Str => {}
_ => output.push('*'),
output.push_str(mutbl.prefix_str());
} else if !is_slice_or_str {
match mutbl {
hir::Mutability::Not => output.push_str("ref$<"),
hir::Mutability::Mut => output.push_str("ref_mut$<"),
}
}
push_debuginfo_type_name(tcx, inner_type, qualified, output, visited);
if cpp_like_names && !is_slice_or_str {
push_close_angle_bracket(tcx, output);
}
}
ty::Array(inner_type, len) => {
output.push('[');
push_debuginfo_type_name(tcx, inner_type, true, output, visited);
output.push_str(&format!("; {}", len.eval_usize(tcx, ty::ParamEnv::reveal_all())));
output.push(']');
if cpp_like_names {
output.push_str("array$<");
push_debuginfo_type_name(tcx, inner_type, true, output, visited);
match len.val {
ty::ConstKind::Param(param) => write!(output, ",{}>", param.name).unwrap(),
_ => write!(output, ",{}>", len.eval_usize(tcx, ty::ParamEnv::reveal_all()))
.unwrap(),
}
} else {
output.push('[');
push_debuginfo_type_name(tcx, inner_type, true, output, visited);
match len.val {
ty::ConstKind::Param(param) => write!(output, "; {}]", param.name).unwrap(),
_ => write!(output, "; {}]", len.eval_usize(tcx, ty::ParamEnv::reveal_all()))
.unwrap(),
}
}
}
ty::Slice(inner_type) => {
if cpp_like_names {
output.push_str("slice<");
output.push_str("slice$<");
} else {
output.push('[');
}
@ -125,19 +169,69 @@ pub fn push_debuginfo_type_name<'tcx>(
push_debuginfo_type_name(tcx, inner_type, true, output, visited);
if cpp_like_names {
output.push('>');
push_close_angle_bracket(tcx, output);
} else {
output.push(']');
}
}
ty::Dynamic(ref trait_data, ..) => {
if cpp_like_names {
output.push_str("dyn$<");
} else {
output.push_str("dyn ");
}
if let Some(principal) = trait_data.principal() {
let principal =
tcx.normalize_erasing_late_bound_regions(ty::ParamEnv::reveal_all(), principal);
push_item_name(tcx, principal.def_id, false, output);
push_type_params(tcx, principal.substs, output, visited);
push_item_name(tcx, principal.def_id, qualified, output);
push_generic_params_internal(tcx, principal.substs, output, visited);
} else {
output.push_str("dyn '_");
// The auto traits come ordered by `DefPathHash`, which guarantees stability if the
// environment is stable (e.g., incremental builds) but not otherwise (e.g.,
// updated compiler version, different target).
//
// To avoid that causing instabilities in test output, sort the auto-traits
// alphabetically.
let mut auto_traits: Vec<_> = trait_data
.iter()
.filter_map(|predicate| {
match tcx.normalize_erasing_late_bound_regions(
ty::ParamEnv::reveal_all(),
predicate,
) {
ty::ExistentialPredicate::AutoTrait(def_id) => {
let mut name = String::new();
push_item_name(tcx, def_id, true, &mut name);
Some(name)
}
_ => None,
}
})
.collect();
auto_traits.sort();
for name in auto_traits {
output.push_str(&name);
if cpp_like_names {
output.push_str(", ");
} else {
output.push_str(" + ");
}
}
// Remove the trailing joining characters. For cpp_like_names
// this is `, ` otherwise ` + `.
output.pop();
output.pop();
if !cpp_like_names {
output.pop();
}
}
if cpp_like_names {
push_close_angle_bracket(tcx, output);
}
}
ty::FnDef(..) | ty::FnPtr(_) => {
@ -155,23 +249,37 @@ pub fn push_debuginfo_type_name<'tcx>(
// use a dummy string that should make it clear
// that something unusual is going on
if !visited.insert(t) {
output.push_str("<recursive_type>");
output.push_str(if cpp_like_names {
"recursive_type$"
} else {
"<recursive_type>"
});
return;
}
let sig = t.fn_sig(tcx);
output.push_str(sig.unsafety().prefix_str());
let sig =
tcx.normalize_erasing_late_bound_regions(ty::ParamEnv::reveal_all(), t.fn_sig(tcx));
let abi = sig.abi();
if abi != rustc_target::spec::abi::Abi::Rust {
output.push_str("extern \"");
output.push_str(abi.name());
output.push_str("\" ");
if cpp_like_names {
// Format as a C++ function pointer: return_type (*)(params...)
if sig.output().is_unit() {
output.push_str("void");
} else {
push_debuginfo_type_name(tcx, sig.output(), true, output, visited);
}
output.push_str(" (*)(");
} else {
output.push_str(sig.unsafety.prefix_str());
if sig.abi != rustc_target::spec::abi::Abi::Rust {
output.push_str("extern \"");
output.push_str(sig.abi.name());
output.push_str("\" ");
}
output.push_str("fn(");
}
output.push_str("fn(");
let sig = tcx.normalize_erasing_late_bound_regions(ty::ParamEnv::reveal_all(), sig);
if !sig.inputs().is_empty() {
for &parameter_type in sig.inputs() {
push_debuginfo_type_name(tcx, parameter_type, true, output, visited);
@ -191,7 +299,7 @@ pub fn push_debuginfo_type_name<'tcx>(
output.push(')');
if !sig.output().is_unit() {
if !cpp_like_names && !sig.output().is_unit() {
output.push_str(" -> ");
push_debuginfo_type_name(tcx, sig.output(), true, output, visited);
}
@ -207,17 +315,14 @@ pub fn push_debuginfo_type_name<'tcx>(
// processing
visited.remove(t);
}
ty::Closure(def_id, ..) => {
output.push_str(&format!(
"closure-{}",
tcx.def_key(def_id).disambiguated_data.disambiguator
));
}
ty::Generator(def_id, ..) => {
output.push_str(&format!(
"generator-{}",
tcx.def_key(def_id).disambiguated_data.disambiguator
));
ty::Closure(def_id, ..) | ty::Generator(def_id, ..) => {
let key = tcx.def_key(def_id);
if qualified {
let parent_def_id = DefId { index: key.parent.unwrap(), ..def_id };
push_item_name(tcx, parent_def_id, true, output);
output.push_str("::");
}
push_unqualified_item_name(tcx, def_id, key.disambiguated_data, output);
}
// Type parameters from polymorphized functions.
ty::Param(_) => {
@ -273,7 +378,7 @@ pub fn push_debuginfo_type_name<'tcx>(
output.push_str("enum$<");
push_item_name(tcx, def.did, true, output);
push_type_params(tcx, substs, output, visited);
push_generic_params_internal(tcx, substs, output, visited);
let dataful_variant_name = def.variants[*dataful_variant].ident.as_str();
@ -281,47 +386,116 @@ pub fn push_debuginfo_type_name<'tcx>(
} else {
output.push_str("enum$<");
push_item_name(tcx, def.did, true, output);
push_type_params(tcx, substs, output, visited);
output.push('>');
push_generic_params_internal(tcx, substs, output, visited);
push_close_angle_bracket(tcx, output);
}
}
fn push_item_name(tcx: TyCtxt<'tcx>, def_id: DefId, qualified: bool, output: &mut String) {
if qualified {
output.push_str(&tcx.crate_name(def_id.krate).as_str());
for path_element in tcx.def_path(def_id).data {
write!(output, "::{}", path_element.data).unwrap();
}
} else {
output.push_str(&tcx.item_name(def_id).as_str());
}
}
// Pushes the type parameters in the given `InternalSubsts` to the output string.
// This ignores region parameters, since they can't reliably be
// reconstructed for items from non-local crates. For local crates, this
// would be possible but with inlining and LTO we have to use the least
// common denominator - otherwise we would run into conflicts.
fn push_type_params<'tcx>(
tcx: TyCtxt<'tcx>,
substs: SubstsRef<'tcx>,
output: &mut String,
visited: &mut FxHashSet<Ty<'tcx>>,
) {
if substs.types().next().is_none() {
return;
}
output.push('<');
for type_parameter in substs.types() {
push_debuginfo_type_name(tcx, type_parameter, true, output, visited);
output.push_str(", ");
}
output.pop();
output.pop();
output.push('>');
}
}
pub fn push_item_name(tcx: TyCtxt<'tcx>, def_id: DefId, qualified: bool, output: &mut String) {
let def_key = tcx.def_key(def_id);
if qualified {
if let Some(parent) = def_key.parent {
push_item_name(tcx, DefId { krate: def_id.krate, index: parent }, true, output);
output.push_str("::");
}
}
push_unqualified_item_name(tcx, def_id, def_key.disambiguated_data, output);
}
fn push_unqualified_item_name(
tcx: TyCtxt<'tcx>,
def_id: DefId,
disambiguated_data: DisambiguatedDefPathData,
output: &mut String,
) {
let cpp_like_names = tcx.sess.target.is_like_msvc;
match disambiguated_data.data {
DefPathData::CrateRoot => {
output.push_str(&tcx.crate_name(def_id.krate).as_str());
}
DefPathData::ClosureExpr if tcx.generator_kind(def_id).is_some() => {
// Generators look like closures, but we want to treat them differently
// in the debug info.
if cpp_like_names {
write!(output, "generator${}", disambiguated_data.disambiguator).unwrap();
} else {
write!(output, "{{generator#{}}}", disambiguated_data.disambiguator).unwrap();
}
}
_ => match disambiguated_data.data.name() {
DefPathDataName::Named(name) => {
output.push_str(&name.as_str());
}
DefPathDataName::Anon { namespace } => {
if cpp_like_names {
write!(output, "{}${}", namespace, disambiguated_data.disambiguator).unwrap();
} else {
write!(output, "{{{}#{}}}", namespace, disambiguated_data.disambiguator)
.unwrap();
}
}
},
};
}
// Pushes the generic parameters in the given `InternalSubsts` to the output string.
// This ignores region parameters, since they can't reliably be
// reconstructed for items from non-local crates. For local crates, this
// would be possible but with inlining and LTO we have to use the least
// common denominator - otherwise we would run into conflicts.
fn push_generic_params_internal<'tcx>(
tcx: TyCtxt<'tcx>,
substs: SubstsRef<'tcx>,
output: &mut String,
visited: &mut FxHashSet<Ty<'tcx>>,
) {
if substs.non_erasable_generics().next().is_none() {
return;
}
debug_assert_eq!(substs, tcx.normalize_erasing_regions(ty::ParamEnv::reveal_all(), substs));
output.push('<');
for type_parameter in substs.non_erasable_generics() {
match type_parameter {
GenericArgKind::Type(type_parameter) => {
push_debuginfo_type_name(tcx, type_parameter, true, output, visited);
output.push_str(", ");
}
GenericArgKind::Const(const_parameter) => match const_parameter.val {
ty::ConstKind::Param(param) => write!(output, "{}, ", param.name).unwrap(),
_ => write!(
output,
"0x{:x}, ",
const_parameter.eval_bits(tcx, ty::ParamEnv::reveal_all(), const_parameter.ty)
)
.unwrap(),
},
other => bug!("Unexpected non-erasable generic: {:?}", other),
}
}
output.pop();
output.pop();
push_close_angle_bracket(tcx, output);
}
pub fn push_generic_params<'tcx>(tcx: TyCtxt<'tcx>, substs: SubstsRef<'tcx>, output: &mut String) {
let mut visited = FxHashSet::default();
push_generic_params_internal(tcx, substs, output, &mut visited);
}
fn push_close_angle_bracket<'tcx>(tcx: TyCtxt<'tcx>, output: &mut String) {
// MSVC debugger always treats `>>` as a shift, even when parsing templates,
// so add a space to avoid confusion.
if tcx.sess.target.is_like_msvc && output.ends_with('>') {
output.push(' ')
};
output.push('>');
}