MCP #705: Provide the option -Csymbol-mangling-version=hashed -Z unstable-options
to shorten symbol names by replacing them with a digest.
Enrich test cases
This commit is contained in:
parent
7ffc697ce1
commit
6e53e66bd3
15 changed files with 211 additions and 49 deletions
43
compiler/rustc_symbol_mangling/src/hashed.rs
Normal file
43
compiler/rustc_symbol_mangling/src/hashed.rs
Normal file
|
@ -0,0 +1,43 @@
|
|||
use crate::v0;
|
||||
use rustc_data_structures::stable_hasher::{Hash64, HashStable, StableHasher};
|
||||
use rustc_hir::def_id::CrateNum;
|
||||
use rustc_middle::ty::{Instance, TyCtxt};
|
||||
|
||||
use std::fmt::Write;
|
||||
|
||||
pub(super) fn mangle<'tcx>(
|
||||
tcx: TyCtxt<'tcx>,
|
||||
instance: Instance<'tcx>,
|
||||
instantiating_crate: Option<CrateNum>,
|
||||
full_mangling_name: impl FnOnce() -> String,
|
||||
) -> String {
|
||||
// The symbol of a generic function may be scattered in multiple downstream dylibs.
|
||||
// If the symbol of a generic function still contains `crate name`, hash conflicts between the
|
||||
// generic funcion and other symbols of the same `crate` cannot be detected in time during
|
||||
// construction. This symbol conflict is left over until it occurs during run time.
|
||||
// In this case, `instantiating-crate name` is used to replace `crate name` can completely
|
||||
// eliminate the risk of the preceding potential hash conflict.
|
||||
let crate_num =
|
||||
if let Some(krate) = instantiating_crate { krate } else { instance.def_id().krate };
|
||||
|
||||
let mut symbol = "_RNxC".to_string();
|
||||
v0::push_ident(tcx.crate_name(crate_num).as_str(), &mut symbol);
|
||||
|
||||
let hash = tcx.with_stable_hashing_context(|mut hcx| {
|
||||
let mut hasher = StableHasher::new();
|
||||
full_mangling_name().hash_stable(&mut hcx, &mut hasher);
|
||||
hasher.finish::<Hash64>().as_u64()
|
||||
});
|
||||
|
||||
push_hash64(hash, &mut symbol);
|
||||
|
||||
symbol
|
||||
}
|
||||
|
||||
// The hash is encoded based on `base-62` and the final terminator `_` is removed because it does
|
||||
// not help prevent hash collisions
|
||||
fn push_hash64(hash: u64, output: &mut String) {
|
||||
let hash = v0::encode_integer_62(hash);
|
||||
let hash_len = hash.len();
|
||||
let _ = write!(output, "{hash_len}H{}", &hash[..hash_len - 1]);
|
||||
}
|
|
@ -111,6 +111,7 @@ use rustc_middle::query::Providers;
|
|||
use rustc_middle::ty::{self, Instance, TyCtxt};
|
||||
use rustc_session::config::SymbolManglingVersion;
|
||||
|
||||
mod hashed;
|
||||
mod legacy;
|
||||
mod v0;
|
||||
|
||||
|
@ -265,6 +266,9 @@ fn compute_symbol_name<'tcx>(
|
|||
let symbol = match mangling_version {
|
||||
SymbolManglingVersion::Legacy => legacy::mangle(tcx, instance, instantiating_crate),
|
||||
SymbolManglingVersion::V0 => v0::mangle(tcx, instance, instantiating_crate),
|
||||
SymbolManglingVersion::Hashed => hashed::mangle(tcx, instance, instantiating_crate, || {
|
||||
v0::mangle(tcx, instance, instantiating_crate)
|
||||
}),
|
||||
};
|
||||
|
||||
debug_assert!(
|
||||
|
|
|
@ -116,10 +116,7 @@ impl<'tcx> SymbolMangler<'tcx> {
|
|||
/// * `x > 0` is encoded as `x - 1` in base 62, followed by `"_"`,
|
||||
/// e.g. `1` becomes `"0_"`, `62` becomes `"Z_"`, etc.
|
||||
fn push_integer_62(&mut self, x: u64) {
|
||||
if let Some(x) = x.checked_sub(1) {
|
||||
base_n::push_str(x as u128, 62, &mut self.out);
|
||||
}
|
||||
self.push("_");
|
||||
push_integer_62(x, &mut self.out)
|
||||
}
|
||||
|
||||
/// Push a `tag`-prefixed base 62 integer, when larger than `0`, that is:
|
||||
|
@ -138,45 +135,7 @@ impl<'tcx> SymbolMangler<'tcx> {
|
|||
}
|
||||
|
||||
fn push_ident(&mut self, ident: &str) {
|
||||
let mut use_punycode = false;
|
||||
for b in ident.bytes() {
|
||||
match b {
|
||||
b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' => {}
|
||||
0x80..=0xff => use_punycode = true,
|
||||
_ => bug!("symbol_names: bad byte {} in ident {:?}", b, ident),
|
||||
}
|
||||
}
|
||||
|
||||
let punycode_string;
|
||||
let ident = if use_punycode {
|
||||
self.push("u");
|
||||
|
||||
// FIXME(eddyb) we should probably roll our own punycode implementation.
|
||||
let mut punycode_bytes = match punycode::encode(ident) {
|
||||
Ok(s) => s.into_bytes(),
|
||||
Err(()) => bug!("symbol_names: punycode encoding failed for ident {:?}", ident),
|
||||
};
|
||||
|
||||
// Replace `-` with `_`.
|
||||
if let Some(c) = punycode_bytes.iter_mut().rfind(|&&mut c| c == b'-') {
|
||||
*c = b'_';
|
||||
}
|
||||
|
||||
// FIXME(eddyb) avoid rechecking UTF-8 validity.
|
||||
punycode_string = String::from_utf8(punycode_bytes).unwrap();
|
||||
&punycode_string
|
||||
} else {
|
||||
ident
|
||||
};
|
||||
|
||||
let _ = write!(self.out, "{}", ident.len());
|
||||
|
||||
// Write a separating `_` if necessary (leading digit or `_`).
|
||||
if let Some('_' | '0'..='9') = ident.chars().next() {
|
||||
self.push("_");
|
||||
}
|
||||
|
||||
self.push(ident);
|
||||
push_ident(ident, &mut self.out)
|
||||
}
|
||||
|
||||
fn path_append_ns(
|
||||
|
@ -836,3 +795,62 @@ impl<'tcx> Printer<'tcx> for SymbolMangler<'tcx> {
|
|||
Ok(())
|
||||
}
|
||||
}
|
||||
/// Push a `_`-terminated base 62 integer, using the format
|
||||
/// specified in the RFC as `<base-62-number>`, that is:
|
||||
/// * `x = 0` is encoded as just the `"_"` terminator
|
||||
/// * `x > 0` is encoded as `x - 1` in base 62, followed by `"_"`,
|
||||
/// e.g. `1` becomes `"0_"`, `62` becomes `"Z_"`, etc.
|
||||
pub(crate) fn push_integer_62(x: u64, output: &mut String) {
|
||||
if let Some(x) = x.checked_sub(1) {
|
||||
base_n::push_str(x as u128, 62, output);
|
||||
}
|
||||
output.push('_');
|
||||
}
|
||||
|
||||
pub(crate) fn encode_integer_62(x: u64) -> String {
|
||||
let mut output = String::new();
|
||||
push_integer_62(x, &mut output);
|
||||
output
|
||||
}
|
||||
|
||||
pub(crate) fn push_ident(ident: &str, output: &mut String) {
|
||||
let mut use_punycode = false;
|
||||
for b in ident.bytes() {
|
||||
match b {
|
||||
b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' => {}
|
||||
0x80..=0xff => use_punycode = true,
|
||||
_ => bug!("symbol_names: bad byte {} in ident {:?}", b, ident),
|
||||
}
|
||||
}
|
||||
|
||||
let punycode_string;
|
||||
let ident = if use_punycode {
|
||||
output.push('u');
|
||||
|
||||
// FIXME(eddyb) we should probably roll our own punycode implementation.
|
||||
let mut punycode_bytes = match punycode::encode(ident) {
|
||||
Ok(s) => s.into_bytes(),
|
||||
Err(()) => bug!("symbol_names: punycode encoding failed for ident {:?}", ident),
|
||||
};
|
||||
|
||||
// Replace `-` with `_`.
|
||||
if let Some(c) = punycode_bytes.iter_mut().rfind(|&&mut c| c == b'-') {
|
||||
*c = b'_';
|
||||
}
|
||||
|
||||
// FIXME(eddyb) avoid rechecking UTF-8 validity.
|
||||
punycode_string = String::from_utf8(punycode_bytes).unwrap();
|
||||
&punycode_string
|
||||
} else {
|
||||
ident
|
||||
};
|
||||
|
||||
let _ = write!(output, "{}", ident.len());
|
||||
|
||||
// Write a separating `_` if necessary (leading digit or `_`).
|
||||
if let Some('_' | '0'..='9') = ident.chars().next() {
|
||||
output.push('_');
|
||||
}
|
||||
|
||||
output.push_str(ident);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue