Stabilize the size of incr comp object file names

This commit is contained in:
Ben Kimock 2024-04-03 14:09:21 -04:00
parent 290d792411
commit 6ee3713b08
9 changed files with 146 additions and 71 deletions

View file

@ -6,7 +6,8 @@ use gccjit::{
use rustc_codegen_ssa::base::wants_msvc_seh; use rustc_codegen_ssa::base::wants_msvc_seh;
use rustc_codegen_ssa::errors as ssa_errors; use rustc_codegen_ssa::errors as ssa_errors;
use rustc_codegen_ssa::traits::{BackendTypes, BaseTypeMethods, MiscMethods}; use rustc_codegen_ssa::traits::{BackendTypes, BaseTypeMethods, MiscMethods};
use rustc_data_structures::base_n; use rustc_data_structures::base_n::ToBaseN;
use rustc_data_structures::base_n::ALPHANUMERIC_ONLY;
use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_middle::mir::mono::CodegenUnit; use rustc_middle::mir::mono::CodegenUnit;
use rustc_middle::span_bug; use rustc_middle::span_bug;
@ -621,7 +622,7 @@ impl<'b, 'tcx> CodegenCx<'b, 'tcx> {
let mut name = String::with_capacity(prefix.len() + 6); let mut name = String::with_capacity(prefix.len() + 6);
name.push_str(prefix); name.push_str(prefix);
name.push_str("."); name.push_str(".");
base_n::push_str(idx as u128, base_n::ALPHANUMERIC_ONLY, &mut name); name.push_str(&(idx as u64).to_base(ALPHANUMERIC_ONLY));
name name
} }
} }

View file

@ -11,7 +11,8 @@ use crate::value::Value;
use rustc_codegen_ssa::base::{wants_msvc_seh, wants_wasm_eh}; use rustc_codegen_ssa::base::{wants_msvc_seh, wants_wasm_eh};
use rustc_codegen_ssa::errors as ssa_errors; use rustc_codegen_ssa::errors as ssa_errors;
use rustc_codegen_ssa::traits::*; use rustc_codegen_ssa::traits::*;
use rustc_data_structures::base_n; use rustc_data_structures::base_n::ToBaseN;
use rustc_data_structures::base_n::ALPHANUMERIC_ONLY;
use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::small_c_str::SmallCStr; use rustc_data_structures::small_c_str::SmallCStr;
use rustc_hir::def_id::DefId; use rustc_hir::def_id::DefId;
@ -1015,7 +1016,7 @@ impl CodegenCx<'_, '_> {
let mut name = String::with_capacity(prefix.len() + 6); let mut name = String::with_capacity(prefix.len() + 6);
name.push_str(prefix); name.push_str(prefix);
name.push('.'); name.push('.');
base_n::push_str(idx as u128, base_n::ALPHANUMERIC_ONLY, &mut name); name.push_str(&(idx as u64).to_base(ALPHANUMERIC_ONLY));
name name
} }
} }

View file

@ -1,6 +1,7 @@
/// Converts unsigned integers into a string representation with some base. /// Converts unsigned integers into a string representation with some base.
/// Bases up to and including 36 can be used for case-insensitive things. /// Bases up to and including 36 can be used for case-insensitive things.
use std::str; use std::ascii;
use std::fmt;
#[cfg(test)] #[cfg(test)]
mod tests; mod tests;
@ -9,36 +10,101 @@ pub const MAX_BASE: usize = 64;
pub const ALPHANUMERIC_ONLY: usize = 62; pub const ALPHANUMERIC_ONLY: usize = 62;
pub const CASE_INSENSITIVE: usize = 36; pub const CASE_INSENSITIVE: usize = 36;
const BASE_64: &[u8; MAX_BASE] = const BASE_64: [ascii::Char; MAX_BASE] = {
b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$"; let bytes = b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$";
let Some(ascii) = bytes.as_ascii() else { panic!() };
*ascii
};
#[inline] pub struct BaseNString {
pub fn push_str(mut n: u128, base: usize, output: &mut String) { start: usize,
debug_assert!(base >= 2 && base <= MAX_BASE); buf: [ascii::Char; 128],
let mut s = [0u8; 128]; }
let mut index = s.len();
let base = base as u128; impl std::ops::Deref for BaseNString {
type Target = str;
loop { fn deref(&self) -> &str {
index -= 1; self.buf[self.start..].as_str()
s[index] = BASE_64[(n % base) as usize]; }
n /= base; }
if n == 0 { impl AsRef<str> for BaseNString {
break; fn as_ref(&self) -> &str {
} self.buf[self.start..].as_str()
}
}
impl fmt::Display for BaseNString {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self)
}
}
// This trait just lets us reserve the exact right amount of space when doing fixed-length
// case-insensitve encoding. Add any impls you need.
pub trait ToBaseN: Into<u128> {
fn encoded_len(base: usize) -> usize;
fn to_base_fixed_len(self, base: usize) -> BaseNString {
let mut encoded = self.to_base(base);
encoded.start = encoded.buf.len() - Self::encoded_len(base);
encoded
} }
output.push_str(unsafe { fn to_base(self, base: usize) -> BaseNString {
// SAFETY: `s` is populated using only valid utf8 characters from `BASE_64` let mut output = [ascii::Char::Digit0; 128];
str::from_utf8_unchecked(&s[index..])
}); let mut n: u128 = self.into();
let mut index = output.len();
loop {
index -= 1;
output[index] = BASE_64[(n % base as u128) as usize];
n /= base as u128;
if n == 0 {
break;
}
}
assert_eq!(n, 0);
BaseNString { start: index, buf: output }
}
} }
#[inline] impl ToBaseN for u128 {
pub fn encode(n: u128, base: usize) -> String { fn encoded_len(base: usize) -> usize {
let mut s = String::new(); let mut max = u128::MAX;
push_str(n, base, &mut s); let mut len = 0;
s while max > 0 {
len += 1;
max /= base as u128;
}
len
}
}
impl ToBaseN for u64 {
fn encoded_len(base: usize) -> usize {
let mut max = u64::MAX;
let mut len = 0;
while max > 0 {
len += 1;
max /= base as u64;
}
len
}
}
impl ToBaseN for u32 {
fn encoded_len(base: usize) -> usize {
let mut max = u32::MAX;
let mut len = 0;
while max > 0 {
len += 1;
max /= base as u32;
}
len
}
} }

View file

@ -1,9 +1,17 @@
use super::*; use super::*;
#[test] #[test]
fn test_encode() { fn limits() {
assert_eq!(Ok(u128::MAX), u128::from_str_radix(&u128::MAX.to_base(36), 36));
assert_eq!(Ok(u64::MAX), u64::from_str_radix(&u64::MAX.to_base(36), 36));
assert_eq!(Ok(u32::MAX), u32::from_str_radix(&u32::MAX.to_base(36), 36));
}
#[test]
fn test_to_base() {
fn test(n: u128, base: usize) { fn test(n: u128, base: usize) {
assert_eq!(Ok(n), u128::from_str_radix(&encode(n, base), base as u32)); assert_eq!(Ok(n), u128::from_str_radix(&n.to_base(base), base as u32));
assert_eq!(Ok(n), u128::from_str_radix(&n.to_base_fixed_len(base), base as u32));
} }
for base in 2..37 { for base in 2..37 {

View file

@ -16,6 +16,8 @@
#![doc(rust_logo)] #![doc(rust_logo)]
#![feature(allocator_api)] #![feature(allocator_api)]
#![feature(array_windows)] #![feature(array_windows)]
#![feature(ascii_char)]
#![feature(ascii_char_variants)]
#![feature(auto_traits)] #![feature(auto_traits)]
#![feature(cfg_match)] #![feature(cfg_match)]
#![feature(core_intrinsics)] #![feature(core_intrinsics)]

View file

@ -104,10 +104,14 @@
//! implemented. //! implemented.
use crate::errors; use crate::errors;
use rustc_data_structures::base_n;
use rustc_data_structures::base_n::BaseNString;
use rustc_data_structures::base_n::ToBaseN;
use rustc_data_structures::base_n::CASE_INSENSITIVE;
use rustc_data_structures::flock;
use rustc_data_structures::fx::{FxHashSet, FxIndexSet}; use rustc_data_structures::fx::{FxHashSet, FxIndexSet};
use rustc_data_structures::svh::Svh; use rustc_data_structures::svh::Svh;
use rustc_data_structures::unord::{UnordMap, UnordSet}; use rustc_data_structures::unord::{UnordMap, UnordSet};
use rustc_data_structures::{base_n, flock};
use rustc_errors::ErrorGuaranteed; use rustc_errors::ErrorGuaranteed;
use rustc_fs_util::{link_or_copy, try_canonicalize, LinkOrCopy}; use rustc_fs_util::{link_or_copy, try_canonicalize, LinkOrCopy};
use rustc_session::config::CrateType; use rustc_session::config::CrateType;
@ -332,31 +336,24 @@ pub fn finalize_session_directory(sess: &Session, svh: Option<Svh>) {
debug!("finalize_session_directory() - session directory: {}", incr_comp_session_dir.display()); debug!("finalize_session_directory() - session directory: {}", incr_comp_session_dir.display());
let old_sub_dir_name = incr_comp_session_dir let mut sub_dir_name = incr_comp_session_dir
.file_name() .file_name()
.unwrap() .unwrap()
.to_str() .to_str()
.expect("malformed session dir name: contains non-Unicode characters"); .expect("malformed session dir name: contains non-Unicode characters")
.to_string();
// Keep the 's-{timestamp}-{random-number}' prefix, but replace the // Keep the 's-{timestamp}-{random-number}' prefix, but replace "working" with the SVH of the crate
// '-working' part with the SVH of the crate sub_dir_name.truncate(sub_dir_name.len() - "working".len());
let dash_indices: Vec<_> = old_sub_dir_name.match_indices('-').map(|(idx, _)| idx).collect(); // Double-check that we kept this: "s-{timestamp}-{random-number}-"
if dash_indices.len() != 3 { assert!(sub_dir_name.ends_with('-'), "{:?}", sub_dir_name);
bug!( assert!(sub_dir_name.as_bytes().iter().filter(|b| **b == b'-').count() == 3);
"Encountered incremental compilation session directory with \
malformed name: {}",
incr_comp_session_dir.display()
)
}
// State: "s-{timestamp}-{random-number}-" // Append the SVH
let mut new_sub_dir_name = String::from(&old_sub_dir_name[..=dash_indices[2]]); sub_dir_name.push_str(&svh.as_u128().to_base_fixed_len(CASE_INSENSITIVE));
// Append the svh
base_n::push_str(svh.as_u128(), INT_ENCODE_BASE, &mut new_sub_dir_name);
// Create the full path // Create the full path
let new_path = incr_comp_session_dir.parent().unwrap().join(new_sub_dir_name); let new_path = incr_comp_session_dir.parent().unwrap().join(&*sub_dir_name);
debug!("finalize_session_directory() - new path: {}", new_path.display()); debug!("finalize_session_directory() - new path: {}", new_path.display());
match rename_path_with_retry(&*incr_comp_session_dir, &new_path, 3) { match rename_path_with_retry(&*incr_comp_session_dir, &new_path, 3) {
@ -452,11 +449,11 @@ fn generate_session_dir_path(crate_dir: &Path) -> PathBuf {
let random_number = thread_rng().next_u32(); let random_number = thread_rng().next_u32();
debug!("generate_session_dir_path: random_number = {}", random_number); debug!("generate_session_dir_path: random_number = {}", random_number);
let directory_name = format!( // Chop the first 3 characters off the timestamp. Those 3 bytes will be zero for a while.
"s-{}-{}-working", let (zeroes, timestamp) = timestamp.split_at(3);
timestamp, assert_eq!(zeroes, "000");
base_n::encode(random_number as u128, INT_ENCODE_BASE) let directory_name =
); format!("s-{}-{}-working", timestamp, random_number.to_base_fixed_len(CASE_INSENSITIVE));
debug!("generate_session_dir_path: directory_name = {}", directory_name); debug!("generate_session_dir_path: directory_name = {}", directory_name);
let directory_path = crate_dir.join(directory_name); let directory_path = crate_dir.join(directory_name);
debug!("generate_session_dir_path: directory_path = {}", directory_path.display()); debug!("generate_session_dir_path: directory_path = {}", directory_path.display());
@ -587,10 +584,10 @@ fn extract_timestamp_from_session_dir(directory_name: &str) -> Result<SystemTime
string_to_timestamp(&directory_name[dash_indices[0] + 1..dash_indices[1]]) string_to_timestamp(&directory_name[dash_indices[0] + 1..dash_indices[1]])
} }
fn timestamp_to_string(timestamp: SystemTime) -> String { fn timestamp_to_string(timestamp: SystemTime) -> BaseNString {
let duration = timestamp.duration_since(UNIX_EPOCH).unwrap(); let duration = timestamp.duration_since(UNIX_EPOCH).unwrap();
let micros = duration.as_secs() * 1_000_000 + (duration.subsec_nanos() as u64) / 1000; let micros = duration.as_secs() * 1_000_000 + (duration.subsec_nanos() as u64) / 1000;
base_n::encode(micros as u128, INT_ENCODE_BASE) micros.to_base_fixed_len(CASE_INSENSITIVE)
} }
fn string_to_timestamp(s: &str) -> Result<SystemTime, &'static str> { fn string_to_timestamp(s: &str) -> Result<SystemTime, &'static str> {
@ -621,9 +618,8 @@ fn crate_path(sess: &Session) -> PathBuf {
sess.cfg_version, sess.cfg_version,
); );
let stable_crate_id = base_n::encode(stable_crate_id.as_u64() as u128, INT_ENCODE_BASE); let crate_name =
format!("{crate_name}-{}", stable_crate_id.as_u64().to_base_fixed_len(CASE_INSENSITIVE));
let crate_name = format!("{crate_name}-{stable_crate_id}");
incr_dir.join(crate_name) incr_dir.join(crate_name)
} }

View file

@ -1,7 +1,9 @@
use crate::dep_graph::{DepNode, WorkProduct, WorkProductId}; use crate::dep_graph::{DepNode, WorkProduct, WorkProductId};
use crate::ty::{GenericArgs, Instance, InstanceDef, SymbolName, TyCtxt}; use crate::ty::{GenericArgs, Instance, InstanceDef, SymbolName, TyCtxt};
use rustc_attr::InlineAttr; use rustc_attr::InlineAttr;
use rustc_data_structures::base_n; use rustc_data_structures::base_n::BaseNString;
use rustc_data_structures::base_n::ToBaseN;
use rustc_data_structures::base_n::CASE_INSENSITIVE;
use rustc_data_structures::fingerprint::Fingerprint; use rustc_data_structures::fingerprint::Fingerprint;
use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::fx::FxIndexMap; use rustc_data_structures::fx::FxIndexMap;
@ -334,14 +336,11 @@ impl<'tcx> CodegenUnit<'tcx> {
self.is_code_coverage_dead_code_cgu = true; self.is_code_coverage_dead_code_cgu = true;
} }
pub fn mangle_name(human_readable_name: &str) -> String { pub fn mangle_name(human_readable_name: &str) -> BaseNString {
// We generate a 80 bit hash from the name. This should be enough to
// avoid collisions and is still reasonably short for filenames.
let mut hasher = StableHasher::new(); let mut hasher = StableHasher::new();
human_readable_name.hash(&mut hasher); human_readable_name.hash(&mut hasher);
let hash: Hash128 = hasher.finish(); let hash: Hash128 = hasher.finish();
let hash = hash.as_u128() & ((1u128 << 80) - 1); hash.as_u128().to_base_fixed_len(CASE_INSENSITIVE)
base_n::encode(hash, base_n::CASE_INSENSITIVE)
} }
pub fn compute_size_estimate(&mut self) { pub fn compute_size_estimate(&mut self) {

View file

@ -4,7 +4,9 @@
//! //!
//! For more information about LLVM CFI and cross-language LLVM CFI support for the Rust compiler, //! For more information about LLVM CFI and cross-language LLVM CFI support for the Rust compiler,
//! see design document in the tracking issue #89653. //! see design document in the tracking issue #89653.
use rustc_data_structures::base_n; use rustc_data_structures::base_n::ToBaseN;
use rustc_data_structures::base_n::ALPHANUMERIC_ONLY;
use rustc_data_structures::base_n::CASE_INSENSITIVE;
use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::fx::FxHashMap;
use rustc_hir as hir; use rustc_hir as hir;
use rustc_middle::bug; use rustc_middle::bug;
@ -736,7 +738,7 @@ fn encode_ty_name(tcx: TyCtxt<'_>, def_id: DefId) -> String {
/// <https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html>). /// <https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html>).
fn to_disambiguator(num: u64) -> String { fn to_disambiguator(num: u64) -> String {
if let Some(num) = num.checked_sub(1) { if let Some(num) = num.checked_sub(1) {
format!("s{}_", base_n::encode(num as u128, base_n::ALPHANUMERIC_ONLY)) format!("s{}_", num.to_base(ALPHANUMERIC_ONLY))
} else { } else {
"s_".to_string() "s_".to_string()
} }
@ -746,7 +748,7 @@ fn to_disambiguator(num: u64) -> String {
/// <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangle.seq-id>). /// <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangle.seq-id>).
fn to_seq_id(num: usize) -> String { fn to_seq_id(num: usize) -> String {
if let Some(num) = num.checked_sub(1) { if let Some(num) = num.checked_sub(1) {
base_n::encode(num as u128, base_n::CASE_INSENSITIVE).to_uppercase() (num as u64).to_base(CASE_INSENSITIVE).to_uppercase()
} else { } else {
"".to_string() "".to_string()
} }

View file

@ -1,4 +1,4 @@
use rustc_data_structures::base_n; use rustc_data_structures::base_n::ToBaseN;
use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::intern::Interned; use rustc_data_structures::intern::Interned;
use rustc_hir as hir; use rustc_hir as hir;
@ -831,7 +831,7 @@ impl<'tcx> Printer<'tcx> for SymbolMangler<'tcx> {
/// e.g. `1` becomes `"0_"`, `62` becomes `"Z_"`, etc. /// e.g. `1` becomes `"0_"`, `62` becomes `"Z_"`, etc.
pub(crate) fn push_integer_62(x: u64, output: &mut String) { pub(crate) fn push_integer_62(x: u64, output: &mut String) {
if let Some(x) = x.checked_sub(1) { if let Some(x) = x.checked_sub(1) {
base_n::push_str(x as u128, base_n::ALPHANUMERIC_ONLY, output); output.push_str(&x.to_base(62));
} }
output.push('_'); output.push('_');
} }