1
Fork 0

Auto merge of #123441 - saethlin:fixed-len-file-names, r=oli-obk

Stabilize the size of incr comp object file names

The current implementation does not produce stable-length paths, and we create the paths in a way that makes our allocation behavior is nondeterministic. I think `@eddyb` fixed a number of other cases like this in the past, and this PR fixes another one. Whether that actually matters I have no idea, but we still have bimodal behavior in rustc-perf and the non-uniformity in `find` and `ls` was bothering me.

I've also removed the truncation of the mangled CGU names. Before this PR incr comp paths look like this:
```
target/debug/incremental/scratch-38izrrq90cex7/s-gux6gz0ow8-1ph76gg-ewe1xj434l26w9up5bedsojpd/261xgo1oqnd90ry5.o
```
And after, they look like this:
```
target/debug/incremental/scratch-035omutqbfkbw/s-gux6borni0-16r3v1j-6n64tmwqzchtgqzwwim5amuga/55v2re42sztc8je9bva6g8ft3.o
```

On the one hand, I'm sure this will break some people's builds because they're on Windows and only a few bytes from the path length limit. But if we're that seriously worried about the length of our file names, I have some other ideas on how to make them smaller. And last time I deleted some hash truncations from the compiler, there was a huge drop in the number if incremental compilation ICEs that were reported: https://github.com/rust-lang/rust/pull/110367https://github.com/rust-lang/rust/pull/110367

---

Upon further reading, this PR actually fixes a bug. This comment says the CGU names are supposed to be a fixed-length hash, and before this PR they aren't: ca7d34efa9/compiler/rustc_monomorphize/src/partitioning.rs (L445-L448)
This commit is contained in:
bors 2024-05-03 17:41:48 +00:00
commit 0d7b2fb797
9 changed files with 146 additions and 71 deletions

View file

@ -1,6 +1,7 @@
/// Converts unsigned integers into a string representation with some base.
/// Bases up to and including 36 can be used for case-insensitive things.
use std::str;
use std::ascii;
use std::fmt;
#[cfg(test)]
mod tests;
@ -9,36 +10,101 @@ pub const MAX_BASE: usize = 64;
pub const ALPHANUMERIC_ONLY: usize = 62;
pub const CASE_INSENSITIVE: usize = 36;
const BASE_64: &[u8; MAX_BASE] =
b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$";
const BASE_64: [ascii::Char; MAX_BASE] = {
let bytes = b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$";
let Some(ascii) = bytes.as_ascii() else { panic!() };
*ascii
};
#[inline]
pub fn push_str(mut n: u128, base: usize, output: &mut String) {
debug_assert!(base >= 2 && base <= MAX_BASE);
let mut s = [0u8; 128];
let mut index = s.len();
pub struct BaseNString {
start: usize,
buf: [ascii::Char; 128],
}
let base = base as u128;
impl std::ops::Deref for BaseNString {
type Target = str;
loop {
index -= 1;
s[index] = BASE_64[(n % base) as usize];
n /= base;
fn deref(&self) -> &str {
self.buf[self.start..].as_str()
}
}
if n == 0 {
break;
}
impl AsRef<str> for BaseNString {
fn as_ref(&self) -> &str {
self.buf[self.start..].as_str()
}
}
impl fmt::Display for BaseNString {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self)
}
}
// This trait just lets us reserve the exact right amount of space when doing fixed-length
// case-insensitve encoding. Add any impls you need.
pub trait ToBaseN: Into<u128> {
fn encoded_len(base: usize) -> usize;
fn to_base_fixed_len(self, base: usize) -> BaseNString {
let mut encoded = self.to_base(base);
encoded.start = encoded.buf.len() - Self::encoded_len(base);
encoded
}
output.push_str(unsafe {
// SAFETY: `s` is populated using only valid utf8 characters from `BASE_64`
str::from_utf8_unchecked(&s[index..])
});
fn to_base(self, base: usize) -> BaseNString {
let mut output = [ascii::Char::Digit0; 128];
let mut n: u128 = self.into();
let mut index = output.len();
loop {
index -= 1;
output[index] = BASE_64[(n % base as u128) as usize];
n /= base as u128;
if n == 0 {
break;
}
}
assert_eq!(n, 0);
BaseNString { start: index, buf: output }
}
}
#[inline]
pub fn encode(n: u128, base: usize) -> String {
let mut s = String::new();
push_str(n, base, &mut s);
s
impl ToBaseN for u128 {
fn encoded_len(base: usize) -> usize {
let mut max = u128::MAX;
let mut len = 0;
while max > 0 {
len += 1;
max /= base as u128;
}
len
}
}
impl ToBaseN for u64 {
fn encoded_len(base: usize) -> usize {
let mut max = u64::MAX;
let mut len = 0;
while max > 0 {
len += 1;
max /= base as u64;
}
len
}
}
impl ToBaseN for u32 {
fn encoded_len(base: usize) -> usize {
let mut max = u32::MAX;
let mut len = 0;
while max > 0 {
len += 1;
max /= base as u32;
}
len
}
}

View file

@ -1,9 +1,17 @@
use super::*;
#[test]
fn test_encode() {
fn limits() {
assert_eq!(Ok(u128::MAX), u128::from_str_radix(&u128::MAX.to_base(36), 36));
assert_eq!(Ok(u64::MAX), u64::from_str_radix(&u64::MAX.to_base(36), 36));
assert_eq!(Ok(u32::MAX), u32::from_str_radix(&u32::MAX.to_base(36), 36));
}
#[test]
fn test_to_base() {
fn test(n: u128, base: usize) {
assert_eq!(Ok(n), u128::from_str_radix(&encode(n, base), base as u32));
assert_eq!(Ok(n), u128::from_str_radix(&n.to_base(base), base as u32));
assert_eq!(Ok(n), u128::from_str_radix(&n.to_base_fixed_len(base), base as u32));
}
for base in 2..37 {

View file

@ -16,6 +16,8 @@
#![doc(rust_logo)]
#![feature(allocator_api)]
#![feature(array_windows)]
#![feature(ascii_char)]
#![feature(ascii_char_variants)]
#![feature(auto_traits)]
#![feature(cfg_match)]
#![feature(core_intrinsics)]