Auto merge of #123441 - saethlin:fixed-len-file-names, r=oli-obk
Stabilize the size of incr comp object file names
The current implementation does not produce stable-length paths, and we create the paths in a way that makes our allocation behavior is nondeterministic. I think `@eddyb` fixed a number of other cases like this in the past, and this PR fixes another one. Whether that actually matters I have no idea, but we still have bimodal behavior in rustc-perf and the non-uniformity in `find` and `ls` was bothering me.
I've also removed the truncation of the mangled CGU names. Before this PR incr comp paths look like this:
```
target/debug/incremental/scratch-38izrrq90cex7/s-gux6gz0ow8-1ph76gg-ewe1xj434l26w9up5bedsojpd/261xgo1oqnd90ry5.o
```
And after, they look like this:
```
target/debug/incremental/scratch-035omutqbfkbw/s-gux6borni0-16r3v1j-6n64tmwqzchtgqzwwim5amuga/55v2re42sztc8je9bva6g8ft3.o
```
On the one hand, I'm sure this will break some people's builds because they're on Windows and only a few bytes from the path length limit. But if we're that seriously worried about the length of our file names, I have some other ideas on how to make them smaller. And last time I deleted some hash truncations from the compiler, there was a huge drop in the number if incremental compilation ICEs that were reported: https://github.com/rust-lang/rust/pull/110367https://github.com/rust-lang/rust/pull/110367
---
Upon further reading, this PR actually fixes a bug. This comment says the CGU names are supposed to be a fixed-length hash, and before this PR they aren't: ca7d34efa9/compiler/rustc_monomorphize/src/partitioning.rs (L445-L448)
This commit is contained in:
commit
0d7b2fb797
9 changed files with 146 additions and 71 deletions
|
@ -1,6 +1,7 @@
|
|||
/// Converts unsigned integers into a string representation with some base.
|
||||
/// Bases up to and including 36 can be used for case-insensitive things.
|
||||
use std::str;
|
||||
use std::ascii;
|
||||
use std::fmt;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
@ -9,36 +10,101 @@ pub const MAX_BASE: usize = 64;
|
|||
pub const ALPHANUMERIC_ONLY: usize = 62;
|
||||
pub const CASE_INSENSITIVE: usize = 36;
|
||||
|
||||
const BASE_64: &[u8; MAX_BASE] =
|
||||
b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$";
|
||||
const BASE_64: [ascii::Char; MAX_BASE] = {
|
||||
let bytes = b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$";
|
||||
let Some(ascii) = bytes.as_ascii() else { panic!() };
|
||||
*ascii
|
||||
};
|
||||
|
||||
#[inline]
|
||||
pub fn push_str(mut n: u128, base: usize, output: &mut String) {
|
||||
debug_assert!(base >= 2 && base <= MAX_BASE);
|
||||
let mut s = [0u8; 128];
|
||||
let mut index = s.len();
|
||||
pub struct BaseNString {
|
||||
start: usize,
|
||||
buf: [ascii::Char; 128],
|
||||
}
|
||||
|
||||
let base = base as u128;
|
||||
impl std::ops::Deref for BaseNString {
|
||||
type Target = str;
|
||||
|
||||
loop {
|
||||
index -= 1;
|
||||
s[index] = BASE_64[(n % base) as usize];
|
||||
n /= base;
|
||||
fn deref(&self) -> &str {
|
||||
self.buf[self.start..].as_str()
|
||||
}
|
||||
}
|
||||
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
impl AsRef<str> for BaseNString {
|
||||
fn as_ref(&self) -> &str {
|
||||
self.buf[self.start..].as_str()
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for BaseNString {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.write_str(self)
|
||||
}
|
||||
}
|
||||
|
||||
// This trait just lets us reserve the exact right amount of space when doing fixed-length
|
||||
// case-insensitve encoding. Add any impls you need.
|
||||
pub trait ToBaseN: Into<u128> {
|
||||
fn encoded_len(base: usize) -> usize;
|
||||
|
||||
fn to_base_fixed_len(self, base: usize) -> BaseNString {
|
||||
let mut encoded = self.to_base(base);
|
||||
encoded.start = encoded.buf.len() - Self::encoded_len(base);
|
||||
encoded
|
||||
}
|
||||
|
||||
output.push_str(unsafe {
|
||||
// SAFETY: `s` is populated using only valid utf8 characters from `BASE_64`
|
||||
str::from_utf8_unchecked(&s[index..])
|
||||
});
|
||||
fn to_base(self, base: usize) -> BaseNString {
|
||||
let mut output = [ascii::Char::Digit0; 128];
|
||||
|
||||
let mut n: u128 = self.into();
|
||||
|
||||
let mut index = output.len();
|
||||
loop {
|
||||
index -= 1;
|
||||
output[index] = BASE_64[(n % base as u128) as usize];
|
||||
n /= base as u128;
|
||||
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert_eq!(n, 0);
|
||||
|
||||
BaseNString { start: index, buf: output }
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn encode(n: u128, base: usize) -> String {
|
||||
let mut s = String::new();
|
||||
push_str(n, base, &mut s);
|
||||
s
|
||||
impl ToBaseN for u128 {
|
||||
fn encoded_len(base: usize) -> usize {
|
||||
let mut max = u128::MAX;
|
||||
let mut len = 0;
|
||||
while max > 0 {
|
||||
len += 1;
|
||||
max /= base as u128;
|
||||
}
|
||||
len
|
||||
}
|
||||
}
|
||||
|
||||
impl ToBaseN for u64 {
|
||||
fn encoded_len(base: usize) -> usize {
|
||||
let mut max = u64::MAX;
|
||||
let mut len = 0;
|
||||
while max > 0 {
|
||||
len += 1;
|
||||
max /= base as u64;
|
||||
}
|
||||
len
|
||||
}
|
||||
}
|
||||
|
||||
impl ToBaseN for u32 {
|
||||
fn encoded_len(base: usize) -> usize {
|
||||
let mut max = u32::MAX;
|
||||
let mut len = 0;
|
||||
while max > 0 {
|
||||
len += 1;
|
||||
max /= base as u32;
|
||||
}
|
||||
len
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,9 +1,17 @@
|
|||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_encode() {
|
||||
fn limits() {
|
||||
assert_eq!(Ok(u128::MAX), u128::from_str_radix(&u128::MAX.to_base(36), 36));
|
||||
assert_eq!(Ok(u64::MAX), u64::from_str_radix(&u64::MAX.to_base(36), 36));
|
||||
assert_eq!(Ok(u32::MAX), u32::from_str_radix(&u32::MAX.to_base(36), 36));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_to_base() {
|
||||
fn test(n: u128, base: usize) {
|
||||
assert_eq!(Ok(n), u128::from_str_radix(&encode(n, base), base as u32));
|
||||
assert_eq!(Ok(n), u128::from_str_radix(&n.to_base(base), base as u32));
|
||||
assert_eq!(Ok(n), u128::from_str_radix(&n.to_base_fixed_len(base), base as u32));
|
||||
}
|
||||
|
||||
for base in 2..37 {
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
#![doc(rust_logo)]
|
||||
#![feature(allocator_api)]
|
||||
#![feature(array_windows)]
|
||||
#![feature(ascii_char)]
|
||||
#![feature(ascii_char_variants)]
|
||||
#![feature(auto_traits)]
|
||||
#![feature(cfg_match)]
|
||||
#![feature(core_intrinsics)]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue