1
Fork 0

add unstable support for outputting file checksums for use in cargo

This commit is contained in:
Jacob Kiesel 2024-06-22 01:27:59 -06:00
parent bfe5e8cef6
commit bb5a8276be
16 changed files with 321 additions and 28 deletions

View file

@ -75,7 +75,9 @@ pub mod profiling;
use std::borrow::Cow;
use std::cmp::{self, Ordering};
use std::fmt::Display;
use std::hash::Hash;
use std::io::{self, Read};
use std::ops::{Add, Range, Sub};
use std::path::{Path, PathBuf};
use std::str::FromStr;
@ -1395,6 +1397,27 @@ pub enum SourceFileHashAlgorithm {
Md5,
Sha1,
Sha256,
Blake3,
}
impl SourceFileHashAlgorithm {
pub fn supported_in_cargo(&self) -> bool {
match self {
Self::Md5 | Self::Sha1 => false,
Self::Sha256 | Self::Blake3 => true,
}
}
}
impl Display for SourceFileHashAlgorithm {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(match self {
Self::Md5 => "md5",
Self::Sha1 => "sha1",
Self::Sha256 => "sha256",
Self::Blake3 => "blake3",
})
}
}
impl FromStr for SourceFileHashAlgorithm {
@ -1405,12 +1428,13 @@ impl FromStr for SourceFileHashAlgorithm {
"md5" => Ok(SourceFileHashAlgorithm::Md5),
"sha1" => Ok(SourceFileHashAlgorithm::Sha1),
"sha256" => Ok(SourceFileHashAlgorithm::Sha256),
"blake3" => Ok(SourceFileHashAlgorithm::Blake3),
_ => Err(()),
}
}
}
/// The hash of the on-disk source file used for debug info.
/// The hash of the on-disk source file used for debug info and cargo freshness checks.
#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
#[derive(HashStable_Generic, Encodable, Decodable)]
pub struct SourceFileHash {
@ -1418,12 +1442,22 @@ pub struct SourceFileHash {
value: [u8; 32],
}
impl Display for SourceFileHash {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}=", self.kind)?;
for byte in self.value[0..self.hash_len()].into_iter() {
write!(f, "{byte:02x}")?;
}
Ok(())
}
}
impl SourceFileHash {
pub fn new(kind: SourceFileHashAlgorithm, src: &str) -> SourceFileHash {
pub fn new_in_memory(kind: SourceFileHashAlgorithm, src: impl AsRef<[u8]>) -> SourceFileHash {
let mut hash = SourceFileHash { kind, value: Default::default() };
let len = hash.hash_len();
let value = &mut hash.value[..len];
let data = src.as_bytes();
let data = src.as_ref();
match kind {
SourceFileHashAlgorithm::Md5 => {
value.copy_from_slice(&Md5::digest(data));
@ -1434,13 +1468,94 @@ impl SourceFileHash {
SourceFileHashAlgorithm::Sha256 => {
value.copy_from_slice(&Sha256::digest(data));
}
}
SourceFileHashAlgorithm::Blake3 => value.copy_from_slice(blake3::hash(data).as_bytes()),
};
hash
}
pub fn new(kind: SourceFileHashAlgorithm, src: impl Read) -> Result<SourceFileHash, io::Error> {
let mut hash = SourceFileHash { kind, value: Default::default() };
let len = hash.hash_len();
let value = &mut hash.value[..len];
// Buffer size is the recommended amount to fully leverage SIMD instructions on AVX-512 as per
// blake3 documentation.
let mut buf = vec![0; 16 * 1024];
fn digest<T>(
mut hasher: T,
mut update: impl FnMut(&mut T, &[u8]),
finish: impl FnOnce(T, &mut [u8]),
mut src: impl Read,
buf: &mut [u8],
value: &mut [u8],
) -> Result<(), io::Error> {
loop {
let bytes_read = src.read(buf)?;
if bytes_read == 0 {
break;
}
update(&mut hasher, &buf[0..bytes_read]);
}
finish(hasher, value);
Ok(())
}
match kind {
SourceFileHashAlgorithm::Sha256 => {
digest(
Sha256::new(),
|h, b| {
h.update(b);
},
|h, out| out.copy_from_slice(&h.finalize()),
src,
&mut buf,
value,
)?;
}
SourceFileHashAlgorithm::Sha1 => {
digest(
Sha1::new(),
|h, b| {
h.update(b);
},
|h, out| out.copy_from_slice(&h.finalize()),
src,
&mut buf,
value,
)?;
}
SourceFileHashAlgorithm::Md5 => {
digest(
Md5::new(),
|h, b| {
h.update(b);
},
|h, out| out.copy_from_slice(&h.finalize()),
src,
&mut buf,
value,
)?;
}
SourceFileHashAlgorithm::Blake3 => {
digest(
blake3::Hasher::new(),
|h, b| {
h.update(b);
},
|h, out| out.copy_from_slice(h.finalize().as_bytes()),
src,
&mut buf,
value,
)?;
}
}
Ok(hash)
}
/// Check if the stored hash matches the hash of the string.
pub fn matches(&self, src: &str) -> bool {
Self::new(self.kind, src) == *self
Self::new_in_memory(self.kind, src.as_bytes()) == *self
}
/// The bytes of the hash.
@ -1453,7 +1568,7 @@ impl SourceFileHash {
match self.kind {
SourceFileHashAlgorithm::Md5 => 16,
SourceFileHashAlgorithm::Sha1 => 20,
SourceFileHashAlgorithm::Sha256 => 32,
SourceFileHashAlgorithm::Sha256 | SourceFileHashAlgorithm::Blake3 => 32,
}
}
}
@ -1509,6 +1624,10 @@ pub struct SourceFile {
pub src: Option<Lrc<String>>,
/// The source code's hash.
pub src_hash: SourceFileHash,
/// Used to enable cargo to use checksums to check if a crate is fresh rather
/// than mtimes. This might be the same as `src_hash`, and if the requested algorithm
/// is identical we won't compute it twice.
pub checksum_hash: Option<SourceFileHash>,
/// The external source code (used for external crates, which will have a `None`
/// value as `self.src`.
pub external_src: FreezeLock<ExternalSource>,
@ -1536,6 +1655,7 @@ impl Clone for SourceFile {
name: self.name.clone(),
src: self.src.clone(),
src_hash: self.src_hash,
checksum_hash: self.checksum_hash,
external_src: self.external_src.clone(),
start_pos: self.start_pos,
source_len: self.source_len,
@ -1552,6 +1672,7 @@ impl<S: SpanEncoder> Encodable<S> for SourceFile {
fn encode(&self, s: &mut S) {
self.name.encode(s);
self.src_hash.encode(s);
self.checksum_hash.encode(s);
// Do not encode `start_pos` as it's global state for this session.
self.source_len.encode(s);
@ -1625,6 +1746,7 @@ impl<D: SpanDecoder> Decodable<D> for SourceFile {
fn decode(d: &mut D) -> SourceFile {
let name: FileName = Decodable::decode(d);
let src_hash: SourceFileHash = Decodable::decode(d);
let checksum_hash: Option<SourceFileHash> = Decodable::decode(d);
let source_len: RelativeBytePos = Decodable::decode(d);
let lines = {
let num_lines: u32 = Decodable::decode(d);
@ -1650,6 +1772,7 @@ impl<D: SpanDecoder> Decodable<D> for SourceFile {
source_len,
src: None,
src_hash,
checksum_hash,
// Unused - the metadata decoder will construct
// a new SourceFile, filling in `external_src` properly
external_src: FreezeLock::frozen(ExternalSource::Unneeded),
@ -1733,9 +1856,17 @@ impl SourceFile {
name: FileName,
mut src: String,
hash_kind: SourceFileHashAlgorithm,
checksum_hash_kind: Option<SourceFileHashAlgorithm>,
) -> Result<Self, OffsetOverflowError> {
// Compute the file hash before any normalization.
let src_hash = SourceFileHash::new(hash_kind, &src);
let src_hash = SourceFileHash::new_in_memory(hash_kind, src.as_bytes());
let checksum_hash = checksum_hash_kind.map(|checksum_hash_kind| {
if checksum_hash_kind == hash_kind {
src_hash
} else {
SourceFileHash::new_in_memory(checksum_hash_kind, src.as_bytes())
}
});
let normalized_pos = normalize_src(&mut src);
let stable_id = StableSourceFileId::from_filename_in_current_crate(&name);
@ -1748,6 +1879,7 @@ impl SourceFile {
name,
src: Some(Lrc::new(src)),
src_hash,
checksum_hash,
external_src: FreezeLock::frozen(ExternalSource::Unneeded),
start_pos: BytePos::from_u32(0),
source_len: RelativeBytePos::from_u32(source_len),