1
Fork 0

add unstable support for outputting file checksums for use in cargo

This commit is contained in:
Jacob Kiesel 2024-06-22 01:27:59 -06:00
parent bfe5e8cef6
commit bb5a8276be
16 changed files with 321 additions and 28 deletions

View file

@ -5,6 +5,7 @@ edition = "2021"
[dependencies]
# tidy-alphabetical-start
blake3 = "1.5.2"
derive-where = "1.2.7"
indexmap = { version = "2.0.0" }
itoa = "1.0"

View file

@ -75,7 +75,9 @@ pub mod profiling;
use std::borrow::Cow;
use std::cmp::{self, Ordering};
use std::fmt::Display;
use std::hash::Hash;
use std::io::{self, Read};
use std::ops::{Add, Range, Sub};
use std::path::{Path, PathBuf};
use std::str::FromStr;
@ -1395,6 +1397,27 @@ pub enum SourceFileHashAlgorithm {
Md5,
Sha1,
Sha256,
Blake3,
}
impl SourceFileHashAlgorithm {
pub fn supported_in_cargo(&self) -> bool {
match self {
Self::Md5 | Self::Sha1 => false,
Self::Sha256 | Self::Blake3 => true,
}
}
}
impl Display for SourceFileHashAlgorithm {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(match self {
Self::Md5 => "md5",
Self::Sha1 => "sha1",
Self::Sha256 => "sha256",
Self::Blake3 => "blake3",
})
}
}
impl FromStr for SourceFileHashAlgorithm {
@ -1405,12 +1428,13 @@ impl FromStr for SourceFileHashAlgorithm {
"md5" => Ok(SourceFileHashAlgorithm::Md5),
"sha1" => Ok(SourceFileHashAlgorithm::Sha1),
"sha256" => Ok(SourceFileHashAlgorithm::Sha256),
"blake3" => Ok(SourceFileHashAlgorithm::Blake3),
_ => Err(()),
}
}
}
/// The hash of the on-disk source file used for debug info.
/// The hash of the on-disk source file used for debug info and cargo freshness checks.
#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
#[derive(HashStable_Generic, Encodable, Decodable)]
pub struct SourceFileHash {
@ -1418,12 +1442,22 @@ pub struct SourceFileHash {
value: [u8; 32],
}
impl Display for SourceFileHash {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}=", self.kind)?;
for byte in self.value[0..self.hash_len()].into_iter() {
write!(f, "{byte:02x}")?;
}
Ok(())
}
}
impl SourceFileHash {
pub fn new(kind: SourceFileHashAlgorithm, src: &str) -> SourceFileHash {
pub fn new_in_memory(kind: SourceFileHashAlgorithm, src: impl AsRef<[u8]>) -> SourceFileHash {
let mut hash = SourceFileHash { kind, value: Default::default() };
let len = hash.hash_len();
let value = &mut hash.value[..len];
let data = src.as_bytes();
let data = src.as_ref();
match kind {
SourceFileHashAlgorithm::Md5 => {
value.copy_from_slice(&Md5::digest(data));
@ -1434,13 +1468,94 @@ impl SourceFileHash {
SourceFileHashAlgorithm::Sha256 => {
value.copy_from_slice(&Sha256::digest(data));
}
}
SourceFileHashAlgorithm::Blake3 => value.copy_from_slice(blake3::hash(data).as_bytes()),
};
hash
}
pub fn new(kind: SourceFileHashAlgorithm, src: impl Read) -> Result<SourceFileHash, io::Error> {
let mut hash = SourceFileHash { kind, value: Default::default() };
let len = hash.hash_len();
let value = &mut hash.value[..len];
// Buffer size is the recommended amount to fully leverage SIMD instructions on AVX-512 as per
// blake3 documentation.
let mut buf = vec![0; 16 * 1024];
fn digest<T>(
mut hasher: T,
mut update: impl FnMut(&mut T, &[u8]),
finish: impl FnOnce(T, &mut [u8]),
mut src: impl Read,
buf: &mut [u8],
value: &mut [u8],
) -> Result<(), io::Error> {
loop {
let bytes_read = src.read(buf)?;
if bytes_read == 0 {
break;
}
update(&mut hasher, &buf[0..bytes_read]);
}
finish(hasher, value);
Ok(())
}
match kind {
SourceFileHashAlgorithm::Sha256 => {
digest(
Sha256::new(),
|h, b| {
h.update(b);
},
|h, out| out.copy_from_slice(&h.finalize()),
src,
&mut buf,
value,
)?;
}
SourceFileHashAlgorithm::Sha1 => {
digest(
Sha1::new(),
|h, b| {
h.update(b);
},
|h, out| out.copy_from_slice(&h.finalize()),
src,
&mut buf,
value,
)?;
}
SourceFileHashAlgorithm::Md5 => {
digest(
Md5::new(),
|h, b| {
h.update(b);
},
|h, out| out.copy_from_slice(&h.finalize()),
src,
&mut buf,
value,
)?;
}
SourceFileHashAlgorithm::Blake3 => {
digest(
blake3::Hasher::new(),
|h, b| {
h.update(b);
},
|h, out| out.copy_from_slice(h.finalize().as_bytes()),
src,
&mut buf,
value,
)?;
}
}
Ok(hash)
}
/// Check if the stored hash matches the hash of the string.
pub fn matches(&self, src: &str) -> bool {
Self::new(self.kind, src) == *self
Self::new_in_memory(self.kind, src.as_bytes()) == *self
}
/// The bytes of the hash.
@ -1453,7 +1568,7 @@ impl SourceFileHash {
match self.kind {
SourceFileHashAlgorithm::Md5 => 16,
SourceFileHashAlgorithm::Sha1 => 20,
SourceFileHashAlgorithm::Sha256 => 32,
SourceFileHashAlgorithm::Sha256 | SourceFileHashAlgorithm::Blake3 => 32,
}
}
}
@ -1509,6 +1624,10 @@ pub struct SourceFile {
pub src: Option<Lrc<String>>,
/// The source code's hash.
pub src_hash: SourceFileHash,
/// Used to enable cargo to use checksums to check if a crate is fresh rather
/// than mtimes. This might be the same as `src_hash`, and if the requested algorithm
/// is identical we won't compute it twice.
pub checksum_hash: Option<SourceFileHash>,
/// The external source code (used for external crates, which will have a `None`
/// value as `self.src`.
pub external_src: FreezeLock<ExternalSource>,
@ -1536,6 +1655,7 @@ impl Clone for SourceFile {
name: self.name.clone(),
src: self.src.clone(),
src_hash: self.src_hash,
checksum_hash: self.checksum_hash,
external_src: self.external_src.clone(),
start_pos: self.start_pos,
source_len: self.source_len,
@ -1552,6 +1672,7 @@ impl<S: SpanEncoder> Encodable<S> for SourceFile {
fn encode(&self, s: &mut S) {
self.name.encode(s);
self.src_hash.encode(s);
self.checksum_hash.encode(s);
// Do not encode `start_pos` as it's global state for this session.
self.source_len.encode(s);
@ -1625,6 +1746,7 @@ impl<D: SpanDecoder> Decodable<D> for SourceFile {
fn decode(d: &mut D) -> SourceFile {
let name: FileName = Decodable::decode(d);
let src_hash: SourceFileHash = Decodable::decode(d);
let checksum_hash: Option<SourceFileHash> = Decodable::decode(d);
let source_len: RelativeBytePos = Decodable::decode(d);
let lines = {
let num_lines: u32 = Decodable::decode(d);
@ -1650,6 +1772,7 @@ impl<D: SpanDecoder> Decodable<D> for SourceFile {
source_len,
src: None,
src_hash,
checksum_hash,
// Unused - the metadata decoder will construct
// a new SourceFile, filling in `external_src` properly
external_src: FreezeLock::frozen(ExternalSource::Unneeded),
@ -1733,9 +1856,17 @@ impl SourceFile {
name: FileName,
mut src: String,
hash_kind: SourceFileHashAlgorithm,
checksum_hash_kind: Option<SourceFileHashAlgorithm>,
) -> Result<Self, OffsetOverflowError> {
// Compute the file hash before any normalization.
let src_hash = SourceFileHash::new(hash_kind, &src);
let src_hash = SourceFileHash::new_in_memory(hash_kind, src.as_bytes());
let checksum_hash = checksum_hash_kind.map(|checksum_hash_kind| {
if checksum_hash_kind == hash_kind {
src_hash
} else {
SourceFileHash::new_in_memory(checksum_hash_kind, src.as_bytes())
}
});
let normalized_pos = normalize_src(&mut src);
let stable_id = StableSourceFileId::from_filename_in_current_crate(&name);
@ -1748,6 +1879,7 @@ impl SourceFile {
name,
src: Some(Lrc::new(src)),
src_hash,
checksum_hash,
external_src: FreezeLock::frozen(ExternalSource::Unneeded),
start_pos: BytePos::from_u32(0),
source_len: RelativeBytePos::from_u32(source_len),

View file

@ -175,6 +175,7 @@ pub struct SourceMapInputs {
pub file_loader: Box<dyn FileLoader + Send + Sync>,
pub path_mapping: FilePathMapping,
pub hash_kind: SourceFileHashAlgorithm,
pub checksum_hash_kind: Option<SourceFileHashAlgorithm>,
}
pub struct SourceMap {
@ -187,6 +188,12 @@ pub struct SourceMap {
/// The algorithm used for hashing the contents of each source file.
hash_kind: SourceFileHashAlgorithm,
/// Similar to `hash_kind`, however this algorithm is used for checksums to determine if a crate is fresh.
/// `cargo` is the primary user of these.
///
/// If this is equal to `hash_kind` then the checksum won't be computed twice.
checksum_hash_kind: Option<SourceFileHashAlgorithm>,
}
impl SourceMap {
@ -195,17 +202,19 @@ impl SourceMap {
file_loader: Box::new(RealFileLoader),
path_mapping,
hash_kind: SourceFileHashAlgorithm::Md5,
checksum_hash_kind: None,
})
}
pub fn with_inputs(
SourceMapInputs { file_loader, path_mapping, hash_kind }: SourceMapInputs,
SourceMapInputs { file_loader, path_mapping, hash_kind, checksum_hash_kind }: SourceMapInputs,
) -> SourceMap {
SourceMap {
files: Default::default(),
file_loader: IntoDynSyncSend(file_loader),
path_mapping,
hash_kind,
checksum_hash_kind,
}
}
@ -307,7 +316,8 @@ impl SourceMap {
match self.source_file_by_stable_id(stable_id) {
Some(lrc_sf) => Ok(lrc_sf),
None => {
let source_file = SourceFile::new(filename, src, self.hash_kind)?;
let source_file =
SourceFile::new(filename, src, self.hash_kind, self.checksum_hash_kind)?;
// Let's make sure the file_id we generated above actually matches
// the ID we generate for the SourceFile we just created.
@ -326,6 +336,7 @@ impl SourceMap {
&self,
filename: FileName,
src_hash: SourceFileHash,
checksum_hash: Option<SourceFileHash>,
stable_id: StableSourceFileId,
source_len: u32,
cnum: CrateNum,
@ -340,6 +351,7 @@ impl SourceMap {
name: filename,
src: None,
src_hash,
checksum_hash,
external_src: FreezeLock::new(ExternalSource::Foreign {
kind: ExternalSourceKind::AbsentOk,
metadata_index,

View file

@ -229,6 +229,7 @@ fn t10() {
let SourceFile {
name,
src_hash,
checksum_hash,
source_len,
lines,
multibyte_chars,
@ -240,6 +241,7 @@ fn t10() {
let imported_src_file = sm.new_imported_source_file(
name,
src_hash,
checksum_hash,
stable_id,
source_len.to_u32(),
CrateNum::ZERO,

View file

@ -3,9 +3,13 @@ use super::*;
#[test]
fn test_lookup_line() {
let source = "abcdefghijklm\nabcdefghij\n...".to_owned();
let mut sf =
SourceFile::new(FileName::Anon(Hash64::ZERO), source, SourceFileHashAlgorithm::Sha256)
.unwrap();
let mut sf = SourceFile::new(
FileName::Anon(Hash64::ZERO),
source,
SourceFileHashAlgorithm::Sha256,
Some(SourceFileHashAlgorithm::Sha256),
)
.unwrap();
sf.start_pos = BytePos(3);
assert_eq!(sf.lines(), &[RelativeBytePos(0), RelativeBytePos(14), RelativeBytePos(25)]);