add unstable support for outputting file checksums for use in cargo
This commit is contained in:
parent
bfe5e8cef6
commit
bb5a8276be
16 changed files with 321 additions and 28 deletions
|
@ -5,6 +5,7 @@ edition = "2021"
|
|||
|
||||
[dependencies]
|
||||
# tidy-alphabetical-start
|
||||
blake3 = "1.5.2"
|
||||
derive-where = "1.2.7"
|
||||
indexmap = { version = "2.0.0" }
|
||||
itoa = "1.0"
|
||||
|
|
|
@ -75,7 +75,9 @@ pub mod profiling;
|
|||
|
||||
use std::borrow::Cow;
|
||||
use std::cmp::{self, Ordering};
|
||||
use std::fmt::Display;
|
||||
use std::hash::Hash;
|
||||
use std::io::{self, Read};
|
||||
use std::ops::{Add, Range, Sub};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::str::FromStr;
|
||||
|
@ -1395,6 +1397,27 @@ pub enum SourceFileHashAlgorithm {
|
|||
Md5,
|
||||
Sha1,
|
||||
Sha256,
|
||||
Blake3,
|
||||
}
|
||||
|
||||
impl SourceFileHashAlgorithm {
|
||||
pub fn supported_in_cargo(&self) -> bool {
|
||||
match self {
|
||||
Self::Md5 | Self::Sha1 => false,
|
||||
Self::Sha256 | Self::Blake3 => true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for SourceFileHashAlgorithm {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.write_str(match self {
|
||||
Self::Md5 => "md5",
|
||||
Self::Sha1 => "sha1",
|
||||
Self::Sha256 => "sha256",
|
||||
Self::Blake3 => "blake3",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for SourceFileHashAlgorithm {
|
||||
|
@ -1405,12 +1428,13 @@ impl FromStr for SourceFileHashAlgorithm {
|
|||
"md5" => Ok(SourceFileHashAlgorithm::Md5),
|
||||
"sha1" => Ok(SourceFileHashAlgorithm::Sha1),
|
||||
"sha256" => Ok(SourceFileHashAlgorithm::Sha256),
|
||||
"blake3" => Ok(SourceFileHashAlgorithm::Blake3),
|
||||
_ => Err(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The hash of the on-disk source file used for debug info.
|
||||
/// The hash of the on-disk source file used for debug info and cargo freshness checks.
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
|
||||
#[derive(HashStable_Generic, Encodable, Decodable)]
|
||||
pub struct SourceFileHash {
|
||||
|
@ -1418,12 +1442,22 @@ pub struct SourceFileHash {
|
|||
value: [u8; 32],
|
||||
}
|
||||
|
||||
impl Display for SourceFileHash {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{}=", self.kind)?;
|
||||
for byte in self.value[0..self.hash_len()].into_iter() {
|
||||
write!(f, "{byte:02x}")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl SourceFileHash {
|
||||
pub fn new(kind: SourceFileHashAlgorithm, src: &str) -> SourceFileHash {
|
||||
pub fn new_in_memory(kind: SourceFileHashAlgorithm, src: impl AsRef<[u8]>) -> SourceFileHash {
|
||||
let mut hash = SourceFileHash { kind, value: Default::default() };
|
||||
let len = hash.hash_len();
|
||||
let value = &mut hash.value[..len];
|
||||
let data = src.as_bytes();
|
||||
let data = src.as_ref();
|
||||
match kind {
|
||||
SourceFileHashAlgorithm::Md5 => {
|
||||
value.copy_from_slice(&Md5::digest(data));
|
||||
|
@ -1434,13 +1468,94 @@ impl SourceFileHash {
|
|||
SourceFileHashAlgorithm::Sha256 => {
|
||||
value.copy_from_slice(&Sha256::digest(data));
|
||||
}
|
||||
}
|
||||
SourceFileHashAlgorithm::Blake3 => value.copy_from_slice(blake3::hash(data).as_bytes()),
|
||||
};
|
||||
hash
|
||||
}
|
||||
|
||||
pub fn new(kind: SourceFileHashAlgorithm, src: impl Read) -> Result<SourceFileHash, io::Error> {
|
||||
let mut hash = SourceFileHash { kind, value: Default::default() };
|
||||
let len = hash.hash_len();
|
||||
let value = &mut hash.value[..len];
|
||||
// Buffer size is the recommended amount to fully leverage SIMD instructions on AVX-512 as per
|
||||
// blake3 documentation.
|
||||
let mut buf = vec![0; 16 * 1024];
|
||||
|
||||
fn digest<T>(
|
||||
mut hasher: T,
|
||||
mut update: impl FnMut(&mut T, &[u8]),
|
||||
finish: impl FnOnce(T, &mut [u8]),
|
||||
mut src: impl Read,
|
||||
buf: &mut [u8],
|
||||
value: &mut [u8],
|
||||
) -> Result<(), io::Error> {
|
||||
loop {
|
||||
let bytes_read = src.read(buf)?;
|
||||
if bytes_read == 0 {
|
||||
break;
|
||||
}
|
||||
update(&mut hasher, &buf[0..bytes_read]);
|
||||
}
|
||||
finish(hasher, value);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
match kind {
|
||||
SourceFileHashAlgorithm::Sha256 => {
|
||||
digest(
|
||||
Sha256::new(),
|
||||
|h, b| {
|
||||
h.update(b);
|
||||
},
|
||||
|h, out| out.copy_from_slice(&h.finalize()),
|
||||
src,
|
||||
&mut buf,
|
||||
value,
|
||||
)?;
|
||||
}
|
||||
SourceFileHashAlgorithm::Sha1 => {
|
||||
digest(
|
||||
Sha1::new(),
|
||||
|h, b| {
|
||||
h.update(b);
|
||||
},
|
||||
|h, out| out.copy_from_slice(&h.finalize()),
|
||||
src,
|
||||
&mut buf,
|
||||
value,
|
||||
)?;
|
||||
}
|
||||
SourceFileHashAlgorithm::Md5 => {
|
||||
digest(
|
||||
Md5::new(),
|
||||
|h, b| {
|
||||
h.update(b);
|
||||
},
|
||||
|h, out| out.copy_from_slice(&h.finalize()),
|
||||
src,
|
||||
&mut buf,
|
||||
value,
|
||||
)?;
|
||||
}
|
||||
SourceFileHashAlgorithm::Blake3 => {
|
||||
digest(
|
||||
blake3::Hasher::new(),
|
||||
|h, b| {
|
||||
h.update(b);
|
||||
},
|
||||
|h, out| out.copy_from_slice(h.finalize().as_bytes()),
|
||||
src,
|
||||
&mut buf,
|
||||
value,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
Ok(hash)
|
||||
}
|
||||
|
||||
/// Check if the stored hash matches the hash of the string.
|
||||
pub fn matches(&self, src: &str) -> bool {
|
||||
Self::new(self.kind, src) == *self
|
||||
Self::new_in_memory(self.kind, src.as_bytes()) == *self
|
||||
}
|
||||
|
||||
/// The bytes of the hash.
|
||||
|
@ -1453,7 +1568,7 @@ impl SourceFileHash {
|
|||
match self.kind {
|
||||
SourceFileHashAlgorithm::Md5 => 16,
|
||||
SourceFileHashAlgorithm::Sha1 => 20,
|
||||
SourceFileHashAlgorithm::Sha256 => 32,
|
||||
SourceFileHashAlgorithm::Sha256 | SourceFileHashAlgorithm::Blake3 => 32,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1509,6 +1624,10 @@ pub struct SourceFile {
|
|||
pub src: Option<Lrc<String>>,
|
||||
/// The source code's hash.
|
||||
pub src_hash: SourceFileHash,
|
||||
/// Used to enable cargo to use checksums to check if a crate is fresh rather
|
||||
/// than mtimes. This might be the same as `src_hash`, and if the requested algorithm
|
||||
/// is identical we won't compute it twice.
|
||||
pub checksum_hash: Option<SourceFileHash>,
|
||||
/// The external source code (used for external crates, which will have a `None`
|
||||
/// value as `self.src`.
|
||||
pub external_src: FreezeLock<ExternalSource>,
|
||||
|
@ -1536,6 +1655,7 @@ impl Clone for SourceFile {
|
|||
name: self.name.clone(),
|
||||
src: self.src.clone(),
|
||||
src_hash: self.src_hash,
|
||||
checksum_hash: self.checksum_hash,
|
||||
external_src: self.external_src.clone(),
|
||||
start_pos: self.start_pos,
|
||||
source_len: self.source_len,
|
||||
|
@ -1552,6 +1672,7 @@ impl<S: SpanEncoder> Encodable<S> for SourceFile {
|
|||
fn encode(&self, s: &mut S) {
|
||||
self.name.encode(s);
|
||||
self.src_hash.encode(s);
|
||||
self.checksum_hash.encode(s);
|
||||
// Do not encode `start_pos` as it's global state for this session.
|
||||
self.source_len.encode(s);
|
||||
|
||||
|
@ -1625,6 +1746,7 @@ impl<D: SpanDecoder> Decodable<D> for SourceFile {
|
|||
fn decode(d: &mut D) -> SourceFile {
|
||||
let name: FileName = Decodable::decode(d);
|
||||
let src_hash: SourceFileHash = Decodable::decode(d);
|
||||
let checksum_hash: Option<SourceFileHash> = Decodable::decode(d);
|
||||
let source_len: RelativeBytePos = Decodable::decode(d);
|
||||
let lines = {
|
||||
let num_lines: u32 = Decodable::decode(d);
|
||||
|
@ -1650,6 +1772,7 @@ impl<D: SpanDecoder> Decodable<D> for SourceFile {
|
|||
source_len,
|
||||
src: None,
|
||||
src_hash,
|
||||
checksum_hash,
|
||||
// Unused - the metadata decoder will construct
|
||||
// a new SourceFile, filling in `external_src` properly
|
||||
external_src: FreezeLock::frozen(ExternalSource::Unneeded),
|
||||
|
@ -1733,9 +1856,17 @@ impl SourceFile {
|
|||
name: FileName,
|
||||
mut src: String,
|
||||
hash_kind: SourceFileHashAlgorithm,
|
||||
checksum_hash_kind: Option<SourceFileHashAlgorithm>,
|
||||
) -> Result<Self, OffsetOverflowError> {
|
||||
// Compute the file hash before any normalization.
|
||||
let src_hash = SourceFileHash::new(hash_kind, &src);
|
||||
let src_hash = SourceFileHash::new_in_memory(hash_kind, src.as_bytes());
|
||||
let checksum_hash = checksum_hash_kind.map(|checksum_hash_kind| {
|
||||
if checksum_hash_kind == hash_kind {
|
||||
src_hash
|
||||
} else {
|
||||
SourceFileHash::new_in_memory(checksum_hash_kind, src.as_bytes())
|
||||
}
|
||||
});
|
||||
let normalized_pos = normalize_src(&mut src);
|
||||
|
||||
let stable_id = StableSourceFileId::from_filename_in_current_crate(&name);
|
||||
|
@ -1748,6 +1879,7 @@ impl SourceFile {
|
|||
name,
|
||||
src: Some(Lrc::new(src)),
|
||||
src_hash,
|
||||
checksum_hash,
|
||||
external_src: FreezeLock::frozen(ExternalSource::Unneeded),
|
||||
start_pos: BytePos::from_u32(0),
|
||||
source_len: RelativeBytePos::from_u32(source_len),
|
||||
|
|
|
@ -175,6 +175,7 @@ pub struct SourceMapInputs {
|
|||
pub file_loader: Box<dyn FileLoader + Send + Sync>,
|
||||
pub path_mapping: FilePathMapping,
|
||||
pub hash_kind: SourceFileHashAlgorithm,
|
||||
pub checksum_hash_kind: Option<SourceFileHashAlgorithm>,
|
||||
}
|
||||
|
||||
pub struct SourceMap {
|
||||
|
@ -187,6 +188,12 @@ pub struct SourceMap {
|
|||
|
||||
/// The algorithm used for hashing the contents of each source file.
|
||||
hash_kind: SourceFileHashAlgorithm,
|
||||
|
||||
/// Similar to `hash_kind`, however this algorithm is used for checksums to determine if a crate is fresh.
|
||||
/// `cargo` is the primary user of these.
|
||||
///
|
||||
/// If this is equal to `hash_kind` then the checksum won't be computed twice.
|
||||
checksum_hash_kind: Option<SourceFileHashAlgorithm>,
|
||||
}
|
||||
|
||||
impl SourceMap {
|
||||
|
@ -195,17 +202,19 @@ impl SourceMap {
|
|||
file_loader: Box::new(RealFileLoader),
|
||||
path_mapping,
|
||||
hash_kind: SourceFileHashAlgorithm::Md5,
|
||||
checksum_hash_kind: None,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn with_inputs(
|
||||
SourceMapInputs { file_loader, path_mapping, hash_kind }: SourceMapInputs,
|
||||
SourceMapInputs { file_loader, path_mapping, hash_kind, checksum_hash_kind }: SourceMapInputs,
|
||||
) -> SourceMap {
|
||||
SourceMap {
|
||||
files: Default::default(),
|
||||
file_loader: IntoDynSyncSend(file_loader),
|
||||
path_mapping,
|
||||
hash_kind,
|
||||
checksum_hash_kind,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -307,7 +316,8 @@ impl SourceMap {
|
|||
match self.source_file_by_stable_id(stable_id) {
|
||||
Some(lrc_sf) => Ok(lrc_sf),
|
||||
None => {
|
||||
let source_file = SourceFile::new(filename, src, self.hash_kind)?;
|
||||
let source_file =
|
||||
SourceFile::new(filename, src, self.hash_kind, self.checksum_hash_kind)?;
|
||||
|
||||
// Let's make sure the file_id we generated above actually matches
|
||||
// the ID we generate for the SourceFile we just created.
|
||||
|
@ -326,6 +336,7 @@ impl SourceMap {
|
|||
&self,
|
||||
filename: FileName,
|
||||
src_hash: SourceFileHash,
|
||||
checksum_hash: Option<SourceFileHash>,
|
||||
stable_id: StableSourceFileId,
|
||||
source_len: u32,
|
||||
cnum: CrateNum,
|
||||
|
@ -340,6 +351,7 @@ impl SourceMap {
|
|||
name: filename,
|
||||
src: None,
|
||||
src_hash,
|
||||
checksum_hash,
|
||||
external_src: FreezeLock::new(ExternalSource::Foreign {
|
||||
kind: ExternalSourceKind::AbsentOk,
|
||||
metadata_index,
|
||||
|
|
|
@ -229,6 +229,7 @@ fn t10() {
|
|||
let SourceFile {
|
||||
name,
|
||||
src_hash,
|
||||
checksum_hash,
|
||||
source_len,
|
||||
lines,
|
||||
multibyte_chars,
|
||||
|
@ -240,6 +241,7 @@ fn t10() {
|
|||
let imported_src_file = sm.new_imported_source_file(
|
||||
name,
|
||||
src_hash,
|
||||
checksum_hash,
|
||||
stable_id,
|
||||
source_len.to_u32(),
|
||||
CrateNum::ZERO,
|
||||
|
|
|
@ -3,9 +3,13 @@ use super::*;
|
|||
#[test]
|
||||
fn test_lookup_line() {
|
||||
let source = "abcdefghijklm\nabcdefghij\n...".to_owned();
|
||||
let mut sf =
|
||||
SourceFile::new(FileName::Anon(Hash64::ZERO), source, SourceFileHashAlgorithm::Sha256)
|
||||
.unwrap();
|
||||
let mut sf = SourceFile::new(
|
||||
FileName::Anon(Hash64::ZERO),
|
||||
source,
|
||||
SourceFileHashAlgorithm::Sha256,
|
||||
Some(SourceFileHashAlgorithm::Sha256),
|
||||
)
|
||||
.unwrap();
|
||||
sf.start_pos = BytePos(3);
|
||||
assert_eq!(sf.lines(), &[RelativeBytePos(0), RelativeBytePos(14), RelativeBytePos(25)]);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue