Auto merge of #137535 - Kobzol:split-metadata, r=petrochenkov
Introduce `-Zembed-metadata` to allow omitting full metadata from rlibs and dylibs This is a continuation of https://github.com/rust-lang/rust/pull/120855 (I was mentored by `@bjorn3` to move it forward). Most of the original code was written by bjorn3, I tried to clean it up a bit and add some documentation and tests. This PR introduces a new unstable compiler flag called `-Zembed-metadata=[no|yes]`, with the default being `yes` (see https://github.com/rust-lang/rust/issues/57076 for context). When set to `no`, rustc will only store a small metadata stub inside rlibs/dylibs instead of the full metadata, to keep their size smaller. It should be used in combination with `--emit=metadata`, so that the users of such a compiled library can still read the metadata from the corresponding `.rmeta` file. [This comment](https://github.com/rust-lang/rust/pull/120855#issuecomment-1937018169) shows an example of binary/artifact size wins that can be achieved using this approach. Contrary to https://github.com/rust-lang/rust/pull/120855, this PR only introduces the new flag, along with a couple of run-make tests and documentation, but does not yet use it in bootstrap to actually compile rustc. I plan to do that as a follow-up step (along with integration in Cargo, which should ideally just always pass this flag to reduce the size of target directories). Fixes https://github.com/rust-lang/rust/issues/23366 Closes https://github.com/rust-lang/rust/issues/29511 Fixes https://github.com/rust-lang/rust/issues/57076 Another attempt of https://github.com/rust-lang/rust/pull/93945 and https://github.com/rust-lang/rust/pull/120855. r? `@petrochenkov`
This commit is contained in:
commit
8c35f4a85e
15 changed files with 253 additions and 60 deletions
|
@ -97,6 +97,10 @@ metadata_found_staticlib =
|
|||
found staticlib `{$crate_name}` instead of rlib or dylib{$add_info}
|
||||
.help = please recompile that crate using --crate-type lib
|
||||
|
||||
metadata_full_metadata_not_found =
|
||||
only metadata stub found for `{$flavor}` dependency `{$crate_name}`
|
||||
please provide path to the corresponding .rmeta file with full metadata
|
||||
|
||||
metadata_global_alloc_required =
|
||||
no global memory allocator found but one is required; link to std or add `#[global_allocator]` to a static item that implements the GlobalAlloc trait
|
||||
|
||||
|
|
|
@ -525,6 +525,15 @@ impl<G: EmissionGuarantee> Diagnostic<'_, G> for MultipleCandidates {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Diagnostic)]
|
||||
#[diag(metadata_full_metadata_not_found)]
|
||||
pub(crate) struct FullMetadataNotFound {
|
||||
#[primary_span]
|
||||
pub span: Span,
|
||||
pub flavor: CrateFlavor,
|
||||
pub crate_name: Symbol,
|
||||
}
|
||||
|
||||
#[derive(Diagnostic)]
|
||||
#[diag(metadata_symbol_conflicts_current, code = E0519)]
|
||||
pub struct SymbolConflictsCurrent {
|
||||
|
|
|
@ -3,7 +3,7 @@ use std::{fs, io};
|
|||
|
||||
use rustc_data_structures::temp_dir::MaybeTempDir;
|
||||
use rustc_middle::ty::TyCtxt;
|
||||
use rustc_session::config::{OutFileName, OutputType};
|
||||
use rustc_session::config::{CrateType, OutFileName, OutputType};
|
||||
use rustc_session::output::filename_for_metadata;
|
||||
use rustc_session::{MetadataKind, Session};
|
||||
use tempfile::Builder as TempFileBuilder;
|
||||
|
@ -50,7 +50,14 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
|
|||
.tempdir_in(out_filename.parent().unwrap_or_else(|| Path::new("")))
|
||||
.unwrap_or_else(|err| tcx.dcx().emit_fatal(FailedCreateTempdir { err }));
|
||||
let metadata_tmpdir = MaybeTempDir::new(metadata_tmpdir, tcx.sess.opts.cg.save_temps);
|
||||
let metadata_filename = metadata_tmpdir.as_ref().join(METADATA_FILENAME);
|
||||
let metadata_filename = metadata_tmpdir.as_ref().join("full.rmeta");
|
||||
let metadata_stub_filename = if !tcx.sess.opts.unstable_opts.embed_metadata
|
||||
&& !tcx.crate_types().contains(&CrateType::ProcMacro)
|
||||
{
|
||||
Some(metadata_tmpdir.as_ref().join("stub.rmeta"))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Always create a file at `metadata_filename`, even if we have nothing to write to it.
|
||||
// This simplifies the creation of the output `out_filename` when requested.
|
||||
|
@ -60,9 +67,15 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
|
|||
std::fs::File::create(&metadata_filename).unwrap_or_else(|err| {
|
||||
tcx.dcx().emit_fatal(FailedCreateFile { filename: &metadata_filename, err });
|
||||
});
|
||||
if let Some(metadata_stub_filename) = &metadata_stub_filename {
|
||||
std::fs::File::create(metadata_stub_filename).unwrap_or_else(|err| {
|
||||
tcx.dcx()
|
||||
.emit_fatal(FailedCreateFile { filename: &metadata_stub_filename, err });
|
||||
});
|
||||
}
|
||||
}
|
||||
MetadataKind::Uncompressed | MetadataKind::Compressed => {
|
||||
encode_metadata(tcx, &metadata_filename);
|
||||
encode_metadata(tcx, &metadata_filename, metadata_stub_filename.as_deref())
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -100,9 +113,10 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
|
|||
|
||||
// Load metadata back to memory: codegen may need to include it in object files.
|
||||
let metadata =
|
||||
EncodedMetadata::from_path(metadata_filename, metadata_tmpdir).unwrap_or_else(|err| {
|
||||
tcx.dcx().emit_fatal(FailedCreateEncodedMetadata { err });
|
||||
});
|
||||
EncodedMetadata::from_path(metadata_filename, metadata_stub_filename, metadata_tmpdir)
|
||||
.unwrap_or_else(|err| {
|
||||
tcx.dcx().emit_fatal(FailedCreateEncodedMetadata { err });
|
||||
});
|
||||
|
||||
let need_metadata_module = metadata_kind == MetadataKind::Compressed;
|
||||
|
||||
|
|
|
@ -654,7 +654,24 @@ impl<'a> CrateLocator<'a> {
|
|||
continue;
|
||||
}
|
||||
}
|
||||
*slot = Some((hash, metadata, lib.clone()));
|
||||
|
||||
// We error eagerly here. If we're locating a rlib, then in theory the full metadata
|
||||
// could still be in a (later resolved) dylib. In practice, if the rlib and dylib
|
||||
// were produced in a way where one has full metadata and the other hasn't, it would
|
||||
// mean that they were compiled using different compiler flags and probably also have
|
||||
// a different SVH value.
|
||||
if metadata.get_header().is_stub {
|
||||
// `is_stub` should never be true for .rmeta files.
|
||||
assert_ne!(flavor, CrateFlavor::Rmeta);
|
||||
|
||||
// Because rmeta files are resolved before rlib/dylib files, if this is a stub and
|
||||
// we haven't found a slot already, it means that the full metadata is missing.
|
||||
if slot.is_none() {
|
||||
return Err(CrateError::FullMetadataNotFound(self.crate_name, flavor));
|
||||
}
|
||||
} else {
|
||||
*slot = Some((hash, metadata, lib.clone()));
|
||||
}
|
||||
ret = Some((lib, kind));
|
||||
}
|
||||
|
||||
|
@ -728,37 +745,25 @@ impl<'a> CrateLocator<'a> {
|
|||
let Some(file) = loc_orig.file_name().and_then(|s| s.to_str()) else {
|
||||
return Err(CrateError::ExternLocationNotFile(self.crate_name, loc_orig.clone()));
|
||||
};
|
||||
// FnMut cannot return reference to captured value, so references
|
||||
// must be taken outside the closure.
|
||||
let rlibs = &mut rlibs;
|
||||
let rmetas = &mut rmetas;
|
||||
let dylibs = &mut dylibs;
|
||||
let type_via_filename = (|| {
|
||||
if file.starts_with("lib") {
|
||||
if file.ends_with(".rlib") {
|
||||
return Some(rlibs);
|
||||
}
|
||||
if file.ends_with(".rmeta") {
|
||||
return Some(rmetas);
|
||||
}
|
||||
if file.starts_with("lib") {
|
||||
if file.ends_with(".rlib") {
|
||||
rlibs.insert(loc_canon.clone(), PathKind::ExternFlag);
|
||||
continue;
|
||||
}
|
||||
let dll_prefix = self.target.dll_prefix.as_ref();
|
||||
let dll_suffix = self.target.dll_suffix.as_ref();
|
||||
if file.starts_with(dll_prefix) && file.ends_with(dll_suffix) {
|
||||
return Some(dylibs);
|
||||
}
|
||||
None
|
||||
})();
|
||||
match type_via_filename {
|
||||
Some(type_via_filename) => {
|
||||
type_via_filename.insert(loc_canon.clone(), PathKind::ExternFlag);
|
||||
}
|
||||
None => {
|
||||
self.crate_rejections
|
||||
.via_filename
|
||||
.push(CrateMismatch { path: loc_orig.clone(), got: String::new() });
|
||||
if file.ends_with(".rmeta") {
|
||||
rmetas.insert(loc_canon.clone(), PathKind::ExternFlag);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
let dll_prefix = self.target.dll_prefix.as_ref();
|
||||
let dll_suffix = self.target.dll_suffix.as_ref();
|
||||
if file.starts_with(dll_prefix) && file.ends_with(dll_suffix) {
|
||||
dylibs.insert(loc_canon.clone(), PathKind::ExternFlag);
|
||||
continue;
|
||||
}
|
||||
self.crate_rejections
|
||||
.via_filename
|
||||
.push(CrateMismatch { path: loc_orig.clone(), got: String::new() });
|
||||
}
|
||||
|
||||
// Extract the dylib/rlib/rmeta triple.
|
||||
|
@ -928,6 +933,7 @@ pub(crate) enum CrateError {
|
|||
ExternLocationNotExist(Symbol, PathBuf),
|
||||
ExternLocationNotFile(Symbol, PathBuf),
|
||||
MultipleCandidates(Symbol, CrateFlavor, Vec<PathBuf>),
|
||||
FullMetadataNotFound(Symbol, CrateFlavor),
|
||||
SymbolConflictsCurrent(Symbol),
|
||||
StableCrateIdCollision(Symbol, Symbol),
|
||||
DlOpen(String, String),
|
||||
|
@ -978,6 +984,9 @@ impl CrateError {
|
|||
CrateError::MultipleCandidates(crate_name, flavor, candidates) => {
|
||||
dcx.emit_err(errors::MultipleCandidates { span, crate_name, flavor, candidates });
|
||||
}
|
||||
CrateError::FullMetadataNotFound(crate_name, flavor) => {
|
||||
dcx.emit_err(errors::FullMetadataNotFound { span, crate_name, flavor });
|
||||
}
|
||||
CrateError::SymbolConflictsCurrent(root_name) => {
|
||||
dcx.emit_err(errors::SymbolConflictsCurrent { span, crate_name: root_name });
|
||||
}
|
||||
|
|
|
@ -701,6 +701,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
|
|||
triple: tcx.sess.opts.target_triple.clone(),
|
||||
hash: tcx.crate_hash(LOCAL_CRATE),
|
||||
is_proc_macro_crate: proc_macro_data.is_some(),
|
||||
is_stub: false,
|
||||
},
|
||||
extra_filename: tcx.sess.opts.cg.extra_filename.clone(),
|
||||
stable_crate_id: tcx.def_path_hash(LOCAL_CRATE.as_def_id()).stable_crate_id(),
|
||||
|
@ -2231,8 +2232,12 @@ fn prefetch_mir(tcx: TyCtxt<'_>) {
|
|||
// generated regardless of trailing bytes that end up in it.
|
||||
|
||||
pub struct EncodedMetadata {
|
||||
// The declaration order matters because `mmap` should be dropped before `_temp_dir`.
|
||||
mmap: Option<Mmap>,
|
||||
// The declaration order matters because `full_metadata` should be dropped
|
||||
// before `_temp_dir`.
|
||||
full_metadata: Option<Mmap>,
|
||||
// This is an optional stub metadata containing only the crate header.
|
||||
// The header should be very small, so we load it directly into memory.
|
||||
stub_metadata: Option<Vec<u8>>,
|
||||
// We need to carry MaybeTempDir to avoid deleting the temporary
|
||||
// directory while accessing the Mmap.
|
||||
_temp_dir: Option<MaybeTempDir>,
|
||||
|
@ -2240,33 +2245,50 @@ pub struct EncodedMetadata {
|
|||
|
||||
impl EncodedMetadata {
|
||||
#[inline]
|
||||
pub fn from_path(path: PathBuf, temp_dir: Option<MaybeTempDir>) -> std::io::Result<Self> {
|
||||
pub fn from_path(
|
||||
path: PathBuf,
|
||||
stub_path: Option<PathBuf>,
|
||||
temp_dir: Option<MaybeTempDir>,
|
||||
) -> std::io::Result<Self> {
|
||||
let file = std::fs::File::open(&path)?;
|
||||
let file_metadata = file.metadata()?;
|
||||
if file_metadata.len() == 0 {
|
||||
return Ok(Self { mmap: None, _temp_dir: None });
|
||||
return Ok(Self { full_metadata: None, stub_metadata: None, _temp_dir: None });
|
||||
}
|
||||
let mmap = unsafe { Some(Mmap::map(file)?) };
|
||||
Ok(Self { mmap, _temp_dir: temp_dir })
|
||||
let full_mmap = unsafe { Some(Mmap::map(file)?) };
|
||||
|
||||
let stub =
|
||||
if let Some(stub_path) = stub_path { Some(std::fs::read(stub_path)?) } else { None };
|
||||
|
||||
Ok(Self { full_metadata: full_mmap, stub_metadata: stub, _temp_dir: temp_dir })
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn raw_data(&self) -> &[u8] {
|
||||
self.mmap.as_deref().unwrap_or_default()
|
||||
pub fn full(&self) -> &[u8] {
|
||||
&self.full_metadata.as_deref().unwrap_or_default()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn stub_or_full(&self) -> &[u8] {
|
||||
self.stub_metadata.as_deref().unwrap_or(self.full())
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: Encoder> Encodable<S> for EncodedMetadata {
|
||||
fn encode(&self, s: &mut S) {
|
||||
let slice = self.raw_data();
|
||||
self.stub_metadata.encode(s);
|
||||
|
||||
let slice = self.full();
|
||||
slice.encode(s)
|
||||
}
|
||||
}
|
||||
|
||||
impl<D: Decoder> Decodable<D> for EncodedMetadata {
|
||||
fn decode(d: &mut D) -> Self {
|
||||
let stub = <Option<Vec<u8>>>::decode(d);
|
||||
|
||||
let len = d.read_usize();
|
||||
let mmap = if len > 0 {
|
||||
let full_metadata = if len > 0 {
|
||||
let mut mmap = MmapMut::map_anon(len).unwrap();
|
||||
mmap.copy_from_slice(d.read_raw_bytes(len));
|
||||
Some(mmap.make_read_only().unwrap())
|
||||
|
@ -2274,11 +2296,11 @@ impl<D: Decoder> Decodable<D> for EncodedMetadata {
|
|||
None
|
||||
};
|
||||
|
||||
Self { mmap, _temp_dir: None }
|
||||
Self { full_metadata, stub_metadata: stub, _temp_dir: None }
|
||||
}
|
||||
}
|
||||
|
||||
pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
|
||||
pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path, ref_path: Option<&Path>) {
|
||||
let _prof_timer = tcx.prof.verbose_generic_activity("generate_crate_metadata");
|
||||
|
||||
// Since encoding metadata is not in a query, and nothing is cached,
|
||||
|
@ -2292,6 +2314,42 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
|
|||
join(|| prefetch_mir(tcx), || tcx.exported_symbols(LOCAL_CRATE));
|
||||
}
|
||||
|
||||
with_encode_metadata_header(tcx, path, |ecx| {
|
||||
// Encode all the entries and extra information in the crate,
|
||||
// culminating in the `CrateRoot` which points to all of it.
|
||||
let root = ecx.encode_crate_root();
|
||||
|
||||
// Flush buffer to ensure backing file has the correct size.
|
||||
ecx.opaque.flush();
|
||||
// Record metadata size for self-profiling
|
||||
tcx.prof.artifact_size(
|
||||
"crate_metadata",
|
||||
"crate_metadata",
|
||||
ecx.opaque.file().metadata().unwrap().len(),
|
||||
);
|
||||
|
||||
root.position.get()
|
||||
});
|
||||
|
||||
if let Some(ref_path) = ref_path {
|
||||
with_encode_metadata_header(tcx, ref_path, |ecx| {
|
||||
let header: LazyValue<CrateHeader> = ecx.lazy(CrateHeader {
|
||||
name: tcx.crate_name(LOCAL_CRATE),
|
||||
triple: tcx.sess.opts.target_triple.clone(),
|
||||
hash: tcx.crate_hash(LOCAL_CRATE),
|
||||
is_proc_macro_crate: false,
|
||||
is_stub: true,
|
||||
});
|
||||
header.position.get()
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
fn with_encode_metadata_header(
|
||||
tcx: TyCtxt<'_>,
|
||||
path: &Path,
|
||||
f: impl FnOnce(&mut EncodeContext<'_, '_>) -> usize,
|
||||
) {
|
||||
let mut encoder = opaque::FileEncoder::new(path)
|
||||
.unwrap_or_else(|err| tcx.dcx().emit_fatal(FailCreateFileEncoder { err }));
|
||||
encoder.emit_raw_bytes(METADATA_HEADER);
|
||||
|
@ -2326,9 +2384,7 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
|
|||
// Encode the rustc version string in a predictable location.
|
||||
rustc_version(tcx.sess.cfg_version).encode(&mut ecx);
|
||||
|
||||
// Encode all the entries and extra information in the crate,
|
||||
// culminating in the `CrateRoot` which points to all of it.
|
||||
let root = ecx.encode_crate_root();
|
||||
let root_position = f(&mut ecx);
|
||||
|
||||
// Make sure we report any errors from writing to the file.
|
||||
// If we forget this, compilation can succeed with an incomplete rmeta file,
|
||||
|
@ -2338,12 +2394,9 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
|
|||
}
|
||||
|
||||
let file = ecx.opaque.file();
|
||||
if let Err(err) = encode_root_position(file, root.position.get()) {
|
||||
if let Err(err) = encode_root_position(file, root_position) {
|
||||
tcx.dcx().emit_fatal(FailWriteFile { path: ecx.opaque.path(), err });
|
||||
}
|
||||
|
||||
// Record metadata size for self-profiling
|
||||
tcx.prof.artifact_size("crate_metadata", "crate_metadata", file.metadata().unwrap().len());
|
||||
}
|
||||
|
||||
fn encode_root_position(mut file: &File, pos: usize) -> Result<(), std::io::Error> {
|
||||
|
|
|
@ -56,7 +56,7 @@ pub(crate) fn rustc_version(cfg_version: &'static str) -> String {
|
|||
/// Metadata encoding version.
|
||||
/// N.B., increment this if you change the format of metadata such that
|
||||
/// the rustc version can't be found to compare with `rustc_version()`.
|
||||
const METADATA_VERSION: u8 = 9;
|
||||
const METADATA_VERSION: u8 = 10;
|
||||
|
||||
/// Metadata header which includes `METADATA_VERSION`.
|
||||
///
|
||||
|
@ -221,6 +221,12 @@ pub(crate) struct CrateHeader {
|
|||
/// This is separate from [`ProcMacroData`] to avoid having to update [`METADATA_VERSION`] every
|
||||
/// time ProcMacroData changes.
|
||||
pub(crate) is_proc_macro_crate: bool,
|
||||
/// Whether this crate metadata section is just a stub.
|
||||
/// Stubs do not contain the full metadata (it will be typically stored
|
||||
/// in a separate rmeta file).
|
||||
///
|
||||
/// This is used inside rlibs and dylibs when using `-Zembed-metadata=no`.
|
||||
pub(crate) is_stub: bool,
|
||||
}
|
||||
|
||||
/// Serialized `.rmeta` data for a crate.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue