Auto merge of #137535 - Kobzol:split-metadata, r=petrochenkov

Introduce `-Zembed-metadata` to allow omitting full metadata from rlibs and dylibs

This is a continuation of https://github.com/rust-lang/rust/pull/120855 (I was mentored by `@bjorn3` to move it forward). Most of the original code was written by bjorn3, I tried to clean it up a bit and add some documentation and tests.

This PR introduces a new unstable compiler flag called `-Zembed-metadata=[no|yes]`, with the default being `yes` (see https://github.com/rust-lang/rust/issues/57076 for context). When set to `no`, rustc will only store a small metadata stub inside rlibs/dylibs instead of the full metadata, to keep their size smaller. It should be used in combination with `--emit=metadata`, so that the users of such a compiled library can still read the metadata from the corresponding `.rmeta` file. [This comment](https://github.com/rust-lang/rust/pull/120855#issuecomment-1937018169) shows an example of binary/artifact size wins that can be achieved using this approach.

Contrary to https://github.com/rust-lang/rust/pull/120855, this PR only introduces the new flag, along with a couple of run-make tests and documentation, but does not yet use it in bootstrap to actually compile rustc. I plan to do that as a follow-up step (along with integration in Cargo, which should ideally just always pass this flag to reduce the size of target directories).

Fixes https://github.com/rust-lang/rust/issues/23366
Closes https://github.com/rust-lang/rust/issues/29511
Fixes https://github.com/rust-lang/rust/issues/57076

Another attempt of https://github.com/rust-lang/rust/pull/93945 and https://github.com/rust-lang/rust/pull/120855.

r? `@petrochenkov`
This commit is contained in:
bors 2025-04-01 10:40:06 +00:00
commit 8c35f4a85e
15 changed files with 253 additions and 60 deletions

View file

@ -294,7 +294,7 @@ fn link_rlib<'a>(
let (metadata, metadata_position) = create_wrapper_file(
sess,
".rmeta".to_string(),
codegen_results.metadata.raw_data(),
codegen_results.metadata.stub_or_full(),
);
let metadata = emit_wrapper_file(sess, &metadata, tmpdir, METADATA_FILENAME);
match metadata_position {

View file

@ -540,8 +540,8 @@ pub fn create_compressed_metadata_file(
symbol_name: &str,
) -> Vec<u8> {
let mut packed_metadata = rustc_metadata::METADATA_HEADER.to_vec();
packed_metadata.write_all(&(metadata.raw_data().len() as u64).to_le_bytes()).unwrap();
packed_metadata.extend(metadata.raw_data());
packed_metadata.write_all(&(metadata.stub_or_full().len() as u64).to_le_bytes()).unwrap();
packed_metadata.extend(metadata.stub_or_full());
let Some(mut file) = create_object_file(sess) else {
if sess.target.is_like_wasm {

View file

@ -787,6 +787,7 @@ fn test_unstable_options_tracking_hash() {
tracked!(direct_access_external_data, Some(true));
tracked!(dual_proc_macros, true);
tracked!(dwarf_version, Some(5));
tracked!(embed_metadata, false);
tracked!(embed_source, true);
tracked!(emit_thin_lto, false);
tracked!(emscripten_wasm_eh, true);

View file

@ -97,6 +97,10 @@ metadata_found_staticlib =
found staticlib `{$crate_name}` instead of rlib or dylib{$add_info}
.help = please recompile that crate using --crate-type lib
metadata_full_metadata_not_found =
only metadata stub found for `{$flavor}` dependency `{$crate_name}`
please provide path to the corresponding .rmeta file with full metadata
metadata_global_alloc_required =
no global memory allocator found but one is required; link to std or add `#[global_allocator]` to a static item that implements the GlobalAlloc trait

View file

@ -525,6 +525,15 @@ impl<G: EmissionGuarantee> Diagnostic<'_, G> for MultipleCandidates {
}
}
#[derive(Diagnostic)]
#[diag(metadata_full_metadata_not_found)]
pub(crate) struct FullMetadataNotFound {
#[primary_span]
pub span: Span,
pub flavor: CrateFlavor,
pub crate_name: Symbol,
}
#[derive(Diagnostic)]
#[diag(metadata_symbol_conflicts_current, code = E0519)]
pub struct SymbolConflictsCurrent {

View file

@ -3,7 +3,7 @@ use std::{fs, io};
use rustc_data_structures::temp_dir::MaybeTempDir;
use rustc_middle::ty::TyCtxt;
use rustc_session::config::{OutFileName, OutputType};
use rustc_session::config::{CrateType, OutFileName, OutputType};
use rustc_session::output::filename_for_metadata;
use rustc_session::{MetadataKind, Session};
use tempfile::Builder as TempFileBuilder;
@ -50,7 +50,14 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
.tempdir_in(out_filename.parent().unwrap_or_else(|| Path::new("")))
.unwrap_or_else(|err| tcx.dcx().emit_fatal(FailedCreateTempdir { err }));
let metadata_tmpdir = MaybeTempDir::new(metadata_tmpdir, tcx.sess.opts.cg.save_temps);
let metadata_filename = metadata_tmpdir.as_ref().join(METADATA_FILENAME);
let metadata_filename = metadata_tmpdir.as_ref().join("full.rmeta");
let metadata_stub_filename = if !tcx.sess.opts.unstable_opts.embed_metadata
&& !tcx.crate_types().contains(&CrateType::ProcMacro)
{
Some(metadata_tmpdir.as_ref().join("stub.rmeta"))
} else {
None
};
// Always create a file at `metadata_filename`, even if we have nothing to write to it.
// This simplifies the creation of the output `out_filename` when requested.
@ -60,9 +67,15 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
std::fs::File::create(&metadata_filename).unwrap_or_else(|err| {
tcx.dcx().emit_fatal(FailedCreateFile { filename: &metadata_filename, err });
});
if let Some(metadata_stub_filename) = &metadata_stub_filename {
std::fs::File::create(metadata_stub_filename).unwrap_or_else(|err| {
tcx.dcx()
.emit_fatal(FailedCreateFile { filename: &metadata_stub_filename, err });
});
}
}
MetadataKind::Uncompressed | MetadataKind::Compressed => {
encode_metadata(tcx, &metadata_filename);
encode_metadata(tcx, &metadata_filename, metadata_stub_filename.as_deref())
}
};
@ -100,9 +113,10 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
// Load metadata back to memory: codegen may need to include it in object files.
let metadata =
EncodedMetadata::from_path(metadata_filename, metadata_tmpdir).unwrap_or_else(|err| {
tcx.dcx().emit_fatal(FailedCreateEncodedMetadata { err });
});
EncodedMetadata::from_path(metadata_filename, metadata_stub_filename, metadata_tmpdir)
.unwrap_or_else(|err| {
tcx.dcx().emit_fatal(FailedCreateEncodedMetadata { err });
});
let need_metadata_module = metadata_kind == MetadataKind::Compressed;

View file

@ -654,7 +654,24 @@ impl<'a> CrateLocator<'a> {
continue;
}
}
*slot = Some((hash, metadata, lib.clone()));
// We error eagerly here. If we're locating a rlib, then in theory the full metadata
// could still be in a (later resolved) dylib. In practice, if the rlib and dylib
// were produced in a way where one has full metadata and the other hasn't, it would
// mean that they were compiled using different compiler flags and probably also have
// a different SVH value.
if metadata.get_header().is_stub {
// `is_stub` should never be true for .rmeta files.
assert_ne!(flavor, CrateFlavor::Rmeta);
// Because rmeta files are resolved before rlib/dylib files, if this is a stub and
// we haven't found a slot already, it means that the full metadata is missing.
if slot.is_none() {
return Err(CrateError::FullMetadataNotFound(self.crate_name, flavor));
}
} else {
*slot = Some((hash, metadata, lib.clone()));
}
ret = Some((lib, kind));
}
@ -728,37 +745,25 @@ impl<'a> CrateLocator<'a> {
let Some(file) = loc_orig.file_name().and_then(|s| s.to_str()) else {
return Err(CrateError::ExternLocationNotFile(self.crate_name, loc_orig.clone()));
};
// FnMut cannot return reference to captured value, so references
// must be taken outside the closure.
let rlibs = &mut rlibs;
let rmetas = &mut rmetas;
let dylibs = &mut dylibs;
let type_via_filename = (|| {
if file.starts_with("lib") {
if file.ends_with(".rlib") {
return Some(rlibs);
}
if file.ends_with(".rmeta") {
return Some(rmetas);
}
if file.starts_with("lib") {
if file.ends_with(".rlib") {
rlibs.insert(loc_canon.clone(), PathKind::ExternFlag);
continue;
}
let dll_prefix = self.target.dll_prefix.as_ref();
let dll_suffix = self.target.dll_suffix.as_ref();
if file.starts_with(dll_prefix) && file.ends_with(dll_suffix) {
return Some(dylibs);
}
None
})();
match type_via_filename {
Some(type_via_filename) => {
type_via_filename.insert(loc_canon.clone(), PathKind::ExternFlag);
}
None => {
self.crate_rejections
.via_filename
.push(CrateMismatch { path: loc_orig.clone(), got: String::new() });
if file.ends_with(".rmeta") {
rmetas.insert(loc_canon.clone(), PathKind::ExternFlag);
continue;
}
}
let dll_prefix = self.target.dll_prefix.as_ref();
let dll_suffix = self.target.dll_suffix.as_ref();
if file.starts_with(dll_prefix) && file.ends_with(dll_suffix) {
dylibs.insert(loc_canon.clone(), PathKind::ExternFlag);
continue;
}
self.crate_rejections
.via_filename
.push(CrateMismatch { path: loc_orig.clone(), got: String::new() });
}
// Extract the dylib/rlib/rmeta triple.
@ -928,6 +933,7 @@ pub(crate) enum CrateError {
ExternLocationNotExist(Symbol, PathBuf),
ExternLocationNotFile(Symbol, PathBuf),
MultipleCandidates(Symbol, CrateFlavor, Vec<PathBuf>),
FullMetadataNotFound(Symbol, CrateFlavor),
SymbolConflictsCurrent(Symbol),
StableCrateIdCollision(Symbol, Symbol),
DlOpen(String, String),
@ -978,6 +984,9 @@ impl CrateError {
CrateError::MultipleCandidates(crate_name, flavor, candidates) => {
dcx.emit_err(errors::MultipleCandidates { span, crate_name, flavor, candidates });
}
CrateError::FullMetadataNotFound(crate_name, flavor) => {
dcx.emit_err(errors::FullMetadataNotFound { span, crate_name, flavor });
}
CrateError::SymbolConflictsCurrent(root_name) => {
dcx.emit_err(errors::SymbolConflictsCurrent { span, crate_name: root_name });
}

View file

@ -701,6 +701,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
triple: tcx.sess.opts.target_triple.clone(),
hash: tcx.crate_hash(LOCAL_CRATE),
is_proc_macro_crate: proc_macro_data.is_some(),
is_stub: false,
},
extra_filename: tcx.sess.opts.cg.extra_filename.clone(),
stable_crate_id: tcx.def_path_hash(LOCAL_CRATE.as_def_id()).stable_crate_id(),
@ -2231,8 +2232,12 @@ fn prefetch_mir(tcx: TyCtxt<'_>) {
// generated regardless of trailing bytes that end up in it.
pub struct EncodedMetadata {
// The declaration order matters because `mmap` should be dropped before `_temp_dir`.
mmap: Option<Mmap>,
// The declaration order matters because `full_metadata` should be dropped
// before `_temp_dir`.
full_metadata: Option<Mmap>,
// This is an optional stub metadata containing only the crate header.
// The header should be very small, so we load it directly into memory.
stub_metadata: Option<Vec<u8>>,
// We need to carry MaybeTempDir to avoid deleting the temporary
// directory while accessing the Mmap.
_temp_dir: Option<MaybeTempDir>,
@ -2240,33 +2245,50 @@ pub struct EncodedMetadata {
impl EncodedMetadata {
#[inline]
pub fn from_path(path: PathBuf, temp_dir: Option<MaybeTempDir>) -> std::io::Result<Self> {
pub fn from_path(
path: PathBuf,
stub_path: Option<PathBuf>,
temp_dir: Option<MaybeTempDir>,
) -> std::io::Result<Self> {
let file = std::fs::File::open(&path)?;
let file_metadata = file.metadata()?;
if file_metadata.len() == 0 {
return Ok(Self { mmap: None, _temp_dir: None });
return Ok(Self { full_metadata: None, stub_metadata: None, _temp_dir: None });
}
let mmap = unsafe { Some(Mmap::map(file)?) };
Ok(Self { mmap, _temp_dir: temp_dir })
let full_mmap = unsafe { Some(Mmap::map(file)?) };
let stub =
if let Some(stub_path) = stub_path { Some(std::fs::read(stub_path)?) } else { None };
Ok(Self { full_metadata: full_mmap, stub_metadata: stub, _temp_dir: temp_dir })
}
#[inline]
pub fn raw_data(&self) -> &[u8] {
self.mmap.as_deref().unwrap_or_default()
pub fn full(&self) -> &[u8] {
&self.full_metadata.as_deref().unwrap_or_default()
}
#[inline]
pub fn stub_or_full(&self) -> &[u8] {
self.stub_metadata.as_deref().unwrap_or(self.full())
}
}
impl<S: Encoder> Encodable<S> for EncodedMetadata {
fn encode(&self, s: &mut S) {
let slice = self.raw_data();
self.stub_metadata.encode(s);
let slice = self.full();
slice.encode(s)
}
}
impl<D: Decoder> Decodable<D> for EncodedMetadata {
fn decode(d: &mut D) -> Self {
let stub = <Option<Vec<u8>>>::decode(d);
let len = d.read_usize();
let mmap = if len > 0 {
let full_metadata = if len > 0 {
let mut mmap = MmapMut::map_anon(len).unwrap();
mmap.copy_from_slice(d.read_raw_bytes(len));
Some(mmap.make_read_only().unwrap())
@ -2274,11 +2296,11 @@ impl<D: Decoder> Decodable<D> for EncodedMetadata {
None
};
Self { mmap, _temp_dir: None }
Self { full_metadata, stub_metadata: stub, _temp_dir: None }
}
}
pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path, ref_path: Option<&Path>) {
let _prof_timer = tcx.prof.verbose_generic_activity("generate_crate_metadata");
// Since encoding metadata is not in a query, and nothing is cached,
@ -2292,6 +2314,42 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
join(|| prefetch_mir(tcx), || tcx.exported_symbols(LOCAL_CRATE));
}
with_encode_metadata_header(tcx, path, |ecx| {
// Encode all the entries and extra information in the crate,
// culminating in the `CrateRoot` which points to all of it.
let root = ecx.encode_crate_root();
// Flush buffer to ensure backing file has the correct size.
ecx.opaque.flush();
// Record metadata size for self-profiling
tcx.prof.artifact_size(
"crate_metadata",
"crate_metadata",
ecx.opaque.file().metadata().unwrap().len(),
);
root.position.get()
});
if let Some(ref_path) = ref_path {
with_encode_metadata_header(tcx, ref_path, |ecx| {
let header: LazyValue<CrateHeader> = ecx.lazy(CrateHeader {
name: tcx.crate_name(LOCAL_CRATE),
triple: tcx.sess.opts.target_triple.clone(),
hash: tcx.crate_hash(LOCAL_CRATE),
is_proc_macro_crate: false,
is_stub: true,
});
header.position.get()
});
}
}
fn with_encode_metadata_header(
tcx: TyCtxt<'_>,
path: &Path,
f: impl FnOnce(&mut EncodeContext<'_, '_>) -> usize,
) {
let mut encoder = opaque::FileEncoder::new(path)
.unwrap_or_else(|err| tcx.dcx().emit_fatal(FailCreateFileEncoder { err }));
encoder.emit_raw_bytes(METADATA_HEADER);
@ -2326,9 +2384,7 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
// Encode the rustc version string in a predictable location.
rustc_version(tcx.sess.cfg_version).encode(&mut ecx);
// Encode all the entries and extra information in the crate,
// culminating in the `CrateRoot` which points to all of it.
let root = ecx.encode_crate_root();
let root_position = f(&mut ecx);
// Make sure we report any errors from writing to the file.
// If we forget this, compilation can succeed with an incomplete rmeta file,
@ -2338,12 +2394,9 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
}
let file = ecx.opaque.file();
if let Err(err) = encode_root_position(file, root.position.get()) {
if let Err(err) = encode_root_position(file, root_position) {
tcx.dcx().emit_fatal(FailWriteFile { path: ecx.opaque.path(), err });
}
// Record metadata size for self-profiling
tcx.prof.artifact_size("crate_metadata", "crate_metadata", file.metadata().unwrap().len());
}
fn encode_root_position(mut file: &File, pos: usize) -> Result<(), std::io::Error> {

View file

@ -56,7 +56,7 @@ pub(crate) fn rustc_version(cfg_version: &'static str) -> String {
/// Metadata encoding version.
/// N.B., increment this if you change the format of metadata such that
/// the rustc version can't be found to compare with `rustc_version()`.
const METADATA_VERSION: u8 = 9;
const METADATA_VERSION: u8 = 10;
/// Metadata header which includes `METADATA_VERSION`.
///
@ -221,6 +221,12 @@ pub(crate) struct CrateHeader {
/// This is separate from [`ProcMacroData`] to avoid having to update [`METADATA_VERSION`] every
/// time ProcMacroData changes.
pub(crate) is_proc_macro_crate: bool,
/// Whether this crate metadata section is just a stub.
/// Stubs do not contain the full metadata (it will be typically stored
/// in a separate rmeta file).
///
/// This is used inside rlibs and dylibs when using `-Zembed-metadata=no`.
pub(crate) is_stub: bool,
}
/// Serialized `.rmeta` data for a crate.

View file

@ -2175,6 +2175,8 @@ options! {
them only if an error has not been emitted"),
ehcont_guard: bool = (false, parse_bool, [TRACKED],
"generate Windows EHCont Guard tables"),
embed_metadata: bool = (true, parse_bool, [TRACKED],
"embed metadata in rlibs and dylibs (default: yes)"),
embed_source: bool = (false, parse_bool, [TRACKED],
"embed source text in DWARF debug sections (default: no)"),
emit_stack_sizes: bool = (false, parse_bool, [UNTRACKED],

View file

@ -0,0 +1,3 @@
## `embed-metadata`
This option instructs `rustc` to include the full metadata in `rlib` and `dylib` crate types. The default value is `yes` (enabled). If disabled (`no`), only stub metadata will be stored in these files, to reduce their size on disk. When using `-Zembed-metadata=no`, you will probably want to use `--emit=metadata` to produce the full metadata into a separate `.rmeta` file.

View file

@ -110,7 +110,7 @@ pub fn read_version(obj: &object::File<'_>) -> io::Result<String> {
));
}
let version = u32::from_be_bytes([dot_rustc[4], dot_rustc[5], dot_rustc[6], dot_rustc[7]]);
// Last supported version is:
// Last breaking version change is:
// https://github.com/rust-lang/rust/commit/b94cfefc860715fb2adf72a6955423d384c69318
let (mut metadata_portion, bytes_before_version) = match version {
8 => {
@ -118,7 +118,7 @@ pub fn read_version(obj: &object::File<'_>) -> io::Result<String> {
let data_len = u32::from_be_bytes(len_bytes.try_into().unwrap()) as usize;
(&dot_rustc[12..data_len + 12], 13)
}
9 => {
9 | 10 => {
let len_bytes = &dot_rustc[8..16];
let data_len = u64::from_le_bytes(len_bytes.try_into().unwrap()) as usize;
(&dot_rustc[16..data_len + 12], 17)

View file

@ -0,0 +1 @@
pub fn func_dep1() {}

View file

@ -0,0 +1,5 @@
extern crate dep1;
fn main() {
dep1::func_dep1();
}

View file

@ -0,0 +1,86 @@
// Tests the -Zembed-metadata compiler flag.
// Tracking issue: https://github.com/rust-lang/rust/issues/139165
use run_make_support::rfs::{create_dir, remove_file, rename};
use run_make_support::{Rustc, dynamic_lib_name, path, run_in_tmpdir, rust_lib_name, rustc};
#[derive(Debug, Copy, Clone)]
enum LibraryKind {
Rlib,
Dylib,
}
impl LibraryKind {
fn crate_type(&self) -> &str {
match self {
LibraryKind::Rlib => "rlib",
LibraryKind::Dylib => "dylib",
}
}
fn add_extern(&self, rustc: &mut Rustc, dep_name: &str, dep_path: &str) {
let dep_path = match self {
LibraryKind::Dylib => format!("{dep_path}/{}", dynamic_lib_name(dep_name)),
LibraryKind::Rlib => format!("{dep_path}/{}", rust_lib_name(dep_name)),
};
rustc.extern_(dep_name, dep_path);
}
}
fn main() {
// The compiler takes different paths based on if --extern is passed or not, so we test all
// combinations (`rlib`/`dylib` x `--extern`/`no --extern`).
for kind in [LibraryKind::Rlib, LibraryKind::Dylib] {
eprintln!("Testing library kind {kind:?}");
lookup_rmeta_in_lib_dir(kind);
lookup_rmeta_through_extern(kind);
lookup_rmeta_missing(kind);
}
}
// Lookup .rmeta file in the same directory as a rlib/dylib with stub metadata.
fn lookup_rmeta_in_lib_dir(kind: LibraryKind) {
run_in_tmpdir(|| {
build_dep_rustc(kind).run();
rustc().input("foo.rs").run();
});
}
// Lookup .rmeta file when specifying the dependency using --extern.
fn lookup_rmeta_through_extern(kind: LibraryKind) {
run_in_tmpdir(|| {
// Generate libdep1.rlib and libdep1.rmeta in deps
create_dir("deps");
build_dep_rustc(kind).out_dir("deps").run();
let mut rustc = rustc();
kind.add_extern(&mut rustc, "dep1", "deps");
rustc.extern_("dep1", path("deps").join("libdep1.rmeta"));
rustc.input("foo.rs").run();
});
}
// Check the error message when the .rmeta file is missing.
fn lookup_rmeta_missing(kind: LibraryKind) {
run_in_tmpdir(|| {
create_dir("deps");
build_dep_rustc(kind).out_dir("deps").run();
let mut rustc = rustc();
kind.add_extern(&mut rustc, "dep1", "deps");
rustc.input("foo.rs").run_fail().assert_stderr_contains("only metadata stub found");
});
}
fn build_dep_rustc(kind: LibraryKind) -> Rustc {
let mut dep_rustc = rustc();
dep_rustc
.arg("-Zembed-metadata=no")
.crate_type(kind.crate_type())
.input("dep1.rs")
.emit("metadata,link");
if matches!(kind, LibraryKind::Dylib) {
dep_rustc.arg("-Cprefer-dynamic");
}
dep_rustc
}