1
Fork 0

Auto merge of #96544 - m-ysk:feature/issue-96358, r=cjgillot

Stop keeping metadata in memory before writing it to disk

Fixes #96358

I created this PR according with the instruction given in the issue except for the following points:

- While the issue says "Write metadata into the temporary file in `encode_and_write_metadata` even if `!need_metadata_file`", I could not do that. That is because though I tried to do that and run `x.py test`, I got a lot of test failures as follows.

<details>
<summary>List of failed tests</summary>
<pre>
<code>
failures:
    [ui] src/test/ui/json-multiple.rs
    [ui] src/test/ui/json-options.rs
    [ui] src/test/ui/rmeta/rmeta-rpass.rs
    [ui] src/test/ui/save-analysis/emit-notifications.rs
    [ui] src/test/ui/svh/changing-crates.rs
    [ui] src/test/ui/svh/svh-change-lit.rs
    [ui] src/test/ui/svh/svh-change-significant-cfg.rs
    [ui] src/test/ui/svh/svh-change-trait-bound.rs
    [ui] src/test/ui/svh/svh-change-type-arg.rs
    [ui] src/test/ui/svh/svh-change-type-ret.rs
    [ui] src/test/ui/svh/svh-change-type-static.rs
    [ui] src/test/ui/svh/svh-use-trait.rs

test result: FAILED. 12915 passed; 12 failed; 100 ignored; 0 measured; 0 filtered out; finished in 71.41s

Some tests failed in compiletest suite=ui mode=ui host=x86_64-unknown-linux-gnu target=x86_64-unknown-linux-gnu
Build completed unsuccessfully in 0:01:58
</code>
</pre>
</details>

- I could not resolve the extra tasks about `create_rmeta_file` and `create_compressed_metadata_file` for my lack of ability.
This commit is contained in:
bors 2022-07-14 21:50:14 +00:00
commit 1ba1fec234
16 changed files with 300 additions and 147 deletions

View file

@ -12,6 +12,7 @@ odht = { version = "0.3.1", features = ["nightly"] }
snap = "1"
tracing = "0.1"
smallvec = { version = "1.8.1", features = ["union", "may_dangle"] }
tempfile = "3.2"
rustc_middle = { path = "../rustc_middle" }
rustc_attr = { path = "../rustc_attr" }
rustc_data_structures = { path = "../rustc_data_structures" }

View file

@ -0,0 +1,137 @@
use crate::{encode_metadata, EncodedMetadata};
use rustc_data_structures::temp_dir::MaybeTempDir;
use rustc_hir::def_id::LOCAL_CRATE;
use rustc_middle::ty::TyCtxt;
use rustc_session::config::{CrateType, OutputFilenames, OutputType};
use rustc_session::output::filename_for_metadata;
use rustc_session::Session;
use tempfile::Builder as TempFileBuilder;
use std::fs;
use std::path::{Path, PathBuf};
// FIXME(eddyb) maybe include the crate name in this?
pub const METADATA_FILENAME: &str = "lib.rmeta";
/// We use a temp directory here to avoid races between concurrent rustc processes,
/// such as builds in the same directory using the same filename for metadata while
/// building an `.rlib` (stomping over one another), or writing an `.rmeta` into a
/// directory being searched for `extern crate` (observing an incomplete file).
/// The returned path is the temporary file containing the complete metadata.
pub fn emit_metadata(sess: &Session, metadata: &[u8], tmpdir: &MaybeTempDir) -> PathBuf {
let out_filename = tmpdir.as_ref().join(METADATA_FILENAME);
let result = fs::write(&out_filename, metadata);
if let Err(e) = result {
sess.fatal(&format!("failed to write {}: {}", out_filename.display(), e));
}
out_filename
}
pub fn encode_and_write_metadata(
tcx: TyCtxt<'_>,
outputs: &OutputFilenames,
) -> (EncodedMetadata, bool) {
#[derive(PartialEq, Eq, PartialOrd, Ord)]
enum MetadataKind {
None,
Uncompressed,
Compressed,
}
let metadata_kind = tcx
.sess
.crate_types()
.iter()
.map(|ty| match *ty {
CrateType::Executable | CrateType::Staticlib | CrateType::Cdylib => MetadataKind::None,
CrateType::Rlib => MetadataKind::Uncompressed,
CrateType::Dylib | CrateType::ProcMacro => MetadataKind::Compressed,
})
.max()
.unwrap_or(MetadataKind::None);
let crate_name = tcx.crate_name(LOCAL_CRATE);
let out_filename = filename_for_metadata(tcx.sess, crate_name.as_str(), outputs);
// To avoid races with another rustc process scanning the output directory,
// we need to write the file somewhere else and atomically move it to its
// final destination, with an `fs::rename` call. In order for the rename to
// always succeed, the temporary file needs to be on the same filesystem,
// which is why we create it inside the output directory specifically.
let metadata_tmpdir = TempFileBuilder::new()
.prefix("rmeta")
.tempdir_in(out_filename.parent().unwrap_or_else(|| Path::new("")))
.unwrap_or_else(|err| tcx.sess.fatal(&format!("couldn't create a temp dir: {}", err)));
let metadata_tmpdir = MaybeTempDir::new(metadata_tmpdir, tcx.sess.opts.cg.save_temps);
let metadata_filename = metadata_tmpdir.as_ref().join(METADATA_FILENAME);
// Always create a file at `metadata_filename`, even if we have nothing to write to it.
// This simplifies the creation of the output `out_filename` when requested.
match metadata_kind {
MetadataKind::None => {
std::fs::File::create(&metadata_filename).unwrap_or_else(|e| {
tcx.sess.fatal(&format!(
"failed to create the file {}: {}",
metadata_filename.display(),
e
))
});
}
MetadataKind::Uncompressed | MetadataKind::Compressed => {
encode_metadata(tcx, &metadata_filename);
}
};
let _prof_timer = tcx.sess.prof.generic_activity("write_crate_metadata");
// If the user requests metadata as output, rename `metadata_filename`
// to the expected output `out_filename`. The match above should ensure
// this file always exists.
let need_metadata_file = tcx.sess.opts.output_types.contains_key(&OutputType::Metadata);
let (metadata_filename, metadata_tmpdir) = if need_metadata_file {
if let Err(e) = non_durable_rename(&metadata_filename, &out_filename) {
tcx.sess.fatal(&format!("failed to write {}: {}", out_filename.display(), e));
}
if tcx.sess.opts.json_artifact_notifications {
tcx.sess
.parse_sess
.span_diagnostic
.emit_artifact_notification(&out_filename, "metadata");
}
(out_filename, None)
} else {
(metadata_filename, Some(metadata_tmpdir))
};
// Load metadata back to memory: codegen may need to include it in object files.
let metadata =
EncodedMetadata::from_path(metadata_filename, metadata_tmpdir).unwrap_or_else(|e| {
tcx.sess.fatal(&format!("failed to create encoded metadata from file: {}", e))
});
let need_metadata_module = metadata_kind == MetadataKind::Compressed;
(metadata, need_metadata_module)
}
#[cfg(not(target_os = "linux"))]
pub fn non_durable_rename(src: &Path, dst: &Path) -> std::io::Result<()> {
std::fs::rename(src, dst)
}
/// This function attempts to bypass the auto_da_alloc heuristic implemented by some filesystems
/// such as btrfs and ext4. When renaming over a file that already exists then they will "helpfully"
/// write back the source file before committing the rename in case a developer forgot some of
/// the fsyncs in the open/write/fsync(file)/rename/fsync(dir) dance for atomic file updates.
///
/// To avoid triggering this heuristic we delete the destination first, if it exists.
/// The cost of an extra syscall is much lower than getting descheduled for the sync IO.
#[cfg(target_os = "linux")]
pub fn non_durable_rename(src: &Path, dst: &Path) -> std::io::Result<()> {
let _ = std::fs::remove_file(dst);
std::fs::rename(src, dst)
}

View file

@ -34,6 +34,8 @@ mod native_libs;
mod rmeta;
pub mod creader;
pub mod fs;
pub mod locator;
pub use fs::{emit_metadata, METADATA_FILENAME};
pub use rmeta::{encode_metadata, EncodedMetadata, METADATA_HEADER};

View file

@ -4,8 +4,10 @@ use crate::rmeta::*;
use rustc_data_structures::fingerprint::Fingerprint;
use rustc_data_structures::fx::{FxHashMap, FxIndexSet};
use rustc_data_structures::memmap::{Mmap, MmapMut};
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
use rustc_data_structures::sync::{join, par_iter, Lrc, ParallelIterator};
use rustc_data_structures::temp_dir::MaybeTempDir;
use rustc_hir as hir;
use rustc_hir::def::DefKind;
use rustc_hir::def_id::{
@ -27,8 +29,7 @@ use rustc_middle::ty::codec::TyEncoder;
use rustc_middle::ty::fast_reject::{self, SimplifiedType, TreatParams};
use rustc_middle::ty::query::Providers;
use rustc_middle::ty::{self, SymbolName, Ty, TyCtxt};
use rustc_serialize::opaque::MemEncoder;
use rustc_serialize::{Encodable, Encoder};
use rustc_serialize::{opaque, Decodable, Decoder, Encodable, Encoder};
use rustc_session::config::CrateType;
use rustc_session::cstore::{ForeignModule, LinkagePreference, NativeLib};
use rustc_span::hygiene::{ExpnIndex, HygieneEncodeContext, MacroKind};
@ -39,12 +40,14 @@ use rustc_span::{
use rustc_target::abi::VariantIdx;
use std::borrow::Borrow;
use std::hash::Hash;
use std::io::{Read, Seek, Write};
use std::iter;
use std::num::NonZeroUsize;
use std::path::{Path, PathBuf};
use tracing::{debug, trace};
pub(super) struct EncodeContext<'a, 'tcx> {
opaque: MemEncoder,
opaque: opaque::FileEncoder,
tcx: TyCtxt<'tcx>,
feat: &'tcx rustc_feature::Features,
@ -729,12 +732,19 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
assert_eq!(total_bytes, computed_total_bytes);
if tcx.sess.meta_stats() {
self.opaque.flush();
// Rewind and re-read all the metadata to count the zero bytes we wrote.
let pos_before_rewind = self.opaque.file().stream_position().unwrap();
let mut zero_bytes = 0;
for e in self.opaque.data.iter() {
if *e == 0 {
self.opaque.file().rewind().unwrap();
let file = std::io::BufReader::new(self.opaque.file());
for e in file.bytes() {
if e.unwrap() == 0 {
zero_bytes += 1;
}
}
assert_eq!(self.opaque.file().stream_position().unwrap(), pos_before_rewind);
let perc = |bytes| (bytes * 100) as f64 / total_bytes as f64;
let p = |label, bytes| {
@ -2133,24 +2143,58 @@ fn prefetch_mir(tcx: TyCtxt<'_>) {
// will allow us to slice the metadata to the precise length that we just
// generated regardless of trailing bytes that end up in it.
#[derive(Encodable, Decodable)]
pub struct EncodedMetadata {
raw_data: Vec<u8>,
// The declaration order matters because `mmap` should be dropped before `_temp_dir`.
mmap: Option<Mmap>,
// We need to carry MaybeTempDir to avoid deleting the temporary
// directory while accessing the Mmap.
_temp_dir: Option<MaybeTempDir>,
}
impl EncodedMetadata {
#[inline]
pub fn new() -> EncodedMetadata {
EncodedMetadata { raw_data: Vec::new() }
pub fn from_path(path: PathBuf, temp_dir: Option<MaybeTempDir>) -> std::io::Result<Self> {
let file = std::fs::File::open(&path)?;
let file_metadata = file.metadata()?;
if file_metadata.len() == 0 {
return Ok(Self { mmap: None, _temp_dir: None });
}
let mmap = unsafe { Some(Mmap::map(file)?) };
Ok(Self { mmap, _temp_dir: temp_dir })
}
#[inline]
pub fn raw_data(&self) -> &[u8] {
&self.raw_data
self.mmap.as_ref().map(|mmap| mmap.as_ref()).unwrap_or_default()
}
}
pub fn encode_metadata(tcx: TyCtxt<'_>) -> EncodedMetadata {
impl<S: Encoder> Encodable<S> for EncodedMetadata {
fn encode(&self, s: &mut S) {
let slice = self.raw_data();
slice.encode(s)
}
}
impl<D: Decoder> Decodable<D> for EncodedMetadata {
fn decode(d: &mut D) -> Self {
let len = d.read_usize();
let mmap = if len > 0 {
let mut mmap = MmapMut::map_anon(len).unwrap();
for _ in 0..len {
(&mut mmap[..]).write(&[d.read_u8()]).unwrap();
}
mmap.flush().unwrap();
Some(mmap.make_read_only().unwrap())
} else {
None
};
Self { mmap, _temp_dir: None }
}
}
pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
let _prof_timer = tcx.prof.verbose_generic_activity("generate_crate_metadata");
// Since encoding metadata is not in a query, and nothing is cached,
@ -2158,7 +2202,7 @@ pub fn encode_metadata(tcx: TyCtxt<'_>) -> EncodedMetadata {
tcx.dep_graph.assert_ignored();
join(
|| encode_metadata_impl(tcx),
|| encode_metadata_impl(tcx, path),
|| {
if tcx.sess.threads() == 1 {
return;
@ -2168,12 +2212,12 @@ pub fn encode_metadata(tcx: TyCtxt<'_>) -> EncodedMetadata {
// It can be removed if it turns out to cause trouble or be detrimental to performance.
join(|| prefetch_mir(tcx), || tcx.exported_symbols(LOCAL_CRATE));
},
)
.0
);
}
fn encode_metadata_impl(tcx: TyCtxt<'_>) -> EncodedMetadata {
let mut encoder = MemEncoder::new();
fn encode_metadata_impl(tcx: TyCtxt<'_>, path: &Path) {
let mut encoder = opaque::FileEncoder::new(path)
.unwrap_or_else(|err| tcx.sess.fatal(&format!("failed to create file encoder: {}", err)));
encoder.emit_raw_bytes(METADATA_HEADER);
// Will be filled with the root position after encoding everything.
@ -2208,20 +2252,29 @@ fn encode_metadata_impl(tcx: TyCtxt<'_>) -> EncodedMetadata {
// culminating in the `CrateRoot` which points to all of it.
let root = ecx.encode_crate_root();
let mut result = ecx.opaque.finish();
ecx.opaque.flush();
let mut file = ecx.opaque.file();
// We will return to this position after writing the root position.
let pos_before_seek = file.stream_position().unwrap();
// Encode the root position.
let header = METADATA_HEADER.len();
file.seek(std::io::SeekFrom::Start(header as u64))
.unwrap_or_else(|err| tcx.sess.fatal(&format!("failed to seek the file: {}", err)));
let pos = root.position.get();
result[header + 0] = (pos >> 24) as u8;
result[header + 1] = (pos >> 16) as u8;
result[header + 2] = (pos >> 8) as u8;
result[header + 3] = (pos >> 0) as u8;
file.write_all(&[(pos >> 24) as u8, (pos >> 16) as u8, (pos >> 8) as u8, (pos >> 0) as u8])
.unwrap_or_else(|err| tcx.sess.fatal(&format!("failed to write to the file: {}", err)));
// Return to the position where we are before writing the root position.
file.seek(std::io::SeekFrom::Start(pos_before_seek)).unwrap();
// Record metadata size for self-profiling
tcx.prof.artifact_size("crate_metadata", "crate_metadata", result.len() as u64);
EncodedMetadata { raw_data: result }
tcx.prof.artifact_size(
"crate_metadata",
"crate_metadata",
file.metadata().unwrap().len() as u64,
);
}
pub fn provide(providers: &mut Providers) {
@ -2242,5 +2295,5 @@ pub fn provide(providers: &mut Providers) {
},
..*providers
};
}
}

View file

@ -21,7 +21,7 @@ use rustc_middle::ty::fast_reject::SimplifiedType;
use rustc_middle::ty::query::Providers;
use rustc_middle::ty::{self, ReprOptions, Ty};
use rustc_middle::ty::{GeneratorDiagnosticData, ParameterizedOverTcx, TyCtxt};
use rustc_serialize::opaque::MemEncoder;
use rustc_serialize::opaque::FileEncoder;
use rustc_session::config::SymbolManglingVersion;
use rustc_session::cstore::{CrateDepKind, ForeignModule, LinkagePreference, NativeLib};
use rustc_span::edition::Edition;
@ -322,7 +322,7 @@ macro_rules! define_tables {
}
impl TableBuilders {
fn encode(&self, buf: &mut MemEncoder) -> LazyTables {
fn encode(&self, buf: &mut FileEncoder) -> LazyTables {
LazyTables {
$($name: self.$name.encode(buf)),+
}

View file

@ -4,8 +4,8 @@ use rustc_data_structures::fingerprint::Fingerprint;
use rustc_hir::def::{CtorKind, CtorOf};
use rustc_index::vec::Idx;
use rustc_middle::ty::ParameterizedOverTcx;
use rustc_serialize::opaque::MemEncoder;
use rustc_serialize::Encoder;
use rustc_serialize::opaque::FileEncoder;
use rustc_serialize::Encoder as _;
use rustc_span::hygiene::MacroKind;
use std::convert::TryInto;
use std::marker::PhantomData;
@ -281,7 +281,7 @@ where
Some(value).write_to_bytes(&mut self.blocks[i]);
}
pub(crate) fn encode<const N: usize>(&self, buf: &mut MemEncoder) -> LazyTable<I, T>
pub(crate) fn encode<const N: usize>(&self, buf: &mut FileEncoder) -> LazyTable<I, T>
where
Option<T>: FixedSizeEncoding<ByteArray = [u8; N]>,
{