Auto merge of #128320 - saethlin:link-me-maybe, r=compiler-errors

Avoid no-op unlink+link dances in incr comp

Incremental compilation scales quite poorly with the number of CGUs. This PR improves one reason for that.

The incr comp process hard-links all the files from an old session into a new one, then it runs the backend, which may just hard-link the new session files into the output directory. Then codegen hard-links all the output files back to the new session directory.

This PR (perhaps unimaginatively) fixes the silliness that ensues in the last step. The old `link_or_copy` implementation would be passed pairs of paths which are already the same inode, then it would blindly delete the destination and re-create the hard-link that it just deleted. This PR lets us skip both those operations. We don't skip the other two hard-links.

`cargo +stage1 b && touch crates/core/main.rs && strace -cfw -elink,linkat,unlink,unlinkat cargo +stage1 b` before and then after on `ripgrep-13.0.0`:
```
% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 52.56    0.024950          25       978       485 unlink
 34.38    0.016318          22       727           linkat
 13.06    0.006200          24       249           unlinkat
------ ----------- ----------- --------- --------- ----------------
100.00    0.047467          24      1954       485 total
```
```
% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 42.83    0.014521          57       252           unlink
 38.41    0.013021          26       486           linkat
 18.77    0.006362          25       249           unlinkat
------ ----------- ----------- --------- --------- ----------------
100.00    0.033904          34       987           total
```

This reduces the number of hard-links that are causing perf troubles, noted in https://github.com/rust-lang/rust/issues/64291 and https://github.com/rust-lang/rust/issues/137560
This commit is contained in:
bors 2025-03-21 21:03:49 +00:00
commit 48b36c9d59
6 changed files with 57 additions and 26 deletions

View file

@ -103,12 +103,14 @@ impl OngoingCodegen {
("o", &module_regular.object.as_ref().unwrap()),
("asm.o", &module_global_asm.object.as_ref().unwrap()),
],
&[],
)
} else {
rustc_incremental::copy_cgu_workproduct_to_incr_comp_cache_dir(
sess,
&module_regular.name,
&[("o", &module_regular.object.as_ref().unwrap())],
&[],
)
};
if let Some((work_product_id, work_product)) = work_product {
@ -381,6 +383,7 @@ fn emit_cgu(
bytecode: None,
assembly: None,
llvm_ir: None,
links_from_incr_cache: Vec::new(),
}),
existing_work_product: None,
})
@ -437,6 +440,7 @@ fn emit_module(
bytecode: None,
assembly: None,
llvm_ir: None,
links_from_incr_cache: Vec::new(),
})
}
@ -460,22 +464,23 @@ fn reuse_workproduct_for_cgu(
err
));
}
let obj_out_global_asm =
crate::global_asm::add_file_stem_postfix(obj_out_regular.clone(), ".asm");
let has_global_asm = if let Some(asm_o) = work_product.saved_files.get("asm.o") {
let source_file_global_asm = if let Some(asm_o) = work_product.saved_files.get("asm.o") {
let source_file_global_asm = rustc_incremental::in_incr_comp_dir_sess(&tcx.sess, asm_o);
if let Err(err) = rustc_fs_util::link_or_copy(&source_file_global_asm, &obj_out_global_asm)
{
return Err(format!(
"unable to copy {} to {}: {}",
source_file_regular.display(),
obj_out_regular.display(),
source_file_global_asm.display(),
obj_out_global_asm.display(),
err
));
}
true
Some(source_file_global_asm)
} else {
false
None
};
Ok(ModuleCodegenResult {
@ -487,8 +492,9 @@ fn reuse_workproduct_for_cgu(
bytecode: None,
assembly: None,
llvm_ir: None,
links_from_incr_cache: vec![source_file_regular],
},
module_global_asm: has_global_asm.then(|| CompiledModule {
module_global_asm: source_file_global_asm.map(|source_file| CompiledModule {
name: cgu.name().to_string(),
kind: ModuleKind::Regular,
object: Some(obj_out_global_asm),
@ -496,6 +502,7 @@ fn reuse_workproduct_for_cgu(
bytecode: None,
assembly: None,
llvm_ir: None,
links_from_incr_cache: vec![source_file],
}),
existing_work_product: Some((cgu.work_product_id(), work_product)),
})
@ -637,6 +644,7 @@ fn emit_metadata_module(tcx: TyCtxt<'_>, metadata: &EncodedMetadata) -> Compiled
bytecode: None,
assembly: None,
llvm_ir: None,
links_from_incr_cache: Vec::new(),
}
}

View file

@ -546,9 +546,12 @@ fn copy_all_cgu_workproducts_to_incr_comp_cache_dir(
if let Some(path) = &module.bytecode {
files.push((OutputType::Bitcode.extension(), path.as_path()));
}
if let Some((id, product)) =
copy_cgu_workproduct_to_incr_comp_cache_dir(sess, &module.name, files.as_slice())
{
if let Some((id, product)) = copy_cgu_workproduct_to_incr_comp_cache_dir(
sess,
&module.name,
files.as_slice(),
&module.links_from_incr_cache,
) {
work_products.insert(id, product);
}
}
@ -934,7 +937,9 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
) -> WorkItemResult<B> {
let incr_comp_session_dir = cgcx.incr_comp_session_dir.as_ref().unwrap();
let load_from_incr_comp_dir = |output_path: PathBuf, saved_path: &str| {
let mut links_from_incr_cache = Vec::new();
let mut load_from_incr_comp_dir = |output_path: PathBuf, saved_path: &str| {
let source_file = in_incr_comp_dir(incr_comp_session_dir, saved_path);
debug!(
"copying preexisting module `{}` from {:?} to {}",
@ -943,7 +948,10 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
output_path.display()
);
match link_or_copy(&source_file, &output_path) {
Ok(_) => Some(output_path),
Ok(_) => {
links_from_incr_cache.push(source_file);
Some(output_path)
}
Err(error) => {
cgcx.create_dcx().handle().emit_err(errors::CopyPathBuf {
source_file,
@ -966,7 +974,7 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
load_from_incr_comp_dir(dwarf_obj_out, saved_dwarf_object_file)
});
let load_from_incr_cache = |perform, output_type: OutputType| {
let mut load_from_incr_cache = |perform, output_type: OutputType| {
if perform {
let saved_file = module.source.saved_files.get(output_type.extension())?;
let output_path = cgcx.output_filenames.temp_path(output_type, Some(&module.name));
@ -986,6 +994,7 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
}
WorkItemResult::Finished(CompiledModule {
links_from_incr_cache,
name: module.name,
kind: ModuleKind::Regular,
object,

View file

@ -658,6 +658,7 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
bytecode: None,
assembly: None,
llvm_ir: None,
links_from_incr_cache: Vec::new(),
}
})
});

View file

@ -122,6 +122,7 @@ impl<M> ModuleCodegen<M> {
bytecode,
assembly,
llvm_ir,
links_from_incr_cache: Vec::new(),
}
}
}
@ -135,6 +136,7 @@ pub struct CompiledModule {
pub bytecode: Option<PathBuf>,
pub assembly: Option<PathBuf>, // --emit=asm
pub llvm_ir: Option<PathBuf>, // --emit=llvm-ir, llvm-bc is in bytecode
pub links_from_incr_cache: Vec<PathBuf>,
}
impl CompiledModule {

View file

@ -55,25 +55,31 @@ pub enum LinkOrCopy {
Copy,
}
/// Copies `p` into `q`, preferring to use hard-linking if possible. If
/// `q` already exists, it is removed first.
/// Copies `p` into `q`, preferring to use hard-linking if possible.
/// The result indicates which of the two operations has been performed.
pub fn link_or_copy<P: AsRef<Path>, Q: AsRef<Path>>(p: P, q: Q) -> io::Result<LinkOrCopy> {
// Creating a hard-link will fail if the destination path already exists. We could defensively
// call remove_file in this function, but that pessimizes callers who can avoid such calls.
// Incremental compilation calls this function a lot, and is able to avoid calls that
// would fail the first hard_link attempt.
let p = p.as_ref();
let q = q.as_ref();
match fs::remove_file(q) {
Ok(()) => (),
Err(err) if err.kind() == io::ErrorKind::NotFound => (),
Err(err) => return Err(err),
let err = match fs::hard_link(p, q) {
Ok(()) => return Ok(LinkOrCopy::Link),
Err(err) => err,
};
if err.kind() == io::ErrorKind::AlreadyExists {
fs::remove_file(q)?;
if fs::hard_link(p, q).is_ok() {
return Ok(LinkOrCopy::Link);
}
}
match fs::hard_link(p, q) {
Ok(()) => Ok(LinkOrCopy::Link),
Err(_) => match fs::copy(p, q) {
Ok(_) => Ok(LinkOrCopy::Copy),
Err(e) => Err(e),
},
}
// Hard linking failed, fall back to copying.
fs::copy(p, q).map(|_| LinkOrCopy::Copy)
}
#[cfg(any(unix, all(target_os = "wasi", target_env = "p1")))]

View file

@ -3,7 +3,7 @@
//! [work products]: WorkProduct
use std::fs as std_fs;
use std::path::Path;
use std::path::{Path, PathBuf};
use rustc_data_structures::unord::UnordMap;
use rustc_fs_util::link_or_copy;
@ -20,6 +20,7 @@ pub fn copy_cgu_workproduct_to_incr_comp_cache_dir(
sess: &Session,
cgu_name: &str,
files: &[(&'static str, &Path)],
known_links: &[PathBuf],
) -> Option<(WorkProductId, WorkProduct)> {
debug!(?cgu_name, ?files);
sess.opts.incremental.as_ref()?;
@ -28,6 +29,10 @@ pub fn copy_cgu_workproduct_to_incr_comp_cache_dir(
for (ext, path) in files {
let file_name = format!("{cgu_name}.{ext}");
let path_in_incr_dir = in_incr_comp_dir_sess(sess, &file_name);
if known_links.contains(&path_in_incr_dir) {
let _ = saved_files.insert(ext.to_string(), file_name);
continue;
}
match link_or_copy(path, &path_in_incr_dir) {
Ok(_) => {
let _ = saved_files.insert(ext.to_string(), file_name);