Auto merge of #139453 - compiler-errors:incr, r=jieyouxu
Prepend temp files with per-invocation random string to avoid temp filename conflicts https://github.com/rust-lang/rust/issues/139407 uncovered a very subtle unsoundness with incremental codegen, failing compilation sessions (due to assembler errors), and the "prefer hard linking over copying files" strategy we use in the compiler for file management. Specifically, imagine we're building a single file 3 times, all with `-Csave-temps -Cincremental=...`. Let's call the object file we're building for the codegen unit for `main` "`XXX.o`" just for clarity since it's probably some gigantic hash name: ``` #[inline(never)] #[cfg(any(rpass1, rpass3))] fn a() -> i32 { 0 } #[cfg(any(cfail2))] fn a() -> i32 { 1 } fn main() { evil::evil(); assert_eq!(a(), 0); } mod evil { #[cfg(any(rpass1, rpass3))] pub fn evil() { unsafe { std::arch::asm!("/* */"); } } #[cfg(any(cfail2))] pub fn evil() { unsafe { std::arch::asm!("missing"); } } } ``` Session 1 (`rpass1`): * Type-check, borrow-check, etc. * Serialize the dep graph to the incremental working directory `.../s-...-working/`. * Codegen object file to a temp file `XXX.rcgu.o` which is spit out in the cwd. * Hard-link[^1] `XXX.rcgu.o` to the incremental working directory `.../s-...-working/XXX.o`. * Save-temps option means we don't delete `XXX.rgcu.o`. * Link the binary and stuff. * Finalize[^2] the working incremental session by renaming `.../s-...-working` to ` s-...-asjkdhsjakd` (some other finalized incr comp session dir name). Session 2 (`cfail2`): * Load artifacts from the previous *finalized* incremental session, namely the dep graph. * Type-check, borrow-check, etc. since the file has changed, so most dep graph nodes are red. * Serialize the dep graph to the incremental working directory `.../s-...-working/`. * Codegen object file to a temp file `XXX.rcgu.o`. **HERE IS THE PROBLEM**: The hard-link is still set up to point to the inode from `XXX.o` from the first session, so this also modifies the `XXX.o` in the previous finalized session directory. * Codegen emits an error b/c `missing` is not an instruction, so we abort before finalizing the incremental session. Specifically, this means that the *previous* session is the last finalized session. Session 3 (`rpass3`): * Load artifacts from the previous *finalized* incremental session, namely the dep graph. NOTE that this is from session 1. * All the dep graph nodes are green since we are basically replaying session 1. * codegen object file `XXX.o`, which is detected as *reused* from session 1 since dep nodes were green. That means we **reuse** `XXX.o` which had been dirtied from session 2. * Link the binary and stuff. This results in a binary which reuses some of the build artifacts from session 2, but thinks it's from session 1. At this point, I hope it's clear to see that the incremental results from session 1 were dirtied from session 2, but we reuse them as if session 1 was the previous (finalized) incremental session we ran. This is at best really buggy, and at worst **unsound**. This isn't limited to `-C save-temps`, since there are other combinations of flags that may keep around temporary files (hard linked) in the working directory (like `-C debuginfo=1 -C split-debuginfo=unpacked` on darwin, for example). --- This PR implements a fix which is to prepend temp filenames with a random string that is generated per invocation of rustc. This string is not *deterministic*, but temporary files are transient anyways, so I don't believe this is a problem. That means that temp files are now something like... `{crate-name}.{cgu}.{invocation_temp}.rcgu.o`, where `{invocation_temp}` is the new temporary string we generate per invocation of rustc. Fixes https://github.com/rust-lang/rust/issues/139407 [^1]:175dcc7773/compiler/rustc_fs_util/src/lib.rs (L60)
[^2]:175dcc7773/compiler/rustc_incremental/src/persist/fs.rs (L1-L40)
This commit is contained in:
commit
e1b06f7730
20 changed files with 285 additions and 97 deletions
|
@ -169,8 +169,11 @@ fn produce_final_output_artifacts(
|
|||
if codegen_results.modules.len() == 1 {
|
||||
// 1) Only one codegen unit. In this case it's no difficulty
|
||||
// to copy `foo.0.x` to `foo.x`.
|
||||
let module_name = Some(&codegen_results.modules[0].name[..]);
|
||||
let path = crate_output.temp_path(output_type, module_name);
|
||||
let path = crate_output.temp_path_for_cgu(
|
||||
output_type,
|
||||
&codegen_results.modules[0].name,
|
||||
sess.invocation_temp.as_deref(),
|
||||
);
|
||||
let output = crate_output.path(output_type);
|
||||
if !output_type.is_text_output() && output.is_tty() {
|
||||
sess.dcx()
|
||||
|
@ -183,22 +186,16 @@ fn produce_final_output_artifacts(
|
|||
ensure_removed(sess.dcx(), &path);
|
||||
}
|
||||
} else {
|
||||
let extension = crate_output
|
||||
.temp_path(output_type, None)
|
||||
.extension()
|
||||
.unwrap()
|
||||
.to_str()
|
||||
.unwrap()
|
||||
.to_owned();
|
||||
|
||||
if crate_output.outputs.contains_explicit_name(&output_type) {
|
||||
// 2) Multiple codegen units, with `--emit foo=some_name`. We have
|
||||
// no good solution for this case, so warn the user.
|
||||
sess.dcx().emit_warn(ssa_errors::IgnoringEmitPath { extension });
|
||||
sess.dcx()
|
||||
.emit_warn(ssa_errors::IgnoringEmitPath { extension: output_type.extension() });
|
||||
} else if crate_output.single_output_file.is_some() {
|
||||
// 3) Multiple codegen units, with `-o some_name`. We have
|
||||
// no good solution for this case, so warn the user.
|
||||
sess.dcx().emit_warn(ssa_errors::IgnoringOutput { extension });
|
||||
sess.dcx()
|
||||
.emit_warn(ssa_errors::IgnoringOutput { extension: output_type.extension() });
|
||||
} else {
|
||||
// 4) Multiple codegen units, but no explicit name. We
|
||||
// just leave the `foo.0.x` files in place.
|
||||
|
@ -351,6 +348,7 @@ fn make_module(sess: &Session, name: String) -> UnwindModule<ObjectModule> {
|
|||
|
||||
fn emit_cgu(
|
||||
output_filenames: &OutputFilenames,
|
||||
invocation_temp: Option<&str>,
|
||||
prof: &SelfProfilerRef,
|
||||
name: String,
|
||||
module: UnwindModule<ObjectModule>,
|
||||
|
@ -366,6 +364,7 @@ fn emit_cgu(
|
|||
|
||||
let module_regular = emit_module(
|
||||
output_filenames,
|
||||
invocation_temp,
|
||||
prof,
|
||||
product.object,
|
||||
ModuleKind::Regular,
|
||||
|
@ -391,6 +390,7 @@ fn emit_cgu(
|
|||
|
||||
fn emit_module(
|
||||
output_filenames: &OutputFilenames,
|
||||
invocation_temp: Option<&str>,
|
||||
prof: &SelfProfilerRef,
|
||||
mut object: cranelift_object::object::write::Object<'_>,
|
||||
kind: ModuleKind,
|
||||
|
@ -409,7 +409,7 @@ fn emit_module(
|
|||
object.set_section_data(comment_section, producer, 1);
|
||||
}
|
||||
|
||||
let tmp_file = output_filenames.temp_path(OutputType::Object, Some(&name));
|
||||
let tmp_file = output_filenames.temp_path_for_cgu(OutputType::Object, &name, invocation_temp);
|
||||
let file = match File::create(&tmp_file) {
|
||||
Ok(file) => file,
|
||||
Err(err) => return Err(format!("error creating object file: {}", err)),
|
||||
|
@ -449,8 +449,11 @@ fn reuse_workproduct_for_cgu(
|
|||
cgu: &CodegenUnit<'_>,
|
||||
) -> Result<ModuleCodegenResult, String> {
|
||||
let work_product = cgu.previous_work_product(tcx);
|
||||
let obj_out_regular =
|
||||
tcx.output_filenames(()).temp_path(OutputType::Object, Some(cgu.name().as_str()));
|
||||
let obj_out_regular = tcx.output_filenames(()).temp_path_for_cgu(
|
||||
OutputType::Object,
|
||||
cgu.name().as_str(),
|
||||
tcx.sess.invocation_temp.as_deref(),
|
||||
);
|
||||
let source_file_regular = rustc_incremental::in_incr_comp_dir_sess(
|
||||
&tcx.sess,
|
||||
&work_product.saved_files.get("o").expect("no saved object file in work product"),
|
||||
|
@ -595,13 +598,19 @@ fn module_codegen(
|
|||
|
||||
let global_asm_object_file =
|
||||
profiler.generic_activity_with_arg("compile assembly", &*cgu_name).run(|| {
|
||||
crate::global_asm::compile_global_asm(&global_asm_config, &cgu_name, &cx.global_asm)
|
||||
crate::global_asm::compile_global_asm(
|
||||
&global_asm_config,
|
||||
&cgu_name,
|
||||
&cx.global_asm,
|
||||
cx.invocation_temp.as_deref(),
|
||||
)
|
||||
})?;
|
||||
|
||||
let codegen_result =
|
||||
profiler.generic_activity_with_arg("write object file", &*cgu_name).run(|| {
|
||||
emit_cgu(
|
||||
&global_asm_config.output_filenames,
|
||||
cx.invocation_temp.as_deref(),
|
||||
&profiler,
|
||||
cgu_name,
|
||||
module,
|
||||
|
@ -626,8 +635,11 @@ fn emit_metadata_module(tcx: TyCtxt<'_>, metadata: &EncodedMetadata) -> Compiled
|
|||
.as_str()
|
||||
.to_string();
|
||||
|
||||
let tmp_file =
|
||||
tcx.output_filenames(()).temp_path(OutputType::Metadata, Some(&metadata_cgu_name));
|
||||
let tmp_file = tcx.output_filenames(()).temp_path_for_cgu(
|
||||
OutputType::Metadata,
|
||||
&metadata_cgu_name,
|
||||
tcx.sess.invocation_temp.as_deref(),
|
||||
);
|
||||
|
||||
let symbol_name = rustc_middle::middle::exported_symbols::metadata_symbol_name(tcx);
|
||||
let obj = create_compressed_metadata_file(tcx.sess, metadata, &symbol_name);
|
||||
|
@ -657,6 +669,7 @@ fn emit_allocator_module(tcx: TyCtxt<'_>) -> Option<CompiledModule> {
|
|||
|
||||
match emit_module(
|
||||
tcx.output_filenames(()),
|
||||
tcx.sess.invocation_temp.as_deref(),
|
||||
&tcx.sess.prof,
|
||||
product.object,
|
||||
ModuleKind::Allocator,
|
||||
|
|
|
@ -132,6 +132,7 @@ pub(crate) fn compile_global_asm(
|
|||
config: &GlobalAsmConfig,
|
||||
cgu_name: &str,
|
||||
global_asm: &str,
|
||||
invocation_temp: Option<&str>,
|
||||
) -> Result<Option<PathBuf>, String> {
|
||||
if global_asm.is_empty() {
|
||||
return Ok(None);
|
||||
|
@ -146,7 +147,7 @@ pub(crate) fn compile_global_asm(
|
|||
global_asm.push('\n');
|
||||
|
||||
let global_asm_object_file = add_file_stem_postfix(
|
||||
config.output_filenames.temp_path(OutputType::Object, Some(cgu_name)),
|
||||
config.output_filenames.temp_path_for_cgu(OutputType::Object, cgu_name, invocation_temp),
|
||||
".asm",
|
||||
);
|
||||
|
||||
|
|
|
@ -124,6 +124,7 @@ impl<F: Fn() -> String> Drop for PrintOnPanic<F> {
|
|||
/// inside a single codegen unit with the exception of the Cranelift [`Module`](cranelift_module::Module).
|
||||
struct CodegenCx {
|
||||
output_filenames: Arc<OutputFilenames>,
|
||||
invocation_temp: Option<String>,
|
||||
should_write_ir: bool,
|
||||
global_asm: String,
|
||||
inline_asm_index: usize,
|
||||
|
@ -142,6 +143,7 @@ impl CodegenCx {
|
|||
};
|
||||
CodegenCx {
|
||||
output_filenames: tcx.output_filenames(()).clone(),
|
||||
invocation_temp: tcx.sess.invocation_temp.clone(),
|
||||
should_write_ir: crate::pretty_clif::should_write_ir(tcx),
|
||||
global_asm: String::new(),
|
||||
inline_asm_index: 0,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue