Reliably signal coordinator thread on panic during ongoing codegen

Replace the separate AbortCodegenOnDrop guard by integrating this functionality into OngoingCodegen (or rather, the Coordinator part of it). This ensures that we send a CodegenAborted message and wait for workers to finish even if the panic occurs outside codegen_crate() (e.g. inside join_codegen()). This requires some minor changes to the handling of CodegenAborted, as it can now occur when the main thread is LLVMing rather than Codegenning.
2022-07-25 14:04:26 +02:00 · 2022-07-25 14:04:26 +02:00 · b00d0fa0c9
commit b00d0fa0c9
parent 6a1f77dba4
2 changed files with 43 additions and 76 deletions
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@ -37,6 +37,7 @@ use rustc_target::spec::{MergeFunctions, SanitizerSet};
 use std::any::Any;
 use std::fs;
 use std::io;
+use std::marker::PhantomData;
 use std::mem;
 use std::path::{Path, PathBuf};
 use std::str;
@ -475,10 +476,13 @@ pub fn start_async_codegen<B: ExtraBackendMethods>(
        metadata_module,
        crate_info,

-        coordinator_send,
        codegen_worker_receive,
        shared_emitter_main,
-        future: coordinator_thread,
+        coordinator: Coordinator {
+            sender: coordinator_send,
+            future: Some(coordinator_thread),
+            phantom: PhantomData,
+        },
        output_filenames: tcx.output_filenames(()).clone(),
    }
 }
@ -1273,6 +1277,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
        // work to be done.
        while !codegen_done
            || running > 0
+            || main_thread_worker_state == MainThreadWorkerState::LLVMing
            || (!codegen_aborted
                && !(work_items.is_empty()
                    && needs_fat_lto.is_empty()
@ -1492,7 +1497,6 @@ fn start_executing_work<B: ExtraBackendMethods>(
                    assert!(!codegen_aborted);
                    codegen_done = true;
                    codegen_aborted = true;
-                    assert_eq!(main_thread_worker_state, MainThreadWorkerState::Codegenning);
                }
                Message::Done { result: Ok(compiled_module), worker_id } => {
                    free_worker(worker_id);
@ -1539,6 +1543,10 @@ fn start_executing_work<B: ExtraBackendMethods>(
            }
        }

+        if codegen_aborted {
+            return Err(());
+        }
+
        let needs_link = mem::take(&mut needs_link);
        if !needs_link.is_empty() {
            assert!(compiled_modules.is_empty());
@ -1828,16 +1836,39 @@ impl SharedEmitterMain {
    }
 }

+pub struct Coordinator<B: ExtraBackendMethods> {
+    pub sender: Sender<Box<dyn Any + Send>>,
+    future: Option<thread::JoinHandle<Result<CompiledModules, ()>>>,
+    // Only used for the Message type.
+    phantom: PhantomData<B>,
+}
+
+impl<B: ExtraBackendMethods> Coordinator<B> {
+    fn join(mut self) -> std::thread::Result<Result<CompiledModules, ()>> {
+        self.future.take().unwrap().join()
+    }
+}
+
+impl<B: ExtraBackendMethods> Drop for Coordinator<B> {
+    fn drop(&mut self) {
+        if let Some(future) = self.future.take() {
+            // If we haven't joined yet, signal to the coordinator that it should spawn no more
+            // work, and wait for worker threads to finish.
+            drop(self.sender.send(Box::new(Message::CodegenAborted::<B>)));
+            drop(future.join());
+        }
+    }
+}
+
 pub struct OngoingCodegen<B: ExtraBackendMethods> {
    pub backend: B,
    pub metadata: EncodedMetadata,
    pub metadata_module: Option<CompiledModule>,
    pub crate_info: CrateInfo,
-    pub coordinator_send: Sender<Box<dyn Any + Send>>,
    pub codegen_worker_receive: Receiver<Message<B>>,
    pub shared_emitter_main: SharedEmitterMain,
-    pub future: thread::JoinHandle<Result<CompiledModules, ()>>,
    pub output_filenames: Arc<OutputFilenames>,
+    pub coordinator: Coordinator<B>,
 }

 impl<B: ExtraBackendMethods> OngoingCodegen<B> {
@ -1845,8 +1876,7 @@ impl<B: ExtraBackendMethods> OngoingCodegen<B> {
        let _timer = sess.timer("finish_ongoing_codegen");

        self.shared_emitter_main.check(sess, true);
-        let future = self.future;
-        let compiled_modules = sess.time("join_worker_thread", || match future.join() {
+        let compiled_modules = sess.time("join_worker_thread", || match self.coordinator.join() {
            Ok(Ok(compiled_modules)) => compiled_modules,
            Ok(Err(())) => {
                sess.abort_if_errors();
@ -1894,26 +1924,13 @@ impl<B: ExtraBackendMethods> OngoingCodegen<B> {

        // These are generally cheap and won't throw off scheduling.
        let cost = 0;
-        submit_codegened_module_to_llvm(&self.backend, &self.coordinator_send, module, cost);
+        submit_codegened_module_to_llvm(&self.backend, &self.coordinator.sender, module, cost);
    }

    pub fn codegen_finished(&self, tcx: TyCtxt<'_>) {
        self.wait_for_signal_to_codegen_item();
        self.check_for_errors(tcx.sess);
-        drop(self.coordinator_send.send(Box::new(Message::CodegenComplete::<B>)));
-    }
-
-    /// Consumes this context indicating that codegen was entirely aborted, and
-    /// we need to exit as quickly as possible.
-    ///
-    /// This method blocks the current thread until all worker threads have
-    /// finished, and all worker threads should have exited or be real close to
-    /// exiting at this point.
-    pub fn codegen_aborted(self) {
-        // Signal to the coordinator it should spawn no more work and start
-        // shutdown.
-        drop(self.coordinator_send.send(Box::new(Message::CodegenAborted::<B>)));
-        drop(self.future.join());
+        drop(self.coordinator.sender.send(Box::new(Message::CodegenComplete::<B>)));
    }

    pub fn check_for_errors(&self, sess: &Session) {
--- a/compiler/rustc_codegen_ssa/src/base.rs
+++ b/compiler/rustc_codegen_ssa/src/base.rs
@ -39,7 +39,6 @@ use rustc_target::abi::{Align, VariantIdx};

 use std::collections::BTreeSet;
 use std::convert::TryFrom;
-use std::ops::{Deref, DerefMut};
 use std::time::{Duration, Instant};

 use itertools::Itertools;
@ -583,7 +582,6 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
        metadata_module,
        codegen_units.len(),
    );
-    let ongoing_codegen = AbortCodegenOnDrop::<B>(Some(ongoing_codegen));

    // Codegen an allocator shim, if necessary.
    //
@ -704,7 +702,7 @@ pub fn codegen_crate<B: ExtraBackendMethods>(

                submit_codegened_module_to_llvm(
                    &backend,
-                    &ongoing_codegen.coordinator_send,
+                    &ongoing_codegen.coordinator.sender,
                    module,
                    cost,
                );
@ -714,7 +712,7 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
                submit_pre_lto_module_to_llvm(
                    &backend,
                    tcx,
-                    &ongoing_codegen.coordinator_send,
+                    &ongoing_codegen.coordinator.sender,
                    CachedModuleCodegen {
                        name: cgu.name().to_string(),
                        source: cgu.previous_work_product(tcx),
@ -725,7 +723,7 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
            CguReuse::PostLto => {
                submit_post_lto_module_to_llvm(
                    &backend,
-                    &ongoing_codegen.coordinator_send,
+                    &ongoing_codegen.coordinator.sender,
                    CachedModuleCodegen {
                        name: cgu.name().to_string(),
                        source: cgu.previous_work_product(tcx),
@ -752,55 +750,7 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
    }

    ongoing_codegen.check_for_errors(tcx.sess);
-
-    ongoing_codegen.into_inner()
-}
-
-/// A curious wrapper structure whose only purpose is to call `codegen_aborted`
-/// when it's dropped abnormally.
-///
-/// In the process of working on rust-lang/rust#55238 a mysterious segfault was
-/// stumbled upon. The segfault was never reproduced locally, but it was
-/// suspected to be related to the fact that codegen worker threads were
-/// sticking around by the time the main thread was exiting, causing issues.
-///
-/// This structure is an attempt to fix that issue where the `codegen_aborted`
-/// message will block until all workers have finished. This should ensure that
-/// even if the main codegen thread panics we'll wait for pending work to
-/// complete before returning from the main thread, hopefully avoiding
-/// segfaults.
-///
-/// If you see this comment in the code, then it means that this workaround
-/// worked! We may yet one day track down the mysterious cause of that
-/// segfault...
-struct AbortCodegenOnDrop<B: ExtraBackendMethods>(Option<OngoingCodegen<B>>);
-
-impl<B: ExtraBackendMethods> AbortCodegenOnDrop<B> {
-    fn into_inner(mut self) -> OngoingCodegen<B> {
-        self.0.take().unwrap()
-    }
-}
-
-impl<B: ExtraBackendMethods> Deref for AbortCodegenOnDrop<B> {
-    type Target = OngoingCodegen<B>;
-
-    fn deref(&self) -> &OngoingCodegen<B> {
-        self.0.as_ref().unwrap()
-    }
-}
-
-impl<B: ExtraBackendMethods> DerefMut for AbortCodegenOnDrop<B> {
-    fn deref_mut(&mut self) -> &mut OngoingCodegen<B> {
-        self.0.as_mut().unwrap()
-    }
-}
-
-impl<B: ExtraBackendMethods> Drop for AbortCodegenOnDrop<B> {
-    fn drop(&mut self) {
-        if let Some(codegen) = self.0.take() {
-            codegen.codegen_aborted();
-        }
-    }
+    ongoing_codegen
 }

 impl CrateInfo {