Rollup merge of #138581 - Zoxc:abort-handler-if-locked, r=SparrowLii

Abort in deadlock handler if we fail to get a query map

Resolving query cycles requires the complete active query map, or it may miss query cycles. We did not check that the map is completely constructed before. If there is some error collecting the map, something has gone wrong already. This adds a check to abort/panic if we fail to construct the complete map.

This can help differentiate errors from the `deadlock detected` case if constructing query map has errors in practice.

An `Option` is not used for `collect_active_jobs` as the panic handler can still make use of a partial map.
This commit is contained in:
Matthias Krüger 2025-03-25 18:09:05 +01:00 committed by GitHub
commit 43297ffc22
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 43 additions and 12 deletions

View file

@ -192,7 +192,18 @@ pub(crate) fn run_in_thread_pool_with_globals<F: FnOnce(CurrentGcx) -> R + Send,
// `TyCtxt` TLS reference here.
let query_map = current_gcx2.access(|gcx| {
tls::enter_context(&tls::ImplicitCtxt::new(gcx), || {
tls::with(|tcx| QueryCtxt::new(tcx).collect_active_jobs())
tls::with(|tcx| {
match QueryCtxt::new(tcx).collect_active_jobs() {
Ok(query_map) => query_map,
Err(_) => {
// There was an unexpected error collecting all active jobs, which we need
// to find cycles to break.
// We want to avoid panicking in the deadlock handler, so we abort instead.
eprintln!("internal compiler error: failed to get query map in deadlock handler, aborting process");
process::abort();
}
}
})
})
});
let query_map = FromDyn::from(query_map);
@ -201,7 +212,7 @@ pub(crate) fn run_in_thread_pool_with_globals<F: FnOnce(CurrentGcx) -> R + Send,
.name("rustc query cycle handler".to_string())
.spawn(move || {
let on_panic = defer(|| {
eprintln!("query cycle handler thread panicked, aborting process");
eprintln!("internal compiler error: query cycle handler thread panicked, aborting process");
// We need to abort here as we failed to resolve the deadlock,
// otherwise the compiler could just hang,
process::abort();

View file

@ -79,14 +79,20 @@ impl QueryContext for QueryCtxt<'_> {
tls::with_related_context(self.tcx, |icx| icx.query)
}
fn collect_active_jobs(self) -> QueryMap {
/// Returns a query map representing active query jobs.
/// It returns an incomplete map as an error if it fails
/// to take locks.
fn collect_active_jobs(self) -> Result<QueryMap, QueryMap> {
let mut jobs = QueryMap::default();
let mut complete = true;
for collect in super::TRY_COLLECT_ACTIVE_JOBS.iter() {
collect(self.tcx, &mut jobs);
if collect(self.tcx, &mut jobs).is_none() {
complete = false;
}
}
jobs
if complete { Ok(jobs) } else { Err(jobs) }
}
// Interactions with on_disk_cache
@ -139,7 +145,12 @@ impl QueryContext for QueryCtxt<'_> {
}
fn depth_limit_error(self, job: QueryJobId) {
let (info, depth) = job.find_dep_kind_root(self.collect_active_jobs());
// FIXME: `collect_active_jobs` expects no locks to be held, which doesn't hold for this call.
let query_map = match self.collect_active_jobs() {
Ok(query_map) => query_map,
Err(query_map) => query_map,
};
let (info, depth) = job.find_dep_kind_root(query_map);
let suggested_limit = match self.recursion_limit() {
Limit(0) => Limit(2),
@ -677,7 +688,7 @@ macro_rules! define_queries {
}
}
pub(crate) fn try_collect_active_jobs<'tcx>(tcx: TyCtxt<'tcx>, qmap: &mut QueryMap) {
pub(crate) fn try_collect_active_jobs<'tcx>(tcx: TyCtxt<'tcx>, qmap: &mut QueryMap) -> Option<()> {
let make_query = |tcx, key| {
let kind = rustc_middle::dep_graph::dep_kinds::$name;
let name = stringify!($name);
@ -697,6 +708,7 @@ macro_rules! define_queries {
stringify!($name)
);
}
res
}
pub(crate) fn alloc_self_profile_query_strings<'tcx>(
@ -756,7 +768,7 @@ macro_rules! define_queries {
// These arrays are used for iteration and can't be indexed by `DepKind`.
const TRY_COLLECT_ACTIVE_JOBS: &[for<'tcx> fn(TyCtxt<'tcx>, &mut QueryMap)] =
const TRY_COLLECT_ACTIVE_JOBS: &[for<'tcx> fn(TyCtxt<'tcx>, &mut QueryMap) -> Option<()>] =
&[$(query_impl::$name::try_collect_active_jobs),*];
const ALLOC_SELF_PROFILE_QUERY_STRINGS: &[

View file

@ -588,7 +588,12 @@ pub fn print_query_stack<Qcx: QueryContext>(
// state if it was responsible for triggering the panic.
let mut count_printed = 0;
let mut count_total = 0;
let query_map = qcx.collect_active_jobs();
// Make use of a partial query map if we fail to take locks collecting active queries.
let query_map = match qcx.collect_active_jobs() {
Ok(query_map) => query_map,
Err(query_map) => query_map,
};
if let Some(ref mut file) = file {
let _ = writeln!(file, "\n\nquery stack during panic:");

View file

@ -86,7 +86,7 @@ pub trait QueryContext: HasDepContext {
/// Get the query information from the TLS context.
fn current_query_job(self) -> Option<QueryJobId>;
fn collect_active_jobs(self) -> QueryMap;
fn collect_active_jobs(self) -> Result<QueryMap, QueryMap>;
/// Load a side effect associated to the node in the previous session.
fn load_side_effect(

View file

@ -260,8 +260,11 @@ where
Q: QueryConfig<Qcx>,
Qcx: QueryContext,
{
let error =
try_execute.find_cycle_in_stack(qcx.collect_active_jobs(), &qcx.current_query_job(), span);
// Ensure there was no errors collecting all active jobs.
// We need the complete map to ensure we find a cycle to break.
let query_map = qcx.collect_active_jobs().expect("failed to collect active queries");
let error = try_execute.find_cycle_in_stack(query_map, &qcx.current_query_job(), span);
(mk_cycle(query, qcx, error), None)
}