Split query execution into hot and cold paths

2020-02-12 21:04:36 +01:00 · 2020-02-12 21:04:36 +01:00 · 545e290a93
commit 545e290a93
parent 7710ae0e26
3 changed files with 163 additions and 111 deletions
--- a/src/librustc/dep_graph/graph.rs
+++ b/src/librustc/dep_graph/graph.rs
@ -1122,6 +1122,7 @@ impl CurrentDepGraph {
 }
 impl DepGraphData {
    #[inline]
    fn read_index(&self, source: DepNodeIndex) {
        ty::tls::with_context_opt(|icx| {
            let icx = if let Some(icx) = icx { icx } else { return };
--- a/src/librustc/ty/context.rs
+++ b/src/librustc/ty/context.rs
@ -1688,6 +1688,7 @@ pub mod tls {
    /// Gets the pointer to the current `ImplicitCtxt`.
    #[cfg(not(parallel_compiler))]
    #[inline]
    fn get_tlv() -> usize {
        TLV.with(|tlv| tlv.get())
    }
--- a/src/librustc/ty/query/plumbing.rs
+++ b/src/librustc/ty/query/plumbing.rs
@ -12,10 +12,8 @@ use crate::ty::{self, TyCtxt};
 #[cfg(not(parallel_compiler))]
 use rustc_data_structures::cold_path;
 use rustc_data_structures::fx::{FxHashMap, FxHasher};
 #[cfg(parallel_compiler)]
 use rustc_data_structures::profiling::TimingGuard;
 use rustc_data_structures::sharded::Sharded;
-use rustc_data_structures::sync::Lock;
+use rustc_data_structures::sync::{Lock, LockGuard};
 use rustc_data_structures::thin_vec::ThinVec;
 use rustc_errors::{struct_span_err, Diagnostic, DiagnosticBuilder, FatalError, Handler, Level};
 use rustc_span::source_map::DUMMY_SP;
@ -70,6 +68,12 @@ impl<'tcx, M: QueryConfig<'tcx>> Default for QueryCache<'tcx, M> {
    }
 }
 /// Values used when checking a query cache which can be reused on a cache-miss to execute the query.
 pub(super) struct QueryLookup<'tcx, Q: QueryDescription<'tcx>> {
    shard: usize,
    lock: LockGuard<'tcx, QueryCache<'tcx, Q>>,
 }
 /// A type representing the responsibility to execute the job in the `job` field.
 /// This will poison the relevant query if dropped.
 pub(super) struct JobOwner<'a, 'tcx, Q: QueryDescription<'tcx>> {
@ -81,119 +85,87 @@ pub(super) struct JobOwner<'a, 'tcx, Q: QueryDescription<'tcx>> {
 impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
    /// Either gets a `JobOwner` corresponding the query, allowing us to
    /// start executing the query, or returns with the result of the query.
-    /// If the query is executing elsewhere, this will wait for it.
+    /// This function assumes that `try_get_cached` is already called and returned `lookup`.
    /// If the query is executing elsewhere, this will wait for it and return the result.
    /// If the query panicked, this will silently panic.
    ///
    /// This function is inlined because that results in a noticeable speed-up
    /// for some compile-time benchmarks.
    #[inline(always)]
-    pub(super) fn try_get(tcx: TyCtxt<'tcx>, span: Span, key: &Q::Key) -> TryGetJob<'a, 'tcx, Q> {
+    pub(super) fn try_start(
-        // Handling the `query_blocked_prof_timer` is a bit weird because of the
+        tcx: TyCtxt<'tcx>,
-        // control flow in this function: Blocking is implemented by
+        span: Span,
-        // awaiting a running job and, once that is done, entering the loop below
+        key: &Q::Key,
-        // again from the top. In that second iteration we will hit the
+        mut lookup: QueryLookup<'tcx, Q>,
-        // cache which provides us with the information we need for
+    ) -> TryGetJob<'a, 'tcx, Q> {
-        // finishing the "query-blocked" event.
+        let lock = &mut *lookup.lock;
-        //
+
-        // We thus allocate `query_blocked_prof_timer` outside the loop,
+        let (latch, mut _query_blocked_prof_timer) = match lock.active.entry((*key).clone()) {
-        // initialize it during the first iteration and finish it during the
+            Entry::Occupied(mut entry) => {
-        // second iteration.
+                match entry.get_mut() {
                    QueryResult::Started(job) => {
                        // For parallel queries, we'll block and wait until the query running
                        // in another thread has completed. Record how long we wait in the
                        // self-profiler.
                        let _query_blocked_prof_timer = if cfg!(parallel_compiler) {
                            Some(tcx.prof.query_blocked())
                        } else {
                            None
                        };
                        // Create the id of the job we're waiting for
                        let id = QueryJobId::new(job.id, lookup.shard, Q::dep_kind());
                        (job.latch(id), _query_blocked_prof_timer)
                    }
                    QueryResult::Poisoned => FatalError.raise(),
                }
            }
            Entry::Vacant(entry) => {
                // No job entry for this query. Return a new one to be started later.
                // Generate an id unique within this shard.
                let id = lock.jobs.checked_add(1).unwrap();
                lock.jobs = id;
                let id = QueryShardJobId(NonZeroU32::new(id).unwrap());
                let global_id = QueryJobId::new(id, lookup.shard, Q::dep_kind());
                let job = tls::with_related_context(tcx, |icx| QueryJob::new(id, span, icx.query));
                entry.insert(QueryResult::Started(job));
                let owner =
                    JobOwner { cache: Q::query_cache(tcx), id: global_id, key: (*key).clone() };
                return TryGetJob::NotYetStarted(owner);
            }
        };
        mem::drop(lookup.lock);
        // If we are single-threaded we know that we have cycle error,
        // so we just return the error.
        #[cfg(not(parallel_compiler))]
        return TryGetJob::Cycle(cold_path(|| {
            Q::handle_cycle_error(tcx, latch.find_cycle_in_stack(tcx, span))
        }));
        // With parallel queries we might just have to wait on some other
        // thread.
        #[cfg(parallel_compiler)]
-        let mut query_blocked_prof_timer: Option<TimingGuard<'_>> = None;
+        {
            let result = latch.wait_on(tcx, span);
-        let cache = Q::query_cache(tcx);
+            if let Err(cycle) = result {
-        loop {
+                return TryGetJob::Cycle(Q::handle_cycle_error(tcx, cycle));
            // We compute the key's hash once and then use it for both the
            // shard lookup and the hashmap lookup. This relies on the fact
            // that both of them use `FxHasher`.
            let mut state = FxHasher::default();
            key.hash(&mut state);
            let key_hash = state.finish();
            let shard = cache.get_shard_index_by_hash(key_hash);
            let mut lock_guard = cache.get_shard_by_index(shard).lock();
            let lock = &mut *lock_guard;
            if let Some((_, value)) =
                lock.results.raw_entry().from_key_hashed_nocheck(key_hash, key)
            {
                if unlikely!(tcx.prof.enabled()) {
                    tcx.prof.query_cache_hit(value.index.into());
                    #[cfg(parallel_compiler)]
                    {
                        if let Some(prof_timer) = query_blocked_prof_timer.take() {
                            prof_timer.finish_with_query_invocation_id(value.index.into());
                        }
                    }
                }
                let result = (value.value.clone(), value.index);
                #[cfg(debug_assertions)]
                {
                    lock.cache_hits += 1;
                }
                return TryGetJob::JobCompleted(result);
            }
-            let latch = match lock.active.entry((*key).clone()) {
+            let cached = tcx.try_get_cached::<Q>(key).0.unwrap();
                Entry::Occupied(mut entry) => {
                    match entry.get_mut() {
                        QueryResult::Started(job) => {
                            // For parallel queries, we'll block and wait until the query running
                            // in another thread has completed. Record how long we wait in the
                            // self-profiler.
                            #[cfg(parallel_compiler)]
                            {
                                query_blocked_prof_timer = Some(tcx.prof.query_blocked());
                            }
-                            // Create the id of the job we're waiting for
+            if let Some(prof_timer) = _query_blocked_prof_timer.take() {
-                            let id = QueryJobId::new(job.id, shard, Q::dep_kind());
+                prof_timer.finish_with_query_invocation_id(cached.1.into());
                            job.latch(id)
                        }
                        QueryResult::Poisoned => FatalError.raise(),
                    }
                }
                Entry::Vacant(entry) => {
                    // No job entry for this query. Return a new one to be started later.
                    // Generate an id unique within this shard.
                    let id = lock.jobs.checked_add(1).unwrap();
                    lock.jobs = id;
                    let id = QueryShardJobId(NonZeroU32::new(id).unwrap());
                    let global_id = QueryJobId::new(id, shard, Q::dep_kind());
                    let job =
                        tls::with_related_context(tcx, |icx| QueryJob::new(id, span, icx.query));
                    entry.insert(QueryResult::Started(job));
                    let owner = JobOwner { cache, id: global_id, key: (*key).clone() };
                    return TryGetJob::NotYetStarted(owner);
                }
            };
            mem::drop(lock_guard);
            // If we are single-threaded we know that we have cycle error,
            // so we just return the error.
            #[cfg(not(parallel_compiler))]
            return TryGetJob::Cycle(cold_path(|| {
                Q::handle_cycle_error(tcx, latch.find_cycle_in_stack(tcx, span))
            }));
            // With parallel queries we might just have to wait on some other
            // thread.
            #[cfg(parallel_compiler)]
            {
                let result = latch.wait_on(tcx, span);
                if let Err(cycle) = result {
                    return TryGetJob::Cycle(Q::handle_cycle_error(tcx, cycle));
                }
            }
            return TryGetJob::JobCompleted(cached);
        }
    }
@ -269,6 +241,7 @@ pub(super) enum TryGetJob<'a, 'tcx, D: QueryDescription<'tcx>> {
    /// The query was already completed.
    /// Returns the result of the query and its dep-node index
    /// if it succeeded or a cycle error if it failed.
    #[cfg(parallel_compiler)]
    JobCompleted((D::Value, DepNodeIndex)),
    /// Trying to execute the query resulted in a cycle.
@ -396,13 +369,76 @@ impl<'tcx> TyCtxt<'tcx> {
        eprintln!("end of query stack");
    }
    /// Checks if the query is already computed and in the cache.
    /// It returns the shard index and a lock guard to the shard,
    /// which will be used if the query is not in the cache and we need
    /// to compute it.
    #[inline(always)]
    fn try_get_cached<Q: QueryDescription<'tcx>>(
        self,
        key: &Q::Key,
    ) -> (Option<(Q::Value, DepNodeIndex)>, QueryLookup<'tcx, Q>) {
        let cache = Q::query_cache(self);
        // We compute the key's hash once and then use it for both the
        // shard lookup and the hashmap lookup. This relies on the fact
        // that both of them use `FxHasher`.
        let mut state = FxHasher::default();
        key.hash(&mut state);
        let key_hash = state.finish();
        let shard = cache.get_shard_index_by_hash(key_hash);
        let mut lock_guard = cache.get_shard_by_index(shard).lock();
        let lock = &mut *lock_guard;
        let result =
            lock.results.raw_entry().from_key_hashed_nocheck(key_hash, key).map(|(_, value)| {
                if unlikely!(self.prof.enabled()) {
                    self.prof.query_cache_hit(value.index.into());
                }
                (value.value.clone(), value.index)
            });
        #[cfg(debug_assertions)]
        {
            if result.is_some() {
                lock.cache_hits += 1;
            }
        }
        (result, QueryLookup { lock: lock_guard, shard })
    }
    #[inline(never)]
-    pub(super) fn get_query<Q: QueryDescription<'tcx>>(self, span: Span, key: Q::Key) -> Q::Value {
+    pub(super) fn get_query<Q: QueryDescription<'tcx> + 'tcx>(
        self,
        span: Span,
        key: Q::Key,
    ) -> Q::Value {
        debug!("ty::query::get_query<{}>(key={:?}, span={:?})", Q::NAME, key, span);
-        let job = match JobOwner::try_get(self, span, &key) {
+        let (cached, lookup) = self.try_get_cached::<Q>(&key);
        if let Some((v, index)) = cached {
            self.dep_graph.read_index(index);
            return v;
        }
        self.try_execute_query(span, key, lookup)
    }
    #[inline(always)]
    pub(super) fn try_execute_query<Q: QueryDescription<'tcx>>(
        self,
        span: Span,
        key: Q::Key,
        lookup: QueryLookup<'tcx, Q>,
    ) -> Q::Value {
        let job = match JobOwner::try_start(self, span, &key, lookup) {
            TryGetJob::NotYetStarted(job) => job,
            TryGetJob::Cycle(result) => return result,
            #[cfg(parallel_compiler)]
            TryGetJob::JobCompleted((v, index)) => {
                self.dep_graph.read_index(index);
                return v;
@ -615,7 +651,7 @@ impl<'tcx> TyCtxt<'tcx> {
    /// side-effects -- e.g., in order to report errors for erroneous programs.
    ///
    /// Note: The optimization is only available during incr. comp.
-    pub(super) fn ensure_query<Q: QueryDescription<'tcx>>(self, key: Q::Key) -> () {
+    pub(super) fn ensure_query<Q: QueryDescription<'tcx> + 'tcx>(self, key: Q::Key) -> () {
        if Q::EVAL_ALWAYS {
            let _ = self.get_query::<Q>(DUMMY_SP, key);
            return;
@ -643,12 +679,26 @@ impl<'tcx> TyCtxt<'tcx> {
    }
    #[allow(dead_code)]
-    fn force_query<Q: QueryDescription<'tcx>>(self, key: Q::Key, span: Span, dep_node: DepNode) {
+    fn force_query<Q: QueryDescription<'tcx> + 'tcx>(
        self,
        key: Q::Key,
        span: Span,
        dep_node: DepNode,
    ) {
        // We may be concurrently trying both execute and force a query.
        // Ensure that only one of them runs the query.
-        let job = match JobOwner::try_get(self, span, &key) {
+
        let (cached, lookup) = self.try_get_cached::<Q>(&key);
        if cached.is_some() {
            return;
        }
        let job = match JobOwner::try_start(self, span, &key, lookup) {
            TryGetJob::NotYetStarted(job) => job,
-            TryGetJob::Cycle(_) | TryGetJob::JobCompleted(_) => return,
+            TryGetJob::Cycle(_) => return,
            #[cfg(parallel_compiler)]
            TryGetJob::JobCompleted(_) => return,
        };
        self.force_query_with_job::<Q>(key, job, dep_node);
    }