Auto merge of #94084 - Mark-Simulacrum:drop-sharded, r=cjgillot

Avoid query cache sharding code in single-threaded mode In non-parallel compilers, this is just adding needless overhead at compilation time (since there is only one shard statically anyway). This amounts to roughly ~10 seconds reduction in bootstrap time, with overall neutral (some wins, some losses) performance results. Parallel compiler performance should be largely unaffected by this PR; sharding is kept there.
2022-02-27 14:04:07 +00:00 · 2022-02-27 14:04:07 +00:00 · 3b1fe7e7c9
commit 3b1fe7e7c9
parent bab4c13f64 594ea74bf0
8 changed files with 167 additions and 204 deletions
--- a/compiler/rustc_data_structures/src/sharded.rs
+++ b/compiler/rustc_data_structures/src/sharded.rs
@ -129,7 +129,7 @@ impl<K: Eq + Hash + Copy + IntoPointer> ShardedHashMap<K, ()> {
 }
 #[inline]
-fn make_hash<K: Hash + ?Sized>(val: &K) -> u64 {
+pub fn make_hash<K: Hash + ?Sized>(val: &K) -> u64 {
    let mut state = FxHasher::default();
    val.hash(&mut state);
    state.finish()
--- a/compiler/rustc_middle/src/ty/query.rs
+++ b/compiler/rustc_middle/src/ty/query.rs
@ -210,7 +210,7 @@ macro_rules! define_callbacks {
        #[derive(Default)]
        pub struct QueryCaches<$tcx> {
-            $($(#[$attr])* pub $name: QueryCacheStore<query_storage::$name<$tcx>>,)*
+            $($(#[$attr])* pub $name: query_storage::$name<$tcx>,)*
        }
        impl<$tcx> TyCtxtEnsure<$tcx> {
@ -222,12 +222,12 @@ macro_rules! define_callbacks {
                let cached = try_get_cached(self.tcx, &self.tcx.query_caches.$name, &key, noop);
-                let lookup = match cached {
+                match cached {
                    Ok(()) => return,
-                    Err(lookup) => lookup,
+                    Err(()) => (),
-                };
+                }
-                self.tcx.queries.$name(self.tcx, DUMMY_SP, key, lookup, QueryMode::Ensure);
+                self.tcx.queries.$name(self.tcx, DUMMY_SP, key, QueryMode::Ensure);
            })*
        }
@ -251,12 +251,12 @@ macro_rules! define_callbacks {
                let cached = try_get_cached(self.tcx, &self.tcx.query_caches.$name, &key, copy);
-                let lookup = match cached {
+                match cached {
                    Ok(value) => return value,
-                    Err(lookup) => lookup,
+                    Err(()) => (),
-                };
+                }
-                self.tcx.queries.$name(self.tcx, self.span, key, lookup, QueryMode::Get).unwrap()
+                self.tcx.queries.$name(self.tcx, self.span, key, QueryMode::Get).unwrap()
            })*
        }
@ -314,7 +314,6 @@ macro_rules! define_callbacks {
                tcx: TyCtxt<$tcx>,
                span: Span,
                key: query_keys::$name<$tcx>,
                lookup: QueryLookup,
                mode: QueryMode,
            ) -> Option<query_stored::$name<$tcx>>;)*
        }
--- a/compiler/rustc_query_impl/src/on_disk_cache.rs
+++ b/compiler/rustc_query_impl/src/on_disk_cache.rs
@ -13,7 +13,7 @@ use rustc_middle::thir;
 use rustc_middle::ty::codec::{RefDecodable, TyDecoder, TyEncoder};
 use rustc_middle::ty::{self, Ty, TyCtxt};
 use rustc_query_system::dep_graph::DepContext;
-use rustc_query_system::query::{QueryContext, QuerySideEffects};
+use rustc_query_system::query::{QueryCache, QueryContext, QuerySideEffects};
 use rustc_serialize::{
    opaque::{self, FileEncodeResult, FileEncoder, IntEncodedWithFixedSize},
    Decodable, Decoder, Encodable, Encoder,
@ -1034,7 +1034,7 @@ where
    assert!(Q::query_state(tcx).all_inactive());
    let cache = Q::query_cache(tcx);
    let mut res = Ok(());
-    cache.iter_results(&mut |key, value, dep_node| {
+    cache.iter(&mut |key, value, dep_node| {
        if res.is_err() {
            return;
        }
--- a/compiler/rustc_query_impl/src/plumbing.rs
+++ b/compiler/rustc_query_impl/src/plumbing.rs
@ -336,7 +336,7 @@ macro_rules! define_queries {
            }
            #[inline(always)]
-            fn query_cache<'a>(tcx: QueryCtxt<$tcx>) -> &'a QueryCacheStore<Self::Cache>
+            fn query_cache<'a>(tcx: QueryCtxt<$tcx>) -> &'a Self::Cache
                where 'tcx:'a
            {
                &tcx.query_caches.$name
@ -537,12 +537,11 @@ macro_rules! define_queries_struct {
                tcx: TyCtxt<$tcx>,
                span: Span,
                key: query_keys::$name<$tcx>,
                lookup: QueryLookup,
                mode: QueryMode,
            ) -> Option<query_stored::$name<$tcx>> {
                opt_remap_env_constness!([$($modifiers)*][key]);
                let qcx = QueryCtxt { tcx, queries: self };
-                get_query::<queries::$name<$tcx>, _>(qcx, span, key, lookup, mode)
+                get_query::<queries::$name<$tcx>, _>(qcx, span, key, mode)
            })*
        }
    };
--- a/compiler/rustc_query_impl/src/profiling_support.rs
+++ b/compiler/rustc_query_impl/src/profiling_support.rs
@ -4,7 +4,7 @@ use rustc_data_structures::profiling::SelfProfiler;
 use rustc_hir::def_id::{CrateNum, DefId, DefIndex, LocalDefId, CRATE_DEF_INDEX, LOCAL_CRATE};
 use rustc_hir::definitions::DefPathData;
 use rustc_middle::ty::{TyCtxt, WithOptConstParam};
-use rustc_query_system::query::{QueryCache, QueryCacheStore};
+use rustc_query_system::query::QueryCache;
 use std::fmt::Debug;
 use std::io::Write;
@ -229,7 +229,7 @@ where
 fn alloc_self_profile_query_strings_for_query_cache<'tcx, C>(
    tcx: TyCtxt<'tcx>,
    query_name: &'static str,
-    query_cache: &QueryCacheStore<C>,
+    query_cache: &C,
    string_cache: &mut QueryKeyStringCache,
 ) where
    C: QueryCache,
@ -251,7 +251,7 @@ fn alloc_self_profile_query_strings_for_query_cache<'tcx, C>(
            // locked while doing so. Instead we copy out the
            // `(query_key, dep_node_index)` pairs and release the lock again.
            let mut query_keys_and_indices = Vec::new();
-            query_cache.iter_results(&mut |k, _, i| query_keys_and_indices.push((k.clone(), i)));
+            query_cache.iter(&mut |k, _, i| query_keys_and_indices.push((k.clone(), i)));
            // Now actually allocate the strings. If allocating the strings
            // generates new entries in the query cache, we'll miss them but
@ -276,7 +276,7 @@ fn alloc_self_profile_query_strings_for_query_cache<'tcx, C>(
            let event_id = event_id_builder.from_label(query_name).to_string_id();
            let mut query_invocation_ids = Vec::new();
-            query_cache.iter_results(&mut |_, _, i| {
+            query_cache.iter(&mut |_, _, i| {
                query_invocation_ids.push(i.into());
            });
--- a/compiler/rustc_query_system/src/query/caches.rs
+++ b/compiler/rustc_query_system/src/query/caches.rs
@ -1,9 +1,12 @@
 use crate::dep_graph::DepNodeIndex;
 use crate::query::plumbing::{QueryCacheStore, QueryLookup};
 use rustc_arena::TypedArena;
 use rustc_data_structures::fx::FxHashMap;
 use rustc_data_structures::sharded;
 #[cfg(parallel_compiler)]
 use rustc_data_structures::sharded::Sharded;
 #[cfg(not(parallel_compiler))]
 use rustc_data_structures::sync::Lock;
 use rustc_data_structures::sync::WorkerLocal;
 use std::default::Default;
 use std::fmt::Debug;
@ -25,35 +28,23 @@ pub trait QueryStorage {
 pub trait QueryCache: QueryStorage + Sized {
    type Key: Hash + Eq + Clone + Debug;
    type Sharded: Default;
    /// Checks if the query is already computed and in the cache.
    /// It returns the shard index and a lock guard to the shard,
    /// which will be used if the query is not in the cache and we need
    /// to compute it.
-    fn lookup<'s, R, OnHit>(
+    fn lookup<R, OnHit>(
        &self,
        state: &'s QueryCacheStore<Self>,
        key: &Self::Key,
        // `on_hit` can be called while holding a lock to the query state shard.
        on_hit: OnHit,
-    ) -> Result<R, QueryLookup>
+    ) -> Result<R, ()>
    where
        OnHit: FnOnce(&Self::Stored, DepNodeIndex) -> R;
-    fn complete(
+    fn complete(&self, key: Self::Key, value: Self::Value, index: DepNodeIndex) -> Self::Stored;
        &self,
        lock_sharded_storage: &mut Self::Sharded,
        key: Self::Key,
        value: Self::Value,
        index: DepNodeIndex,
    ) -> Self::Stored;
-    fn iter(
+    fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex));
        &self,
        shards: &Sharded<Self::Sharded>,
        f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex),
    );
 }
 pub struct DefaultCacheSelector;
@ -62,11 +53,16 @@ impl<K: Eq + Hash, V: Clone> CacheSelector<K, V> for DefaultCacheSelector {
    type Cache = DefaultCache<K, V>;
 }
-pub struct DefaultCache<K, V>(PhantomData<(K, V)>);
+pub struct DefaultCache<K, V> {
    #[cfg(parallel_compiler)]
    cache: Sharded<FxHashMap<K, (V, DepNodeIndex)>>,
    #[cfg(not(parallel_compiler))]
    cache: Lock<FxHashMap<K, (V, DepNodeIndex)>>,
 }
 impl<K, V> Default for DefaultCache<K, V> {
    fn default() -> Self {
-        DefaultCache(PhantomData)
+        DefaultCache { cache: Default::default() }
    }
 }
@ -87,49 +83,51 @@ where
    V: Clone + Debug,
 {
    type Key = K;
    type Sharded = FxHashMap<K, (V, DepNodeIndex)>;
    #[inline(always)]
-    fn lookup<'s, R, OnHit>(
+    fn lookup<R, OnHit>(&self, key: &K, on_hit: OnHit) -> Result<R, ()>
        &self,
        state: &'s QueryCacheStore<Self>,
        key: &K,
        on_hit: OnHit,
    ) -> Result<R, QueryLookup>
    where
        OnHit: FnOnce(&V, DepNodeIndex) -> R,
    {
-        let (lookup, lock) = state.get_lookup(key);
+        let key_hash = sharded::make_hash(key);
-        let result = lock.raw_entry().from_key_hashed_nocheck(lookup.key_hash, key);
+        #[cfg(parallel_compiler)]
        let lock = self.cache.get_shard_by_hash(key_hash).lock();
        #[cfg(not(parallel_compiler))]
        let lock = self.cache.lock();
        let result = lock.raw_entry().from_key_hashed_nocheck(key_hash, key);
        if let Some((_, value)) = result {
            let hit_result = on_hit(&value.0, value.1);
            Ok(hit_result)
        } else {
-            Err(lookup)
+            Err(())
        }
    }
    #[inline]
-    fn complete(
+    fn complete(&self, key: K, value: V, index: DepNodeIndex) -> Self::Stored {
-        &self,
+        #[cfg(parallel_compiler)]
-        lock_sharded_storage: &mut Self::Sharded,
+        let mut lock = self.cache.get_shard_by_value(&key).lock();
-        key: K,
+        #[cfg(not(parallel_compiler))]
-        value: V,
+        let mut lock = self.cache.lock();
-        index: DepNodeIndex,
+        lock.insert(key, (value.clone(), index));
    ) -> Self::Stored {
        lock_sharded_storage.insert(key, (value.clone(), index));
        value
    }
-    fn iter(
+    fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) {
-        &self,
+        #[cfg(parallel_compiler)]
-        shards: &Sharded<Self::Sharded>,
+        {
-        f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex),
+            let shards = self.cache.lock_shards();
-    ) {
+            for shard in shards.iter() {
-        let shards = shards.lock_shards();
+                for (k, v) in shard.iter() {
-        for shard in shards.iter() {
+                    f(k, &v.0, v.1);
-            for (k, v) in shard.iter() {
+                }
            }
        }
        #[cfg(not(parallel_compiler))]
        {
            let map = self.cache.lock();
            for (k, v) in map.iter() {
                f(k, &v.0, v.1);
            }
        }
@ -144,12 +142,15 @@ impl<'tcx, K: Eq + Hash, V: 'tcx> CacheSelector<K, V> for ArenaCacheSelector<'tc
 pub struct ArenaCache<'tcx, K, V> {
    arena: WorkerLocal<TypedArena<(V, DepNodeIndex)>>,
-    phantom: PhantomData<(K, &'tcx V)>,
+    #[cfg(parallel_compiler)]
    cache: Sharded<FxHashMap<K, &'tcx (V, DepNodeIndex)>>,
    #[cfg(not(parallel_compiler))]
    cache: Lock<FxHashMap<K, &'tcx (V, DepNodeIndex)>>,
 }
 impl<'tcx, K, V> Default for ArenaCache<'tcx, K, V> {
    fn default() -> Self {
-        ArenaCache { arena: WorkerLocal::new(|_| TypedArena::default()), phantom: PhantomData }
+        ArenaCache { arena: WorkerLocal::new(|_| TypedArena::default()), cache: Default::default() }
    }
 }
@ -171,51 +172,53 @@ where
    V: Debug,
 {
    type Key = K;
    type Sharded = FxHashMap<K, &'tcx (V, DepNodeIndex)>;
    #[inline(always)]
-    fn lookup<'s, R, OnHit>(
+    fn lookup<R, OnHit>(&self, key: &K, on_hit: OnHit) -> Result<R, ()>
        &self,
        state: &'s QueryCacheStore<Self>,
        key: &K,
        on_hit: OnHit,
    ) -> Result<R, QueryLookup>
    where
        OnHit: FnOnce(&&'tcx V, DepNodeIndex) -> R,
    {
-        let (lookup, lock) = state.get_lookup(key);
+        let key_hash = sharded::make_hash(key);
-        let result = lock.raw_entry().from_key_hashed_nocheck(lookup.key_hash, key);
+        #[cfg(parallel_compiler)]
        let lock = self.cache.get_shard_by_hash(key_hash).lock();
        #[cfg(not(parallel_compiler))]
        let lock = self.cache.lock();
        let result = lock.raw_entry().from_key_hashed_nocheck(key_hash, key);
        if let Some((_, value)) = result {
            let hit_result = on_hit(&&value.0, value.1);
            Ok(hit_result)
        } else {
-            Err(lookup)
+            Err(())
        }
    }
    #[inline]
-    fn complete(
+    fn complete(&self, key: K, value: V, index: DepNodeIndex) -> Self::Stored {
        &self,
        lock_sharded_storage: &mut Self::Sharded,
        key: K,
        value: V,
        index: DepNodeIndex,
    ) -> Self::Stored {
        let value = self.arena.alloc((value, index));
        let value = unsafe { &*(value as *const _) };
-        lock_sharded_storage.insert(key, value);
+        #[cfg(parallel_compiler)]
        let mut lock = self.cache.get_shard_by_value(&key).lock();
        #[cfg(not(parallel_compiler))]
        let mut lock = self.cache.lock();
        lock.insert(key, value);
        &value.0
    }
-    fn iter(
+    fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) {
-        &self,
+        #[cfg(parallel_compiler)]
-        shards: &Sharded<Self::Sharded>,
+        {
-        f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex),
+            let shards = self.cache.lock_shards();
-    ) {
+            for shard in shards.iter() {
-        let shards = shards.lock_shards();
+                for (k, v) in shard.iter() {
-        for shard in shards.iter() {
+                    f(k, &v.0, v.1);
-            for (k, v) in shard.iter() {
+                }
            }
        }
        #[cfg(not(parallel_compiler))]
        {
            let map = self.cache.lock();
            for (k, v) in map.iter() {
                f(k, &v.0, v.1);
            }
        }
--- a/compiler/rustc_query_system/src/query/config.rs
+++ b/compiler/rustc_query_system/src/query/config.rs
@ -4,7 +4,7 @@ use crate::dep_graph::DepNode;
 use crate::dep_graph::SerializedDepNodeIndex;
 use crate::ich::StableHashingContext;
 use crate::query::caches::QueryCache;
-use crate::query::{QueryCacheStore, QueryContext, QueryState};
+use crate::query::{QueryContext, QueryState};
 use rustc_data_structures::fingerprint::Fingerprint;
 use rustc_errors::{DiagnosticBuilder, ErrorReported};
@ -64,7 +64,7 @@ pub trait QueryDescription<CTX: QueryContext>: QueryConfig {
        CTX: 'a;
    // Don't use this method to access query results, instead use the methods on TyCtxt
-    fn query_cache<'a>(tcx: CTX) -> &'a QueryCacheStore<Self::Cache>
+    fn query_cache<'a>(tcx: CTX) -> &'a Self::Cache
    where
        CTX: 'a;
--- a/compiler/rustc_query_system/src/query/plumbing.rs
+++ b/compiler/rustc_query_system/src/query/plumbing.rs
@ -8,11 +8,12 @@ use crate::query::config::{QueryDescription, QueryVtable};
 use crate::query::job::{report_cycle, QueryInfo, QueryJob, QueryJobId, QueryJobInfo};
 use crate::query::{QueryContext, QueryMap, QuerySideEffects, QueryStackFrame};
 use rustc_data_structures::fingerprint::Fingerprint;
-use rustc_data_structures::fx::{FxHashMap, FxHasher};
+use rustc_data_structures::fx::FxHashMap;
 #[cfg(parallel_compiler)]
 use rustc_data_structures::profiling::TimingGuard;
-use rustc_data_structures::sharded::{get_shard_index_by_hash, Sharded};
+#[cfg(parallel_compiler)]
-use rustc_data_structures::sync::{Lock, LockGuard};
+use rustc_data_structures::sharded::Sharded;
 use rustc_data_structures::sync::Lock;
 use rustc_data_structures::thin_vec::ThinVec;
 use rustc_errors::{DiagnosticBuilder, ErrorReported, FatalError};
 use rustc_session::Session;
@ -20,64 +21,15 @@ use rustc_span::{Span, DUMMY_SP};
 use std::cell::Cell;
 use std::collections::hash_map::Entry;
 use std::fmt::Debug;
-use std::hash::{Hash, Hasher};
+use std::hash::Hash;
 use std::mem;
 use std::ptr;
 pub struct QueryCacheStore<C: QueryCache> {
    cache: C,
    shards: Sharded<C::Sharded>,
 }
 impl<C: QueryCache + Default> Default for QueryCacheStore<C> {
    fn default() -> Self {
        Self { cache: C::default(), shards: Default::default() }
    }
 }
 /// Values used when checking a query cache which can be reused on a cache-miss to execute the query.
 pub struct QueryLookup {
    pub(super) key_hash: u64,
    shard: usize,
 }
 // We compute the key's hash once and then use it for both the
 // shard lookup and the hashmap lookup. This relies on the fact
 // that both of them use `FxHasher`.
 fn hash_for_shard<K: Hash>(key: &K) -> u64 {
    let mut hasher = FxHasher::default();
    key.hash(&mut hasher);
    hasher.finish()
 }
 impl<C: QueryCache> QueryCacheStore<C> {
    pub(super) fn get_lookup<'tcx>(
        &'tcx self,
        key: &C::Key,
    ) -> (QueryLookup, LockGuard<'tcx, C::Sharded>) {
        let key_hash = hash_for_shard(key);
        let shard = get_shard_index_by_hash(key_hash);
        let lock = self.shards.get_shard_by_index(shard).lock();
        (QueryLookup { key_hash, shard }, lock)
    }
    pub fn iter_results(&self, f: &mut dyn FnMut(&C::Key, &C::Value, DepNodeIndex)) {
        self.cache.iter(&self.shards, f)
    }
 }
 struct QueryStateShard<K> {
    active: FxHashMap<K, QueryResult>,
 }
 impl<K> Default for QueryStateShard<K> {
    fn default() -> QueryStateShard<K> {
        QueryStateShard { active: Default::default() }
    }
 }
 pub struct QueryState<K> {
-    shards: Sharded<QueryStateShard<K>>,
+    #[cfg(parallel_compiler)]
    active: Sharded<FxHashMap<K, QueryResult>>,
    #[cfg(not(parallel_compiler))]
    active: Lock<FxHashMap<K, QueryResult>>,
 }
 /// Indicates the state of a query for a given key in a query map.
@ -95,8 +47,15 @@ where
    K: Eq + Hash + Clone + Debug,
 {
    pub fn all_inactive(&self) -> bool {
-        let shards = self.shards.lock_shards();
+        #[cfg(parallel_compiler)]
-        shards.iter().all(|shard| shard.active.is_empty())
+        {
            let shards = self.active.lock_shards();
            shards.iter().all(|shard| shard.is_empty())
        }
        #[cfg(not(parallel_compiler))]
        {
            self.active.lock().is_empty()
        }
    }
    pub fn try_collect_active_jobs<CTX: Copy>(
@ -105,11 +64,27 @@ where
        make_query: fn(CTX, K) -> QueryStackFrame,
        jobs: &mut QueryMap,
    ) -> Option<()> {
-        // We use try_lock_shards here since we are called from the
+        #[cfg(parallel_compiler)]
-        // deadlock handler, and this shouldn't be locked.
+        {
-        let shards = self.shards.try_lock_shards()?;
+            // We use try_lock_shards here since we are called from the
-        for shard in shards.iter() {
+            // deadlock handler, and this shouldn't be locked.
-            for (k, v) in shard.active.iter() {
+            let shards = self.active.try_lock_shards()?;
            for shard in shards.iter() {
                for (k, v) in shard.iter() {
                    if let QueryResult::Started(ref job) = *v {
                        let query = make_query(tcx, k.clone());
                        jobs.insert(job.id, QueryJobInfo { query, job: job.clone() });
                    }
                }
            }
        }
        #[cfg(not(parallel_compiler))]
        {
            // We use try_lock here since we are called from the
            // deadlock handler, and this shouldn't be locked.
            // (FIXME: Is this relevant for non-parallel compilers? It doesn't
            // really hurt much.)
            for (k, v) in self.active.try_lock()?.iter() {
                if let QueryResult::Started(ref job) = *v {
                    let query = make_query(tcx, k.clone());
                    jobs.insert(job.id, QueryJobInfo { query, job: job.clone() });
@ -123,7 +98,7 @@ where
 impl<K> Default for QueryState<K> {
    fn default() -> QueryState<K> {
-        QueryState { shards: Default::default() }
+        QueryState { active: Default::default() }
    }
 }
@ -174,16 +149,17 @@ where
        state: &'b QueryState<K>,
        span: Span,
        key: K,
        lookup: QueryLookup,
    ) -> TryGetJob<'b, K>
    where
        CTX: QueryContext,
    {
-        let shard = lookup.shard;
+        #[cfg(parallel_compiler)]
-        let mut state_lock = state.shards.get_shard_by_index(shard).lock();
+        let mut state_lock = state.active.get_shard_by_value(&key).lock();
        #[cfg(not(parallel_compiler))]
        let mut state_lock = state.active.lock();
        let lock = &mut *state_lock;
-        match lock.active.entry(key) {
+        match lock.entry(key) {
            Entry::Vacant(entry) => {
                let id = tcx.next_job_id();
                let job = tcx.current_query_job();
@ -239,12 +215,7 @@ where
    /// Completes the query by updating the query cache with the `result`,
    /// signals the waiter and forgets the JobOwner, so it won't poison the query
-    fn complete<C>(
+    fn complete<C>(self, cache: &C, result: C::Value, dep_node_index: DepNodeIndex) -> C::Stored
        self,
        cache: &QueryCacheStore<C>,
        result: C::Value,
        dep_node_index: DepNodeIndex,
    ) -> C::Stored
    where
        C: QueryCache<Key = K>,
    {
@ -256,19 +227,17 @@ where
        mem::forget(self);
        let (job, result) = {
            let key_hash = hash_for_shard(&key);
            let shard = get_shard_index_by_hash(key_hash);
            let job = {
-                let mut lock = state.shards.get_shard_by_index(shard).lock();
+                #[cfg(parallel_compiler)]
-                match lock.active.remove(&key).unwrap() {
+                let mut lock = state.active.get_shard_by_value(&key).lock();
                #[cfg(not(parallel_compiler))]
                let mut lock = state.active.lock();
                match lock.remove(&key).unwrap() {
                    QueryResult::Started(job) => job,
                    QueryResult::Poisoned => panic!(),
                }
            };
-            let result = {
+            let result = cache.complete(key, result, dep_node_index);
                let mut lock = cache.shards.get_shard_by_index(shard).lock();
                cache.cache.complete(&mut lock, key, result, dep_node_index)
            };
            (job, result)
        };
@ -286,14 +255,16 @@ where
    fn drop(&mut self) {
        // Poison the query so jobs waiting on it panic.
        let state = self.state;
        let shard = state.shards.get_shard_by_value(&self.key);
        let job = {
-            let mut shard = shard.lock();
+            #[cfg(parallel_compiler)]
-            let job = match shard.active.remove(&self.key).unwrap() {
+            let mut shard = state.active.get_shard_by_value(&self.key).lock();
            #[cfg(not(parallel_compiler))]
            let mut shard = state.active.lock();
            let job = match shard.remove(&self.key).unwrap() {
                QueryResult::Started(job) => job,
                QueryResult::Poisoned => panic!(),
            };
-            shard.active.insert(self.key.clone(), QueryResult::Poisoned);
+            shard.insert(self.key.clone(), QueryResult::Poisoned);
            job
        };
        // Also signal the completion of the job, so waiters
@ -334,17 +305,17 @@ where
 #[inline]
 pub fn try_get_cached<'a, CTX, C, R, OnHit>(
    tcx: CTX,
-    cache: &'a QueryCacheStore<C>,
+    cache: &'a C,
    key: &C::Key,
    // `on_hit` can be called while holding a lock to the query cache
    on_hit: OnHit,
-) -> Result<R, QueryLookup>
+) -> Result<R, ()>
 where
    C: QueryCache,
    CTX: DepContext,
    OnHit: FnOnce(&C::Stored) -> R,
 {
-    cache.cache.lookup(cache, &key, |value, index| {
+    cache.lookup(&key, |value, index| {
        if unlikely!(tcx.profiler().enabled()) {
            tcx.profiler().query_cache_hit(index.into());
        }
@ -356,10 +327,9 @@ where
 fn try_execute_query<CTX, C>(
    tcx: CTX,
    state: &QueryState<C::Key>,
-    cache: &QueryCacheStore<C>,
+    cache: &C,
    span: Span,
    key: C::Key,
    lookup: QueryLookup,
    dep_node: Option<DepNode<CTX::DepKind>>,
    query: &QueryVtable<CTX, C::Key, C::Value>,
 ) -> (C::Stored, Option<DepNodeIndex>)
@ -368,21 +338,20 @@ where
    C::Key: Clone + DepNodeParams<CTX::DepContext>,
    CTX: QueryContext,
 {
-    match JobOwner::<'_, C::Key>::try_start(&tcx, state, span, key.clone(), lookup) {
+    match JobOwner::<'_, C::Key>::try_start(&tcx, state, span, key.clone()) {
        TryGetJob::NotYetStarted(job) => {
            let (result, dep_node_index) = execute_job(tcx, key, dep_node, query, job.id);
            let result = job.complete(cache, result, dep_node_index);
            (result, Some(dep_node_index))
        }
        TryGetJob::Cycle(error) => {
-            let result = mk_cycle(tcx, error, query.handle_cycle_error, &cache.cache);
+            let result = mk_cycle(tcx, error, query.handle_cycle_error, cache);
            (result, None)
        }
        #[cfg(parallel_compiler)]
        TryGetJob::JobCompleted(query_blocked_prof_timer) => {
            let (v, index) = cache
-                .cache
+                .lookup(&key, |value, index| (value.clone(), index))
                .lookup(cache, &key, |value, index| (value.clone(), index))
                .unwrap_or_else(|_| panic!("value must be in cache after waiting"));
            if unlikely!(tcx.dep_context().profiler().enabled()) {
@ -711,13 +680,7 @@ pub enum QueryMode {
    Ensure,
 }
-pub fn get_query<Q, CTX>(
+pub fn get_query<Q, CTX>(tcx: CTX, span: Span, key: Q::Key, mode: QueryMode) -> Option<Q::Stored>
    tcx: CTX,
    span: Span,
    key: Q::Key,
    lookup: QueryLookup,
    mode: QueryMode,
 ) -> Option<Q::Stored>
 where
    Q: QueryDescription<CTX>,
    Q::Key: DepNodeParams<CTX::DepContext>,
@ -741,7 +704,6 @@ where
        Q::query_cache(tcx),
        span,
        key,
        lookup,
        dep_node,
        &query,
    );
@ -760,20 +722,20 @@ where
    // We may be concurrently trying both execute and force a query.
    // Ensure that only one of them runs the query.
    let cache = Q::query_cache(tcx);
-    let cached = cache.cache.lookup(cache, &key, |_, index| {
+    let cached = cache.lookup(&key, |_, index| {
        if unlikely!(tcx.dep_context().profiler().enabled()) {
            tcx.dep_context().profiler().query_cache_hit(index.into());
        }
    });
-    let lookup = match cached {
+    match cached {
        Ok(()) => return,
-        Err(lookup) => lookup,
+        Err(()) => {}
-    };
+    }
    let query = Q::make_vtable(tcx, &key);
    let state = Q::query_state(tcx);
    debug_assert!(!query.anon);
-    try_execute_query(tcx, state, cache, DUMMY_SP, key, lookup, Some(dep_node), &query);
+    try_execute_query(tcx, state, cache, DUMMY_SP, key, Some(dep_node), &query);
 }