Auto merge of #124780 - Mark-Simulacrum:lockless-cache, r=lcnr
Improve VecCache under parallel frontend This replaces the single Vec allocation with a series of progressively larger buckets. With the cfg for parallel enabled but with -Zthreads=1, this looks like a slight regression in i-count and cycle counts (~1%). With the parallel frontend at -Zthreads=4, this is an improvement (-5% wall-time from 5.788 to 5.4688 on libcore) than our current Lock-based approach, likely due to reducing the bouncing of the cache line holding the lock. At -Zthreads=32 it's a huge improvement (-46%: 8.829 -> 4.7319 seconds). try-job: i686-gnu-nopt try-job: dist-x86_64-linux
This commit is contained in:
commit
5926e82dd1
6 changed files with 458 additions and 65 deletions
|
@ -2,6 +2,7 @@
|
|||
#![allow(rustc::potential_query_instability, internal_features)]
|
||||
#![feature(assert_matches)]
|
||||
#![feature(core_intrinsics)]
|
||||
#![feature(dropck_eyepatch)]
|
||||
#![feature(hash_raw_entry)]
|
||||
#![feature(let_chains)]
|
||||
#![feature(min_specialization)]
|
||||
|
|
|
@ -3,9 +3,10 @@ use std::hash::Hash;
|
|||
|
||||
use rustc_data_structures::fx::FxHashMap;
|
||||
use rustc_data_structures::sharded::{self, Sharded};
|
||||
use rustc_data_structures::sync::{Lock, OnceLock};
|
||||
use rustc_data_structures::sync::OnceLock;
|
||||
pub use rustc_data_structures::vec_cache::VecCache;
|
||||
use rustc_hir::def_id::LOCAL_CRATE;
|
||||
use rustc_index::{Idx, IndexVec};
|
||||
use rustc_index::Idx;
|
||||
use rustc_span::def_id::{DefId, DefIndex};
|
||||
|
||||
use crate::dep_graph::DepNodeIndex;
|
||||
|
@ -100,52 +101,10 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
pub struct VecCache<K: Idx, V> {
|
||||
cache: Lock<IndexVec<K, Option<(V, DepNodeIndex)>>>,
|
||||
}
|
||||
|
||||
impl<K: Idx, V> Default for VecCache<K, V> {
|
||||
fn default() -> Self {
|
||||
VecCache { cache: Default::default() }
|
||||
}
|
||||
}
|
||||
|
||||
impl<K, V> QueryCache for VecCache<K, V>
|
||||
where
|
||||
K: Eq + Idx + Copy + Debug,
|
||||
V: Copy,
|
||||
{
|
||||
type Key = K;
|
||||
type Value = V;
|
||||
|
||||
#[inline(always)]
|
||||
fn lookup(&self, key: &K) -> Option<(V, DepNodeIndex)> {
|
||||
let lock = self.cache.lock();
|
||||
if let Some(Some(value)) = lock.get(*key) { Some(*value) } else { None }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn complete(&self, key: K, value: V, index: DepNodeIndex) {
|
||||
let mut lock = self.cache.lock();
|
||||
lock.insert(key, (value, index));
|
||||
}
|
||||
|
||||
fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) {
|
||||
for (k, v) in self.cache.lock().iter_enumerated() {
|
||||
if let Some(v) = v {
|
||||
f(&k, &v.0, v.1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DefIdCache<V> {
|
||||
/// Stores the local DefIds in a dense map. Local queries are much more often dense, so this is
|
||||
/// a win over hashing query keys at marginal memory cost (~5% at most) compared to FxHashMap.
|
||||
///
|
||||
/// The second element of the tuple is the set of keys actually present in the IndexVec, used
|
||||
/// for faster iteration in `iter()`.
|
||||
local: Lock<(IndexVec<DefIndex, Option<(V, DepNodeIndex)>>, Vec<DefIndex>)>,
|
||||
local: VecCache<DefIndex, V, DepNodeIndex>,
|
||||
foreign: DefaultCache<DefId, V>,
|
||||
}
|
||||
|
||||
|
@ -165,8 +124,7 @@ where
|
|||
#[inline(always)]
|
||||
fn lookup(&self, key: &DefId) -> Option<(V, DepNodeIndex)> {
|
||||
if key.krate == LOCAL_CRATE {
|
||||
let cache = self.local.lock();
|
||||
cache.0.get(key.index).and_then(|v| *v)
|
||||
self.local.lookup(&key.index)
|
||||
} else {
|
||||
self.foreign.lookup(key)
|
||||
}
|
||||
|
@ -175,27 +133,39 @@ where
|
|||
#[inline]
|
||||
fn complete(&self, key: DefId, value: V, index: DepNodeIndex) {
|
||||
if key.krate == LOCAL_CRATE {
|
||||
let mut cache = self.local.lock();
|
||||
let (cache, present) = &mut *cache;
|
||||
let slot = cache.ensure_contains_elem(key.index, Default::default);
|
||||
if slot.is_none() {
|
||||
// FIXME: Only store the present set when running in incremental mode. `iter` is not
|
||||
// used outside of saving caches to disk and self-profile.
|
||||
present.push(key.index);
|
||||
}
|
||||
*slot = Some((value, index));
|
||||
self.local.complete(key.index, value, index)
|
||||
} else {
|
||||
self.foreign.complete(key, value, index)
|
||||
}
|
||||
}
|
||||
|
||||
fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) {
|
||||
let guard = self.local.lock();
|
||||
let (cache, present) = &*guard;
|
||||
for &idx in present.iter() {
|
||||
let value = cache[idx].unwrap();
|
||||
f(&DefId { krate: LOCAL_CRATE, index: idx }, &value.0, value.1);
|
||||
}
|
||||
self.local.iter(&mut |key, value, index| {
|
||||
f(&DefId { krate: LOCAL_CRATE, index: *key }, value, index);
|
||||
});
|
||||
self.foreign.iter(f);
|
||||
}
|
||||
}
|
||||
|
||||
impl<K, V> QueryCache for VecCache<K, V, DepNodeIndex>
|
||||
where
|
||||
K: Idx + Eq + Hash + Copy + Debug,
|
||||
V: Copy,
|
||||
{
|
||||
type Key = K;
|
||||
type Value = V;
|
||||
|
||||
#[inline(always)]
|
||||
fn lookup(&self, key: &K) -> Option<(V, DepNodeIndex)> {
|
||||
self.lookup(key)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn complete(&self, key: K, value: V, index: DepNodeIndex) {
|
||||
self.complete(key, value, index)
|
||||
}
|
||||
|
||||
fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) {
|
||||
self.iter(f)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue