1
Fork 0

Auto merge of #124780 - Mark-Simulacrum:lockless-cache, r=lcnr

Improve VecCache under parallel frontend

This replaces the single Vec allocation with a series of progressively larger buckets. With the cfg for parallel enabled but with -Zthreads=1, this looks like a slight regression in i-count and cycle counts (~1%).

With the parallel frontend at -Zthreads=4, this is an improvement (-5% wall-time from 5.788 to 5.4688 on libcore) than our current Lock-based approach, likely due to reducing the bouncing of the cache line holding the lock. At -Zthreads=32 it's a huge improvement (-46%: 8.829 -> 4.7319 seconds).

try-job: i686-gnu-nopt
try-job: dist-x86_64-linux
This commit is contained in:
bors 2024-11-19 02:07:48 +00:00
commit 5926e82dd1
6 changed files with 458 additions and 65 deletions

View file

@ -2,6 +2,7 @@
#![allow(rustc::potential_query_instability, internal_features)]
#![feature(assert_matches)]
#![feature(core_intrinsics)]
#![feature(dropck_eyepatch)]
#![feature(hash_raw_entry)]
#![feature(let_chains)]
#![feature(min_specialization)]

View file

@ -3,9 +3,10 @@ use std::hash::Hash;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::sharded::{self, Sharded};
use rustc_data_structures::sync::{Lock, OnceLock};
use rustc_data_structures::sync::OnceLock;
pub use rustc_data_structures::vec_cache::VecCache;
use rustc_hir::def_id::LOCAL_CRATE;
use rustc_index::{Idx, IndexVec};
use rustc_index::Idx;
use rustc_span::def_id::{DefId, DefIndex};
use crate::dep_graph::DepNodeIndex;
@ -100,52 +101,10 @@ where
}
}
pub struct VecCache<K: Idx, V> {
cache: Lock<IndexVec<K, Option<(V, DepNodeIndex)>>>,
}
impl<K: Idx, V> Default for VecCache<K, V> {
fn default() -> Self {
VecCache { cache: Default::default() }
}
}
impl<K, V> QueryCache for VecCache<K, V>
where
K: Eq + Idx + Copy + Debug,
V: Copy,
{
type Key = K;
type Value = V;
#[inline(always)]
fn lookup(&self, key: &K) -> Option<(V, DepNodeIndex)> {
let lock = self.cache.lock();
if let Some(Some(value)) = lock.get(*key) { Some(*value) } else { None }
}
#[inline]
fn complete(&self, key: K, value: V, index: DepNodeIndex) {
let mut lock = self.cache.lock();
lock.insert(key, (value, index));
}
fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) {
for (k, v) in self.cache.lock().iter_enumerated() {
if let Some(v) = v {
f(&k, &v.0, v.1);
}
}
}
}
pub struct DefIdCache<V> {
/// Stores the local DefIds in a dense map. Local queries are much more often dense, so this is
/// a win over hashing query keys at marginal memory cost (~5% at most) compared to FxHashMap.
///
/// The second element of the tuple is the set of keys actually present in the IndexVec, used
/// for faster iteration in `iter()`.
local: Lock<(IndexVec<DefIndex, Option<(V, DepNodeIndex)>>, Vec<DefIndex>)>,
local: VecCache<DefIndex, V, DepNodeIndex>,
foreign: DefaultCache<DefId, V>,
}
@ -165,8 +124,7 @@ where
#[inline(always)]
fn lookup(&self, key: &DefId) -> Option<(V, DepNodeIndex)> {
if key.krate == LOCAL_CRATE {
let cache = self.local.lock();
cache.0.get(key.index).and_then(|v| *v)
self.local.lookup(&key.index)
} else {
self.foreign.lookup(key)
}
@ -175,27 +133,39 @@ where
#[inline]
fn complete(&self, key: DefId, value: V, index: DepNodeIndex) {
if key.krate == LOCAL_CRATE {
let mut cache = self.local.lock();
let (cache, present) = &mut *cache;
let slot = cache.ensure_contains_elem(key.index, Default::default);
if slot.is_none() {
// FIXME: Only store the present set when running in incremental mode. `iter` is not
// used outside of saving caches to disk and self-profile.
present.push(key.index);
}
*slot = Some((value, index));
self.local.complete(key.index, value, index)
} else {
self.foreign.complete(key, value, index)
}
}
fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) {
let guard = self.local.lock();
let (cache, present) = &*guard;
for &idx in present.iter() {
let value = cache[idx].unwrap();
f(&DefId { krate: LOCAL_CRATE, index: idx }, &value.0, value.1);
}
self.local.iter(&mut |key, value, index| {
f(&DefId { krate: LOCAL_CRATE, index: *key }, value, index);
});
self.foreign.iter(f);
}
}
impl<K, V> QueryCache for VecCache<K, V, DepNodeIndex>
where
K: Idx + Eq + Hash + Copy + Debug,
V: Copy,
{
type Key = K;
type Value = V;
#[inline(always)]
fn lookup(&self, key: &K) -> Option<(V, DepNodeIndex)> {
self.lookup(key)
}
#[inline]
fn complete(&self, key: K, value: V, index: DepNodeIndex) {
self.complete(key, value, index)
}
fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) {
self.iter(f)
}
}