1
Fork 0

Auto merge of #136115 - Mark-Simulacrum:shard-alloc-id, r=RalfJung

Shard AllocMap Lock

This improves performance on many-seed parallel (-Zthreads=32) miri executions from managing to use ~8 cores to using 27-28 cores, which is about the same as what I see with the data structure proposed in https://github.com/rust-lang/rust/pull/136105 - I haven't analyzed but I suspect the sharding might actually work out better if we commonly insert "densely" since sharding would split the cache lines and the OnceVec packs locks close together. Of course, we could do something similar with the bitset lock too.

Either way, this seems like a very reasonable starting point that solves the problem ~equally well on what I can test locally.

r? `@RalfJung`
This commit is contained in:
bors 2025-02-04 23:47:45 +00:00
commit e5f11af042
2 changed files with 44 additions and 26 deletions

View file

@ -15,7 +15,8 @@ use std::{fmt, io};
use rustc_abi::{AddressSpace, Align, Endian, HasDataLayout, Size}; use rustc_abi::{AddressSpace, Align, Endian, HasDataLayout, Size};
use rustc_ast::{LitKind, Mutability}; use rustc_ast::{LitKind, Mutability};
use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::sync::Lock; use rustc_data_structures::sharded::ShardedHashMap;
use rustc_data_structures::sync::{AtomicU64, Lock};
use rustc_hir::def::DefKind; use rustc_hir::def::DefKind;
use rustc_hir::def_id::{DefId, LocalDefId}; use rustc_hir::def_id::{DefId, LocalDefId};
use rustc_macros::{HashStable, TyDecodable, TyEncodable, TypeFoldable, TypeVisitable}; use rustc_macros::{HashStable, TyDecodable, TyEncodable, TypeFoldable, TypeVisitable};
@ -389,35 +390,39 @@ pub const CTFE_ALLOC_SALT: usize = 0;
pub(crate) struct AllocMap<'tcx> { pub(crate) struct AllocMap<'tcx> {
/// Maps `AllocId`s to their corresponding allocations. /// Maps `AllocId`s to their corresponding allocations.
alloc_map: FxHashMap<AllocId, GlobalAlloc<'tcx>>, // Note that this map on rustc workloads seems to be rather dense, but in miri workloads should
// be pretty sparse. In #136105 we considered replacing it with a (dense) Vec-based map, but
// since there are workloads where it can be sparse we decided to go with sharding for now. At
// least up to 32 cores the one workload tested didn't exhibit much difference between the two.
//
// Should be locked *after* locking dedup if locking both to avoid deadlocks.
to_alloc: ShardedHashMap<AllocId, GlobalAlloc<'tcx>>,
/// Used to deduplicate global allocations: functions, vtables, string literals, ... /// Used to deduplicate global allocations: functions, vtables, string literals, ...
/// ///
/// The `usize` is a "salt" used by Miri to make deduplication imperfect, thus better emulating /// The `usize` is a "salt" used by Miri to make deduplication imperfect, thus better emulating
/// the actual guarantees. /// the actual guarantees.
dedup: FxHashMap<(GlobalAlloc<'tcx>, usize), AllocId>, dedup: Lock<FxHashMap<(GlobalAlloc<'tcx>, usize), AllocId>>,
/// The `AllocId` to assign to the next requested ID. /// The `AllocId` to assign to the next requested ID.
/// Always incremented; never gets smaller. /// Always incremented; never gets smaller.
next_id: AllocId, next_id: AtomicU64,
} }
impl<'tcx> AllocMap<'tcx> { impl<'tcx> AllocMap<'tcx> {
pub(crate) fn new() -> Self { pub(crate) fn new() -> Self {
AllocMap { AllocMap {
alloc_map: Default::default(), to_alloc: Default::default(),
dedup: Default::default(), dedup: Default::default(),
next_id: AllocId(NonZero::new(1).unwrap()), next_id: AtomicU64::new(1),
} }
} }
fn reserve(&mut self) -> AllocId { fn reserve(&self) -> AllocId {
let next = self.next_id; // Technically there is a window here where we overflow and then another thread
self.next_id.0 = self.next_id.0.checked_add(1).expect( // increments `next_id` *again* and uses it before we panic and tear down the entire session.
"You overflowed a u64 by incrementing by 1... \ // We consider this fine since such overflows cannot realistically occur.
You've just earned yourself a free drink if we ever meet. \ let next_id = self.next_id.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
Seriously, how did you do that?!", AllocId(NonZero::new(next_id).unwrap())
);
next
} }
} }
@ -428,26 +433,34 @@ impl<'tcx> TyCtxt<'tcx> {
/// Make sure to call `set_alloc_id_memory` or `set_alloc_id_same_memory` before returning such /// Make sure to call `set_alloc_id_memory` or `set_alloc_id_same_memory` before returning such
/// an `AllocId` from a query. /// an `AllocId` from a query.
pub fn reserve_alloc_id(self) -> AllocId { pub fn reserve_alloc_id(self) -> AllocId {
self.alloc_map.lock().reserve() self.alloc_map.reserve()
} }
/// Reserves a new ID *if* this allocation has not been dedup-reserved before. /// Reserves a new ID *if* this allocation has not been dedup-reserved before.
/// Should not be used for mutable memory. /// Should not be used for mutable memory.
fn reserve_and_set_dedup(self, alloc: GlobalAlloc<'tcx>, salt: usize) -> AllocId { fn reserve_and_set_dedup(self, alloc: GlobalAlloc<'tcx>, salt: usize) -> AllocId {
let mut alloc_map = self.alloc_map.lock();
if let GlobalAlloc::Memory(mem) = alloc { if let GlobalAlloc::Memory(mem) = alloc {
if mem.inner().mutability.is_mut() { if mem.inner().mutability.is_mut() {
bug!("trying to dedup-reserve mutable memory"); bug!("trying to dedup-reserve mutable memory");
} }
} }
let alloc_salt = (alloc, salt); let alloc_salt = (alloc, salt);
if let Some(&alloc_id) = alloc_map.dedup.get(&alloc_salt) { // Locking this *before* `to_alloc` also to ensure correct lock order.
let mut dedup = self.alloc_map.dedup.lock();
if let Some(&alloc_id) = dedup.get(&alloc_salt) {
return alloc_id; return alloc_id;
} }
let id = alloc_map.reserve(); let id = self.alloc_map.reserve();
debug!("creating alloc {:?} with id {id:?}", alloc_salt.0); debug!("creating alloc {:?} with id {id:?}", alloc_salt.0);
alloc_map.alloc_map.insert(id, alloc_salt.0.clone()); let had_previous = self
alloc_map.dedup.insert(alloc_salt, id); .alloc_map
.to_alloc
.lock_shard_by_value(&id)
.insert(id, alloc_salt.0.clone())
.is_some();
// We just reserved, so should always be unique.
assert!(!had_previous);
dedup.insert(alloc_salt, id);
id id
} }
@ -497,7 +510,7 @@ impl<'tcx> TyCtxt<'tcx> {
/// local dangling pointers and allocations in constants/statics. /// local dangling pointers and allocations in constants/statics.
#[inline] #[inline]
pub fn try_get_global_alloc(self, id: AllocId) -> Option<GlobalAlloc<'tcx>> { pub fn try_get_global_alloc(self, id: AllocId) -> Option<GlobalAlloc<'tcx>> {
self.alloc_map.lock().alloc_map.get(&id).cloned() self.alloc_map.to_alloc.lock_shard_by_value(&id).get(&id).cloned()
} }
#[inline] #[inline]
@ -516,7 +529,9 @@ impl<'tcx> TyCtxt<'tcx> {
/// Freezes an `AllocId` created with `reserve` by pointing it at an `Allocation`. Trying to /// Freezes an `AllocId` created with `reserve` by pointing it at an `Allocation`. Trying to
/// call this function twice, even with the same `Allocation` will ICE the compiler. /// call this function twice, even with the same `Allocation` will ICE the compiler.
pub fn set_alloc_id_memory(self, id: AllocId, mem: ConstAllocation<'tcx>) { pub fn set_alloc_id_memory(self, id: AllocId, mem: ConstAllocation<'tcx>) {
if let Some(old) = self.alloc_map.lock().alloc_map.insert(id, GlobalAlloc::Memory(mem)) { if let Some(old) =
self.alloc_map.to_alloc.lock_shard_by_value(&id).insert(id, GlobalAlloc::Memory(mem))
{
bug!("tried to set allocation ID {id:?}, but it was already existing as {old:#?}"); bug!("tried to set allocation ID {id:?}, but it was already existing as {old:#?}");
} }
} }
@ -524,8 +539,11 @@ impl<'tcx> TyCtxt<'tcx> {
/// Freezes an `AllocId` created with `reserve` by pointing it at a static item. Trying to /// Freezes an `AllocId` created with `reserve` by pointing it at a static item. Trying to
/// call this function twice, even with the same `DefId` will ICE the compiler. /// call this function twice, even with the same `DefId` will ICE the compiler.
pub fn set_nested_alloc_id_static(self, id: AllocId, def_id: LocalDefId) { pub fn set_nested_alloc_id_static(self, id: AllocId, def_id: LocalDefId) {
if let Some(old) = if let Some(old) = self
self.alloc_map.lock().alloc_map.insert(id, GlobalAlloc::Static(def_id.to_def_id())) .alloc_map
.to_alloc
.lock_shard_by_value(&id)
.insert(id, GlobalAlloc::Static(def_id.to_def_id()))
{ {
bug!("tried to set allocation ID {id:?}, but it was already existing as {old:#?}"); bug!("tried to set allocation ID {id:?}, but it was already existing as {old:#?}");
} }

View file

@ -1366,7 +1366,7 @@ pub struct GlobalCtxt<'tcx> {
pub data_layout: TargetDataLayout, pub data_layout: TargetDataLayout,
/// Stores memory for globals (statics/consts). /// Stores memory for globals (statics/consts).
pub(crate) alloc_map: Lock<interpret::AllocMap<'tcx>>, pub(crate) alloc_map: interpret::AllocMap<'tcx>,
current_gcx: CurrentGcx, current_gcx: CurrentGcx,
} }
@ -1583,7 +1583,7 @@ impl<'tcx> TyCtxt<'tcx> {
new_solver_evaluation_cache: Default::default(), new_solver_evaluation_cache: Default::default(),
canonical_param_env_cache: Default::default(), canonical_param_env_cache: Default::default(),
data_layout, data_layout,
alloc_map: Lock::new(interpret::AllocMap::new()), alloc_map: interpret::AllocMap::new(),
current_gcx, current_gcx,
}); });