Auto merge of #106977 - michaelwoerister:unord_id_collections, r=oli-obk

Use UnordMap and UnordSet for id collections (DefIdMap, LocalDefIdMap, etc)

This PR changes the `rustc_data_structures::define_id_collections!` macro to use `UnordMap` and `UnordSet` instead of `FxHashMap` and `FxHashSet`. This should account for a large portion of hash-maps being used in places where they can cause trouble.

The changes required are moderate but non-zero:
- In some places the collections are extracted into sorted vecs.
- There are a few instances where for-loops have been changed to extends.

~~Let's see what the performance impact is. With a bit more refactoring, we might be able to get rid of some of the additional sorting -- but the change set is already big enough. Unless there's a performance impact, I'd like to do further changes in subsequent PRs.~~

Performance does not seem to be negatively affected ([perf-run here](https://github.com/rust-lang/rust/pull/106977#issuecomment-1396776699)).

Part of [MCP 533](https://github.com/rust-lang/compiler-team/issues/533).

r? `@ghost`
This commit is contained in:
bors 2023-01-21 14:18:17 +00:00
commit 005fc0f00f
18 changed files with 393 additions and 118 deletions

View file

@ -11,8 +11,8 @@ pub type IndexEntry<'a, K, V> = indexmap::map::Entry<'a, K, V>;
#[macro_export]
macro_rules! define_id_collections {
($map_name:ident, $set_name:ident, $entry_name:ident, $key:ty) => {
pub type $map_name<T> = $crate::fx::FxHashMap<$key, T>;
pub type $set_name = $crate::fx::FxHashSet<$key>;
pub type $map_name<T> = $crate::unord::UnordMap<$key, T>;
pub type $set_name = $crate::unord::UnordSet<$key>;
pub type $entry_name<'a, T> = $crate::fx::StdEntry<'a, $key, T>;
};
}

View file

@ -6,13 +6,15 @@ use rustc_hash::{FxHashMap, FxHashSet};
use smallvec::SmallVec;
use std::{
borrow::Borrow,
collections::hash_map::Entry,
hash::Hash,
iter::{Product, Sum},
ops::Index,
};
use crate::{
fingerprint::Fingerprint,
stable_hasher::{HashStable, StableHasher, ToStableHashKey},
stable_hasher::{HashStable, StableHasher, StableOrd, ToStableHashKey},
};
/// `UnordItems` is the order-less version of `Iterator`. It only contains methods
@ -38,17 +40,17 @@ impl<T, I: Iterator<Item = T>> UnordItems<T, I> {
}
#[inline]
pub fn all<U, F: Fn(T) -> bool>(mut self, f: F) -> bool {
pub fn all<F: Fn(T) -> bool>(mut self, f: F) -> bool {
self.0.all(f)
}
#[inline]
pub fn any<U, F: Fn(T) -> bool>(mut self, f: F) -> bool {
pub fn any<F: Fn(T) -> bool>(mut self, f: F) -> bool {
self.0.any(f)
}
#[inline]
pub fn filter<U, F: Fn(&T) -> bool>(self, f: F) -> UnordItems<T, impl Iterator<Item = T>> {
pub fn filter<F: Fn(&T) -> bool>(self, f: F) -> UnordItems<T, impl Iterator<Item = T>> {
UnordItems(self.0.filter(f))
}
@ -96,6 +98,15 @@ impl<T, I: Iterator<Item = T>> UnordItems<T, I> {
pub fn count(self) -> usize {
self.0.count()
}
#[inline]
pub fn flat_map<U, F, O>(self, f: F) -> UnordItems<O, impl Iterator<Item = O>>
where
U: IntoIterator<Item = O>,
F: Fn(T) -> U,
{
UnordItems(self.0.flat_map(f))
}
}
impl<'a, T: Clone + 'a, I: Iterator<Item = &'a T>> UnordItems<&'a T, I> {
@ -147,6 +158,7 @@ pub struct UnordSet<V: Eq + Hash> {
}
impl<V: Eq + Hash> Default for UnordSet<V> {
#[inline]
fn default() -> Self {
Self { inner: FxHashSet::default() }
}
@ -178,7 +190,16 @@ impl<V: Eq + Hash> UnordSet<V> {
}
#[inline]
pub fn items(&self) -> UnordItems<&V, impl Iterator<Item = &V>> {
pub fn remove<Q: ?Sized>(&mut self, k: &Q) -> bool
where
V: Borrow<Q>,
Q: Hash + Eq,
{
self.inner.remove(k)
}
#[inline]
pub fn items<'a>(&'a self) -> UnordItems<&'a V, impl Iterator<Item = &'a V>> {
UnordItems(self.inner.iter())
}
@ -187,20 +208,75 @@ impl<V: Eq + Hash> UnordSet<V> {
UnordItems(self.inner.into_iter())
}
/// Returns the items of this set in stable sort order (as defined by `ToStableHashKey`).
///
/// The `cache_sort_key` parameter controls if [slice::sort_by_cached_key] or
/// [slice::sort_unstable_by_key] will be used for sorting the vec. Use
/// `cache_sort_key` when the [ToStableHashKey::to_stable_hash_key] implementation
/// for `V` is expensive (e.g. a `DefId -> DefPathHash` lookup).
#[inline]
pub fn to_sorted<HCX>(&self, hcx: &HCX, cache_sort_key: bool) -> Vec<&V>
where
V: ToStableHashKey<HCX>,
{
to_sorted_vec(hcx, self.inner.iter(), cache_sort_key, |&x| x)
}
/// Returns the items of this set in stable sort order (as defined by
/// `StableOrd`). This method is much more efficient than
/// `into_sorted` because it does not need to transform keys to their
/// `ToStableHashKey` equivalent.
#[inline]
pub fn to_sorted_stable_ord(&self) -> Vec<V>
where
V: Ord + StableOrd + Copy,
{
let mut items: Vec<V> = self.inner.iter().copied().collect();
items.sort_unstable();
items
}
/// Returns the items of this set in stable sort order (as defined by `ToStableHashKey`).
///
/// The `cache_sort_key` parameter controls if [slice::sort_by_cached_key] or
/// [slice::sort_unstable_by_key] will be used for sorting the vec. Use
/// `cache_sort_key` when the [ToStableHashKey::to_stable_hash_key] implementation
/// for `V` is expensive (e.g. a `DefId -> DefPathHash` lookup).
#[inline]
pub fn into_sorted<HCX>(self, hcx: &HCX, cache_sort_key: bool) -> Vec<V>
where
V: ToStableHashKey<HCX>,
{
to_sorted_vec(hcx, self.inner.into_iter(), cache_sort_key, |x| x)
}
// We can safely extend this UnordSet from a set of unordered values because that
// won't expose the internal ordering anywhere.
#[inline]
pub fn extend<I: Iterator<Item = V>>(&mut self, items: UnordItems<V, I>) {
self.inner.extend(items.0)
}
#[inline]
pub fn clear(&mut self) {
self.inner.clear();
}
}
impl<V: Hash + Eq> Extend<V> for UnordSet<V> {
#[inline]
fn extend<T: IntoIterator<Item = V>>(&mut self, iter: T) {
self.inner.extend(iter)
}
}
impl<V: Hash + Eq> FromIterator<V> for UnordSet<V> {
#[inline]
fn from_iter<T: IntoIterator<Item = V>>(iter: T) -> Self {
UnordSet { inner: FxHashSet::from_iter(iter) }
}
}
impl<HCX, V: Hash + Eq + HashStable<HCX>> HashStable<HCX> for UnordSet<V> {
#[inline]
fn hash_stable(&self, hcx: &mut HCX, hasher: &mut StableHasher) {
@ -223,17 +299,33 @@ pub struct UnordMap<K: Eq + Hash, V> {
}
impl<K: Eq + Hash, V> Default for UnordMap<K, V> {
#[inline]
fn default() -> Self {
Self { inner: FxHashMap::default() }
}
}
impl<K: Hash + Eq, V> Extend<(K, V)> for UnordMap<K, V> {
#[inline]
fn extend<T: IntoIterator<Item = (K, V)>>(&mut self, iter: T) {
self.inner.extend(iter)
}
}
impl<K: Hash + Eq, V> FromIterator<(K, V)> for UnordMap<K, V> {
#[inline]
fn from_iter<T: IntoIterator<Item = (K, V)>>(iter: T) -> Self {
UnordMap { inner: FxHashMap::from_iter(iter) }
}
}
impl<K: Hash + Eq, V, I: Iterator<Item = (K, V)>> From<UnordItems<(K, V), I>> for UnordMap<K, V> {
#[inline]
fn from(items: UnordItems<(K, V), I>) -> Self {
UnordMap { inner: FxHashMap::from_iter(items.0) }
}
}
impl<K: Eq + Hash, V> UnordMap<K, V> {
#[inline]
pub fn len(&self) -> usize {
@ -255,7 +347,44 @@ impl<K: Eq + Hash, V> UnordMap<K, V> {
}
#[inline]
pub fn items(&self) -> UnordItems<(&K, &V), impl Iterator<Item = (&K, &V)>> {
pub fn is_empty(&self) -> bool {
self.inner.is_empty()
}
#[inline]
pub fn entry(&mut self, key: K) -> Entry<'_, K, V> {
self.inner.entry(key)
}
#[inline]
pub fn get<Q: ?Sized>(&self, k: &Q) -> Option<&V>
where
K: Borrow<Q>,
Q: Hash + Eq,
{
self.inner.get(k)
}
#[inline]
pub fn get_mut<Q: ?Sized>(&mut self, k: &Q) -> Option<&mut V>
where
K: Borrow<Q>,
Q: Hash + Eq,
{
self.inner.get_mut(k)
}
#[inline]
pub fn remove<Q: ?Sized>(&mut self, k: &Q) -> Option<V>
where
K: Borrow<Q>,
Q: Hash + Eq,
{
self.inner.remove(k)
}
#[inline]
pub fn items<'a>(&'a self) -> UnordItems<(&'a K, &'a V), impl Iterator<Item = (&'a K, &'a V)>> {
UnordItems(self.inner.iter())
}
@ -270,6 +399,77 @@ impl<K: Eq + Hash, V> UnordMap<K, V> {
pub fn extend<I: Iterator<Item = (K, V)>>(&mut self, items: UnordItems<(K, V), I>) {
self.inner.extend(items.0)
}
/// Returns the entries of this map in stable sort order (as defined by `ToStableHashKey`).
///
/// The `cache_sort_key` parameter controls if [slice::sort_by_cached_key] or
/// [slice::sort_unstable_by_key] will be used for sorting the vec. Use
/// `cache_sort_key` when the [ToStableHashKey::to_stable_hash_key] implementation
/// for `K` is expensive (e.g. a `DefId -> DefPathHash` lookup).
#[inline]
pub fn to_sorted<HCX>(&self, hcx: &HCX, cache_sort_key: bool) -> Vec<(&K, &V)>
where
K: ToStableHashKey<HCX>,
{
to_sorted_vec(hcx, self.inner.iter(), cache_sort_key, |&(k, _)| k)
}
/// Returns the entries of this map in stable sort order (as defined by `StableOrd`).
/// This method can be much more efficient than `into_sorted` because it does not need
/// to transform keys to their `ToStableHashKey` equivalent.
#[inline]
pub fn to_sorted_stable_ord(&self) -> Vec<(K, &V)>
where
K: Ord + StableOrd + Copy,
{
let mut items: Vec<(K, &V)> = self.inner.iter().map(|(&k, v)| (k, v)).collect();
items.sort_unstable_by_key(|&(k, _)| k);
items
}
/// Returns the entries of this map in stable sort order (as defined by `ToStableHashKey`).
///
/// The `cache_sort_key` parameter controls if [slice::sort_by_cached_key] or
/// [slice::sort_unstable_by_key] will be used for sorting the vec. Use
/// `cache_sort_key` when the [ToStableHashKey::to_stable_hash_key] implementation
/// for `K` is expensive (e.g. a `DefId -> DefPathHash` lookup).
#[inline]
pub fn into_sorted<HCX>(self, hcx: &HCX, cache_sort_key: bool) -> Vec<(K, V)>
where
K: ToStableHashKey<HCX>,
{
to_sorted_vec(hcx, self.inner.into_iter(), cache_sort_key, |(k, _)| k)
}
/// Returns the values of this map in stable sort order (as defined by K's
/// `ToStableHashKey` implementation).
///
/// The `cache_sort_key` parameter controls if [slice::sort_by_cached_key] or
/// [slice::sort_unstable_by_key] will be used for sorting the vec. Use
/// `cache_sort_key` when the [ToStableHashKey::to_stable_hash_key] implementation
/// for `K` is expensive (e.g. a `DefId -> DefPathHash` lookup).
#[inline]
pub fn values_sorted<HCX>(&self, hcx: &HCX, cache_sort_key: bool) -> impl Iterator<Item = &V>
where
K: ToStableHashKey<HCX>,
{
to_sorted_vec(hcx, self.inner.iter(), cache_sort_key, |&(k, _)| k)
.into_iter()
.map(|(_, v)| v)
}
}
impl<K, Q: ?Sized, V> Index<&Q> for UnordMap<K, V>
where
K: Eq + Hash + Borrow<Q>,
Q: Eq + Hash,
{
type Output = V;
#[inline]
fn index(&self, key: &Q) -> &V {
&self.inner[key]
}
}
impl<HCX, K: Hash + Eq + HashStable<HCX>, V: HashStable<HCX>> HashStable<HCX> for UnordMap<K, V> {
@ -334,6 +534,12 @@ impl<T> Extend<T> for UnordBag<T> {
}
}
impl<T, I: Iterator<Item = T>> From<UnordItems<T, I>> for UnordBag<T> {
fn from(value: UnordItems<T, I>) -> Self {
UnordBag { inner: Vec::from_iter(value.0) }
}
}
impl<HCX, V: Hash + Eq + HashStable<HCX>> HashStable<HCX> for UnordBag<V> {
#[inline]
fn hash_stable(&self, hcx: &mut HCX, hasher: &mut StableHasher) {
@ -341,6 +547,27 @@ impl<HCX, V: Hash + Eq + HashStable<HCX>> HashStable<HCX> for UnordBag<V> {
}
}
#[inline]
fn to_sorted_vec<HCX, T, K, I>(
hcx: &HCX,
iter: I,
cache_sort_key: bool,
extract_key: fn(&T) -> &K,
) -> Vec<T>
where
I: Iterator<Item = T>,
K: ToStableHashKey<HCX>,
{
let mut items: Vec<T> = iter.collect();
if cache_sort_key {
items.sort_by_cached_key(|x| extract_key(x).to_stable_hash_key(hcx));
} else {
items.sort_unstable_by_key(|x| extract_key(x).to_stable_hash_key(hcx));
}
items
}
fn hash_iter_order_independent<
HCX,
T: HashStable<HCX>,