Introduce MixedBitSet.

It just uses `BitSet` for small/medium sizes (<= 2048 bits) and
`ChunkedBitSet` for larger sizes. This is good because `ChunkedBitSet`
is slow and memory-hungry at smaller sizes.
This commit is contained in:
Nicholas Nethercote 2024-12-05 11:15:59 +11:00
parent dff5ce6881
commit 6ee1a7aaa0
4 changed files with 200 additions and 3 deletions

View file

@ -410,6 +410,9 @@ impl<'a, T: Idx> Iterator for BitIter<'a, T> {
/// some stretches with lots of 0s and 1s mixed in a way that causes trouble /// some stretches with lots of 0s and 1s mixed in a way that causes trouble
/// for `IntervalSet`. /// for `IntervalSet`.
/// ///
/// Best used via `MixedBitSet`, rather than directly, because `MixedBitSet`
/// has better performance for small bitsets.
///
/// `T` is an index type, typically a newtyped `usize` wrapper, but it can also /// `T` is an index type, typically a newtyped `usize` wrapper, but it can also
/// just be `usize`. /// just be `usize`.
/// ///
@ -1106,6 +1109,158 @@ where
false false
} }
/// A bitset with a mixed representation, using `BitSet` for small and medium
/// bitsets, and `ChunkedBitSet` for large bitsets, i.e. those with enough bits
/// for at least two chunks. This is a good choice for many bitsets that can
/// have large domain sizes (e.g. 5000+).
///
/// `T` is an index type, typically a newtyped `usize` wrapper, but it can also
/// just be `usize`.
///
/// All operations that involve an element will panic if the element is equal
/// to or greater than the domain size. All operations that involve two bitsets
/// will panic if the bitsets have differing domain sizes.
#[derive(PartialEq, Eq)]
pub enum MixedBitSet<T> {
Small(BitSet<T>),
Large(ChunkedBitSet<T>),
}
impl<T> MixedBitSet<T> {
pub fn domain_size(&self) -> usize {
match self {
MixedBitSet::Small(set) => set.domain_size(),
MixedBitSet::Large(set) => set.domain_size(),
}
}
}
impl<T: Idx> MixedBitSet<T> {
#[inline]
pub fn new_empty(domain_size: usize) -> MixedBitSet<T> {
if domain_size <= CHUNK_BITS {
MixedBitSet::Small(BitSet::new_empty(domain_size))
} else {
MixedBitSet::Large(ChunkedBitSet::new_empty(domain_size))
}
}
#[inline]
pub fn is_empty(&self) -> bool {
match self {
MixedBitSet::Small(set) => set.is_empty(),
MixedBitSet::Large(set) => set.is_empty(),
}
}
#[inline]
pub fn contains(&self, elem: T) -> bool {
match self {
MixedBitSet::Small(set) => set.contains(elem),
MixedBitSet::Large(set) => set.contains(elem),
}
}
#[inline]
pub fn insert(&mut self, elem: T) -> bool {
match self {
MixedBitSet::Small(set) => set.insert(elem),
MixedBitSet::Large(set) => set.insert(elem),
}
}
pub fn insert_all(&mut self) {
match self {
MixedBitSet::Small(set) => set.insert_all(),
MixedBitSet::Large(set) => set.insert_all(),
}
}
#[inline]
pub fn remove(&mut self, elem: T) -> bool {
match self {
MixedBitSet::Small(set) => set.remove(elem),
MixedBitSet::Large(set) => set.remove(elem),
}
}
pub fn iter(&self) -> MixedBitIter<'_, T> {
match self {
MixedBitSet::Small(set) => MixedBitIter::Small(set.iter()),
MixedBitSet::Large(set) => MixedBitIter::Large(set.iter()),
}
}
bit_relations_inherent_impls! {}
}
impl<T> Clone for MixedBitSet<T> {
fn clone(&self) -> Self {
match self {
MixedBitSet::Small(set) => MixedBitSet::Small(set.clone()),
MixedBitSet::Large(set) => MixedBitSet::Large(set.clone()),
}
}
/// WARNING: this implementation of clone_from may panic if the two
/// bitsets have different domain sizes. This constraint is not inherent to
/// `clone_from`, but it works with the existing call sites and allows a
/// faster implementation, which is important because this function is hot.
fn clone_from(&mut self, from: &Self) {
match (self, from) {
(MixedBitSet::Small(set), MixedBitSet::Small(from)) => set.clone_from(from),
(MixedBitSet::Large(set), MixedBitSet::Large(from)) => set.clone_from(from),
_ => panic!("MixedBitSet size mismatch"),
}
}
}
impl<T: Idx> BitRelations<MixedBitSet<T>> for MixedBitSet<T> {
fn union(&mut self, other: &MixedBitSet<T>) -> bool {
match (self, other) {
(MixedBitSet::Small(set), MixedBitSet::Small(other)) => set.union(other),
(MixedBitSet::Large(set), MixedBitSet::Large(other)) => set.union(other),
_ => panic!("MixedBitSet size mismatch"),
}
}
fn subtract(&mut self, other: &MixedBitSet<T>) -> bool {
match (self, other) {
(MixedBitSet::Small(set), MixedBitSet::Small(other)) => set.subtract(other),
(MixedBitSet::Large(set), MixedBitSet::Large(other)) => set.subtract(other),
_ => panic!("MixedBitSet size mismatch"),
}
}
fn intersect(&mut self, _other: &MixedBitSet<T>) -> bool {
unimplemented!("implement if/when necessary");
}
}
impl<T: Idx> fmt::Debug for MixedBitSet<T> {
fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
MixedBitSet::Small(set) => set.fmt(w),
MixedBitSet::Large(set) => set.fmt(w),
}
}
}
pub enum MixedBitIter<'a, T: Idx> {
Small(BitIter<'a, T>),
Large(ChunkedBitIter<'a, T>),
}
impl<'a, T: Idx> Iterator for MixedBitIter<'a, T> {
type Item = T;
fn next(&mut self) -> Option<T> {
match self {
MixedBitIter::Small(iter) => iter.next(),
MixedBitIter::Large(iter) => iter.next(),
}
}
}
/// A resizable bitset type with a dense representation. /// A resizable bitset type with a dense representation.
/// ///
/// `T` is an index type, typically a newtyped `usize` wrapper, but it can also /// `T` is an index type, typically a newtyped `usize` wrapper, but it can also

View file

@ -4,7 +4,7 @@
use std::fmt; use std::fmt;
use rustc_index::Idx; use rustc_index::Idx;
use rustc_index::bit_set::{BitSet, ChunkedBitSet}; use rustc_index::bit_set::{BitSet, ChunkedBitSet, MixedBitSet};
use super::lattice::MaybeReachable; use super::lattice::MaybeReachable;
@ -127,6 +127,26 @@ where
} }
} }
impl<T, C> DebugWithContext<C> for MixedBitSet<T>
where
T: Idx + DebugWithContext<C>,
{
fn fmt_with(&self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
MixedBitSet::Small(set) => set.fmt_with(ctxt, f),
MixedBitSet::Large(set) => set.fmt_with(ctxt, f),
}
}
fn fmt_diff_with(&self, old: &Self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match (self, old) {
(MixedBitSet::Small(set), MixedBitSet::Small(old)) => set.fmt_diff_with(old, ctxt, f),
(MixedBitSet::Large(set), MixedBitSet::Large(old)) => set.fmt_diff_with(old, ctxt, f),
_ => panic!("MixedBitSet size mismatch"),
}
}
}
impl<S, C> DebugWithContext<C> for MaybeReachable<S> impl<S, C> DebugWithContext<C> for MaybeReachable<S>
where where
S: DebugWithContext<C>, S: DebugWithContext<C>,

View file

@ -40,7 +40,7 @@
use std::iter; use std::iter;
use rustc_index::bit_set::{BitSet, ChunkedBitSet}; use rustc_index::bit_set::{BitSet, ChunkedBitSet, MixedBitSet};
use rustc_index::{Idx, IndexVec}; use rustc_index::{Idx, IndexVec};
use crate::framework::BitSetExt; use crate::framework::BitSetExt;
@ -132,6 +132,12 @@ impl<T: Idx> JoinSemiLattice for ChunkedBitSet<T> {
} }
} }
impl<T: Idx> JoinSemiLattice for MixedBitSet<T> {
fn join(&mut self, other: &Self) -> bool {
self.union(other)
}
}
/// Extends a type `T` with top and bottom elements to make it a partially ordered set in which no /// Extends a type `T` with top and bottom elements to make it a partially ordered set in which no
/// value of `T` is comparable with any other. /// value of `T` is comparable with any other.
/// ///

View file

@ -35,7 +35,7 @@
use std::cmp::Ordering; use std::cmp::Ordering;
use rustc_data_structures::work_queue::WorkQueue; use rustc_data_structures::work_queue::WorkQueue;
use rustc_index::bit_set::{BitSet, ChunkedBitSet}; use rustc_index::bit_set::{BitSet, ChunkedBitSet, MixedBitSet};
use rustc_index::{Idx, IndexVec}; use rustc_index::{Idx, IndexVec};
use rustc_middle::bug; use rustc_middle::bug;
use rustc_middle::mir::{self, BasicBlock, CallReturnPlaces, Location, TerminatorEdges, traversal}; use rustc_middle::mir::{self, BasicBlock, CallReturnPlaces, Location, TerminatorEdges, traversal};
@ -77,6 +77,12 @@ impl<T: Idx> BitSetExt<T> for ChunkedBitSet<T> {
} }
} }
impl<T: Idx> BitSetExt<T> for MixedBitSet<T> {
fn contains(&self, elem: T) -> bool {
self.contains(elem)
}
}
/// A dataflow problem with an arbitrarily complex transfer function. /// A dataflow problem with an arbitrarily complex transfer function.
/// ///
/// This trait specifies the lattice on which this analysis operates (the domain), its /// This trait specifies the lattice on which this analysis operates (the domain), its
@ -337,6 +343,16 @@ impl<T: Idx> GenKill<T> for ChunkedBitSet<T> {
} }
} }
impl<T: Idx> GenKill<T> for MixedBitSet<T> {
fn gen_(&mut self, elem: T) {
self.insert(elem);
}
fn kill(&mut self, elem: T) {
self.remove(elem);
}
}
impl<T, S: GenKill<T>> GenKill<T> for MaybeReachable<S> { impl<T, S: GenKill<T>> GenKill<T> for MaybeReachable<S> {
fn gen_(&mut self, elem: T) { fn gen_(&mut self, elem: T) {
match self { match self {