1
Fork 0

rename BitSet to DenseBitSet

This should make it clearer that this bitset is dense, with the
advantages and disadvantages that it entails.
This commit is contained in:
Rémy Rakic 2025-01-07 15:19:05 +00:00
parent 7e4077d06f
commit a13354bea0
67 changed files with 367 additions and 356 deletions

View file

@ -108,33 +108,33 @@ macro_rules! bit_relations_inherent_impls {
///
#[cfg_attr(feature = "nightly", derive(Decodable_Generic, Encodable_Generic))]
#[derive(Eq, PartialEq, Hash)]
pub struct BitSet<T> {
pub struct DenseBitSet<T> {
domain_size: usize,
words: SmallVec<[Word; 2]>,
marker: PhantomData<T>,
}
impl<T> BitSet<T> {
impl<T> DenseBitSet<T> {
/// Gets the domain size.
pub fn domain_size(&self) -> usize {
self.domain_size
}
}
impl<T: Idx> BitSet<T> {
impl<T: Idx> DenseBitSet<T> {
/// Creates a new, empty bitset with a given `domain_size`.
#[inline]
pub fn new_empty(domain_size: usize) -> BitSet<T> {
pub fn new_empty(domain_size: usize) -> DenseBitSet<T> {
let num_words = num_words(domain_size);
BitSet { domain_size, words: smallvec![0; num_words], marker: PhantomData }
DenseBitSet { domain_size, words: smallvec![0; num_words], marker: PhantomData }
}
/// Creates a new, filled bitset with a given `domain_size`.
#[inline]
pub fn new_filled(domain_size: usize) -> BitSet<T> {
pub fn new_filled(domain_size: usize) -> DenseBitSet<T> {
let num_words = num_words(domain_size);
let mut result =
BitSet { domain_size, words: smallvec![!0; num_words], marker: PhantomData };
DenseBitSet { domain_size, words: smallvec![!0; num_words], marker: PhantomData };
result.clear_excess_bits();
result
}
@ -165,7 +165,7 @@ impl<T: Idx> BitSet<T> {
/// Is `self` is a (non-strict) superset of `other`?
#[inline]
pub fn superset(&self, other: &BitSet<T>) -> bool {
pub fn superset(&self, other: &DenseBitSet<T>) -> bool {
assert_eq!(self.domain_size, other.domain_size);
self.words.iter().zip(&other.words).all(|(a, b)| (a & b) == *b)
}
@ -278,32 +278,36 @@ impl<T: Idx> BitSet<T> {
}
// dense REL dense
impl<T: Idx> BitRelations<BitSet<T>> for BitSet<T> {
fn union(&mut self, other: &BitSet<T>) -> bool {
impl<T: Idx> BitRelations<DenseBitSet<T>> for DenseBitSet<T> {
fn union(&mut self, other: &DenseBitSet<T>) -> bool {
assert_eq!(self.domain_size, other.domain_size);
bitwise(&mut self.words, &other.words, |a, b| a | b)
}
fn subtract(&mut self, other: &BitSet<T>) -> bool {
fn subtract(&mut self, other: &DenseBitSet<T>) -> bool {
assert_eq!(self.domain_size, other.domain_size);
bitwise(&mut self.words, &other.words, |a, b| a & !b)
}
fn intersect(&mut self, other: &BitSet<T>) -> bool {
fn intersect(&mut self, other: &DenseBitSet<T>) -> bool {
assert_eq!(self.domain_size, other.domain_size);
bitwise(&mut self.words, &other.words, |a, b| a & b)
}
}
impl<T: Idx> From<GrowableBitSet<T>> for BitSet<T> {
impl<T: Idx> From<GrowableBitSet<T>> for DenseBitSet<T> {
fn from(bit_set: GrowableBitSet<T>) -> Self {
bit_set.bit_set
}
}
impl<T> Clone for BitSet<T> {
impl<T> Clone for DenseBitSet<T> {
fn clone(&self) -> Self {
BitSet { domain_size: self.domain_size, words: self.words.clone(), marker: PhantomData }
DenseBitSet {
domain_size: self.domain_size,
words: self.words.clone(),
marker: PhantomData,
}
}
fn clone_from(&mut self, from: &Self) {
@ -312,13 +316,13 @@ impl<T> Clone for BitSet<T> {
}
}
impl<T: Idx> fmt::Debug for BitSet<T> {
impl<T: Idx> fmt::Debug for DenseBitSet<T> {
fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result {
w.debug_list().entries(self.iter()).finish()
}
}
impl<T: Idx> ToString for BitSet<T> {
impl<T: Idx> ToString for DenseBitSet<T> {
fn to_string(&self) -> String {
let mut result = String::new();
let mut sep = '[';
@ -902,7 +906,7 @@ impl<T: Idx> BitRelations<ChunkedBitSet<T>> for ChunkedBitSet<T> {
}
}
impl<T: Idx> BitRelations<ChunkedBitSet<T>> for BitSet<T> {
impl<T: Idx> BitRelations<ChunkedBitSet<T>> for DenseBitSet<T> {
fn union(&mut self, other: &ChunkedBitSet<T>) -> bool {
sequential_update(|elem| self.insert(elem), other.iter())
}
@ -1114,10 +1118,10 @@ where
false
}
/// A bitset with a mixed representation, using `BitSet` for small and medium
/// bitsets, and `ChunkedBitSet` for large bitsets, i.e. those with enough bits
/// for at least two chunks. This is a good choice for many bitsets that can
/// have large domain sizes (e.g. 5000+).
/// A bitset with a mixed representation, using `DenseBitSet` for small and
/// medium bitsets, and `ChunkedBitSet` for large bitsets, i.e. those with
/// enough bits for at least two chunks. This is a good choice for many bitsets
/// that can have large domain sizes (e.g. 5000+).
///
/// `T` is an index type, typically a newtyped `usize` wrapper, but it can also
/// just be `usize`.
@ -1127,7 +1131,7 @@ where
/// will panic if the bitsets have differing domain sizes.
#[derive(PartialEq, Eq)]
pub enum MixedBitSet<T> {
Small(BitSet<T>),
Small(DenseBitSet<T>),
Large(ChunkedBitSet<T>),
}
@ -1144,7 +1148,7 @@ impl<T: Idx> MixedBitSet<T> {
#[inline]
pub fn new_empty(domain_size: usize) -> MixedBitSet<T> {
if domain_size <= CHUNK_BITS {
MixedBitSet::Small(BitSet::new_empty(domain_size))
MixedBitSet::Small(DenseBitSet::new_empty(domain_size))
} else {
MixedBitSet::Large(ChunkedBitSet::new_empty(domain_size))
}
@ -1283,7 +1287,7 @@ impl<'a, T: Idx> Iterator for MixedBitIter<'a, T> {
/// to or greater than the domain size.
#[derive(Clone, Debug, PartialEq)]
pub struct GrowableBitSet<T: Idx> {
bit_set: BitSet<T>,
bit_set: DenseBitSet<T>,
}
impl<T: Idx> Default for GrowableBitSet<T> {
@ -1306,11 +1310,11 @@ impl<T: Idx> GrowableBitSet<T> {
}
pub fn new_empty() -> GrowableBitSet<T> {
GrowableBitSet { bit_set: BitSet::new_empty(0) }
GrowableBitSet { bit_set: DenseBitSet::new_empty(0) }
}
pub fn with_capacity(capacity: usize) -> GrowableBitSet<T> {
GrowableBitSet { bit_set: BitSet::new_empty(capacity) }
GrowableBitSet { bit_set: DenseBitSet::new_empty(capacity) }
}
/// Returns `true` if the set has changed.
@ -1349,8 +1353,8 @@ impl<T: Idx> GrowableBitSet<T> {
}
}
impl<T: Idx> From<BitSet<T>> for GrowableBitSet<T> {
fn from(bit_set: BitSet<T>) -> Self {
impl<T: Idx> From<DenseBitSet<T>> for GrowableBitSet<T> {
fn from(bit_set: DenseBitSet<T>) -> Self {
Self { bit_set }
}
}
@ -1386,7 +1390,7 @@ impl<R: Idx, C: Idx> BitMatrix<R, C> {
}
/// Creates a new matrix, with `row` used as the value for every row.
pub fn from_row_n(row: &BitSet<C>, num_rows: usize) -> BitMatrix<R, C> {
pub fn from_row_n(row: &DenseBitSet<C>, num_rows: usize) -> BitMatrix<R, C> {
let num_columns = row.domain_size();
let words_per_row = num_words(num_columns);
assert_eq!(words_per_row, row.words.len());
@ -1484,7 +1488,7 @@ impl<R: Idx, C: Idx> BitMatrix<R, C> {
/// Adds the bits from `with` to the bits from row `write`, and
/// returns `true` if anything changed.
pub fn union_row_with(&mut self, with: &BitSet<C>, write: R) -> bool {
pub fn union_row_with(&mut self, with: &DenseBitSet<C>, write: R) -> bool {
assert!(write.index() < self.num_rows);
assert_eq!(with.domain_size(), self.num_columns);
let (write_start, write_end) = self.range(write);
@ -1541,8 +1545,8 @@ impl<R: Idx, C: Idx> fmt::Debug for BitMatrix<R, C> {
/// A fixed-column-size, variable-row-size 2D bit matrix with a moderately
/// sparse representation.
///
/// Initially, every row has no explicit representation. If any bit within a
/// row is set, the entire row is instantiated as `Some(<BitSet>)`.
/// Initially, every row has no explicit representation. If any bit within a row
/// is set, the entire row is instantiated as `Some(<DenseBitSet>)`.
/// Furthermore, any previously uninstantiated rows prior to it will be
/// instantiated as `None`. Those prior rows may themselves become fully
/// instantiated later on if any of their bits are set.
@ -1556,7 +1560,7 @@ where
C: Idx,
{
num_columns: usize,
rows: IndexVec<R, Option<BitSet<C>>>,
rows: IndexVec<R, Option<DenseBitSet<C>>>,
}
impl<R: Idx, C: Idx> SparseBitMatrix<R, C> {
@ -1565,10 +1569,10 @@ impl<R: Idx, C: Idx> SparseBitMatrix<R, C> {
Self { num_columns, rows: IndexVec::new() }
}
fn ensure_row(&mut self, row: R) -> &mut BitSet<C> {
// Instantiate any missing rows up to and including row `row` with an empty `BitSet`.
// Then replace row `row` with a full `BitSet` if necessary.
self.rows.get_or_insert_with(row, || BitSet::new_empty(self.num_columns))
fn ensure_row(&mut self, row: R) -> &mut DenseBitSet<C> {
// Instantiate any missing rows up to and including row `row` with an empty `DenseBitSet`.
// Then replace row `row` with a full `DenseBitSet` if necessary.
self.rows.get_or_insert_with(row, || DenseBitSet::new_empty(self.num_columns))
}
/// Sets the cell at `(row, column)` to true. Put another way, insert
@ -1642,17 +1646,17 @@ impl<R: Idx, C: Idx> SparseBitMatrix<R, C> {
self.row(row).into_iter().flat_map(|r| r.iter())
}
pub fn row(&self, row: R) -> Option<&BitSet<C>> {
pub fn row(&self, row: R) -> Option<&DenseBitSet<C>> {
self.rows.get(row)?.as_ref()
}
/// Intersects `row` with `set`. `set` can be either `BitSet` or
/// Intersects `row` with `set`. `set` can be either `DenseBitSet` or
/// `ChunkedBitSet`. Has no effect if `row` does not exist.
///
/// Returns true if the row was changed.
pub fn intersect_row<Set>(&mut self, row: R, set: &Set) -> bool
where
BitSet<C>: BitRelations<Set>,
DenseBitSet<C>: BitRelations<Set>,
{
match self.rows.get_mut(row) {
Some(Some(row)) => row.intersect(set),
@ -1660,13 +1664,13 @@ impl<R: Idx, C: Idx> SparseBitMatrix<R, C> {
}
}
/// Subtracts `set` from `row`. `set` can be either `BitSet` or
/// Subtracts `set` from `row`. `set` can be either `DenseBitSet` or
/// `ChunkedBitSet`. Has no effect if `row` does not exist.
///
/// Returns true if the row was changed.
pub fn subtract_row<Set>(&mut self, row: R, set: &Set) -> bool
where
BitSet<C>: BitRelations<Set>,
DenseBitSet<C>: BitRelations<Set>,
{
match self.rows.get_mut(row) {
Some(Some(row)) => row.subtract(set),
@ -1674,13 +1678,13 @@ impl<R: Idx, C: Idx> SparseBitMatrix<R, C> {
}
}
/// Unions `row` with `set`. `set` can be either `BitSet` or
/// Unions `row` with `set`. `set` can be either `DenseBitSet` or
/// `ChunkedBitSet`.
///
/// Returns true if the row was changed.
pub fn union_row<Set>(&mut self, row: R, set: &Set) -> bool
where
BitSet<C>: BitRelations<Set>,
DenseBitSet<C>: BitRelations<Set>,
{
self.ensure_row(row).union(set)
}

View file

@ -8,7 +8,7 @@ use test::Bencher;
#[test]
fn test_new_filled() {
for i in 0..128 {
let idx_buf = BitSet::new_filled(i);
let idx_buf = DenseBitSet::new_filled(i);
let elems: Vec<usize> = idx_buf.iter().collect();
let expected: Vec<usize> = (0..i).collect();
assert_eq!(elems, expected);
@ -17,7 +17,7 @@ fn test_new_filled() {
#[test]
fn bitset_iter_works() {
let mut bitset: BitSet<usize> = BitSet::new_empty(100);
let mut bitset: DenseBitSet<usize> = DenseBitSet::new_empty(100);
bitset.insert(1);
bitset.insert(10);
bitset.insert(19);
@ -32,7 +32,7 @@ fn bitset_iter_works() {
#[test]
fn bitset_iter_works_2() {
let mut bitset: BitSet<usize> = BitSet::new_empty(320);
let mut bitset: DenseBitSet<usize> = DenseBitSet::new_empty(320);
bitset.insert(0);
bitset.insert(127);
bitset.insert(191);
@ -43,25 +43,25 @@ fn bitset_iter_works_2() {
#[test]
fn bitset_clone_from() {
let mut a: BitSet<usize> = BitSet::new_empty(10);
let mut a: DenseBitSet<usize> = DenseBitSet::new_empty(10);
a.insert(4);
a.insert(7);
a.insert(9);
let mut b = BitSet::new_empty(2);
let mut b = DenseBitSet::new_empty(2);
b.clone_from(&a);
assert_eq!(b.domain_size(), 10);
assert_eq!(b.iter().collect::<Vec<_>>(), [4, 7, 9]);
b.clone_from(&BitSet::new_empty(40));
b.clone_from(&DenseBitSet::new_empty(40));
assert_eq!(b.domain_size(), 40);
assert_eq!(b.iter().collect::<Vec<_>>(), []);
}
#[test]
fn union_two_sets() {
let mut set1: BitSet<usize> = BitSet::new_empty(65);
let mut set2: BitSet<usize> = BitSet::new_empty(65);
let mut set1: DenseBitSet<usize> = DenseBitSet::new_empty(65);
let mut set2: DenseBitSet<usize> = DenseBitSet::new_empty(65);
assert!(set1.insert(3));
assert!(!set1.insert(3));
assert!(set2.insert(5));
@ -268,8 +268,8 @@ fn with_elements_chunked(elements: &[usize], domain_size: usize) -> ChunkedBitSe
s
}
fn with_elements_standard(elements: &[usize], domain_size: usize) -> BitSet<usize> {
let mut s = BitSet::new_empty(domain_size);
fn with_elements_standard(elements: &[usize], domain_size: usize) -> DenseBitSet<usize> {
let mut s = DenseBitSet::new_empty(domain_size);
for &e in elements {
assert!(s.insert(e));
}
@ -503,15 +503,15 @@ fn sparse_matrix_operations() {
matrix.insert(2, 99);
matrix.insert(4, 0);
let mut disjoint: BitSet<usize> = BitSet::new_empty(100);
let mut disjoint: DenseBitSet<usize> = DenseBitSet::new_empty(100);
disjoint.insert(33);
let mut superset = BitSet::new_empty(100);
let mut superset = DenseBitSet::new_empty(100);
superset.insert(22);
superset.insert(75);
superset.insert(33);
let mut subset = BitSet::new_empty(100);
let mut subset = DenseBitSet::new_empty(100);
subset.insert(22);
// SparseBitMatrix::remove
@ -568,7 +568,7 @@ fn dense_insert_range() {
where
R: RangeBounds<usize> + Clone + IntoIterator<Item = usize> + std::fmt::Debug,
{
let mut set = BitSet::new_empty(domain);
let mut set = DenseBitSet::new_empty(domain);
set.insert_range(range.clone());
for i in set.iter() {
assert!(range.contains(&i));
@ -609,7 +609,7 @@ fn dense_insert_range() {
#[test]
fn dense_last_set_before() {
fn easy(set: &BitSet<usize>, needle: impl RangeBounds<usize>) -> Option<usize> {
fn easy(set: &DenseBitSet<usize>, needle: impl RangeBounds<usize>) -> Option<usize> {
let mut last_leq = None;
for e in set.iter() {
if needle.contains(&e) {
@ -620,7 +620,7 @@ fn dense_last_set_before() {
}
#[track_caller]
fn cmp(set: &BitSet<usize>, needle: impl RangeBounds<usize> + Clone + std::fmt::Debug) {
fn cmp(set: &DenseBitSet<usize>, needle: impl RangeBounds<usize> + Clone + std::fmt::Debug) {
assert_eq!(
set.last_set_in(needle.clone()),
easy(set, needle.clone()),
@ -629,7 +629,7 @@ fn dense_last_set_before() {
set
);
}
let mut set = BitSet::new_empty(300);
let mut set = DenseBitSet::new_empty(300);
cmp(&set, 50..=50);
set.insert(WORD_BITS);
cmp(&set, WORD_BITS..=WORD_BITS);
@ -645,7 +645,7 @@ fn dense_last_set_before() {
for i in 0..=WORD_BITS * 2 {
for j in i..=WORD_BITS * 2 {
for k in 0..WORD_BITS * 2 {
let mut set = BitSet::new_empty(300);
let mut set = DenseBitSet::new_empty(300);
cmp(&set, i..j);
cmp(&set, i..=j);
set.insert(k);
@ -658,7 +658,7 @@ fn dense_last_set_before() {
#[bench]
fn bench_insert(b: &mut Bencher) {
let mut bs = BitSet::new_filled(99999usize);
let mut bs = DenseBitSet::new_filled(99999usize);
b.iter(|| {
black_box(bs.insert(black_box(100u32)));
});
@ -666,7 +666,7 @@ fn bench_insert(b: &mut Bencher) {
#[bench]
fn bench_remove(b: &mut Bencher) {
let mut bs = BitSet::new_filled(99999usize);
let mut bs = DenseBitSet::new_filled(99999usize);
b.iter(|| {
black_box(bs.remove(black_box(100u32)));
});
@ -674,7 +674,7 @@ fn bench_remove(b: &mut Bencher) {
#[bench]
fn bench_iter(b: &mut Bencher) {
let bs = BitSet::new_filled(99999usize);
let bs = DenseBitSet::new_filled(99999usize);
b.iter(|| {
bs.iter().map(|b: usize| black_box(b)).for_each(drop);
});
@ -682,8 +682,8 @@ fn bench_iter(b: &mut Bencher) {
#[bench]
fn bench_intersect(b: &mut Bencher) {
let mut ba: BitSet<u32> = BitSet::new_filled(99999usize);
let bb = BitSet::new_filled(99999usize);
let mut ba: DenseBitSet<u32> = DenseBitSet::new_filled(99999usize);
let bb = DenseBitSet::new_filled(99999usize);
b.iter(|| {
ba.intersect(black_box(&bb));
});