Skip to main content

saluki_common/collections/
bitset.rs

1use smallvec::SmallVec;
2
3/// A dense, contiguous bitset.
4///
5/// `ContiguousBitSet` is designed for tracking set membership of dense, contiguous values, such as the indexes of
6/// values in a vector. It is able to hold up to 128 values (indices 0–127) inline with no heap allocation. It is _not_
7/// suitable for sparse values (values that are far apart in value) as the size of the underlying storage will tied to
8/// the largest value in the set: roughly `((max_value / 64) + 1) * 8` bytes.
9///
10/// All operations are O(1) with the exception of `set` when setting a bit that extends beyond the current capacity,
11/// which will require a heap allocation.
12#[derive(Clone, Debug, Default)]
13pub struct ContiguousBitSet {
14    words: SmallVec<[u64; 2]>,
15}
16
17impl ContiguousBitSet {
18    /// Creates a new, empty bitset with no bits set.
19    pub fn new() -> Self {
20        Self::default()
21    }
22
23    /// Returns the number of bits that are set.
24    pub fn len(&self) -> usize {
25        self.words.iter().map(|w| w.count_ones() as usize).sum()
26    }
27
28    /// Returns `false` if no bits are set.
29    pub fn is_empty(&self) -> bool {
30        self.words.iter().all(|w| *w == 0)
31    }
32
33    /// Sets the bit at `index`.
34    pub fn set(&mut self, index: usize) {
35        let word = index / 64;
36        let bit = index % 64;
37
38        // Grow if needed.
39        while self.words.len() <= word {
40            self.words.push(0);
41        }
42
43        self.words[word] |= 1 << bit;
44    }
45
46    /// Clears the bit at `index`.
47    ///
48    /// If `index` is beyond the current capacity, this is a no-op.
49    pub fn clear(&mut self, index: usize) {
50        let word = index / 64;
51        let bit = index % 64;
52
53        if let Some(w) = self.words.get_mut(word) {
54            *w &= !(1 << bit);
55        }
56    }
57
58    /// Returns `true` if the bit at `index` is set.
59    pub fn is_set(&self, index: usize) -> bool {
60        let word = index / 64;
61        let bit = index % 64;
62        self.words.get(word).is_some_and(|w| w & (1 << bit) != 0)
63    }
64
65    /// Clears all bits, resetting the bitset to its initial empty state.
66    ///
67    /// All existing capacity is retained.
68    pub fn clear_all(&mut self) {
69        for w in &mut self.words {
70            *w = 0;
71        }
72    }
73}
74
75impl<'a> IntoIterator for &'a ContiguousBitSet {
76    type Item = usize;
77    type IntoIter = Iter<'a>;
78
79    fn into_iter(self) -> Iter<'a> {
80        let len = self.words.len();
81        Iter {
82            words: &self.words,
83            front: 0,
84            back: len,
85            front_bits: self.words.first().copied().unwrap_or(0),
86            back_bits: if len > 1 { self.words[len - 1] } else { 0 },
87        }
88    }
89}
90
91/// Iterator over set bit indices of a [`ContiguousBitSet`].
92///
93/// Indices are yielded in ascending order: lowest to highest.
94pub struct Iter<'a> {
95    words: &'a [u64],
96    front: usize,
97    back: usize,
98    front_bits: u64,
99    back_bits: u64,
100}
101
102impl Iterator for Iter<'_> {
103    type Item = usize;
104
105    fn next(&mut self) -> Option<usize> {
106        loop {
107            if self.front_bits != 0 {
108                let bit = self.front_bits.trailing_zeros() as usize;
109                self.front_bits &= self.front_bits - 1; // Clear lowest set bit.
110                return Some(self.front * 64 + bit);
111            }
112            self.front += 1;
113            if self.front >= self.back {
114                return None;
115            }
116            self.front_bits = if self.front == self.back - 1 {
117                // Reached back's word -- take its remaining bits.
118                std::mem::take(&mut self.back_bits)
119            } else {
120                self.words[self.front]
121            };
122        }
123    }
124}
125
126impl DoubleEndedIterator for Iter<'_> {
127    fn next_back(&mut self) -> Option<usize> {
128        loop {
129            if self.back <= self.front {
130                return None;
131            }
132            if self.back > self.front + 1 {
133                // Back is at a separate word from front.
134                if self.back_bits != 0 {
135                    let bit = 63 - self.back_bits.leading_zeros() as usize;
136                    self.back_bits &= !(1u64 << bit); // Clear highest set bit.
137                    return Some((self.back - 1) * 64 + bit);
138                }
139                self.back -= 1;
140                if self.back > self.front + 1 {
141                    self.back_bits = self.words[self.back - 1];
142                }
143                // Otherwise back == front + 1: fall through to front_bits on next iteration.
144                continue;
145            }
146            // back == front + 1: same word as front. Consume from front_bits (high end).
147            if self.front_bits != 0 {
148                let bit = 63 - self.front_bits.leading_zeros() as usize;
149                self.front_bits &= !(1u64 << bit); // Clear highest set bit.
150                return Some(self.front * 64 + bit);
151            }
152            return None;
153        }
154    }
155}
156
157#[cfg(test)]
158mod tests {
159    use super::*;
160
161    #[test]
162    fn empty_bitset() {
163        let bs = ContiguousBitSet::new();
164        assert!(bs.is_empty());
165        assert!(!bs.is_set(0));
166        assert!(!bs.is_set(127));
167        assert_eq!(bs.len(), 0);
168    }
169
170    #[test]
171    fn set_and_check() {
172        let mut bs = ContiguousBitSet::new();
173        bs.set(0);
174        bs.set(63);
175        bs.set(64);
176        bs.set(127);
177
178        assert!(bs.is_set(0));
179        assert!(bs.is_set(63));
180        assert!(bs.is_set(64));
181        assert!(bs.is_set(127));
182        assert!(!bs.is_set(1));
183        assert!(!bs.is_set(65));
184        assert!(!bs.is_empty());
185        assert_eq!(bs.len(), 4);
186    }
187
188    #[test]
189    fn clear_bit() {
190        let mut bs = ContiguousBitSet::new();
191        bs.set(10);
192        assert!(bs.is_set(10));
193
194        bs.clear(10);
195        assert!(!bs.is_set(10));
196        assert!(bs.is_empty());
197    }
198
199    #[test]
200    fn clear_beyond_capacity_is_noop() {
201        let mut bs = ContiguousBitSet::new();
202        bs.clear(999); // Should not panic or allocate.
203        assert!(bs.is_empty());
204    }
205
206    #[test]
207    fn grows_beyond_inline() {
208        let mut bs = ContiguousBitSet::new();
209        bs.set(200);
210
211        assert!(bs.is_set(200));
212        assert!(!bs.is_set(199));
213        assert!(!bs.is_empty());
214        assert_eq!(bs.len(), 1);
215    }
216
217    #[test]
218    fn len_across_words() {
219        let mut bs = ContiguousBitSet::new();
220        for i in (0..192).step_by(3) {
221            bs.set(i);
222        }
223        assert_eq!(bs.len(), 64);
224    }
225
226    #[test]
227    fn clone_independence() {
228        let mut bs = ContiguousBitSet::new();
229        bs.set(5);
230        let mut cloned = bs.clone();
231        cloned.set(10);
232
233        assert!(bs.is_set(5));
234        assert!(!bs.is_set(10));
235        assert!(cloned.is_set(5));
236        assert!(cloned.is_set(10));
237    }
238
239    #[test]
240    fn iter_ascending() {
241        let mut bs = ContiguousBitSet::new();
242        bs.set(3);
243        bs.set(0);
244        bs.set(127);
245        bs.set(64);
246
247        let items: Vec<usize> = bs.into_iter().collect();
248        assert_eq!(items, vec![0, 3, 64, 127]);
249    }
250
251    #[test]
252    fn iter_descending() {
253        let mut bs = ContiguousBitSet::new();
254        bs.set(3);
255        bs.set(0);
256        bs.set(127);
257        bs.set(64);
258
259        let items: Vec<usize> = bs.into_iter().rev().collect();
260        assert_eq!(items, vec![127, 64, 3, 0]);
261    }
262
263    #[test]
264    fn iter_interleaved_single_word() {
265        let mut bs = ContiguousBitSet::new();
266        bs.set(1);
267        bs.set(3);
268        bs.set(5);
269        bs.set(7);
270
271        let mut iter = bs.into_iter();
272        assert_eq!(iter.next(), Some(1));
273        assert_eq!(iter.next_back(), Some(7));
274        assert_eq!(iter.next(), Some(3));
275        assert_eq!(iter.next_back(), Some(5));
276        assert_eq!(iter.next(), None);
277        assert_eq!(iter.next_back(), None);
278    }
279
280    #[test]
281    fn iter_interleaved_multi_word() {
282        let mut bs = ContiguousBitSet::new();
283        bs.set(0);
284        bs.set(63);
285        bs.set(64);
286        bs.set(127);
287
288        let mut iter = bs.into_iter();
289        assert_eq!(iter.next(), Some(0));
290        assert_eq!(iter.next_back(), Some(127));
291        assert_eq!(iter.next(), Some(63));
292        assert_eq!(iter.next_back(), Some(64));
293        assert_eq!(iter.next(), None);
294        assert_eq!(iter.next_back(), None);
295    }
296
297    #[test]
298    fn iter_empty() {
299        let bs = ContiguousBitSet::new();
300        let items: Vec<usize> = bs.into_iter().collect();
301        assert!(items.is_empty());
302        assert_eq!(bs.into_iter().next_back(), None);
303    }
304}