88// option. This file may not be copied, modified, or distributed
99// except according to those terms.
1010
11+ use array_vec:: ArrayVec ;
1112use std:: borrow:: { Borrow , BorrowMut , ToOwned } ;
1213use std:: fmt;
1314use std:: iter;
@@ -25,6 +26,8 @@ use rustc_serialize;
2526///
2627/// In other words, `T` is the type used to index into the bitvector
2728/// this type uses to represent the set of object it holds.
29+ ///
30+ /// The representation is dense, using one bit per possible element.
2831#[ derive( Eq , PartialEq ) ]
2932pub struct IdxSetBuf < T : Idx > {
3033 _pd : PhantomData < fn ( & T ) > ,
@@ -93,6 +96,8 @@ impl<T: Idx> ToOwned for IdxSet<T> {
9396 }
9497}
9598
99+ const BITS_PER_WORD : usize = mem:: size_of :: < Word > ( ) * 8 ;
100+
96101impl < T : Idx > fmt:: Debug for IdxSetBuf < T > {
97102 fn fmt ( & self , w : & mut fmt:: Formatter ) -> fmt:: Result {
98103 w. debug_list ( )
@@ -111,8 +116,7 @@ impl<T: Idx> fmt::Debug for IdxSet<T> {
111116
112117impl < T : Idx > IdxSetBuf < T > {
113118 fn new ( init : Word , universe_size : usize ) -> Self {
114- let bits_per_word = mem:: size_of :: < Word > ( ) * 8 ;
115- let num_words = ( universe_size + ( bits_per_word - 1 ) ) / bits_per_word;
119+ let num_words = ( universe_size + ( BITS_PER_WORD - 1 ) ) / BITS_PER_WORD ;
116120 IdxSetBuf {
117121 _pd : Default :: default ( ) ,
118122 bits : vec ! [ init; num_words] ,
@@ -163,6 +167,16 @@ impl<T: Idx> IdxSet<T> {
163167 }
164168 }
165169
170+ /// Duplicates as a hybrid set.
171+ pub fn to_hybrid ( & self ) -> HybridIdxSetBuf < T > {
172+ // This universe_size may be slightly larger than the one specified
173+ // upon creation, due to rounding up to a whole word. That's ok.
174+ let universe_size = self . bits . len ( ) * BITS_PER_WORD ;
175+
176+ // Note: we currently don't bother trying to make a Sparse set.
177+ HybridIdxSetBuf :: Dense ( self . to_owned ( ) , universe_size)
178+ }
179+
166180 /// Removes all elements
167181 pub fn clear ( & mut self ) {
168182 for b in & mut self . bits {
@@ -180,21 +194,19 @@ impl<T: Idx> IdxSet<T> {
180194
181195 /// Clear all elements above `universe_size`.
182196 fn trim_to ( & mut self , universe_size : usize ) {
183- let word_bits = mem:: size_of :: < Word > ( ) * 8 ;
184-
185197 // `trim_block` is the first block where some bits have
186198 // to be cleared.
187- let trim_block = universe_size / word_bits ;
199+ let trim_block = universe_size / BITS_PER_WORD ;
188200
189201 // all the blocks above it have to be completely cleared.
190202 if trim_block < self . bits . len ( ) {
191203 for b in & mut self . bits [ trim_block+1 ..] {
192204 * b = 0 ;
193205 }
194206
195- // at that block, the `universe_size % word_bits ` lsbs
207+ // at that block, the `universe_size % BITS_PER_WORD ` lsbs
196208 // should remain.
197- let remaining_bits = universe_size % word_bits ;
209+ let remaining_bits = universe_size % BITS_PER_WORD ;
198210 let mask = ( 1 <<remaining_bits) -1 ;
199211 self . bits [ trim_block] &= mask;
200212 }
@@ -245,12 +257,46 @@ impl<T: Idx> IdxSet<T> {
245257 bitwise ( self . words_mut ( ) , other. words ( ) , & Union )
246258 }
247259
260+ /// Like `union()`, but takes a `SparseIdxSetBuf` argument.
261+ fn union_sparse ( & mut self , other : & SparseIdxSetBuf < T > ) -> bool {
262+ let mut changed = false ;
263+ for elem in other. iter ( ) {
264+ changed |= self . add ( & elem) ;
265+ }
266+ changed
267+ }
268+
269+ /// Like `union()`, but takes a `HybridIdxSetBuf` argument.
270+ pub fn union_hybrid ( & mut self , other : & HybridIdxSetBuf < T > ) -> bool {
271+ match other {
272+ HybridIdxSetBuf :: Sparse ( sparse, _) => self . union_sparse ( sparse) ,
273+ HybridIdxSetBuf :: Dense ( dense, _) => self . union ( dense) ,
274+ }
275+ }
276+
248277 /// Set `self = self - other` and return true if `self` changed.
249278 /// (i.e., if any bits were removed).
250279 pub fn subtract ( & mut self , other : & IdxSet < T > ) -> bool {
251280 bitwise ( self . words_mut ( ) , other. words ( ) , & Subtract )
252281 }
253282
283+ /// Like `subtract()`, but takes a `SparseIdxSetBuf` argument.
284+ fn subtract_sparse ( & mut self , other : & SparseIdxSetBuf < T > ) -> bool {
285+ let mut changed = false ;
286+ for elem in other. iter ( ) {
287+ changed |= self . remove ( & elem) ;
288+ }
289+ changed
290+ }
291+
292+ /// Like `subtract()`, but takes a `HybridIdxSetBuf` argument.
293+ pub fn subtract_hybrid ( & mut self , other : & HybridIdxSetBuf < T > ) -> bool {
294+ match other {
295+ HybridIdxSetBuf :: Sparse ( sparse, _) => self . subtract_sparse ( sparse) ,
296+ HybridIdxSetBuf :: Dense ( dense, _) => self . subtract ( dense) ,
297+ }
298+ }
299+
254300 /// Set `self = self & other` and return true if `self` changed.
255301 /// (i.e., if any bits were removed).
256302 pub fn intersect ( & mut self , other : & IdxSet < T > ) -> bool {
@@ -276,19 +322,200 @@ impl<'a, T: Idx> Iterator for Iter<'a, T> {
276322 type Item = T ;
277323
278324 fn next ( & mut self ) -> Option < T > {
279- let word_bits = mem:: size_of :: < Word > ( ) * 8 ;
280325 loop {
281326 if let Some ( ( ref mut word, offset) ) = self . cur {
282327 let bit_pos = word. trailing_zeros ( ) as usize ;
283- if bit_pos != word_bits {
328+ if bit_pos != BITS_PER_WORD {
284329 let bit = 1 << bit_pos;
285330 * word ^= bit;
286331 return Some ( T :: new ( bit_pos + offset) )
287332 }
288333 }
289334
290335 let ( i, word) = self . iter . next ( ) ?;
291- self . cur = Some ( ( * word, word_bits * i) ) ;
336+ self . cur = Some ( ( * word, BITS_PER_WORD * i) ) ;
337+ }
338+ }
339+ }
340+
341+ const SPARSE_MAX : usize = 8 ;
342+
343+ /// A sparse index set with a maximum of SPARSE_MAX elements. Used by
344+ /// HybridIdxSetBuf; do not use directly.
345+ ///
346+ /// The elements are stored as an unsorted vector with no duplicates.
347+ #[ derive( Clone , Debug ) ]
348+ pub struct SparseIdxSetBuf < T : Idx > ( ArrayVec < [ T ; SPARSE_MAX ] > ) ;
349+
350+ impl < T : Idx > SparseIdxSetBuf < T > {
351+ fn new ( ) -> Self {
352+ SparseIdxSetBuf ( ArrayVec :: new ( ) )
353+ }
354+
355+ fn len ( & self ) -> usize {
356+ self . 0 . len ( )
357+ }
358+
359+ fn contains ( & self , elem : & T ) -> bool {
360+ self . 0 . contains ( elem)
361+ }
362+
363+ fn add ( & mut self , elem : & T ) -> bool {
364+ // Ensure there are no duplicates.
365+ if self . 0 . contains ( elem) {
366+ false
367+ } else {
368+ self . 0 . push ( * elem) ;
369+ true
370+ }
371+ }
372+
373+ fn remove ( & mut self , elem : & T ) -> bool {
374+ if let Some ( i) = self . 0 . iter ( ) . position ( |e| e == elem) {
375+ // Swap the found element to the end, then pop it.
376+ let len = self . 0 . len ( ) ;
377+ self . 0 . swap ( i, len - 1 ) ;
378+ self . 0 . pop ( ) ;
379+ true
380+ } else {
381+ false
382+ }
383+ }
384+
385+ fn to_dense ( & self , universe_size : usize ) -> IdxSetBuf < T > {
386+ let mut dense = IdxSetBuf :: new_empty ( universe_size) ;
387+ for elem in self . 0 . iter ( ) {
388+ dense. add ( elem) ;
389+ }
390+ dense
391+ }
392+
393+ fn iter ( & self ) -> SparseIter < T > {
394+ SparseIter {
395+ iter : self . 0 . iter ( ) ,
396+ }
397+ }
398+ }
399+
400+ pub struct SparseIter < ' a , T : Idx > {
401+ iter : slice:: Iter < ' a , T > ,
402+ }
403+
404+ impl < ' a , T : Idx > Iterator for SparseIter < ' a , T > {
405+ type Item = T ;
406+
407+ fn next ( & mut self ) -> Option < T > {
408+ self . iter . next ( ) . map ( |e| * e)
409+ }
410+ }
411+
412+ /// Like IdxSetBuf, but with a hybrid representation: sparse when there are few
413+ /// elements in the set, but dense when there are many. It's especially
414+ /// efficient for sets that typically have a small number of elements, but a
415+ /// large `universe_size`, and are cleared frequently.
416+ #[ derive( Clone , Debug ) ]
417+ pub enum HybridIdxSetBuf < T : Idx > {
418+ Sparse ( SparseIdxSetBuf < T > , usize ) ,
419+ Dense ( IdxSetBuf < T > , usize ) ,
420+ }
421+
422+ impl < T : Idx > HybridIdxSetBuf < T > {
423+ pub fn new_empty ( universe_size : usize ) -> Self {
424+ HybridIdxSetBuf :: Sparse ( SparseIdxSetBuf :: new ( ) , universe_size)
425+ }
426+
427+ fn universe_size ( & mut self ) -> usize {
428+ match * self {
429+ HybridIdxSetBuf :: Sparse ( _, size) => size,
430+ HybridIdxSetBuf :: Dense ( _, size) => size,
431+ }
432+ }
433+
434+ pub fn clear ( & mut self ) {
435+ let universe_size = self . universe_size ( ) ;
436+ * self = HybridIdxSetBuf :: new_empty ( universe_size) ;
437+ }
438+
439+ /// Returns true iff set `self` contains `elem`.
440+ pub fn contains ( & self , elem : & T ) -> bool {
441+ match self {
442+ HybridIdxSetBuf :: Sparse ( sparse, _) => sparse. contains ( elem) ,
443+ HybridIdxSetBuf :: Dense ( dense, _) => dense. contains ( elem) ,
444+ }
445+ }
446+
447+ /// Adds `elem` to the set `self`.
448+ pub fn add ( & mut self , elem : & T ) -> bool {
449+ match self {
450+ HybridIdxSetBuf :: Sparse ( sparse, _) if sparse. len ( ) < SPARSE_MAX => {
451+ // The set is sparse and has space for `elem`.
452+ sparse. add ( elem)
453+ }
454+ HybridIdxSetBuf :: Sparse ( sparse, _) if sparse. contains ( elem) => {
455+ // The set is sparse and does not have space for `elem`, but
456+ // that doesn't matter because `elem` is already present.
457+ false
458+ }
459+ HybridIdxSetBuf :: Sparse ( _, _) => {
460+ // The set is sparse and full. Convert to a dense set.
461+ //
462+ // FIXME: This code is awful, but I can't work out how else to
463+ // appease the borrow checker.
464+ let dummy = HybridIdxSetBuf :: Sparse ( SparseIdxSetBuf :: new ( ) , 0 ) ;
465+ match mem:: replace ( self , dummy) {
466+ HybridIdxSetBuf :: Sparse ( sparse, universe_size) => {
467+ let mut dense = sparse. to_dense ( universe_size) ;
468+ let changed = dense. add ( elem) ;
469+ assert ! ( changed) ;
470+ mem:: replace ( self , HybridIdxSetBuf :: Dense ( dense, universe_size) ) ;
471+ changed
472+ }
473+ _ => panic ! ( "impossible" ) ,
474+ }
475+ }
476+
477+ HybridIdxSetBuf :: Dense ( dense, _) => dense. add ( elem) ,
478+ }
479+ }
480+
481+ /// Removes `elem` from the set `self`.
482+ pub fn remove ( & mut self , elem : & T ) -> bool {
483+ // Note: we currently don't bother going from Dense back to Sparse.
484+ match self {
485+ HybridIdxSetBuf :: Sparse ( sparse, _) => sparse. remove ( elem) ,
486+ HybridIdxSetBuf :: Dense ( dense, _) => dense. remove ( elem) ,
487+ }
488+ }
489+
490+ /// Converts to a dense set, consuming itself in the process.
491+ pub fn to_dense ( self ) -> IdxSetBuf < T > {
492+ match self {
493+ HybridIdxSetBuf :: Sparse ( sparse, universe_size) => sparse. to_dense ( universe_size) ,
494+ HybridIdxSetBuf :: Dense ( dense, _) => dense,
495+ }
496+ }
497+
498+ /// Iteration order is unspecified.
499+ pub fn iter ( & self ) -> HybridIter < T > {
500+ match self {
501+ HybridIdxSetBuf :: Sparse ( sparse, _) => HybridIter :: Sparse ( sparse. iter ( ) ) ,
502+ HybridIdxSetBuf :: Dense ( dense, _) => HybridIter :: Dense ( dense. iter ( ) ) ,
503+ }
504+ }
505+ }
506+
507+ pub enum HybridIter < ' a , T : Idx > {
508+ Sparse ( SparseIter < ' a , T > ) ,
509+ Dense ( Iter < ' a , T > ) ,
510+ }
511+
512+ impl < ' a , T : Idx > Iterator for HybridIter < ' a , T > {
513+ type Item = T ;
514+
515+ fn next ( & mut self ) -> Option < T > {
516+ match self {
517+ HybridIter :: Sparse ( sparse) => sparse. next ( ) ,
518+ HybridIter :: Dense ( dense) => dense. next ( ) ,
292519 }
293520 }
294521}
0 commit comments