1- use super :: imp:: { BitMaskWord , BITMASK_MASK , BITMASK_SHIFT } ;
1+ use super :: imp:: { BitMaskWord , BITMASK_MASK , BITMASK_STRIDE } ;
22#[ cfg( feature = "nightly" ) ]
33use core:: intrinsics;
44
@@ -7,6 +7,12 @@ use core::intrinsics;
77///
88/// The bit mask is arranged so that low-order bits represent lower memory
99/// addresses for group match results.
10+ ///
11+ /// For implementation reasons, the bits in the set may be sparsely packed, so
12+ /// that there is only one bit-per-byte used (the high bit, 7). If this is the
13+ /// case, `BITMASK_STRIDE` will be 8 to indicate a divide-by-8 should be
14+ /// performed on counts/indices to normalize this difference. `BITMASK_MASK` is
15+ /// similarly a mask of all the actually-used bits.
1016#[ derive( Copy , Clone ) ]
1117pub struct BitMask ( pub BitMaskWord ) ;
1218
@@ -24,7 +30,7 @@ impl BitMask {
2430 pub fn remove_lowest_bit ( self ) -> BitMask {
2531 BitMask ( self . 0 & ( self . 0 - 1 ) )
2632 }
27- /// Returns whether the `BitMask` has at least one set bits .
33+ /// Returns whether the `BitMask` has at least one set bit .
2834 #[ inline]
2935 pub fn any_bit_set ( self ) -> bool {
3036 self . 0 != 0
@@ -36,7 +42,7 @@ impl BitMask {
3642 if self . 0 == 0 {
3743 None
3844 } else {
39- Some ( self . trailing_zeros ( ) )
45+ Some ( unsafe { self . lowest_set_bit_nonzero ( ) } )
4046 }
4147 }
4248
@@ -45,7 +51,7 @@ impl BitMask {
4551 #[ inline]
4652 #[ cfg( feature = "nightly" ) ]
4753 pub unsafe fn lowest_set_bit_nonzero ( self ) -> usize {
48- intrinsics:: cttz_nonzero ( self . 0 ) as usize >> BITMASK_SHIFT
54+ intrinsics:: cttz_nonzero ( self . 0 ) as usize / BITMASK_STRIDE
4955 }
5056 #[ cfg( not( feature = "nightly" ) ) ]
5157 pub unsafe fn lowest_set_bit_nonzero ( self ) -> usize {
@@ -55,21 +61,22 @@ impl BitMask {
5561 /// Returns the number of trailing zeroes in the `BitMask`.
5662 #[ inline]
5763 pub fn trailing_zeros ( self ) -> usize {
58- // ARM doesn't have a CTZ instruction, and instead uses RBIT + CLZ.
59- // However older ARM versions (pre-ARMv7) don't have RBIT and need to
60- // emulate it instead. Since we only have 1 bit set in each byte we can
61- // use REV + CLZ instead.
62- if cfg ! ( target_arch = "arm" ) && BITMASK_SHIFT >= 3 {
63- self . 0 . swap_bytes ( ) . leading_zeros ( ) as usize >> BITMASK_SHIFT
64+ // ARM doesn't have a trailing_zeroes instruction, and instead uses
65+ // reverse_bits (RBIT) + leading_zeroes (CLZ). However older ARM
66+ // versions (pre-ARMv7) don't have RBIT and need to emulate it
67+ // instead. Since we only have 1 bit set in each byte on ARM, we can
68+ // use swap_bytes (REV) + leading_zeroes instead.
69+ if cfg ! ( target_arch = "arm" ) && BITMASK_STRIDE % 8 == 0 {
70+ self . 0 . swap_bytes ( ) . leading_zeros ( ) as usize / BITMASK_STRIDE
6471 } else {
65- self . 0 . trailing_zeros ( ) as usize >> BITMASK_SHIFT
72+ self . 0 . trailing_zeros ( ) as usize / BITMASK_STRIDE
6673 }
6774 }
6875
6976 /// Returns the number of leading zeroes in the `BitMask`.
7077 #[ inline]
7178 pub fn leading_zeros ( self ) -> usize {
72- self . 0 . leading_zeros ( ) as usize >> BITMASK_SHIFT
79+ self . 0 . leading_zeros ( ) as usize / BITMASK_STRIDE
7380 }
7481}
7582
0 commit comments