@@ -548,6 +548,9 @@ impl InitMaskCompressed {
548548/// Transferring the initialization mask to other allocations.
549549impl < Tag , Extra > Allocation < Tag , Extra > {
550550 /// Creates a run-length encoding of the initialization mask.
551+ ///
552+ /// This is essentially a more space-efficient version of
553+ /// `InitMask::range_as_init_chunks(...).collect::<Vec<_>>()`.
551554 pub fn compress_uninit_range ( & self , range : AllocRange ) -> InitMaskCompressed {
552555 // Since we are copying `size` bytes from `src` to `dest + i * size` (`for i in 0..repeat`),
553556 // a naive initialization mask copying algorithm would repeatedly have to read the initialization mask from
@@ -723,6 +726,12 @@ impl InitMask {
723726
724727 /// Returns an iterator, yielding a range of byte indexes for each contiguous region
725728 /// of initialized or uninitialized bytes inside the range `start..end` (end-exclusive).
729+ ///
730+ /// The iterator guarantees the following:
731+ /// - Chunks are nonempty.
732+ /// - Chunks are adjacent (each range's start is equal to the previous range's end).
733+ /// - Chunks span exactly `start..end` (the first starts at `start`, the last ends at `end`).
734+ /// - Chunks alternate between [`InitChunk::Init`] and [`InitChunk::Uninit`].
726735 #[ inline]
727736 pub fn range_as_init_chunks ( & self , start : Size , end : Size ) -> InitChunkIter < ' _ > {
728737 InitChunkIter :: new ( self , start, end)
@@ -839,7 +848,7 @@ impl InitChunk {
839848/// Yields [`InitChunk`]s. See [`InitMask::range_as_init_chunks`].
840849pub struct InitChunkIter < ' a > {
841850 init_mask : & ' a InitMask ,
842- /// Whether the last chunk was initialized.
851+ /// Whether the next chunk we will return is initialized.
843852 is_init : bool ,
844853 /// The current byte index into `init_mask`.
845854 start : Size ,
@@ -884,18 +893,45 @@ impl<'a> Iterator for InitChunkIter<'a> {
884893
885894/// Returns the index of the first bit in `start..end` (end-exclusive) that is equal to is_init.
886895fn find_bit ( init_mask : & InitMask , start : Size , end : Size , is_init : bool ) -> Option < Size > {
896+ /// A fast implementation of `find_bit`,
897+ /// which skips over an entire block at a time if it's all 0s (resp. 1s),
898+ /// and finds the first 1 (resp. 0) bit inside a block using `trailing_zeros` instead of a loop.
899+ ///
900+ /// Note that all examples below are written with 8 (instead of 64) bit blocks for simplicity,
901+ /// and with the least significant bit (and lowest block) first:
902+ ///
903+ /// 00000000|00000000
904+ /// ^ ^ ^ ^
905+ /// index: 0 7 8 15
906+ ///
907+ /// Also, if not stated, assume that `is_init = true`, that is, we are searching for the first 1 bit.
887908 fn find_bit_fast ( init_mask : & InitMask , start : Size , end : Size , is_init : bool ) -> Option < Size > {
909+ /// Search one block, returning the index of the first bit equal to `is_init`.
888910 fn search_block (
889911 bits : Block ,
890912 block : usize ,
891913 start_bit : usize ,
892914 is_init : bool ,
893915 ) -> Option < Size > {
894- // invert bits so we're always looking for the first set bit
916+ // For the following examples, assume this function was called with:
917+ // bits = 11011100
918+ // start_bit = 3
919+ // is_init = false
920+ // Note again that the least significant bit is written first,
921+ // which is backwards compared to how we normally write numbers.
922+
923+ // Invert bits so we're always looking for the first set bit.
924+ // ! 11011100
925+ // bits = 00100011
895926 let bits = if is_init { bits } else { !bits } ;
896- // mask off unused start bits
927+ // Mask off unused start bits.
928+ // 00100011
929+ // & 00011111
930+ // bits = 00000011
897931 let bits = bits & ( !0 << start_bit) ;
898- // find set bit, if any
932+ // Find set bit, if any.
933+ // bit = trailing_zeros(00000011)
934+ // bit = 6
899935 if bits == 0 {
900936 None
901937 } else {
@@ -908,39 +944,103 @@ fn find_bit(init_mask: &InitMask, start: Size, end: Size, is_init: bool) -> Opti
908944 return None ;
909945 }
910946
947+ // Convert `start` and `end` to block indexes and bit indexes within each block.
948+ // We must convert `end` to an inclusive bound to handle block boundaries correctly.
949+ //
950+ // For example:
951+ //
952+ // (a) 00000000|00000000 (b) 00000000|
953+ // ^~~~~~~~~~~^ ^~~~~~~~~^
954+ // start end start end
955+ //
956+ // In both cases, the block index of `end` is 1.
957+ // But we do want to search block 1 in (a), and we don't in (b).
958+ //
959+ // If we subtract 1 from both end positions to make them inclusive:
960+ //
961+ // (a) 00000000|00000000 (b) 00000000|
962+ // ^~~~~~~~~~^ ^~~~~~~^
963+ // start end_inclusive start end_inclusive
964+ //
965+ // For (a), the block index of `end_inclusive` is 1, and for (b), it's 0.
966+ // This provides the desired behavior of searching blocks 0 and 1 for (a),
967+ // and searching only block 0 for (b).
911968 let ( start_block, start_bit) = bit_index ( start) ;
912- let ( end_block, end_bit) = bit_index ( end) ;
913-
914- // handle first block: need to skip `start_bit` bits
969+ let end_inclusive = Size :: from_bytes ( end. bytes ( ) - 1 ) ;
970+ let ( end_block_inclusive, _) = bit_index ( end_inclusive) ;
971+
972+ // Handle first block: need to skip `start_bit` bits.
973+ //
974+ // We need to handle the first block separately,
975+ // because there may be bits earlier in the block that should be ignored,
976+ // such as the bit marked (1) in this example:
977+ //
978+ // (1)
979+ // -|------
980+ // (c) 01000000|00000000|00000001
981+ // ^~~~~~~~~~~~~~~~~~^
982+ // start end
915983 if let Some ( i) =
916984 search_block ( init_mask. blocks [ start_block] , start_block, start_bit, is_init)
917985 {
918986 if i < end {
919987 return Some ( i) ;
920988 } else {
921- // if the range is less than a block, we may find a matching bit after `end`
989+ // If the range is less than a block, we may find a matching bit after `end`.
990+ //
991+ // For example, we shouldn't successfully find bit (2), because it's after `end`:
992+ //
993+ // (2)
994+ // -------|
995+ // (d) 00000001|00000000|00000001
996+ // ^~~~~^
997+ // start end
998+ //
999+ // An alternative would be to mask off end bits in the same way as we do for start bits,
1000+ // but performing this check afterwards is faster and simpler to implement.
9221001 return None ;
9231002 }
9241003 }
9251004
926- let one_block_past_the_end = if end_bit > 0 {
927- // if `end_bit` > 0, then the range overlaps `end_block`
928- end_block + 1
929- } else {
930- end_block
931- } ;
932-
933- // handle remaining blocks
934- if start_block < one_block_past_the_end {
935- for ( & bits, block) in init_mask. blocks [ start_block + 1 ..one_block_past_the_end]
1005+ // Handle remaining blocks.
1006+ //
1007+ // We can skip over an entire block at once if it's all 0s (resp. 1s).
1008+ // The block marked (3) in this example is the first block that will be handled by this loop,
1009+ // and it will be skipped for that reason:
1010+ //
1011+ // (3)
1012+ // --------
1013+ // (e) 01000000|00000000|00000001
1014+ // ^~~~~~~~~~~~~~~~~~^
1015+ // start end
1016+ if start_block < end_block_inclusive {
1017+ // This loop is written in a specific way for performance.
1018+ // Notably: `..end_block_inclusive + 1` is used for an inclusive range instead of `..=end_block_inclusive`,
1019+ // and `.zip(start_block + 1..)` is used to track the index instead of `.enumerate().skip().take()`,
1020+ // because both alternatives result in significantly worse codegen.
1021+ // `end_block_inclusive + 1` is guaranteed not to wrap, because `end_block_inclusive <= end / BLOCK_SIZE`,
1022+ // and `BLOCK_SIZE` (the number of bits per block) will always be at least 8 (1 byte).
1023+ for ( & bits, block) in init_mask. blocks [ start_block + 1 ..end_block_inclusive + 1 ]
9361024 . iter ( )
9371025 . zip ( start_block + 1 ..)
9381026 {
9391027 if let Some ( i) = search_block ( bits, block, 0 , is_init) {
9401028 if i < end {
9411029 return Some ( i) ;
9421030 } else {
943- // if this is the last block, we may find a matching bit after `end`
1031+ // If this is the last block, we may find a matching bit after `end`.
1032+ //
1033+ // For example, we shouldn't successfully find bit (4), because it's after `end`:
1034+ //
1035+ // (4)
1036+ // -------|
1037+ // (f) 00000001|00000000|00000001
1038+ // ^~~~~~~~~~~~~~~~~~^
1039+ // start end
1040+ //
1041+ // As above with example (d), we could handle the end block separately and mask off end bits,
1042+ // but unconditionally searching an entire block at once and performing this check afterwards
1043+ // is faster and much simpler to implement.
9441044 return None ;
9451045 }
9461046 }
0 commit comments