@@ -352,9 +352,9 @@ impl<T> Bucket<T> {
352352 /// [`<*mut T>::sub`]: https://doc.rust-lang.org/core/primitive.pointer.html#method.sub-1
353353 /// [`NonNull::new_unchecked`]: https://doc.rust-lang.org/stable/std/ptr/struct.NonNull.html#method.new_unchecked
354354 /// [`RawTable::data_end`]: crate::raw::RawTable::data_end
355- /// [`RawTableInner::data_end<T>`]: crate::raw:: RawTableInner::data_end<T>
355+ /// [`RawTableInner::data_end<T>`]: RawTableInner::data_end<T>
356356 /// [`RawTable::buckets`]: crate::raw::RawTable::buckets
357- /// [`RawTableInner::buckets`]: crate::raw:: RawTableInner::buckets
357+ /// [`RawTableInner::buckets`]: RawTableInner::buckets
358358 #[ inline]
359359 unsafe fn from_base_index ( base : NonNull < T > , index : usize ) -> Self {
360360 // If mem::size_of::<T>() != 0 then return a pointer to an `element` in
@@ -424,9 +424,9 @@ impl<T> Bucket<T> {
424424 /// [`Bucket`]: crate::raw::Bucket
425425 /// [`from_base_index`]: crate::raw::Bucket::from_base_index
426426 /// [`RawTable::data_end`]: crate::raw::RawTable::data_end
427- /// [`RawTableInner::data_end<T>`]: crate::raw:: RawTableInner::data_end<T>
427+ /// [`RawTableInner::data_end<T>`]: RawTableInner::data_end<T>
428428 /// [`RawTable`]: crate::raw::RawTable
429- /// [`RawTableInner`]: crate::raw:: RawTableInner
429+ /// [`RawTableInner`]: RawTableInner
430430 /// [`<*const T>::offset_from`]: https://doc.rust-lang.org/nightly/core/primitive.pointer.html#method.offset_from
431431 #[ inline]
432432 unsafe fn to_base_index ( & self , base : NonNull < T > ) -> usize {
@@ -560,7 +560,7 @@ impl<T> Bucket<T> {
560560 /// [`<*mut T>::sub`]: https://doc.rust-lang.org/core/primitive.pointer.html#method.sub-1
561561 /// [`NonNull::new_unchecked`]: https://doc.rust-lang.org/stable/std/ptr/struct.NonNull.html#method.new_unchecked
562562 /// [`RawTable::buckets`]: crate::raw::RawTable::buckets
563- /// [`RawTableInner::buckets`]: crate::raw:: RawTableInner::buckets
563+ /// [`RawTableInner::buckets`]: RawTableInner::buckets
564564 #[ inline]
565565 unsafe fn next_n ( & self , offset : usize ) -> Self {
566566 let ptr = if Self :: IS_ZERO_SIZED_TYPE {
@@ -1642,7 +1642,8 @@ impl<A: Allocator + Clone> RawTableInner<A> {
16421642 // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
16431643 let result = ( probe_seq. pos + bit) & self . bucket_mask ;
16441644
1645- // In tables smaller than the group width, trailing control
1645+ // In tables smaller than the group width
1646+ // (self.buckets() < Group::WIDTH), trailing control
16461647 // bytes outside the range of the table are filled with
16471648 // EMPTY entries. These will unfortunately trigger a
16481649 // match, but once masked may point to a full bucket that
@@ -1663,8 +1664,9 @@ impl<A: Allocator + Clone> RawTableInner<A> {
16631664 // and properly aligned, because the table is already allocated
16641665 // (see `TableLayout::calculate_layout_for` and `ptr::read`);
16651666 //
1666- // * For tables larger than the group width, we will never end up in the given
1667- // branch, since `(probe_seq.pos + bit) & self.bucket_mask` cannot return a
1667+ // * For tables larger than the group width (self.buckets() >= Group::WIDTH),
1668+ // we will never end up in the given branch, since
1669+ // `(probe_seq.pos + bit) & self.bucket_mask` cannot return a
16681670 // full bucket index. For tables smaller than the group width, calling the
16691671 // `lowest_set_bit_nonzero` function (when `nightly` feature enabled) is also
16701672 // safe, as the trailing control bytes outside the range of the table are filled
@@ -1731,12 +1733,49 @@ impl<A: Allocator + Clone> RawTableInner<A> {
17311733 }
17321734 }
17331735
1736+ /// Prepares for rehashing data in place (that is, without allocating new memory).
1737+ /// Converts all full index `control bytes` to `DELETED` and all `DELETED` control
1738+ /// bytes to `EMPTY`, i.e. performs the following conversion:
1739+ ///
1740+ /// - `EMPTY` control bytes -> `EMPTY`;
1741+ /// - `DELETED` control bytes -> `EMPTY`;
1742+ /// - `FULL` control bytes -> `DELETED`.
1743+ ///
1744+ /// This function does not make any changes to the `data` parts of the table,
1745+ /// or any changes to the the `items` or `growth_left` field of the table.
1746+ ///
1747+ /// # Safety
1748+ ///
1749+ /// You must observe the following safety rules when calling this function:
1750+ ///
1751+ /// * The [`RawTableInner`] has already been allocated;
1752+ ///
1753+ /// * The caller of this function must convert the `DELETED` bytes back to `FULL`
1754+ /// bytes when re-inserting them into their ideal position (which was impossible
1755+ /// to do during the first insert due to tombstones). If the caller does not do
1756+ /// this, then calling this function may result in a memory leak.
1757+ ///
1758+ /// Calling this function on a table that has not been allocated results in
1759+ /// [`undefined behavior`].
1760+ ///
1761+ /// See also [`Bucket::as_ptr`] method, for more information about of properly removing
1762+ /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`].
1763+ ///
1764+ /// [`Bucket::as_ptr`]: Bucket::as_ptr
1765+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
17341766 #[ allow( clippy:: mut_mut) ]
17351767 #[ inline]
17361768 unsafe fn prepare_rehash_in_place ( & mut self ) {
1737- // Bulk convert all full control bytes to DELETED, and all DELETED
1738- // control bytes to EMPTY. This effectively frees up all buckets
1739- // containing a DELETED entry.
1769+ // Bulk convert all full control bytes to DELETED, and all DELETED control bytes to EMPTY.
1770+ // This effectively frees up all buckets containing a DELETED entry.
1771+ //
1772+ // SAFETY:
1773+ // 1. `i` is guaranteed to be within bounds since we are iterating from zero to `buckets - 1`;
1774+ // 2. Even if `i` will be `i == self.bucket_mask`, it is safe to call `Group::load_aligned`
1775+ // due to the extended control bytes range, which is `self.bucket_mask + 1 + Group::WIDTH`;
1776+ // 3. The caller of this function guarantees that [`RawTableInner`] has already been allocated;
1777+ // 4. We can use `Group::load_aligned` and `Group::store_aligned` here since we start from 0
1778+ // and go to the end with a step equal to `Group::WIDTH` (see TableLayout::calculate_layout_for).
17401779 for i in ( 0 ..self . buckets ( ) ) . step_by ( Group :: WIDTH ) {
17411780 let group = Group :: load_aligned ( self . ctrl ( i) ) ;
17421781 let group = group. convert_special_to_empty_and_full_to_deleted ( ) ;
@@ -1745,10 +1784,19 @@ impl<A: Allocator + Clone> RawTableInner<A> {
17451784
17461785 // Fix up the trailing control bytes. See the comments in set_ctrl
17471786 // for the handling of tables smaller than the group width.
1748- if self . buckets ( ) < Group :: WIDTH {
1787+ //
1788+ // SAFETY: The caller of this function guarantees that [`RawTableInner`]
1789+ // has already been allocated
1790+ if unlikely ( self . buckets ( ) < Group :: WIDTH ) {
1791+ // SAFETY: We have `self.bucket_mask + 1 + Group::WIDTH` number of control bytes,
1792+ // so copying `self.buckets() == self.bucket_mask + 1` bytes with offset equal to
1793+ // `Group::WIDTH` is safe
17491794 self . ctrl ( 0 )
17501795 . copy_to ( self . ctrl ( Group :: WIDTH ) , self . buckets ( ) ) ;
17511796 } else {
1797+ // SAFETY: We have `self.bucket_mask + 1 + Group::WIDTH` number of
1798+ // control bytes,so copying `Group::WIDTH` bytes with offset equal
1799+ // to `self.buckets() == self.bucket_mask + 1` is safe
17521800 self . ctrl ( 0 )
17531801 . copy_to ( self . ctrl ( self . buckets ( ) ) , Group :: WIDTH ) ;
17541802 }
@@ -2248,27 +2296,95 @@ impl<A: Allocator + Clone> RawTableInner<A> {
22482296 self . growth_left = bucket_mask_to_capacity ( self . bucket_mask ) ;
22492297 }
22502298
2299+ /// Erases the [`Bucket`]'s control byte at the given index so that it does not
2300+ /// triggered as full, decreases the `items` of the table and, if it can be done,
2301+ /// increases `self.growth_left`.
2302+ ///
2303+ /// This function does not actually erase / drop the [`Bucket`] itself, i.e. it
2304+ /// does not make any changes to the `data` parts of the table. The caller of this
2305+ /// function must take care to properly drop the `data`, otherwise calling this
2306+ /// function may result in a memory leak.
2307+ ///
2308+ /// # Safety
2309+ ///
2310+ /// You must observe the following safety rules when calling this function:
2311+ ///
2312+ /// * The [`RawTableInner`] has already been allocated;
2313+ ///
2314+ /// * It must be the full control byte at the given position;
2315+ ///
2316+ /// * The `index` must not be greater than the `RawTableInner.bucket_mask`, i.e.
2317+ /// `index <= RawTableInner.bucket_mask` or, in other words, `(index + 1)` must
2318+ /// be no greater than the number returned by the function [`RawTableInner::buckets`].
2319+ ///
2320+ /// Calling this function on a table that has not been allocated results in [`undefined behavior`].
2321+ ///
2322+ /// Calling this function on a table with no elements is unspecified, but calling subsequent
2323+ /// functions is likely to result in [`undefined behavior`] due to overflow subtraction
2324+ /// (`self.items -= 1 cause overflow when self.items == 0`).
2325+ ///
2326+ /// See also [`Bucket::as_ptr`] method, for more information about of properly removing
2327+ /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`].
2328+ ///
2329+ /// [`RawTableInner::buckets`]: RawTableInner::buckets
2330+ /// [`Bucket::as_ptr`]: Bucket::as_ptr
2331+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
22512332 #[ inline]
22522333 unsafe fn erase ( & mut self , index : usize ) {
22532334 debug_assert ! ( self . is_bucket_full( index) ) ;
2335+
2336+ // This is the same as `index.wrapping_sub(Group::WIDTH) % self.buckets()` because
2337+ // the number of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
22542338 let index_before = index. wrapping_sub ( Group :: WIDTH ) & self . bucket_mask ;
2339+ // SAFETY:
2340+ // - The caller must uphold the safety contract for `erase` method;
2341+ // - `index_before` is guaranteed to be in range due to masking with `self.bucket_mask`
22552342 let empty_before = Group :: load ( self . ctrl ( index_before) ) . match_empty ( ) ;
22562343 let empty_after = Group :: load ( self . ctrl ( index) ) . match_empty ( ) ;
22572344
2258- // If we are inside a continuous block of Group::WIDTH full or deleted
2259- // cells then a probe window may have seen a full block when trying to
2260- // insert. We therefore need to keep that block non-empty so that
2261- // lookups will continue searching to the next probe window.
2345+ // Inserting and searching in the map is performed by two key functions:
2346+ //
2347+ // - The `find_insert_slot` function that looks up the index of any `EMPTY` or `DELETED`
2348+ // slot in a group to be able to insert. If it doesn't find an `EMPTY` or `DELETED`
2349+ // slot immediately in the first group, it jumps to the next `Group` looking for it,
2350+ // and so on until it has gone through all the groups in the control bytes.
2351+ //
2352+ // - The `find_inner` function that looks for the index of the desired element by looking
2353+ // at all the `FULL` bytes in the group. If it did not find the element right away, and
2354+ // there is no `EMPTY` byte in the group, then this means that the `find_insert_slot`
2355+ // function may have found a suitable slot in the next group. Therefore, `find_inner`
2356+ // jumps further, and if it does not find the desired element and again there is no `EMPTY`
2357+ // byte, then it jumps further, and so on. The search stops only if `find_inner` function
2358+ // finds the desired element or hits an `EMPTY` slot/byte.
2359+ //
2360+ // Accordingly, this leads to two consequences:
2361+ //
2362+ // - The map must have `EMPTY` slots (bytes);
2363+ //
2364+ // - You can't just mark the byte to be erased as `EMPTY`, because otherwise the `find_inner`
2365+ // function may stumble upon an `EMPTY` byte before finding the desired element and stop
2366+ // searching.
2367+ //
2368+ // Thus it is necessary to check all bytes after and before the erased element. If we are in
2369+ // a contiguous `Group` of `FULL` or `DELETED` bytes (the number of `FULL` or `DELETED` bytes
2370+ // before and after is greater than or equal to `Group::WIDTH`), then we must mark our byte as
2371+ // `DELETED` in order for the `find_inner` function to go further. On the other hand, if there
2372+ // is at least one `EMPTY` slot in the `Group`, then the `find_inner` function will still stumble
2373+ // upon an `EMPTY` byte, so we can safely mark our erased byte as `EMPTY` as well.
2374+ //
2375+ // Finally, since `index_before == (index.wrapping_sub(Group::WIDTH) & self.bucket_mask) == index`
2376+ // and given all of the above, tables smaller than the group width (self.buckets() < Group::WIDTH)
2377+ // cannot have `DELETED` bytes.
22622378 //
2263- // Note that in this context `leading_zeros` refers to the bytes at the
2264- // end of a group, while `trailing_zeros` refers to the bytes at the
2265- // beginning of a group.
2379+ // Note that in this context `leading_zeros` refers to the bytes at the end of a group, while
2380+ // `trailing_zeros` refers to the bytes at the beginning of a group.
22662381 let ctrl = if empty_before. leading_zeros ( ) + empty_after. trailing_zeros ( ) >= Group :: WIDTH {
22672382 DELETED
22682383 } else {
22692384 self . growth_left += 1 ;
22702385 EMPTY
22712386 } ;
2387+ // SAFETY: the caller must uphold the safety contract for `erase` method.
22722388 self . set_ctrl ( index, ctrl) ;
22732389 self . items -= 1 ;
22742390 }
0 commit comments