@@ -1601,51 +1601,50 @@ impl<A: Allocator + Clone> RawTableInner<A> {
16011601 }
16021602 }
16031603
1604+ /// Fixes up an insertion slot due to false positives for groups smaller than the group width.
1605+ #[ inline]
1606+ unsafe fn fix_insert_slot ( & self , index : usize ) -> usize {
1607+ // In tables smaller than the group width, trailing control
1608+ // bytes outside the range of the table are filled with
1609+ // EMPTY entries. These will unfortunately trigger a
1610+ // match, but once masked may point to a full bucket that
1611+ // is already occupied. We detect this situation here and
1612+ // perform a second scan starting at the beginning of the
1613+ // table. This second scan is guaranteed to find an empty
1614+ // slot (due to the load factor) before hitting the trailing
1615+ // control bytes (containing EMPTY).
1616+ if unlikely ( self . is_bucket_full ( index) ) {
1617+ debug_assert ! ( self . bucket_mask < Group :: WIDTH ) ;
1618+ // SAFETY:
1619+ //
1620+ // * We are in range and `ptr = self.ctrl(0)` are valid for reads
1621+ // and properly aligned, because the table is already allocated
1622+ // (see `TableLayout::calculate_layout_for` and `ptr::read`);
1623+ //
1624+ // * For tables larger than the group width, we will never end up in the given
1625+ // branch, since `(probe_seq.pos + bit) & self.bucket_mask` cannot return a
1626+ // full bucket index. For tables smaller than the group width, calling the
1627+ // `lowest_set_bit_nonzero` function (when `nightly` feature enabled) is also
1628+ // safe, as the trailing control bytes outside the range of the table are filled
1629+ // with EMPTY bytes, so this second scan either finds an empty slot (due to the
1630+ // load factor) or hits the trailing control bytes (containing EMPTY). See
1631+ // `intrinsics::cttz_nonzero` for more information.
1632+ Group :: load_aligned ( self . ctrl ( 0 ) )
1633+ . match_empty_or_deleted ( )
1634+ . lowest_set_bit_nonzero ( )
1635+ } else {
1636+ index
1637+ }
1638+ }
1639+
16041640 /// Finds the position to insert something in a group.
1641+ /// This may have false positives and must be fixed up with `fix_insert_slot` before it's used.
16051642 #[ inline]
16061643 fn find_insert_slot_in_group ( & self , group : & Group , probe_seq : & ProbeSeq ) -> Option < usize > {
16071644 let bit = group. match_empty_or_deleted ( ) . lowest_set_bit ( ) ;
16081645
16091646 if likely ( bit. is_some ( ) ) {
1610- let mut index = ( probe_seq. pos + bit. unwrap ( ) ) & self . bucket_mask ;
1611-
1612- // In tables smaller than the group width, trailing control
1613- // bytes outside the range of the table are filled with
1614- // EMPTY entries. These will unfortunately trigger a
1615- // match, but once masked may point to a full bucket that
1616- // is already occupied. We detect this situation here and
1617- // perform a second scan starting at the beginning of the
1618- // table. This second scan is guaranteed to find an empty
1619- // slot (due to the load factor) before hitting the trailing
1620- // control bytes (containing EMPTY).
1621- //
1622- // SAFETY: The `index` is guaranteed to be in range `0..self.bucket_mask`
1623- // due to masking with `self.bucket_mask`
1624- unsafe {
1625- if unlikely ( self . is_bucket_full ( index) ) {
1626- debug_assert ! ( self . bucket_mask < Group :: WIDTH ) ;
1627- debug_assert_ne ! ( probe_seq. pos, 0 ) ;
1628- // SAFETY:
1629- //
1630- // * We are in range and `ptr = self.ctrl(0)` are valid for reads
1631- // and properly aligned, because the table is already allocated
1632- // (see `TableLayout::calculate_layout_for` and `ptr::read`);
1633- //
1634- // * For tables larger than the group width, we will never end up in the given
1635- // branch, since `(probe_seq.pos + bit) & self.bucket_mask` cannot return a
1636- // full bucket index. For tables smaller than the group width, calling the
1637- // `lowest_set_bit_nonzero` function (when `nightly` feature enabled) is also
1638- // safe, as the trailing control bytes outside the range of the table are filled
1639- // with EMPTY bytes, so this second scan either finds an empty slot (due to the
1640- // load factor) or hits the trailing control bytes (containing EMPTY). See
1641- // `intrinsics::cttz_nonzero` for more information.
1642- index = Group :: load_aligned ( self . ctrl ( 0 ) )
1643- . match_empty_or_deleted ( )
1644- . lowest_set_bit_nonzero ( ) ;
1645- }
1646- }
1647-
1648- Some ( index)
1647+ Some ( ( probe_seq. pos + bit. unwrap ( ) ) & self . bucket_mask )
16491648 } else {
16501649 None
16511650 }
@@ -1690,10 +1689,8 @@ impl<A: Allocator + Clone> RawTableInner<A> {
16901689 // We must have found a insert slot by now, since the current group contains at
16911690 // least one. For tables smaller than the group width, there will still be an
16921691 // empty element in the current (and only) group due to the load factor.
1693- debug_assert ! ( insert_slot. is_some( ) ) ;
1694- match insert_slot {
1695- Some ( insert_slot) => return ( insert_slot, false ) ,
1696- None => unsafe { hint:: unreachable_unchecked ( ) } ,
1692+ unsafe {
1693+ return ( self . fix_insert_slot ( insert_slot. unwrap_unchecked ( ) ) , false ) ;
16971694 }
16981695 }
16991696
@@ -1756,7 +1753,7 @@ impl<A: Allocator + Clone> RawTableInner<A> {
17561753 let index = self . find_insert_slot_in_group ( & group, & probe_seq) ;
17571754
17581755 if likely ( index. is_some ( ) ) {
1759- return index. unwrap ( ) ;
1756+ return self . fix_insert_slot ( index. unwrap_unchecked ( ) ) ;
17601757 }
17611758 }
17621759 probe_seq. move_next ( self . bucket_mask ) ;
0 commit comments