@@ -443,7 +443,7 @@ impl<T> Bucket<T> {
443443 // | (to the end of T5)
444444 // | | `base: NonNull<T>` must point here
445445 // v | (to the end of T0 or to the start of C0)
446- // /‾‾‾ \ v v
446+ // /??? \ v v
447447 // [Padding], Tlast, ..., |T10|, ..., T5|, T4, T3, T2, T1, T0, |C0, C1, C2, C3, C4, C5, ..., C10, ..., Clast
448448 // \__________ __________/
449449 // \/
@@ -1083,7 +1083,7 @@ impl<T, A: Allocator + Clone> RawTable<T, A> {
10831083 /// without reallocation.
10841084 #[ cfg_attr( feature = "inline-more" , inline) ]
10851085 pub fn reserve ( & mut self , additional : usize , hasher : impl Fn ( & T ) -> u64 ) {
1086- if additional > self . table . growth_left {
1086+ if unlikely ( additional > self . table . growth_left ) {
10871087 // Avoid `Result::unwrap_or_else` because it bloats LLVM IR.
10881088 if self
10891089 . reserve_rehash ( additional, hasher, Fallibility :: Infallible )
@@ -1252,6 +1252,22 @@ impl<T, A: Allocator + Clone> RawTable<T, A> {
12521252 }
12531253 }
12541254
1255+ /// Searches for an element in the table,
1256+ /// or a potential slot where that element could be inserted.
1257+ #[ inline]
1258+ pub fn find_potential ( & self , hash : u64 , mut eq : impl FnMut ( & T ) -> bool ) -> ( usize , bool ) {
1259+ self . table . find_potential_inner ( hash, & mut |index| unsafe {
1260+ eq ( self . bucket ( index) . as_ref ( ) )
1261+ } )
1262+ }
1263+
1264+ /// Marks an element in the table as inserted.
1265+ #[ inline]
1266+ pub unsafe fn mark_inserted ( & mut self , index : usize , hash : u64 ) {
1267+ let old_ctrl = * self . table . ctrl ( index) ;
1268+ self . table . record_item_insert_at ( index, old_ctrl, hash) ;
1269+ }
1270+
12551271 /// Searches for an element in the table.
12561272 #[ inline]
12571273 pub fn find ( & self , hash : u64 , mut eq : impl FnMut ( & T ) -> bool ) -> Option < Bucket < T > > {
@@ -1585,6 +1601,106 @@ impl<A: Allocator + Clone> RawTableInner<A> {
15851601 }
15861602 }
15871603
1604+ /// Finds the position to insert something in a group.
1605+ #[ inline]
1606+ fn find_insert_slot_in_group ( & self , group : & Group , probe_seq : & ProbeSeq ) -> Option < usize > {
1607+ let bit = group. match_empty_or_deleted ( ) . lowest_set_bit ( ) ;
1608+
1609+ if likely ( bit. is_some ( ) ) {
1610+ let mut index = ( probe_seq. pos + bit. unwrap ( ) ) & self . bucket_mask ;
1611+
1612+ // In tables smaller than the group width, trailing control
1613+ // bytes outside the range of the table are filled with
1614+ // EMPTY entries. These will unfortunately trigger a
1615+ // match, but once masked may point to a full bucket that
1616+ // is already occupied. We detect this situation here and
1617+ // perform a second scan starting at the beginning of the
1618+ // table. This second scan is guaranteed to find an empty
1619+ // slot (due to the load factor) before hitting the trailing
1620+ // control bytes (containing EMPTY).
1621+ //
1622+ // SAFETY: The `index` is guaranteed to be in range `0..self.bucket_mask`
1623+ // due to masking with `self.bucket_mask`
1624+ unsafe {
1625+ if unlikely ( self . is_bucket_full ( index) ) {
1626+ debug_assert ! ( self . bucket_mask < Group :: WIDTH ) ;
1627+ debug_assert_ne ! ( probe_seq. pos, 0 ) ;
1628+ // SAFETY:
1629+ //
1630+ // * We are in range and `ptr = self.ctrl(0)` are valid for reads
1631+ // and properly aligned, because the table is already allocated
1632+ // (see `TableLayout::calculate_layout_for` and `ptr::read`);
1633+ //
1634+ // * For tables larger than the group width, we will never end up in the given
1635+ // branch, since `(probe_seq.pos + bit) & self.bucket_mask` cannot return a
1636+ // full bucket index. For tables smaller than the group width, calling the
1637+ // `lowest_set_bit_nonzero` function (when `nightly` feature enabled) is also
1638+ // safe, as the trailing control bytes outside the range of the table are filled
1639+ // with EMPTY bytes, so this second scan either finds an empty slot (due to the
1640+ // load factor) or hits the trailing control bytes (containing EMPTY). See
1641+ // `intrinsics::cttz_nonzero` for more information.
1642+ index = Group :: load_aligned ( self . ctrl ( 0 ) )
1643+ . match_empty_or_deleted ( )
1644+ . lowest_set_bit_nonzero ( ) ;
1645+ }
1646+ }
1647+
1648+ Some ( index)
1649+ } else {
1650+ None
1651+ }
1652+ }
1653+
1654+ /// Searches for an element in the table, or a potential slot where that element could be
1655+ /// inserted.
1656+ ///
1657+ /// This uses dynamic dispatch to reduce the amount of code generated, but that is
1658+ /// eliminated by LLVM optimizations.
1659+ #[ inline]
1660+ pub fn find_potential_inner (
1661+ & self ,
1662+ hash : u64 ,
1663+ eq : & mut dyn FnMut ( usize ) -> bool ,
1664+ ) -> ( usize , bool ) {
1665+ let mut insert_slot = None ;
1666+
1667+ let h2_hash = h2 ( hash) ;
1668+ let mut probe_seq = self . probe_seq ( hash) ;
1669+
1670+ loop {
1671+ let group = unsafe { Group :: load ( self . ctrl ( probe_seq. pos ) ) } ;
1672+
1673+ for bit in group. match_byte ( h2_hash) {
1674+ let index = ( probe_seq. pos + bit) & self . bucket_mask ;
1675+
1676+ if likely ( eq ( index) ) {
1677+ return ( index, true ) ;
1678+ }
1679+ }
1680+
1681+ // We didn't find the element we were looking for in the group, try to get an
1682+ // insertion slot from the group if we don't have one yet.
1683+ if likely ( insert_slot. is_none ( ) ) {
1684+ insert_slot = self . find_insert_slot_in_group ( & group, & probe_seq) ;
1685+ }
1686+
1687+ // Only stop the search if the group contains at least one empty element.
1688+ // Otherwise, the element that we are looking for might be in a following group.
1689+ if likely ( group. match_empty ( ) . any_bit_set ( ) ) {
1690+ // We must have found a insert slot by now, since the current group contains at
1691+ // least one. For tables smaller than the group width, there will still be an
1692+ // empty element in the current (and only) group due to the load factor.
1693+ debug_assert ! ( insert_slot. is_some( ) ) ;
1694+ match insert_slot {
1695+ Some ( insert_slot) => return ( insert_slot, false ) ,
1696+ None => unsafe { hint:: unreachable_unchecked ( ) } ,
1697+ }
1698+ }
1699+
1700+ probe_seq. move_next ( self . bucket_mask ) ;
1701+ }
1702+ }
1703+
15881704 /// Searches for an empty or deleted bucket which is suitable for inserting
15891705 /// a new element and sets the hash for that slot.
15901706 ///
@@ -1637,46 +1753,10 @@ impl<A: Allocator + Clone> RawTableInner<A> {
16371753 // bytes, which is safe (see RawTableInner::new_in).
16381754 unsafe {
16391755 let group = Group :: load ( self . ctrl ( probe_seq. pos ) ) ;
1640- if let Some ( bit) = group. match_empty_or_deleted ( ) . lowest_set_bit ( ) {
1641- // This is the same as `(probe_seq.pos + bit) % self.buckets()` because the number
1642- // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
1643- let result = ( probe_seq. pos + bit) & self . bucket_mask ;
1644-
1645- // In tables smaller than the group width, trailing control
1646- // bytes outside the range of the table are filled with
1647- // EMPTY entries. These will unfortunately trigger a
1648- // match, but once masked may point to a full bucket that
1649- // is already occupied. We detect this situation here and
1650- // perform a second scan starting at the beginning of the
1651- // table. This second scan is guaranteed to find an empty
1652- // slot (due to the load factor) before hitting the trailing
1653- // control bytes (containing EMPTY).
1654- //
1655- // SAFETY: The `result` is guaranteed to be in range `0..self.bucket_mask`
1656- // due to masking with `self.bucket_mask`
1657- if unlikely ( self . is_bucket_full ( result) ) {
1658- debug_assert ! ( self . bucket_mask < Group :: WIDTH ) ;
1659- debug_assert_ne ! ( probe_seq. pos, 0 ) ;
1660- // SAFETY:
1661- //
1662- // * We are in range and `ptr = self.ctrl(0)` are valid for reads
1663- // and properly aligned, because the table is already allocated
1664- // (see `TableLayout::calculate_layout_for` and `ptr::read`);
1665- //
1666- // * For tables larger than the group width, we will never end up in the given
1667- // branch, since `(probe_seq.pos + bit) & self.bucket_mask` cannot return a
1668- // full bucket index. For tables smaller than the group width, calling the
1669- // `lowest_set_bit_nonzero` function (when `nightly` feature enabled) is also
1670- // safe, as the trailing control bytes outside the range of the table are filled
1671- // with EMPTY bytes, so this second scan either finds an empty slot (due to the
1672- // load factor) or hits the trailing control bytes (containing EMPTY). See
1673- // `intrinsics::cttz_nonzero` for more information.
1674- return Group :: load_aligned ( self . ctrl ( 0 ) )
1675- . match_empty_or_deleted ( )
1676- . lowest_set_bit_nonzero ( ) ;
1677- }
1756+ let index = self . find_insert_slot_in_group ( & group, & probe_seq) ;
16781757
1679- return result;
1758+ if likely ( index. is_some ( ) ) {
1759+ return index. unwrap ( ) ;
16801760 }
16811761 }
16821762 probe_seq. move_next ( self . bucket_mask ) ;
0 commit comments