@@ -100,16 +100,51 @@ impl ProbeSeq {
100100// Workaround for emscripten bug emscripten-core/emscripten-fastcomp#258
101101#[ cfg_attr( target_os = "emscripten" , inline( never) ) ]
102102#[ cfg_attr( not( target_os = "emscripten" ) , inline) ]
103- fn capacity_to_buckets ( cap : usize ) -> Option < usize > {
103+ fn capacity_to_buckets ( cap : usize , table_layout : TableLayout ) -> Option < usize > {
104104 debug_assert_ne ! ( cap, 0 ) ;
105105
106106 // For small tables we require at least 1 empty bucket so that lookups are
107107 // guaranteed to terminate if an element doesn't exist in the table.
108- if cap < 8 {
108+ if cap < 15 {
109+ // Consider a small TableLayout like { size: 1, ctrl_align: 16 } on a
110+ // platform with Group::WIDTH of 16 (like x86_64 with SSE2). For small
111+ // bucket sizes, this ends up wasting quite a few bytes just to pad to
112+ // the relatively larger ctrl_align:
113+ //
114+ // | capacity | buckets | bytes allocated | bytes per item |
115+ // | -------- | ------- | --------------- | -------------- |
116+ // | 3 | 4 | 36 | (Yikes!) 12.0 |
117+ // | 7 | 8 | 40 | (Poor) 5.7 |
118+ // | 14 | 16 | 48 | 3.4 |
119+ // | 28 | 32 | 80 | 3.3 |
120+ //
121+ // In general, buckets * table_layout.size >= table_layout.ctrl_align
122+ // must be true to avoid these edges. This is implemented by adjusting
123+ // the minimum capacity upwards for small items. This code only needs
124+ // to handle ctrl_align which are less than or equal to Group::WIDTH,
125+ // because valid layout sizes are always a multiple of the alignment,
126+ // so anything with alignment over the Group::WIDTH won't hit this edge
127+ // case.
128+
129+ // This is brittle, e.g. if we ever add 32 byte groups, it will select
130+ // 3 regardless of the table_layout.size.
131+ let min_cap = match ( Group :: WIDTH , table_layout. size ) {
132+ ( 16 , 0 ..=1 ) => 14 ,
133+ ( 16 , 2 ..=3 ) => 7 ,
134+ ( 8 , 0 ..=1 ) => 7 ,
135+ _ => 3 ,
136+ } ;
137+ let cap = min_cap. max ( cap) ;
109138 // We don't bother with a table size of 2 buckets since that can only
110- // hold a single element. Instead we skip directly to a 4 bucket table
139+ // hold a single element. Instead, we skip directly to a 4 bucket table
111140 // which can hold 3 elements.
112- return Some ( if cap < 4 { 4 } else { 8 } ) ;
141+ return Some ( if cap < 4 {
142+ 4
143+ } else if cap < 8 {
144+ 8
145+ } else {
146+ 16
147+ } ) ;
113148 }
114149
115150 // Otherwise require 1/8 buckets to be empty (87.5% load)
@@ -851,7 +886,7 @@ impl<T, A: Allocator> RawTable<T, A> {
851886 // elements. If the calculation overflows then the requested bucket
852887 // count must be larger than what we have right and nothing needs to be
853888 // done.
854- let min_buckets = match capacity_to_buckets ( min_size) {
889+ let min_buckets = match capacity_to_buckets ( min_size, Self :: TABLE_LAYOUT ) {
855890 Some ( buckets) => buckets,
856891 None => return ,
857892 } ;
@@ -982,14 +1017,8 @@ impl<T, A: Allocator> RawTable<T, A> {
9821017 /// * If `self.table.items != 0`, calling of this function with `capacity`
9831018 /// equal to 0 (`capacity == 0`) results in [`undefined behavior`].
9841019 ///
985- /// * If `capacity_to_buckets(capacity) < Group::WIDTH` and
986- /// `self.table.items > capacity_to_buckets(capacity)`
987- /// calling this function results in [`undefined behavior`].
988- ///
989- /// * If `capacity_to_buckets(capacity) >= Group::WIDTH` and
990- /// `self.table.items > capacity_to_buckets(capacity)`
991- /// calling this function are never return (will go into an
992- /// infinite loop).
1020+ /// * If `self.table.items > capacity_to_buckets(capacity, Self::TABLE_LAYOUT)`
1021+ /// calling this function are never return (will loop infinitely).
9931022 ///
9941023 /// See [`RawTableInner::find_insert_slot`] for more information.
9951024 ///
@@ -1479,8 +1508,8 @@ impl RawTableInner {
14791508 // SAFETY: We checked that we could successfully allocate the new table, and then
14801509 // initialized all control bytes with the constant `Tag::EMPTY` byte.
14811510 unsafe {
1482- let buckets =
1483- capacity_to_buckets ( capacity ) . ok_or_else ( || fallibility. capacity_overflow ( ) ) ?;
1511+ let buckets = capacity_to_buckets ( capacity , table_layout )
1512+ . ok_or_else ( || fallibility. capacity_overflow ( ) ) ?;
14841513
14851514 let mut result =
14861515 Self :: new_uninitialized ( alloc, table_layout, buckets, fallibility) ?;
@@ -4137,6 +4166,26 @@ impl<T, A: Allocator> RawExtractIf<'_, T, A> {
41374166mod test_map {
41384167 use super :: * ;
41394168
4169+ #[ test]
4170+ fn test_minimum_capacity_for_small_types ( ) {
4171+ #[ track_caller]
4172+ fn test_t < T > ( ) {
4173+ let raw_table: RawTable < T > = RawTable :: with_capacity ( 1 ) ;
4174+ let actual_buckets = raw_table. buckets ( ) ;
4175+ let min_buckets = Group :: WIDTH / core:: mem:: size_of :: < T > ( ) ;
4176+ assert ! (
4177+ actual_buckets >= min_buckets,
4178+ "expected at least {min_buckets} buckets, got {actual_buckets} buckets"
4179+ ) ;
4180+ }
4181+
4182+ test_t :: < u8 > ( ) ;
4183+
4184+ // This is only "small" for some platforms, like x86_64 with SSE2, but
4185+ // there's no harm in running it on other platforms.
4186+ test_t :: < u16 > ( ) ;
4187+ }
4188+
41404189 fn rehash_in_place < T > ( table : & mut RawTable < T > , hasher : impl Fn ( & T ) -> u64 ) {
41414190 unsafe {
41424191 table. table . rehash_in_place (
0 commit comments