@@ -905,10 +905,35 @@ impl<T, A: Allocator> RawTable<T, A> {
905905 & self . alloc
906906 }
907907
908- /// Returns pointer to one past last element of data table.
908+ /// Returns pointer to one past last `data` element in the the table as viewed from
909+ /// the start point of the allocation.
910+ ///
911+ /// The caller must ensure that the `RawTable` outlives the returned [`NonNull<T>`],
912+ /// otherwise using it may result in [`undefined behavior`].
913+ ///
914+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
909915 #[ inline]
910- pub unsafe fn data_end ( & self ) -> NonNull < T > {
911- NonNull :: new_unchecked ( self . table . ctrl . as_ptr ( ) . cast ( ) )
916+ pub fn data_end ( & self ) -> NonNull < T > {
917+ // SAFETY: `self.table.ctrl` is `NonNull`, so casting it is safe
918+ //
919+ // `self.table.ctrl.as_ptr().cast()` returns pointer that
920+ // points here (to the end of `T0`)
921+ // ∨
922+ // [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m
923+ // \________ ________/
924+ // \/
925+ // `n = buckets - 1`, i.e. `RawTable::buckets() - 1`
926+ //
927+ // where: T0...T_n - our stored data;
928+ // CT0...CT_n - control bytes or metadata for `data`.
929+ // CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search
930+ // with loading `Group` bytes from the heap works properly, even if the result
931+ // of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also
932+ // `RawTableInner::set_ctrl` function.
933+ //
934+ // P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
935+ // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
936+ unsafe { NonNull :: new_unchecked ( self . table . ctrl . as_ptr ( ) . cast ( ) ) }
912937 }
913938
914939 /// Returns pointer to start of data table.
@@ -940,8 +965,55 @@ impl<T, A: Allocator> RawTable<T, A> {
940965 }
941966
942967 /// Returns a pointer to an element in the table.
968+ ///
969+ /// The caller must ensure that the `RawTable` outlives the returned [`Bucket<T>`],
970+ /// otherwise using it may result in [`undefined behavior`].
971+ ///
972+ /// # Safety
973+ ///
974+ /// If `mem::size_of::<T>() != 0`, then the caller of this function must observe the
975+ /// following safety rules:
976+ ///
977+ /// * The table must already be allocated;
978+ ///
979+ /// * The `index` must not be greater than the number returned by the [`RawTable::buckets`]
980+ /// function, i.e. `(index + 1) <= self.buckets()`.
981+ ///
982+ /// It is safe to call this function with index of zero (`index == 0`) on a table that has
983+ /// not been allocated, but using the returned [`Bucket`] results in [`undefined behavior`].
984+ ///
985+ /// If `mem::size_of::<T>() == 0`, then the only requirement is that the `index` must
986+ /// not be greater than the number returned by the [`RawTable::buckets`] function, i.e.
987+ /// `(index + 1) <= self.buckets()`.
988+ ///
989+ /// [`RawTable::buckets`]: RawTable::buckets
990+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
943991 #[ inline]
944992 pub unsafe fn bucket ( & self , index : usize ) -> Bucket < T > {
993+ // If mem::size_of::<T>() != 0 then return a pointer to the `element` in the `data part` of the table
994+ // (we start counting from "0", so that in the expression T[n], the "n" index actually one less than
995+ // the "buckets" number of our `RawTable`, i.e. "n = RawTable::buckets() - 1"):
996+ //
997+ // `table.bucket(3).as_ptr()` returns a pointer that points here in the `data`
998+ // part of the `RawTable`, i.e. to the start of T3 (see `Bucket::as_ptr`)
999+ // |
1000+ // | `base = self.data_end()` points here
1001+ // | (to the start of CT0 or to the end of T0)
1002+ // v v
1003+ // [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m
1004+ // ^ \__________ __________/
1005+ // `table.bucket(3)` returns a pointer that points \/
1006+ // here in the `data` part of the `RawTable` (to additional control bytes
1007+ // the end of T3) `m = Group::WIDTH - 1`
1008+ //
1009+ // where: T0...T_n - our stored data;
1010+ // CT0...CT_n - control bytes or metadata for `data`;
1011+ // CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from
1012+ // the heap works properly, even if the result of `h1(hash) & self.table.bucket_mask`
1013+ // is equal to `self.table.bucket_mask`). See also `RawTableInner::set_ctrl` function.
1014+ //
1015+ // P.S. `h1(hash) & self.table.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
1016+ // of buckets is a power of two, and `self.table.bucket_mask = self.buckets() - 1`.
9451017 debug_assert_ne ! ( self . table. bucket_mask, 0 ) ;
9461018 debug_assert ! ( index < self . buckets( ) ) ;
9471019 Bucket :: from_base_index ( self . data_end ( ) , index)
@@ -2212,6 +2284,9 @@ impl RawTableInner {
22122284 ///
22132285 /// * The [`RawTableInner`] must have properly initialized control bytes.
22142286 ///
2287+ /// The type `T` must be the actual type of the elements stored in the table,
2288+ /// otherwise using the returned [`RawIter`] results in [`undefined behavior`].
2289+ ///
22152290 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
22162291 #[ inline]
22172292 unsafe fn iter < T > ( & self ) -> RawIter < T > {
@@ -2228,13 +2303,20 @@ impl RawTableInner {
22282303 // `ctrl` points here (to the start
22292304 // of the first control byte `CT0`)
22302305 // ∨
2231- // [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, Group::WIDTH
2306+ // [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m
22322307 // \________ ________/
22332308 // \/
2234- // `n = buckets - 1`, i.e. `RawIndexTableInner ::buckets() - 1`
2309+ // `n = buckets - 1`, i.e. `RawTableInner ::buckets() - 1`
22352310 //
22362311 // where: T0...T_n - our stored data;
22372312 // CT0...CT_n - control bytes or metadata for `data`.
2313+ // CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search
2314+ // with loading `Group` bytes from the heap works properly, even if the result
2315+ // of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also
2316+ // `RawTableInner::set_ctrl` function.
2317+ //
2318+ // P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
2319+ // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
22382320 let data = Bucket :: from_base_index ( self . data_end ( ) , 0 ) ;
22392321 RawIter {
22402322 // SAFETY: See explanation above
@@ -2258,6 +2340,9 @@ impl RawTableInner {
22582340 ///
22592341 /// # Safety
22602342 ///
2343+ /// The type `T` must be the actual type of the elements stored in the table,
2344+ /// otherwise calling this function may result in [`undefined behavior`].
2345+ ///
22612346 /// If `T` is a type that should be dropped and **the table is not empty**,
22622347 /// calling this function more than once results in [`undefined behavior`].
22632348 ///
@@ -2309,6 +2394,8 @@ impl RawTableInner {
23092394 ///
23102395 /// * Calling this function more than once;
23112396 ///
2397+ /// * The type `T` must be the actual type of the elements stored in the table.
2398+ ///
23122399 /// * The `alloc` must be the same [`Allocator`] as the `Allocator` that was used
23132400 /// to allocate this table.
23142401 ///
@@ -2348,13 +2435,116 @@ impl RawTableInner {
23482435 }
23492436 }
23502437
2438+ /// Returns a pointer to an element in the table (convenience for
2439+ /// `Bucket::from_base_index(self.data_end::<T>(), index)`).
2440+ ///
2441+ /// The caller must ensure that the `RawTableInner` outlives the returned [`Bucket<T>`],
2442+ /// otherwise using it may result in [`undefined behavior`].
2443+ ///
2444+ /// # Safety
2445+ ///
2446+ /// If `mem::size_of::<T>() != 0`, then the safety rules are directly derived from the
2447+ /// safety rules of the [`Bucket::from_base_index`] function. Therefore, when calling
2448+ /// this function, the following safety rules must be observed:
2449+ ///
2450+ /// * The table must already be allocated;
2451+ ///
2452+ /// * The `index` must not be greater than the number returned by the [`RawTableInner::buckets`]
2453+ /// function, i.e. `(index + 1) <= self.buckets()`.
2454+ ///
2455+ /// * The type `T` must be the actual type of the elements stored in the table, otherwise
2456+ /// using the returned [`Bucket`] may result in [`undefined behavior`].
2457+ ///
2458+ /// It is safe to call this function with index of zero (`index == 0`) on a table that has
2459+ /// not been allocated, but using the returned [`Bucket`] results in [`undefined behavior`].
2460+ ///
2461+ /// If `mem::size_of::<T>() == 0`, then the only requirement is that the `index` must
2462+ /// not be greater than the number returned by the [`RawTable::buckets`] function, i.e.
2463+ /// `(index + 1) <= self.buckets()`.
2464+ ///
2465+ /// ```none
2466+ /// If mem::size_of::<T>() != 0 then return a pointer to the `element` in the `data part` of the table
2467+ /// (we start counting from "0", so that in the expression T[n], the "n" index actually one less than
2468+ /// the "buckets" number of our `RawTableInner`, i.e. "n = RawTableInner::buckets() - 1"):
2469+ ///
2470+ /// `table.bucket(3).as_ptr()` returns a pointer that points here in the `data`
2471+ /// part of the `RawTableInner`, i.e. to the start of T3 (see [`Bucket::as_ptr`])
2472+ /// |
2473+ /// | `base = table.data_end::<T>()` points here
2474+ /// | (to the start of CT0 or to the end of T0)
2475+ /// v v
2476+ /// [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m
2477+ /// ^ \__________ __________/
2478+ /// `table.bucket(3)` returns a pointer that points \/
2479+ /// here in the `data` part of the `RawTableInner` additional control bytes
2480+ /// (to the end of T3) `m = Group::WIDTH - 1`
2481+ ///
2482+ /// where: T0...T_n - our stored data;
2483+ /// CT0...CT_n - control bytes or metadata for `data`;
2484+ /// CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from
2485+ /// the heap works properly, even if the result of `h1(hash) & self.bucket_mask`
2486+ /// is equal to `self.bucket_mask`). See also `RawTableInner::set_ctrl` function.
2487+ ///
2488+ /// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
2489+ /// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
2490+ /// ```
2491+ ///
2492+ /// [`Bucket::from_base_index`]: Bucket::from_base_index
2493+ /// [`RawTableInner::buckets`]: RawTableInner::buckets
2494+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
23512495 #[ inline]
23522496 unsafe fn bucket < T > ( & self , index : usize ) -> Bucket < T > {
23532497 debug_assert_ne ! ( self . bucket_mask, 0 ) ;
23542498 debug_assert ! ( index < self . buckets( ) ) ;
23552499 Bucket :: from_base_index ( self . data_end ( ) , index)
23562500 }
23572501
2502+ /// Returns a raw `*mut u8` pointer to the start of the `data` element in the table
2503+ /// (convenience for `self.data_end::<u8>().as_ptr().sub((index + 1) * size_of)`).
2504+ ///
2505+ /// The caller must ensure that the `RawTableInner` outlives the returned `*mut u8`,
2506+ /// otherwise using it may result in [`undefined behavior`].
2507+ ///
2508+ /// # Safety
2509+ ///
2510+ /// If any of the following conditions are violated, the result is [`undefined behavior`]:
2511+ ///
2512+ /// * The table must already be allocated;
2513+ ///
2514+ /// * The `index` must not be greater than the number returned by the [`RawTableInner::buckets`]
2515+ /// function, i.e. `(index + 1) <= self.buckets()`;
2516+ ///
2517+ /// * The `size_of` must be equal to the size of the elements stored in the table;
2518+ ///
2519+ /// ```none
2520+ /// If mem::size_of::<T>() != 0 then return a pointer to the `element` in the `data part` of the table
2521+ /// (we start counting from "0", so that in the expression T[n], the "n" index actually one less than
2522+ /// the "buckets" number of our `RawTableInner`, i.e. "n = RawTableInner::buckets() - 1"):
2523+ ///
2524+ /// `table.bucket_ptr(3, mem::size_of::<T>())` returns a pointer that points here in the
2525+ /// `data` part of the `RawTableInner`, i.e. to the start of T3
2526+ /// |
2527+ /// | `base = table.data_end::<u8>()` points here
2528+ /// | (to the start of CT0 or to the end of T0)
2529+ /// v v
2530+ /// [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m
2531+ /// \__________ __________/
2532+ /// \/
2533+ /// additional control bytes
2534+ /// `m = Group::WIDTH - 1`
2535+ ///
2536+ /// where: T0...T_n - our stored data;
2537+ /// CT0...CT_n - control bytes or metadata for `data`;
2538+ /// CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from
2539+ /// the heap works properly, even if the result of `h1(hash) & self.bucket_mask`
2540+ /// is equal to `self.bucket_mask`). See also `RawTableInner::set_ctrl` function.
2541+ ///
2542+ /// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
2543+ /// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
2544+ /// ```
2545+ ///
2546+ /// [`RawTableInner::buckets`]: RawTableInner::buckets
2547+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
23582548 #[ inline]
23592549 unsafe fn bucket_ptr ( & self , index : usize , size_of : usize ) -> * mut u8 {
23602550 debug_assert_ne ! ( self . bucket_mask, 0 ) ;
@@ -2363,9 +2553,47 @@ impl RawTableInner {
23632553 base. sub ( ( index + 1 ) * size_of)
23642554 }
23652555
2556+ /// Returns pointer to one past last `data` element in the the table as viewed from
2557+ /// the start point of the allocation (convenience for `self.ctrl.cast()`).
2558+ ///
2559+ /// This function actually returns a pointer to the end of the `data element` at
2560+ /// index "0" (zero).
2561+ ///
2562+ /// The caller must ensure that the `RawTableInner` outlives the returned [`NonNull<T>`],
2563+ /// otherwise using it may result in [`undefined behavior`].
2564+ ///
2565+ /// # Note
2566+ ///
2567+ /// The type `T` must be the actual type of the elements stored in the table, otherwise
2568+ /// using the returned [`NonNull<T>`] may result in [`undefined behavior`].
2569+ ///
2570+ /// ```none
2571+ /// `table.data_end::<T>()` returns pointer that points here
2572+ /// (to the end of `T0`)
2573+ /// ∨
2574+ /// [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m
2575+ /// \________ ________/
2576+ /// \/
2577+ /// `n = buckets - 1`, i.e. `RawTableInner::buckets() - 1`
2578+ ///
2579+ /// where: T0...T_n - our stored data;
2580+ /// CT0...CT_n - control bytes or metadata for `data`.
2581+ /// CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search
2582+ /// with loading `Group` bytes from the heap works properly, even if the result
2583+ /// of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also
2584+ /// `RawTableInner::set_ctrl` function.
2585+ ///
2586+ /// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
2587+ /// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
2588+ /// ```
2589+ ///
2590+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
23662591 #[ inline]
2367- unsafe fn data_end < T > ( & self ) -> NonNull < T > {
2368- NonNull :: new_unchecked ( self . ctrl . as_ptr ( ) . cast ( ) )
2592+ fn data_end < T > ( & self ) -> NonNull < T > {
2593+ unsafe {
2594+ // SAFETY: `self.ctrl` is `NonNull`, so casting it is safe
2595+ NonNull :: new_unchecked ( self . ctrl . as_ptr ( ) . cast ( ) )
2596+ }
23692597 }
23702598
23712599 /// Returns an iterator-like object for a probe sequence on the table.
@@ -2758,7 +2986,7 @@ impl RawTableInner {
27582986 // [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, Group::WIDTH
27592987 // \________ ________/
27602988 // \/
2761- // `n = buckets - 1`, i.e. `RawIndexTableInner ::buckets() - 1`
2989+ // `n = buckets - 1`, i.e. `RawTableInner ::buckets() - 1`
27622990 //
27632991 // where: T0...T_n - our stored data;
27642992 // CT0...CT_n - control bytes or metadata for `data`.
@@ -3000,7 +3228,7 @@ impl RawTableInner {
30003228 ///
30013229 /// # Note
30023230 ///
3003- /// This function must be called only after [`drop_elements`](RawTable ::drop_elements),
3231+ /// This function must be called only after [`drop_elements`](RawTableInner ::drop_elements),
30043232 /// else it can lead to leaking of memory. Also calling this function automatically
30053233 /// makes invalid (dangling) all instances of buckets ([`Bucket`]) and makes invalid
30063234 /// (dangling) the `ctrl` field of the table.
@@ -3521,6 +3749,7 @@ impl<T> RawIterRange<T> {
35213749 ///
35223750 /// * The `len` must be a power of two.
35233751 ///
3752+ /// [valid]: https://doc.rust-lang.org/std/ptr/index.html#safety
35243753 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
35253754 #[ cfg_attr( feature = "inline-more" , inline) ]
35263755 unsafe fn new ( ctrl : * const u8 , data : Bucket < T > , len : usize ) -> Self {
0 commit comments