@@ -905,10 +905,33 @@ impl<T, A: Allocator> RawTable<T, A> {
905905 & self . alloc
906906 }
907907
908- /// Returns pointer to one past last element of data table.
908+ /// Returns pointer to one past last `data` element in the the table as viewed from
909+ /// the start point of the allocation.
910+ ///
911+ /// The caller must ensure that the `RawTable` outlives the returned [`NonNull<T>`],
912+ /// otherwise using it may result in [`undefined behavior`].
909913 #[ inline]
910- pub unsafe fn data_end ( & self ) -> NonNull < T > {
911- NonNull :: new_unchecked ( self . table . ctrl . as_ptr ( ) . cast ( ) )
914+ pub fn data_end ( & self ) -> NonNull < T > {
915+ // SAFETY: `self.table.ctrl` is `NonNull`, so casting it is safe
916+ //
917+ // `self.table.ctrl.as_ptr().cast()` returns pointer that
918+ // points here (to the end of `T0`)
919+ // ∨
920+ // [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m
921+ // \________ ________/
922+ // \/
923+ // `n = buckets - 1`, i.e. `RawTable::buckets() - 1`
924+ //
925+ // where: T0...T_n - our stored data;
926+ // CT0...CT_n - control bytes or metadata for `data`.
927+ // CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search
928+ // with loading `Group` bytes from the heap works properly, even if the result
929+ // of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also
930+ // `RawTableInner::set_ctrl` function.
931+ //
932+ // P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
933+ // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
934+ unsafe { NonNull :: new_unchecked ( self . table . ctrl . as_ptr ( ) . cast ( ) ) }
912935 }
913936
914937 /// Returns pointer to start of data table.
@@ -940,8 +963,55 @@ impl<T, A: Allocator> RawTable<T, A> {
940963 }
941964
942965 /// Returns a pointer to an element in the table.
966+ ///
967+ /// The caller must ensure that the `RawTable` outlives the returned [`Bucket<T>`],
968+ /// otherwise using it may result in [`undefined behavior`].
969+ ///
970+ /// # Safety
971+ ///
972+ /// If `mem::size_of::<T>() != 0`, then the caller of this function must observe the
973+ /// following safety rules:
974+ ///
975+ /// * The table must already be allocated;
976+ ///
977+ /// * The `index` must not be greater than the number returned by the [`RawTable::buckets`]
978+ /// function, i.e. `(index + 1) <= self.buckets()`.
979+ ///
980+ /// It is safe to call this function with index of zero (`index == 0`) on a table that has
981+ /// not been allocated, but using the returned [`Bucket`] results in [`undefined behavior`].
982+ ///
983+ /// If `mem::size_of::<T>() == 0`, then the only requirement is that the `index` must
984+ /// not be greater than the number returned by the [`RawTable::buckets`] function, i.e.
985+ /// `(index + 1) <= self.buckets()`.
986+ ///
987+ /// [`RawTable::buckets`]: RawTable::buckets
988+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
943989 #[ inline]
944990 pub unsafe fn bucket ( & self , index : usize ) -> Bucket < T > {
991+ // If mem::size_of::<T>() != 0 then return a pointer to the `element` in the `data part` of the table
992+ // (we start counting from "0", so that in the expression T[n], the "n" index actually one less than
993+ // the "buckets" number of our `RawTable`, i.e. "n = RawTable::buckets() - 1"):
994+ //
995+ // `table.bucket(3).as_ptr()` returns a pointer that points here in the `data`
996+ // part of the `RawTable`, i.e. to the start of T3 (see `Bucket::as_ptr`)
997+ // |
998+ // | `base = self.data_end()` points here
999+ // | (to the start of CT0 or to the end of T0)
1000+ // v v
1001+ // [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m
1002+ // ^ \__________ __________/
1003+ // `table.bucket(3)` returns a pointer that points \/
1004+ // here in the `data` part of the `RawTable` (to additional control bytes
1005+ // the end of T3) `m = Group::WIDTH - 1`
1006+ //
1007+ // where: T0...T_n - our stored data;
1008+ // CT0...CT_n - control bytes or metadata for `data`;
1009+ // CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from
1010+ // the heap works properly, even if the result of `h1(hash) & self.table.bucket_mask`
1011+ // is equal to `self.table.bucket_mask`). See also `RawTableInner::set_ctrl` function.
1012+ //
1013+ // P.S. `h1(hash) & self.table.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
1014+ // of buckets is a power of two, and `self.table.bucket_mask = self.buckets() - 1`.
9451015 debug_assert_ne ! ( self . table. bucket_mask, 0 ) ;
9461016 debug_assert ! ( index < self . buckets( ) ) ;
9471017 Bucket :: from_base_index ( self . data_end ( ) , index)
@@ -2212,6 +2282,9 @@ impl RawTableInner {
22122282 ///
22132283 /// * The [`RawTableInner`] must have properly initialized control bytes.
22142284 ///
2285+ /// The type `T` must be the actual type of the elements stored in the table,
2286+ /// otherwise using the returned [`RawIter`] results in [`undefined behavior`].
2287+ ///
22152288 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
22162289 #[ inline]
22172290 unsafe fn iter < T > ( & self ) -> RawIter < T > {
@@ -2228,13 +2301,20 @@ impl RawTableInner {
22282301 // `ctrl` points here (to the start
22292302 // of the first control byte `CT0`)
22302303 // ∨
2231- // [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, Group::WIDTH
2304+ // [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m
22322305 // \________ ________/
22332306 // \/
2234- // `n = buckets - 1`, i.e. `RawIndexTableInner ::buckets() - 1`
2307+ // `n = buckets - 1`, i.e. `RawTableInner ::buckets() - 1`
22352308 //
22362309 // where: T0...T_n - our stored data;
22372310 // CT0...CT_n - control bytes or metadata for `data`.
2311+ // CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search
2312+ // with loading `Group` bytes from the heap works properly, even if the result
2313+ // of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also
2314+ // `RawTableInner::set_ctrl` function.
2315+ //
2316+ // P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
2317+ // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
22382318 let data = Bucket :: from_base_index ( self . data_end ( ) , 0 ) ;
22392319 RawIter {
22402320 // SAFETY: See explanation above
@@ -2258,6 +2338,9 @@ impl RawTableInner {
22582338 ///
22592339 /// # Safety
22602340 ///
2341+ /// The type `T` must be the actual type of the elements stored in the table,
2342+ /// otherwise calling this function may result in [`undefined behavior`].
2343+ ///
22612344 /// If `T` is a type that should be dropped and **the table is not empty**,
22622345 /// calling this function more than once results in [`undefined behavior`].
22632346 ///
@@ -2309,6 +2392,8 @@ impl RawTableInner {
23092392 ///
23102393 /// * Calling this function more than once;
23112394 ///
2395+ /// * The type `T` must be the actual type of the elements stored in the table.
2396+ ///
23122397 /// * The `alloc` must be the same [`Allocator`] as the `Allocator` that was used
23132398 /// to allocate this table.
23142399 ///
@@ -2348,13 +2433,116 @@ impl RawTableInner {
23482433 }
23492434 }
23502435
2436+ /// Returns a pointer to an element in the table (convenience for
2437+ /// `Bucket::from_base_index(self.data_end::<T>(), index)`).
2438+ ///
2439+ /// The caller must ensure that the `RawTableInner` outlives the returned [`Bucket<T>`],
2440+ /// otherwise using it may result in [`undefined behavior`].
2441+ ///
2442+ /// # Safety
2443+ ///
2444+ /// If `mem::size_of::<T>() != 0`, then the safety rules are directly derived from the
2445+ /// safety rules of the [`Bucket::from_base_index`] function. Therefore, when calling
2446+ /// this function, the following safety rules must be observed:
2447+ ///
2448+ /// * The table must already be allocated;
2449+ ///
2450+ /// * The `index` must not be greater than the number returned by the [`RawTableInner::buckets`]
2451+ /// function, i.e. `(index + 1) <= self.buckets()`.
2452+ ///
2453+ /// * The type `T` must be the actual type of the elements stored in the table, otherwise
2454+ /// using the returned [`Bucket`] may result in [`undefined behavior`].
2455+ ///
2456+ /// It is safe to call this function with index of zero (`index == 0`) on a table that has
2457+ /// not been allocated, but using the returned [`Bucket`] results in [`undefined behavior`].
2458+ ///
2459+ /// If `mem::size_of::<T>() == 0`, then the only requirement is that the `index` must
2460+ /// not be greater than the number returned by the [`RawTable::buckets`] function, i.e.
2461+ /// `(index + 1) <= self.buckets()`.
2462+ ///
2463+ /// ```none
2464+ /// If mem::size_of::<T>() != 0 then return a pointer to the `element` in the `data part` of the table
2465+ /// (we start counting from "0", so that in the expression T[n], the "n" index actually one less than
2466+ /// the "buckets" number of our `RawTableInner`, i.e. "n = RawTableInner::buckets() - 1"):
2467+ ///
2468+ /// `table.bucket(3).as_ptr()` returns a pointer that points here in the `data`
2469+ /// part of the `RawTableInner`, i.e. to the start of T3 (see [`Bucket::as_ptr`])
2470+ /// |
2471+ /// | `base = table.data_end::<T>()` points here
2472+ /// | (to the start of CT0 or to the end of T0)
2473+ /// v v
2474+ /// [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m
2475+ /// ^ \__________ __________/
2476+ /// `table.bucket(3)` returns a pointer that points \/
2477+ /// here in the `data` part of the `RawTableInner` additional control bytes
2478+ /// (to the end of T3) `m = Group::WIDTH - 1`
2479+ ///
2480+ /// where: T0...T_n - our stored data;
2481+ /// CT0...CT_n - control bytes or metadata for `data`;
2482+ /// CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from
2483+ /// the heap works properly, even if the result of `h1(hash) & self.bucket_mask`
2484+ /// is equal to `self.bucket_mask`). See also `RawTableInner::set_ctrl` function.
2485+ ///
2486+ /// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
2487+ /// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
2488+ /// ```
2489+ ///
2490+ /// [`Bucket::from_base_index`]: Bucket::from_base_index
2491+ /// [`RawTableInner::buckets`]: RawTableInner::buckets
2492+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
23512493 #[ inline]
23522494 unsafe fn bucket < T > ( & self , index : usize ) -> Bucket < T > {
23532495 debug_assert_ne ! ( self . bucket_mask, 0 ) ;
23542496 debug_assert ! ( index < self . buckets( ) ) ;
23552497 Bucket :: from_base_index ( self . data_end ( ) , index)
23562498 }
23572499
2500+ /// Returns a raw `*mut u8` pointer to the start of the `data` element in the table
2501+ /// (convenience for `self.data_end::<u8>().as_ptr().sub((index + 1) * size_of)`).
2502+ ///
2503+ /// The caller must ensure that the `RawTableInner` outlives the returned `*mut u8`,
2504+ /// otherwise using it may result in [`undefined behavior`].
2505+ ///
2506+ /// # Safety
2507+ ///
2508+ /// If any of the following conditions are violated, the result is [`undefined behavior`]:
2509+ ///
2510+ /// * The table must already be allocated;
2511+ ///
2512+ /// * The `index` must not be greater than the number returned by the [`RawTableInner::buckets`]
2513+ /// function, i.e. `(index + 1) <= self.buckets()`;
2514+ ///
2515+ /// * The `size_of` must be equal to the size of the elements stored in the table;
2516+ ///
2517+ /// ```none
2518+ /// If mem::size_of::<T>() != 0 then return a pointer to the `element` in the `data part` of the table
2519+ /// (we start counting from "0", so that in the expression T[n], the "n" index actually one less than
2520+ /// the "buckets" number of our `RawTableInner`, i.e. "n = RawTableInner::buckets() - 1"):
2521+ ///
2522+ /// `table.bucket_ptr(3, mem::size_of::<T>())` returns a pointer that points here in the
2523+ /// `data` part of the `RawTableInner`, i.e. to the start of T3
2524+ /// |
2525+ /// | `base = table.data_end::<u8>()` points here
2526+ /// | (to the start of CT0 or to the end of T0)
2527+ /// v v
2528+ /// [Pad], T_n, ..., |T3|, T2, T1, T0, |CT0, CT1, CT2, CT3, ..., CT_n, CTa_0, CTa_1, ..., CTa_m
2529+ /// \__________ __________/
2530+ /// \/
2531+ /// additional control bytes
2532+ /// `m = Group::WIDTH - 1`
2533+ ///
2534+ /// where: T0...T_n - our stored data;
2535+ /// CT0...CT_n - control bytes or metadata for `data`;
2536+ /// CTa_0...CTa_m - additional control bytes (so that the search with loading `Group` bytes from
2537+ /// the heap works properly, even if the result of `h1(hash) & self.bucket_mask`
2538+ /// is equal to `self.bucket_mask`). See also `RawTableInner::set_ctrl` function.
2539+ ///
2540+ /// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
2541+ /// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
2542+ /// ```
2543+ ///
2544+ /// [`RawTableInner::buckets`]: RawTableInner::buckets
2545+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
23582546 #[ inline]
23592547 unsafe fn bucket_ptr ( & self , index : usize , size_of : usize ) -> * mut u8 {
23602548 debug_assert_ne ! ( self . bucket_mask, 0 ) ;
@@ -2363,9 +2551,47 @@ impl RawTableInner {
23632551 base. sub ( ( index + 1 ) * size_of)
23642552 }
23652553
2554+ /// Returns pointer to one past last `data` element in the the table as viewed from
2555+ /// the start point of the allocation (convenience for `self.ctrl.cast()`).
2556+ ///
2557+ /// This function actually returns a pointer to the end of the `data element` at
2558+ /// index "0" (zero).
2559+ ///
2560+ /// The caller must ensure that the `RawTableInner` outlives the returned [`NonNull<T>`],
2561+ /// otherwise using it may result in [`undefined behavior`].
2562+ ///
2563+ /// # Note
2564+ ///
2565+ /// The type `T` must be the actual type of the elements stored in the table, otherwise
2566+ /// using the returned [`NonNull<T>`] may result in [`undefined behavior`].
2567+ ///
2568+ /// ```none
2569+ /// `table.data_end::<T>()` returns pointer that points here
2570+ /// (to the end of `T0`)
2571+ /// ∨
2572+ /// [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, CTa_0, CTa_1, ..., CTa_m
2573+ /// \________ ________/
2574+ /// \/
2575+ /// `n = buckets - 1`, i.e. `RawTableInner::buckets() - 1`
2576+ ///
2577+ /// where: T0...T_n - our stored data;
2578+ /// CT0...CT_n - control bytes or metadata for `data`.
2579+ /// CTa_0...CTa_m - additional control bytes, where `m = Group::WIDTH - 1` (so that the search
2580+ /// with loading `Group` bytes from the heap works properly, even if the result
2581+ /// of `h1(hash) & self.bucket_mask` is equal to `self.bucket_mask`). See also
2582+ /// `RawTableInner::set_ctrl` function.
2583+ ///
2584+ /// P.S. `h1(hash) & self.bucket_mask` is the same as `hash as usize % self.buckets()` because the number
2585+ /// of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`.
2586+ /// ```
2587+ ///
2588+ /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
23662589 #[ inline]
2367- unsafe fn data_end < T > ( & self ) -> NonNull < T > {
2368- NonNull :: new_unchecked ( self . ctrl . as_ptr ( ) . cast ( ) )
2590+ fn data_end < T > ( & self ) -> NonNull < T > {
2591+ unsafe {
2592+ // SAFETY: `self.ctrl` is `NonNull`, so casting it is safe
2593+ NonNull :: new_unchecked ( self . ctrl . as_ptr ( ) . cast ( ) )
2594+ }
23692595 }
23702596
23712597 /// Returns an iterator-like object for a probe sequence on the table.
@@ -2758,7 +2984,7 @@ impl RawTableInner {
27582984 // [Pad], T_n, ..., T1, T0, |CT0, CT1, ..., CT_n|, Group::WIDTH
27592985 // \________ ________/
27602986 // \/
2761- // `n = buckets - 1`, i.e. `RawIndexTableInner ::buckets() - 1`
2987+ // `n = buckets - 1`, i.e. `RawTableInner ::buckets() - 1`
27622988 //
27632989 // where: T0...T_n - our stored data;
27642990 // CT0...CT_n - control bytes or metadata for `data`.
@@ -3000,7 +3226,7 @@ impl RawTableInner {
30003226 ///
30013227 /// # Note
30023228 ///
3003- /// This function must be called only after [`drop_elements`](RawTable ::drop_elements),
3229+ /// This function must be called only after [`drop_elements`](RawTableInner ::drop_elements),
30043230 /// else it can lead to leaking of memory. Also calling this function automatically
30053231 /// makes invalid (dangling) all instances of buckets ([`Bucket`]) and makes invalid
30063232 /// (dangling) the `ctrl` field of the table.
@@ -3521,6 +3747,7 @@ impl<T> RawIterRange<T> {
35213747 ///
35223748 /// * The `len` must be a power of two.
35233749 ///
3750+ /// [valid]: https://doc.rust-lang.org/std/ptr/index.html#safety
35243751 /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
35253752 #[ cfg_attr( feature = "inline-more" , inline) ]
35263753 unsafe fn new ( ctrl : * const u8 , data : Bucket < T > , len : usize ) -> Self {
0 commit comments