From d552103676f3583b5d1a26f309a911301704b9da Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Wed, 22 Oct 2025 18:24:41 -0700 Subject: [PATCH 1/7] Add `HashTable` methods related to the raw bucket index --- src/raw/mod.rs | 37 ++++++++ src/table.rs | 249 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 286 insertions(+) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index c3a56ff16..4449843a8 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -1233,6 +1233,43 @@ impl RawTable { } } + /// Gets a reference to an element in the table at the given bucket index. + #[inline] + pub fn get_bucket(&self, index: usize) -> Option<&T> { + unsafe { + if index < self.buckets() && self.is_bucket_full(index) { + Some(self.bucket(index).as_ref()) + } else { + None + } + } + } + + /// Gets a mutable reference to an element in the table at the given bucket index. + #[inline] + pub fn get_bucket_mut(&mut self, index: usize) -> Option<&mut T> { + unsafe { + if index < self.buckets() && self.is_bucket_full(index) { + Some(self.bucket(index).as_mut()) + } else { + None + } + } + } + + /// Returns a pointer to an element in the table, but only after verifying that + /// the index is in-bounds and that its control byte matches the given hash. + #[inline] + pub fn checked_bucket(&self, hash: u64, index: usize) -> Option> { + unsafe { + if index < self.buckets() && *self.table.ctrl(index) == Tag::full(hash) { + Some(self.bucket(index)) + } else { + None + } + } + } + /// Attempts to get mutable references to `N` entries in the table at once. /// /// Returns an array of length `N` with the results of each query. diff --git a/src/table.rs b/src/table.rs index baa96a993..6eb41b2b8 100644 --- a/src/table.rs +++ b/src/table.rs @@ -311,6 +311,55 @@ where } } + /// Returns the bucket index in the table for an entry with the given hash + /// and which satisfies the equality function passed. + /// + /// This can be used to store a borrow-free "reference" to the entry, later using + /// [`get_bucket`][Self::get_bucket], [`get_bucket_mut`][Self::get_bucket_mut], or + /// [`get_bucket_entry`][Self::get_bucket_entry] to access it again without hash probing. + /// + /// The index is only meaningful as long as the table is not resized and no entries are added + /// or removed. After such changes, it may end up pointing to a different entry or none at all. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use hashbrown::{HashTable, DefaultHashBuilder}; + /// use std::hash::BuildHasher; + /// + /// let mut table = HashTable::new(); + /// let hasher = DefaultHashBuilder::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// table.insert_unique(hasher(&1), (1, 1), |val| hasher(&val.0)); + /// table.insert_unique(hasher(&2), (2, 2), |val| hasher(&val.0)); + /// table.insert_unique(hasher(&3), (3, 3), |val| hasher(&val.0)); + /// + /// let index = table.find_bucket_index(hasher(&2), |val| val.0 == 2).unwrap(); + /// assert_eq!(table.get_bucket(index), Some(&(2, 2))); + /// + /// // Mutation would invalidate any normal reference + /// for (_key, value) in &mut table { + /// *value *= 11; + /// } + /// + /// // The index still reaches the same key with the updated value + /// assert_eq!(table.get_bucket(index), Some(&(2, 22))); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + #[cfg_attr(feature = "inline-more", inline)] + pub fn find_bucket_index(&self, hash: u64, eq: impl FnMut(&T) -> bool) -> Option { + match self.raw.find(hash, eq) { + Some(bucket) => Some(unsafe { self.raw.bucket_index(&bucket) }), + None => None, + } + } + /// Returns an `Entry` for an entry in the table with the given hash /// and which satisfies the equality function passed. /// @@ -376,6 +425,121 @@ where } } + /// Returns an `OccupiedEntry` for a bucket index in the table with the given hash, + /// or `None` if the index is out of bounds or if its hash doesn't match. + /// + /// However, note that the hash is only compared for the few bits that are directly stored in + /// the table, and even in full this could not guarantee equality. Use [`OccupiedEntry::get`] + /// if you need to further validate a match. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use hashbrown::{HashTable, DefaultHashBuilder}; + /// use std::hash::BuildHasher; + /// + /// let mut table = HashTable::new(); + /// let hasher = DefaultHashBuilder::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// table.insert_unique(hasher(&1), (1, 'a'), |val| hasher(&val.0)); + /// table.insert_unique(hasher(&2), (2, 'b'), |val| hasher(&val.0)); + /// table.insert_unique(hasher(&3), (3, 'c'), |val| hasher(&val.0)); + /// + /// let hash = hasher(&2); + /// let index = table.find_bucket_index(hash, |val| val.0 == 2).unwrap(); + /// + /// let bad_hash = !hash; + /// assert!(table.get_bucket_entry(bad_hash, index).is_none()); + /// assert!(table.get_bucket_entry(hash, usize::MAX).is_none()); + /// + /// let occupied_entry = table.get_bucket_entry(hash, index).unwrap(); + /// assert_eq!(occupied_entry.get(), &(2, 'b')); + /// assert_eq!(occupied_entry.remove().0, (2, 'b')); + /// + /// assert!(table.find(hash, |val| val.0 == 2).is_none()); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + #[inline] + pub fn get_bucket_entry(&mut self, hash: u64, index: usize) -> Option> { + Some(OccupiedEntry { + hash, + bucket: self.raw.checked_bucket(hash, index)?, + table: self, + }) + } + + /// Gets a reference to an entry in the table at the given bucket index, + /// or `None` if it is unoccupied or out of bounds. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use hashbrown::{HashTable, DefaultHashBuilder}; + /// use std::hash::BuildHasher; + /// + /// let mut table = HashTable::new(); + /// let hasher = DefaultHashBuilder::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// table.insert_unique(hasher(&1), (1, 'a'), |val| hasher(&val.0)); + /// table.insert_unique(hasher(&2), (2, 'b'), |val| hasher(&val.0)); + /// table.insert_unique(hasher(&3), (3, 'c'), |val| hasher(&val.0)); + /// + /// let index = table.find_bucket_index(hasher(&2), |val| val.0 == 2).unwrap(); + /// assert_eq!(table.get_bucket(index), Some(&(2, 'b'))); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + #[inline] + pub fn get_bucket(&self, index: usize) -> Option<&T> { + self.raw.get_bucket(index) + } + + /// Gets a mutable reference to an entry in the table at the given bucket index, + /// or `None` if it is unoccupied or out of bounds. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use hashbrown::{HashTable, DefaultHashBuilder}; + /// use std::hash::BuildHasher; + /// + /// let mut table = HashTable::new(); + /// let hasher = DefaultHashBuilder::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// table.insert_unique(hasher(&1), (1, 'a'), |val| hasher(&val.0)); + /// table.insert_unique(hasher(&2), (2, 'b'), |val| hasher(&val.0)); + /// table.insert_unique(hasher(&3), (3, 'c'), |val| hasher(&val.0)); + /// + /// let index = table.find_bucket_index(hasher(&2), |val| val.0 == 2).unwrap(); + /// assert_eq!(table.get_bucket(index), Some(&(2, 'b'))); + /// if let Some((_key, value)) = table.get_bucket_mut(index) { + /// *value = 'B'; + /// } + /// assert_eq!(table.get_bucket(index), Some(&(2, 'B'))); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + #[inline] + pub fn get_bucket_mut(&mut self, index: usize) -> Option<&mut T> { + self.raw.get_bucket_mut(index) + } + /// Inserts an element into the `HashTable` with the given hash value, but /// without checking whether an equivalent element already exists within the /// table. @@ -591,6 +755,44 @@ where self.raw.try_reserve(additional, hasher) } + /// Returns the raw number of buckets allocated in the table. + /// + /// This is an upper bound on any methods that take or return a bucket index, + /// as opposed to the usable [`capacity`](Self::capacity) for entries which is + /// reduced by an unspecified load factor. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use hashbrown::{HashTable, DefaultHashBuilder}; + /// use std::hash::BuildHasher; + /// + /// let mut table = HashTable::new(); + /// let hasher = DefaultHashBuilder::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// table.insert_unique(hasher(&1), (1, 'a'), |val| hasher(&val.0)); + /// table.insert_unique(hasher(&2), (2, 'b'), |val| hasher(&val.0)); + /// table.insert_unique(hasher(&3), (3, 'c'), |val| hasher(&val.0)); + /// + /// // Each entry is available at some index in the bucket range. + /// let count = (0..table.buckets()) + /// .filter_map(|i| table.get_bucket(i)) + /// .count(); + /// assert_eq!(count, 3); + /// + /// assert_eq!(table.get_bucket(table.buckets()), None); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn buckets(&self) -> usize { + self.raw.buckets() + } + /// Returns the number of elements the table can hold without reallocating. /// /// # Examples @@ -1789,6 +1991,53 @@ where pub fn into_table(self) -> &'a mut HashTable { self.table } + + /// Returns the bucket index in the table for this entry. + /// + /// This can be used to store a borrow-free "reference" to the entry, later using + /// [`HashTable::get_bucket`], [`HashTable::get_bucket_mut`], or + /// [`HashTable::get_bucket_entry`] to access it again without hash probing. + /// + /// The index is only meaningful as long as the table is not resized and no entries are added + /// or removed. After such changes, it may end up pointing to a different entry or none at all. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use hashbrown::{HashTable, DefaultHashBuilder}; + /// use std::hash::BuildHasher; + /// + /// let mut table = HashTable::new(); + /// let hasher = DefaultHashBuilder::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// table.insert_unique(hasher(&1), (1, 1), |val| hasher(&val.0)); + /// table.insert_unique(hasher(&2), (2, 2), |val| hasher(&val.0)); + /// table.insert_unique(hasher(&3), (3, 3), |val| hasher(&val.0)); + /// + /// let index = table + /// .entry(hasher(&2), |val| val.0 == 2, |val| hasher(&val.0)) + /// .or_insert((2, -2)) + /// .bucket_index(); + /// assert_eq!(table.get_bucket(index), Some(&(2, 2))); + /// + /// // Full mutation would invalidate any normal reference + /// for (_key, value) in &mut table { + /// *value *= 11; + /// } + /// + /// // The index still reaches the same key with the updated value + /// assert_eq!(table.get_bucket(index), Some(&(2, 22))); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn bucket_index(&self) -> usize { + unsafe { self.table.raw.bucket_index(&self.bucket) } + } } /// A view into a vacant entry in a `HashTable`. From c0419b40d31a8255a2b1a5ec35194c738996d303 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Thu, 23 Oct 2025 10:28:19 -0700 Subject: [PATCH 2/7] Rename `HashTable::buckets` to `num_buckets` --- src/table.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/table.rs b/src/table.rs index 6eb41b2b8..56383add3 100644 --- a/src/table.rs +++ b/src/table.rs @@ -777,19 +777,19 @@ where /// table.insert_unique(hasher(&3), (3, 'c'), |val| hasher(&val.0)); /// /// // Each entry is available at some index in the bucket range. - /// let count = (0..table.buckets()) + /// let count = (0..table.num_buckets()) /// .filter_map(|i| table.get_bucket(i)) /// .count(); /// assert_eq!(count, 3); /// - /// assert_eq!(table.get_bucket(table.buckets()), None); + /// assert_eq!(table.get_bucket(table.num_buckets()), None); /// # } /// # fn main() { /// # #[cfg(feature = "nightly")] /// # test() /// # } /// ``` - pub fn buckets(&self) -> usize { + pub fn num_buckets(&self) -> usize { self.raw.buckets() } From a465633026a5341d884f341c182a5b57ce53294c Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Thu, 23 Oct 2025 11:05:37 -0700 Subject: [PATCH 3/7] Make `HashTable` entries use `Tag` instead of a full hash `VacantEntry` now stores a `Tag` instead of a full `hash: u64`. This means that `OccupiedEntry` doesn't need to store anything, because it can get that tag from the control byte for `remove -> VacantEntry`. The `get_bucket_entry` method doesn't need a hash argument either. Also, since `OccupiedEntry` is now smaller, `enum Entry` will be the same size as `VacantEntry` by using a niche for the discriminant. (Although this is not _guaranteed_ by the compiler.) --- src/raw/mod.rs | 47 +++++++++++++++++++++++++++++++++++++++-------- src/table.rs | 44 ++++++++++++++++++-------------------------- 2 files changed, 57 insertions(+), 34 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 4449843a8..567cd8ad2 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -838,6 +838,19 @@ impl RawTable { (item.read(), self.bucket_index(&item)) } + /// Removes an element from the table, returning it. + /// + /// This also returns an index to the newly free bucket + /// and the former `Tag` for that bucket. + #[cfg_attr(feature = "inline-more", inline)] + #[allow(clippy::needless_pass_by_value)] + pub(crate) unsafe fn remove_tagged(&mut self, item: Bucket) -> (T, usize, Tag) { + let index = self.bucket_index(&item); + let tag = *self.table.ctrl(index); + self.table.erase(index); + (item.read(), index, tag) + } + /// Finds and removes an element from the table, returning it. #[cfg_attr(feature = "inline-more", inline)] pub fn remove_entry(&mut self, hash: u64, eq: impl FnMut(&T) -> bool) -> Option { @@ -1172,8 +1185,8 @@ impl RawTable { } } - /// Inserts a new element into the table at the given index, and returns its - /// raw bucket. + /// Inserts a new element into the table at the given index with the given hash, + /// and returns its raw bucket. /// /// # Safety /// @@ -1182,8 +1195,26 @@ impl RawTable { /// occurred since that call. #[inline] pub unsafe fn insert_at_index(&mut self, hash: u64, index: usize, value: T) -> Bucket { + self.insert_tagged_at_index(Tag::full(hash), index, value) + } + + /// Inserts a new element into the table at the given index with the given tag, + /// and returns its raw bucket. + /// + /// # Safety + /// + /// `index` must point to a slot previously returned by + /// `find_or_find_insert_index`, and no mutation of the table must have + /// occurred since that call. + #[inline] + pub(crate) unsafe fn insert_tagged_at_index( + &mut self, + tag: Tag, + index: usize, + value: T, + ) -> Bucket { let old_ctrl = *self.table.ctrl(index); - self.table.record_item_insert_at(index, old_ctrl, hash); + self.table.record_item_insert_at(index, old_ctrl, tag); let bucket = self.bucket(index); bucket.write(value); @@ -1258,11 +1289,11 @@ impl RawTable { } /// Returns a pointer to an element in the table, but only after verifying that - /// the index is in-bounds and that its control byte matches the given hash. + /// the index is in-bounds and the bucket is occupied. #[inline] - pub fn checked_bucket(&self, hash: u64, index: usize) -> Option> { + pub fn checked_bucket(&self, index: usize) -> Option> { unsafe { - if index < self.buckets() && *self.table.ctrl(index) == Tag::full(hash) { + if index < self.buckets() && self.is_bucket_full(index) { Some(self.bucket(index)) } else { None @@ -2442,9 +2473,9 @@ impl RawTableInner { } #[inline] - unsafe fn record_item_insert_at(&mut self, index: usize, old_ctrl: Tag, hash: u64) { + unsafe fn record_item_insert_at(&mut self, index: usize, old_ctrl: Tag, new_ctrl: Tag) { self.growth_left -= usize::from(old_ctrl.special_is_empty()); - self.set_ctrl_hash(index, hash); + self.set_ctrl(index, new_ctrl); self.items += 1; } diff --git a/src/table.rs b/src/table.rs index 56383add3..35a7b11bb 100644 --- a/src/table.rs +++ b/src/table.rs @@ -1,6 +1,7 @@ use core::{fmt, iter::FusedIterator, marker::PhantomData}; use crate::{ + control::Tag, raw::{ Allocator, Bucket, Global, RawDrain, RawExtractIf, RawIntoIter, RawIter, RawIterHash, RawTable, @@ -303,7 +304,6 @@ where ) -> Result, AbsentEntry<'_, T, A>> { match self.raw.find(hash, eq) { Some(bucket) => Ok(OccupiedEntry { - hash, bucket, table: self, }), @@ -413,24 +413,19 @@ where ) -> Entry<'_, T, A> { match self.raw.find_or_find_insert_index(hash, eq, hasher) { Ok(bucket) => Entry::Occupied(OccupiedEntry { - hash, bucket, table: self, }), Err(insert_index) => Entry::Vacant(VacantEntry { - hash, + tag: Tag::full(hash), index: insert_index, table: self, }), } } - /// Returns an `OccupiedEntry` for a bucket index in the table with the given hash, - /// or `None` if the index is out of bounds or if its hash doesn't match. - /// - /// However, note that the hash is only compared for the few bits that are directly stored in - /// the table, and even in full this could not guarantee equality. Use [`OccupiedEntry::get`] - /// if you need to further validate a match. + /// Returns an `OccupiedEntry` for the given bucket index in the table, + /// or `None` if it is unoccupied or out of bounds. /// /// # Examples /// @@ -447,18 +442,15 @@ where /// table.insert_unique(hasher(&2), (2, 'b'), |val| hasher(&val.0)); /// table.insert_unique(hasher(&3), (3, 'c'), |val| hasher(&val.0)); /// - /// let hash = hasher(&2); - /// let index = table.find_bucket_index(hash, |val| val.0 == 2).unwrap(); + /// let index = table.find_bucket_index(hasher(&2), |val| val.0 == 2).unwrap(); /// - /// let bad_hash = !hash; - /// assert!(table.get_bucket_entry(bad_hash, index).is_none()); - /// assert!(table.get_bucket_entry(hash, usize::MAX).is_none()); + /// assert!(table.get_bucket_entry(usize::MAX).is_none()); /// - /// let occupied_entry = table.get_bucket_entry(hash, index).unwrap(); + /// let occupied_entry = table.get_bucket_entry(index).unwrap(); /// assert_eq!(occupied_entry.get(), &(2, 'b')); /// assert_eq!(occupied_entry.remove().0, (2, 'b')); /// - /// assert!(table.find(hash, |val| val.0 == 2).is_none()); + /// assert!(table.find(hasher(&2), |val| val.0 == 2).is_none()); /// # } /// # fn main() { /// # #[cfg(feature = "nightly")] @@ -466,10 +458,9 @@ where /// # } /// ``` #[inline] - pub fn get_bucket_entry(&mut self, hash: u64, index: usize) -> Option> { + pub fn get_bucket_entry(&mut self, index: usize) -> Option> { Some(OccupiedEntry { - hash, - bucket: self.raw.checked_bucket(hash, index)?, + bucket: self.raw.checked_bucket(index)?, table: self, }) } @@ -573,7 +564,6 @@ where ) -> OccupiedEntry<'_, T, A> { let bucket = self.raw.insert(hash, value, hasher); OccupiedEntry { - hash, bucket, table: self, } @@ -1771,7 +1761,6 @@ pub struct OccupiedEntry<'a, T, A = Global> where A: Allocator, { - hash: u64, bucket: Bucket, table: &'a mut HashTable, } @@ -1840,11 +1829,11 @@ where /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn remove(self) -> (T, VacantEntry<'a, T, A>) { - let (val, index) = unsafe { self.table.raw.remove(self.bucket) }; + let (val, index, tag) = unsafe { self.table.raw.remove_tagged(self.bucket) }; ( val, VacantEntry { - hash: self.hash, + tag, index, table: self.table, }, @@ -2083,7 +2072,7 @@ pub struct VacantEntry<'a, T, A = Global> where A: Allocator, { - hash: u64, + tag: Tag, index: usize, table: &'a mut HashTable, } @@ -2131,9 +2120,12 @@ where /// ``` #[inline] pub fn insert(self, value: T) -> OccupiedEntry<'a, T, A> { - let bucket = unsafe { self.table.raw.insert_at_index(self.hash, self.index, value) }; + let bucket = unsafe { + self.table + .raw + .insert_tagged_at_index(self.tag, self.index, value) + }; OccupiedEntry { - hash: self.hash, bucket, table: self.table, } From 901742d97d34e8bd94ad6afd4cfd5d6b4e07f603 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Thu, 23 Oct 2025 16:02:58 -0700 Subject: [PATCH 4/7] Add `get_bucket_unchecked` and `get_bucket_unchecked_mut` --- src/table.rs | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/src/table.rs b/src/table.rs index 35a7b11bb..e296ce6e0 100644 --- a/src/table.rs +++ b/src/table.rs @@ -496,6 +496,49 @@ where self.raw.get_bucket(index) } + /// Gets a reference to an entry in the table at the given bucket index, + /// without checking whether the index is in-bounds or occupied. + /// + /// For a safe alternative, see [`get_bucket`](Self::get_bucket). + /// + /// # Safety + /// + /// It is *[undefined behavior]* to call this method with an index that is + /// out-of-bounds or unoccupied, even if the resulting reference is not used. + /// + /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use hashbrown::{HashTable, DefaultHashBuilder}; + /// use std::hash::BuildHasher; + /// + /// let mut table = HashTable::new(); + /// let hasher = DefaultHashBuilder::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// table.insert_unique(hasher(&1), (1, 'a'), |val| hasher(&val.0)); + /// table.insert_unique(hasher(&2), (2, 'b'), |val| hasher(&val.0)); + /// table.insert_unique(hasher(&3), (3, 'c'), |val| hasher(&val.0)); + /// + /// let index = table.find_bucket_index(hasher(&2), |val| val.0 == 2).unwrap(); + /// assert!(std::ptr::eq( + /// table.get_bucket(index).unwrap(), + /// unsafe { table.get_bucket_unchecked(index) }, + /// )); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + #[inline] + pub unsafe fn get_bucket_unchecked(&self, index: usize) -> &T { + self.raw.bucket(index).as_ref() + } + /// Gets a mutable reference to an entry in the table at the given bucket index, /// or `None` if it is unoccupied or out of bounds. /// @@ -531,6 +574,49 @@ where self.raw.get_bucket_mut(index) } + /// Gets a mutable reference to an entry in the table at the given bucket index, + /// without checking whether the index is in-bounds or occupied. + /// + /// For a safe alternative, see [`get_bucket_mut`](Self::get_bucket_mut). + /// + /// # Safety + /// + /// It is *[undefined behavior]* to call this method with an index that is + /// out-of-bounds or unoccupied, even if the resulting reference is not used. + /// + /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use hashbrown::{HashTable, DefaultHashBuilder}; + /// use std::hash::BuildHasher; + /// + /// let mut table = HashTable::new(); + /// let hasher = DefaultHashBuilder::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// table.insert_unique(hasher(&1), (1, 'a'), |val| hasher(&val.0)); + /// table.insert_unique(hasher(&2), (2, 'b'), |val| hasher(&val.0)); + /// table.insert_unique(hasher(&3), (3, 'c'), |val| hasher(&val.0)); + /// + /// let index = table.find_bucket_index(hasher(&2), |val| val.0 == 2).unwrap(); + /// assert!(std::ptr::eq( + /// table.get_bucket_mut(index).unwrap(), + /// unsafe { table.get_bucket_unchecked_mut(index) }, + /// )); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + #[inline] + pub unsafe fn get_bucket_unchecked_mut(&mut self, index: usize) -> &mut T { + self.raw.bucket(index).as_mut() + } + /// Inserts an element into the `HashTable` with the given hash value, but /// without checking whether an equivalent element already exists within the /// table. From a6592a8a4056c595e8561b35190b1bfc97b73ab4 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Sat, 1 Nov 2025 10:18:15 -0700 Subject: [PATCH 5/7] `get_bucket_entry -> Result` --- src/table.rs | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/src/table.rs b/src/table.rs index e296ce6e0..f4e10408a 100644 --- a/src/table.rs +++ b/src/table.rs @@ -425,7 +425,7 @@ where } /// Returns an `OccupiedEntry` for the given bucket index in the table, - /// or `None` if it is unoccupied or out of bounds. + /// or `AbsentEntry` if it is unoccupied or out of bounds. /// /// # Examples /// @@ -444,7 +444,7 @@ where /// /// let index = table.find_bucket_index(hasher(&2), |val| val.0 == 2).unwrap(); /// - /// assert!(table.get_bucket_entry(usize::MAX).is_none()); + /// assert!(table.get_bucket_entry(usize::MAX).is_err()); /// /// let occupied_entry = table.get_bucket_entry(index).unwrap(); /// assert_eq!(occupied_entry.get(), &(2, 'b')); @@ -458,11 +458,17 @@ where /// # } /// ``` #[inline] - pub fn get_bucket_entry(&mut self, index: usize) -> Option> { - Some(OccupiedEntry { - bucket: self.raw.checked_bucket(index)?, - table: self, - }) + pub fn get_bucket_entry( + &mut self, + index: usize, + ) -> Result, AbsentEntry<'_, T, A>> { + match self.raw.checked_bucket(index) { + Some(bucket) => Ok(OccupiedEntry { + bucket, + table: self, + }), + None => Err(AbsentEntry { table: self }), + } } /// Gets a reference to an entry in the table at the given bucket index, @@ -2224,10 +2230,11 @@ where } } -/// Type representing the absence of an entry, as returned by [`HashTable::find_entry`]. +/// Type representing the absence of an entry, as returned by [`HashTable::find_entry`] +/// and [`HashTable::get_bucket_entry`]. /// /// This type only exists due to [limitations] in Rust's NLL borrow checker. In -/// the future, `find_entry` will return an `Option` and this +/// the future, those methods will return an `Option` and this /// type will be removed. /// /// [limitations]: https://smallcultfollowing.com/babysteps/blog/2018/06/15/mir-based-borrow-check-nll-status-update/#polonius From 6b3687efdc08e34c437f58f645b527e88207bc08 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Sat, 1 Nov 2025 10:25:32 -0700 Subject: [PATCH 6/7] Add `HashTable::get_bucket_entry_unchecked` --- src/table.rs | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/src/table.rs b/src/table.rs index f4e10408a..22b032ba1 100644 --- a/src/table.rs +++ b/src/table.rs @@ -471,6 +471,52 @@ where } } + /// Returns an `OccupiedEntry` for the given bucket index in the table, + /// without checking whether the index is in-bounds or occupied. + /// + /// For a safe alternative, see [`get_bucket_entry`](Self::get_bucket_entry). + /// + /// # Safety + /// + /// It is *[undefined behavior]* to call this method with an index that is + /// out-of-bounds or unoccupied, even if the resulting entry is not used. + /// + /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use hashbrown::{HashTable, DefaultHashBuilder}; + /// use std::hash::BuildHasher; + /// + /// let mut table = HashTable::new(); + /// let hasher = DefaultHashBuilder::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// table.insert_unique(hasher(&1), (1, 'a'), |val| hasher(&val.0)); + /// table.insert_unique(hasher(&2), (2, 'b'), |val| hasher(&val.0)); + /// table.insert_unique(hasher(&3), (3, 'c'), |val| hasher(&val.0)); + /// + /// let index = table.find_bucket_index(hasher(&2), |val| val.0 == 2).unwrap(); + /// assert!(std::ptr::eq( + /// table.get_bucket_entry(index).unwrap().into_mut(), + /// unsafe { table.get_bucket_entry_unchecked(index).into_mut() }, + /// )); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + #[inline] + pub unsafe fn get_bucket_entry_unchecked(&mut self, index: usize) -> OccupiedEntry<'_, T, A> { + OccupiedEntry { + bucket: self.raw.bucket(index), + table: self, + } + } + /// Gets a reference to an entry in the table at the given bucket index, /// or `None` if it is unoccupied or out of bounds. /// From 12ad6f4f9165840ce711f4dccd4d0ebf424836c2 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Sat, 1 Nov 2025 11:35:33 -0700 Subject: [PATCH 7/7] Add `HashTable::iter_buckets` and `iter_hash_buckets` --- src/raw/mod.rs | 54 +++++++++++++++--- src/table.rs | 149 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 192 insertions(+), 11 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 567cd8ad2..aa7b97d24 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -1414,6 +1414,28 @@ impl RawTable { RawIterHash::new(self, hash) } + /// Returns an iterator over occupied bucket indices that could match a given hash. + /// + /// `RawTable` only stores 7 bits of the hash value, so this iterator may + /// return items that have a hash value different than the one provided. You + /// should always validate the returned values before using them. + /// + /// It is up to the caller to ensure that the `RawTable` outlives the + /// `RawIterHashIndices`. Because we cannot make the `next` method unsafe on the + /// `RawIterHashIndices` struct, we have to make the `iter_hash_buckets` method unsafe. + #[cfg_attr(feature = "inline-more", inline)] + pub(crate) unsafe fn iter_hash_buckets(&self, hash: u64) -> RawIterHashIndices { + RawIterHashIndices::new(&self.table, hash) + } + + /// Returns an iterator over full buckets indices in the table. + /// + /// See [`RawTableInner::full_buckets_indices`] for safety conditions. + #[inline(always)] + pub(crate) unsafe fn full_buckets_indices(&self) -> FullBucketsIndices { + self.table.full_buckets_indices() + } + /// Returns an iterator which removes all elements from the table without /// freeing the memory. #[cfg_attr(feature = "inline-more", inline)] @@ -3871,6 +3893,7 @@ impl FusedIterator for RawIter {} /// created will be yielded by that iterator. /// - The order in which the iterator yields indices of the buckets is unspecified /// and may change in the future. +#[derive(Clone)] pub(crate) struct FullBucketsIndices { // Mask of full buckets in the current group. Bits are cleared from this // mask as each element is processed. @@ -3888,6 +3911,14 @@ pub(crate) struct FullBucketsIndices { items: usize, } +impl Default for FullBucketsIndices { + #[cfg_attr(feature = "inline-more", inline)] + fn default() -> Self { + // SAFETY: Because the table is static, it always outlives the iter. + unsafe { RawTableInner::NEW.full_buckets_indices() } + } +} + impl FullBucketsIndices { /// Advances the iterator and returns the next value. /// @@ -4153,12 +4184,12 @@ impl FusedIterator for RawDrain<'_, T, A> {} /// - The order in which the iterator yields buckets is unspecified and may /// change in the future. pub struct RawIterHash { - inner: RawIterHashInner, + inner: RawIterHashIndices, _marker: PhantomData, } #[derive(Clone)] -struct RawIterHashInner { +pub(crate) struct RawIterHashIndices { // See `RawTableInner`'s corresponding fields for details. // We can't store a `*const RawTableInner` as it would get // invalidated by the user calling `&mut` methods on `RawTable`. @@ -4181,7 +4212,7 @@ impl RawIterHash { #[cfg_attr(feature = "inline-more", inline)] unsafe fn new(table: &RawTable, hash: u64) -> Self { RawIterHash { - inner: RawIterHashInner::new(&table.table, hash), + inner: RawIterHashIndices::new(&table.table, hash), _marker: PhantomData, } } @@ -4201,14 +4232,21 @@ impl Default for RawIterHash { #[cfg_attr(feature = "inline-more", inline)] fn default() -> Self { Self { - // SAFETY: Because the table is static, it always outlives the iter. - inner: unsafe { RawIterHashInner::new(&RawTableInner::NEW, 0) }, + inner: RawIterHashIndices::default(), _marker: PhantomData, } } } -impl RawIterHashInner { +impl Default for RawIterHashIndices { + #[cfg_attr(feature = "inline-more", inline)] + fn default() -> Self { + // SAFETY: Because the table is static, it always outlives the iter. + unsafe { RawIterHashIndices::new(&RawTableInner::NEW, 0) } + } +} + +impl RawIterHashIndices { #[cfg_attr(feature = "inline-more", inline)] unsafe fn new(table: &RawTableInner, hash: u64) -> Self { let tag_hash = Tag::full(hash); @@ -4216,7 +4254,7 @@ impl RawIterHashInner { let group = Group::load(table.ctrl(probe_seq.pos)); let bitmask = group.match_tag(tag_hash).into_iter(); - RawIterHashInner { + RawIterHashIndices { bucket_mask: table.bucket_mask, ctrl: table.ctrl, tag_hash, @@ -4246,7 +4284,7 @@ impl Iterator for RawIterHash { } } -impl Iterator for RawIterHashInner { +impl Iterator for RawIterHashIndices { type Item = usize; fn next(&mut self) -> Option { diff --git a/src/table.rs b/src/table.rs index 22b032ba1..75ad681cb 100644 --- a/src/table.rs +++ b/src/table.rs @@ -3,8 +3,8 @@ use core::{fmt, iter::FusedIterator, marker::PhantomData}; use crate::{ control::Tag, raw::{ - Allocator, Bucket, Global, RawDrain, RawExtractIf, RawIntoIter, RawIter, RawIterHash, - RawTable, + Allocator, Bucket, FullBucketsIndices, Global, RawDrain, RawExtractIf, RawIntoIter, + RawIter, RawIterHash, RawIterHashIndices, RawTable, }, TryReserveError, }; @@ -1000,7 +1000,7 @@ where /// let mut table = HashTable::new(); /// let hasher = DefaultHashBuilder::default(); /// let hasher = |val: &_| hasher.hash_one(val); - /// table.insert_unique(hasher(&"a"), "b", hasher); + /// table.insert_unique(hasher(&"a"), "a", hasher); /// table.insert_unique(hasher(&"b"), "b", hasher); /// /// // Will print in an arbitrary order. @@ -1071,6 +1071,42 @@ where } } + /// An iterator producing the `usize` indices of all occupied buckets. + /// + /// The order in which the iterator yields indices is unspecified + /// and may change in the future. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use hashbrown::{HashTable, DefaultHashBuilder}; + /// use std::hash::BuildHasher; + /// + /// let mut table = HashTable::new(); + /// let hasher = DefaultHashBuilder::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// table.insert_unique(hasher(&"a"), "a", hasher); + /// table.insert_unique(hasher(&"b"), "b", hasher); + /// + /// // Will print in an arbitrary order. + /// for index in table.iter_buckets() { + /// println!("{index}: {}", table.get_bucket(index).unwrap()); + /// } + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn iter_buckets(&self) -> IterBuckets<'_> { + IterBuckets { + inner: unsafe { self.raw.full_buckets_indices() }, + marker: PhantomData, + } + } + /// An iterator visiting all elements which may match a hash. /// The iterator element type is `&'a T`. /// @@ -1163,6 +1199,47 @@ where } } + /// An iterator producing the `usize` indices of all buckets which may match a hash. + /// + /// This iterator may return indices from the table that have a hash value + /// different than the one provided. You should always validate the returned + /// values before using them. + /// + /// The order in which the iterator yields indices is unspecified + /// and may change in the future. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use hashbrown::{HashTable, DefaultHashBuilder}; + /// use std::hash::BuildHasher; + /// + /// let mut table = HashTable::new(); + /// let hasher = DefaultHashBuilder::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// table.insert_unique(hasher(&"a"), "a", hasher); + /// table.insert_unique(hasher(&"a"), "b", hasher); + /// table.insert_unique(hasher(&"b"), "c", hasher); + /// + /// // Will print the indices with "a" and "b" (and possibly "c") in an arbitrary order. + /// for index in table.iter_hash_buckets(hasher(&"a")) { + /// println!("{index}: {}", table.get_bucket(index).unwrap()); + /// } + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn iter_hash_buckets(&self, hash: u64) -> IterHashBuckets<'_> { + IterHashBuckets { + inner: unsafe { self.raw.iter_hash_buckets(hash) }, + marker: PhantomData, + } + } + /// Retains only the elements specified by the predicate. /// /// In other words, remove all elements `e` such that `f(&e)` returns `false`. @@ -2484,6 +2561,46 @@ where } } +/// An iterator producing the `usize` indices of all occupied buckets, +/// within the range `0..table.num_buckets()`. +/// +/// The order in which the iterator yields indices is unspecified +/// and may change in the future. +/// +/// This `struct` is created by the [`HashTable::iter_buckets`] method. See its +/// documentation for more. +#[derive(Clone, Default)] +pub struct IterBuckets<'a> { + inner: FullBucketsIndices, + marker: PhantomData<&'a ()>, +} + +impl Iterator for IterBuckets<'_> { + type Item = usize; + + fn next(&mut self) -> Option { + self.inner.next() + } + + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} + +impl ExactSizeIterator for IterBuckets<'_> { + fn len(&self) -> usize { + self.inner.len() + } +} + +impl FusedIterator for IterBuckets<'_> {} + +impl fmt::Debug for IterBuckets<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self.clone()).finish() + } +} + /// An iterator over the entries of a `HashTable` that could match a given hash. /// The iterator element type is `&'a T`. /// @@ -2610,6 +2727,32 @@ where } } +/// An iterator producing the `usize` indices of all buckets which may match a hash. +/// +/// This `struct` is created by the [`HashTable::iter_hash_buckets`] method. See its +/// documentation for more. +#[derive(Clone, Default)] +pub struct IterHashBuckets<'a> { + inner: RawIterHashIndices, + marker: PhantomData<&'a ()>, +} + +impl Iterator for IterHashBuckets<'_> { + type Item = usize; + + fn next(&mut self) -> Option { + self.inner.next() + } +} + +impl FusedIterator for IterHashBuckets<'_> {} + +impl fmt::Debug for IterHashBuckets<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self.clone()).finish() + } +} + /// An owning iterator over the entries of a `HashTable` in arbitrary order. /// The iterator element type is `T`. ///