try triggering exceptions before entering khash

matiaslindgren · matiaslindgren · commit f7df8af47886 · 2025-10-27T22:11:08.000-04:00
diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -1356,6 +1356,14 @@ cdef class PyObjectHashTable(HashTable):
         cdef:
             khiter_t k
 
+        # GH 57052
+        # in khash_python.h, kh_python_hash_equal and kh_python_hash_func will be called repeatedly by khash in a loop.
+        # if object implements custom __hash__ and __eq__ methods that can raise exceptions,
+        # kh_python_hash_{equal,func} will suppress the exceptions without warnings.
+        # as a workaround: try triggering exceptions here, before starting the khash loop
+        hash(val)
+        val == val
+
         k = kh_get_pymap(self.table, <PyObject*>val)
         if k != self.table.n_buckets:
             return self.table.vals[k]
@@ -1369,6 +1377,8 @@ cdef class PyObjectHashTable(HashTable):
             char* buf
 
         hash(key)
+        # GH 57052
+        key == key
 
         k = kh_put_pymap(self.table, <PyObject*>key, &ret)
         if kh_exist_pymap(self.table, k):
diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py
@@ -1,6 +1,7 @@
 from collections import namedtuple
 from collections.abc import Generator
 from contextlib import contextmanager
+from itertools import product
 import re
 import struct
 import tracemalloc
@@ -780,3 +781,80 @@ def test_float_complex_int_are_equal_as_objects():
     result = isin(np.array(values, dtype=object), np.asarray(comps))
     expected = np.array([False, True, True, True], dtype=np.bool_)
     tm.assert_numpy_array_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "throw1hash, throw2hash, throw1eq, throw2eq",
+    product([True, False], repeat=4),
+)
+def test_exceptions_thrown_from_custom_hash_and_eq_methods(
+    throw1hash, throw2hash, throw1eq, throw2eq
+):
+    # GH 57052
+    class testkey:
+        def __init__(self, value, throw_hash=False, throw_eq=False):
+            self.value = value
+            self.throw_hash = throw_hash
+            self.throw_eq = throw_eq
+
+        def __hash__(self):
+            if self.throw_hash:
+                raise RuntimeError(f"exception in {self!r}.__hash__")
+            return hash(self.value)
+
+        def __eq__(self, other):
+            if self.throw_eq:
+                raise RuntimeError(f"exception in {self!r}.__eq__")
+            return self.value == other.value
+
+        def __repr__(self):
+            return f"{self.__class__.__name__}({self.value}, {self.throw_hash}, {self.throw_eq})"
+
+    table = ht.PyObjectHashTable()
+
+    key1 = testkey(value="hello1")
+    key2 = testkey(value="hello2")
+
+    table.set_item(key1, 123)
+    table.set_item(key2, 456)
+
+    key1.throw_hash = throw1hash
+    key2.throw_hash = throw2hash
+    key1.throw_eq = throw1eq
+    key2.throw_eq = throw2eq
+
+    if throw1hash and throw1eq:
+        with pytest.raises(
+            RuntimeError, match=re.escape(f"exception in {key1!r}.") + "__(hash|eq)__"
+        ):
+            table.get_item(key1)
+    elif throw1hash:
+        with pytest.raises(
+            RuntimeError, match=re.escape(f"exception in {key1!r}.__hash__")
+        ):
+            table.get_item(key1)
+    elif throw1eq:
+        with pytest.raises(
+            RuntimeError, match=re.escape(f"exception in {key1!r}.__eq__")
+        ):
+            table.get_item(key1)
+    else:
+        assert table.get_item(key1) == 123
+
+    if throw2hash and throw2eq:
+        with pytest.raises(
+            RuntimeError, match=re.escape(f"exception in {key2!r}.") + "__(hash|eq)__"
+        ):
+            table.get_item(key2)
+    elif throw2hash:
+        with pytest.raises(
+            RuntimeError, match=re.escape(f"exception in {key2!r}.__hash__")
+        ):
+            table.get_item(key2)
+    elif throw2eq:
+        with pytest.raises(
+            RuntimeError, match=re.escape(f"exception in {key2!r}.__eq__")
+        ):
+            table.get_item(key2)
+    else:
+        assert table.get_item(key2) == 456