Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1212,6 +1212,7 @@ Other
^^^^^
- Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`)
- Bug in :class:`Series` ignoring errors when trying to convert :class:`Series` input data to the given ``dtype`` (:issue:`60728`)
- Bug in :class:``PyObjectHashTable`` that would silently suppress exceptions thrown from custom ``__hash__`` and ``__eq__`` methods during hashing (:issue:`57052`)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you able to add a test that uses a public API that would be fixed by your changes?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

- Bug in :func:`eval` on :class:`ExtensionArray` on including division ``/`` failed with a ``TypeError``. (:issue:`58748`)
- Bug in :func:`eval` where method calls on binary operations like ``(x + y).dropna()`` would raise ``AttributeError: 'BinOp' object has no attribute 'value'`` (:issue:`61175`)
- Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`)
Expand Down
10 changes: 10 additions & 0 deletions pandas/_libs/hashtable_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -1356,6 +1356,14 @@ cdef class PyObjectHashTable(HashTable):
cdef:
khiter_t k

# GH 57052
# in khash_python.h, kh_python_hash_equal and kh_python_hash_func will be called repeatedly by khash in a loop.
# if object implements custom __hash__ and __eq__ methods that can raise exceptions,
# kh_python_hash_{equal,func} will suppress the exceptions without warnings.
# as a workaround: try triggering exceptions here, before starting the khash loop
hash(val)
val == val

k = kh_get_pymap(self.table, <PyObject*>val)
if k != self.table.n_buckets:
return self.table.vals[k]
Expand All @@ -1369,6 +1377,8 @@ cdef class PyObjectHashTable(HashTable):
char* buf

hash(key)
# GH 57052
key == key

k = kh_put_pymap(self.table, <PyObject*>key, &ret)
if kh_exist_pymap(self.table, k):
Expand Down
78 changes: 78 additions & 0 deletions pandas/tests/libs/test_hashtable.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from collections import namedtuple
from collections.abc import Generator
from contextlib import contextmanager
from itertools import product
import re
import struct
import tracemalloc
Expand Down Expand Up @@ -780,3 +781,80 @@ def test_float_complex_int_are_equal_as_objects():
result = isin(np.array(values, dtype=object), np.asarray(comps))
expected = np.array([False, True, True, True], dtype=np.bool_)
tm.assert_numpy_array_equal(result, expected)


@pytest.mark.parametrize(
"throw1hash, throw2hash, throw1eq, throw2eq",
product([True, False], repeat=4),
)
def test_exceptions_thrown_from_custom_hash_and_eq_methods(
throw1hash, throw2hash, throw1eq, throw2eq
):
# GH 57052
class testkey:
def __init__(self, value, throw_hash=False, throw_eq=False):
self.value = value
self.throw_hash = throw_hash
self.throw_eq = throw_eq

def __hash__(self):
if self.throw_hash:
raise RuntimeError(f"exception in {self!r}.__hash__")
return hash(self.value)

def __eq__(self, other):
if self.throw_eq:
raise RuntimeError(f"exception in {self!r}.__eq__")
return self.value == other.value

def __repr__(self):
return f"testkey({self.value}, {self.throw_hash}, {self.throw_eq})"

table = ht.PyObjectHashTable()

key1 = testkey(value="hello1")
key2 = testkey(value="hello2")

table.set_item(key1, 123)
table.set_item(key2, 456)

key1.throw_hash = throw1hash
key2.throw_hash = throw2hash
key1.throw_eq = throw1eq
key2.throw_eq = throw2eq

if throw1hash and throw1eq:
with pytest.raises(
RuntimeError, match=re.escape(f"exception in {key1!r}.") + "__(hash|eq)__"
):
table.get_item(key1)
elif throw1hash:
with pytest.raises(
RuntimeError, match=re.escape(f"exception in {key1!r}.__hash__")
):
table.get_item(key1)
elif throw1eq:
with pytest.raises(
RuntimeError, match=re.escape(f"exception in {key1!r}.__eq__")
):
table.get_item(key1)
else:
assert table.get_item(key1) == 123

if throw2hash and throw2eq:
with pytest.raises(
RuntimeError, match=re.escape(f"exception in {key2!r}.") + "__(hash|eq)__"
):
table.get_item(key2)
elif throw2hash:
with pytest.raises(
RuntimeError, match=re.escape(f"exception in {key2!r}.__hash__")
):
table.get_item(key2)
elif throw2eq:
with pytest.raises(
RuntimeError, match=re.escape(f"exception in {key2!r}.__eq__")
):
table.get_item(key2)
else:
assert table.get_item(key2) == 456
Loading