Skip to content

Commit ac99ea6

Browse files
hash dict/list as 0, do not compare NA
1 parent 0a4cba8 commit ac99ea6

File tree

1 file changed

+15
-0
lines changed

1 file changed

+15
-0
lines changed

pandas/_libs/include/pandas/vendored/klib/khash_python.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,12 @@ static inline int tupleobject_cmp(PyTupleObject *a, PyTupleObject *b) {
191191
return 1;
192192
}
193193

194+
static inline int _is_pandas_NA_type(PyObject *o) {
195+
// TODO compare PyTypeObject* C_NA, not strings!
196+
PyObject* type_name = PyType_GetName(Py_TYPE(o));
197+
return PyUnicode_CompareWithASCIIString(type_name, "NAType") == 0;
198+
}
199+
194200
static inline int pyobject_cmp(PyObject *a, PyObject *b) {
195201
if (PyErr_Occurred() != NULL) {
196202
return 0;
@@ -213,6 +219,8 @@ static inline int pyobject_cmp(PyObject *a, PyObject *b) {
213219
return tupleobject_cmp((PyTupleObject *)a, (PyTupleObject *)b);
214220
}
215221
// frozenset isn't yet supported
222+
} else if (_is_pandas_NA_type(a) || _is_pandas_NA_type(b)) {
223+
return 0;
216224
}
217225

218226
int result = PyObject_RichCompareBool(a, b, Py_EQ);
@@ -314,6 +322,13 @@ static inline khuint32_t kh_python_hash_func(PyObject *key) {
314322
} else if (PyTuple_Check(key)) {
315323
// hash tuple subclasses as builtin tuples
316324
hash = tupleobject_hash((PyTupleObject *)key);
325+
} else if (PyDict_Check(key) || PyList_Check(key)) {
326+
// before GH 57052 was fixed, all exceptions raised from PyObject_Hash were suppressed.
327+
// some features rely on this behaviour, e.g. _libs.hashtable.value_count_object via DataFrame.describe,
328+
// which counts generic objects using PyObjectHashTable.
329+
// using hash = 0 for dict and list objects puts all of them in the same bucket,
330+
// which is not optimal for performance but that is what the behaviour was before.
331+
hash = 0;
317332
} else {
318333
hash = PyObject_Hash(key);
319334
}

0 commit comments

Comments
 (0)