@@ -191,6 +191,12 @@ static inline int tupleobject_cmp(PyTupleObject *a, PyTupleObject *b) {
191191 return 1 ;
192192}
193193
194+ static inline int _is_pandas_NA_type (PyObject * o ) {
195+ // TODO compare PyTypeObject* C_NA, not strings!
196+ PyObject * type_name = PyType_GetName (Py_TYPE (o ));
197+ return PyUnicode_CompareWithASCIIString (type_name , "NAType" ) == 0 ;
198+ }
199+
194200static inline int pyobject_cmp (PyObject * a , PyObject * b ) {
195201 if (PyErr_Occurred () != NULL ) {
196202 return 0 ;
@@ -213,6 +219,8 @@ static inline int pyobject_cmp(PyObject *a, PyObject *b) {
213219 return tupleobject_cmp ((PyTupleObject * )a , (PyTupleObject * )b );
214220 }
215221 // frozenset isn't yet supported
222+ } else if (_is_pandas_NA_type (a ) || _is_pandas_NA_type (b )) {
223+ return 0 ;
216224 }
217225
218226 int result = PyObject_RichCompareBool (a , b , Py_EQ );
@@ -314,6 +322,13 @@ static inline khuint32_t kh_python_hash_func(PyObject *key) {
314322 } else if (PyTuple_Check (key )) {
315323 // hash tuple subclasses as builtin tuples
316324 hash = tupleobject_hash ((PyTupleObject * )key );
325+ } else if (PyDict_Check (key ) || PyList_Check (key )) {
326+ // before GH 57052 was fixed, all exceptions raised from PyObject_Hash were suppressed.
327+ // some features rely on this behaviour, e.g. _libs.hashtable.value_count_object via DataFrame.describe,
328+ // which counts generic objects using PyObjectHashTable.
329+ // using hash = 0 for dict and list objects puts all of them in the same bucket,
330+ // which is not optimal for performance but that is what the behaviour was before.
331+ hash = 0 ;
317332 } else {
318333 hash = PyObject_Hash (key );
319334 }
0 commit comments