Skip to content

Commit 2462e30

Browse files
hash list as 0 again, Series.isin hashes lists
1 parent c94b480 commit 2462e30

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

pandas/_libs/include/pandas/vendored/klib/khash_python.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -322,11 +322,13 @@ static inline khuint32_t kh_python_hash_func(PyObject *key) {
322322
} else if (PyTuple_Check(key)) {
323323
// hash tuple subclasses as builtin tuples
324324
hash = tupleobject_hash((PyTupleObject *)key);
325-
} else if (PyDict_Check(key)) {
325+
} else if (PyDict_Check(key) || PyList_Check(key)) {
326326
// Before GH 57052 was fixed, all exceptions raised from PyObject_Hash were suppressed.
327-
// some features rely on this behaviour, e.g. _libs.hashtable.value_count_object via
328-
// DataFrame.describe, which counts generic objects using PyObjectHashTable.
329-
// Using hash = 0 puts all objects in the same bucket, which is bad for performance but that is how it worked before.
327+
// Existing code that relies on this behaviour is for example:
328+
// * _libs.hashtable.value_count_object via DataFrame.describe
329+
// * _libs.hashtable.ismember_object via Series.isin
330+
// Using hash = 0 puts all dict and list objects in the same bucket,
331+
// which is bad for performance but that is how it worked before.
330332
hash = 0;
331333
} else {
332334
hash = PyObject_Hash(key);

0 commit comments

Comments
 (0)