Skip to content

Commit 2b0fa82

Browse files
stop using hash=0 for list, cleanup comment
1 parent eb120e9 commit 2b0fa82

File tree

1 file changed

+5
-6
lines changed

1 file changed

+5
-6
lines changed

pandas/_libs/include/pandas/vendored/klib/khash_python.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -322,12 +322,11 @@ static inline khuint32_t kh_python_hash_func(PyObject *key) {
322322
} else if (PyTuple_Check(key)) {
323323
// hash tuple subclasses as builtin tuples
324324
hash = tupleobject_hash((PyTupleObject *)key);
325-
} else if (PyDict_Check(key) || PyList_Check(key)) {
326-
// before GH 57052 was fixed, all exceptions raised from PyObject_Hash were suppressed.
327-
// some features rely on this behaviour, e.g. _libs.hashtable.value_count_object via DataFrame.describe,
328-
// which counts generic objects using PyObjectHashTable.
329-
// using hash = 0 for dict and list objects puts all of them in the same bucket,
330-
// which is not optimal for performance but that is what the behaviour was before.
325+
} else if (PyDict_Check(key)) {
326+
// Before GH 57052 was fixed, all exceptions raised from PyObject_Hash were suppressed.
327+
// some features rely on this behaviour, e.g. _libs.hashtable.value_count_object via
328+
// DataFrame.describe, which counts generic objects using PyObjectHashTable.
329+
// Using hash = 0 puts all objects in the same bucket, which is bad for performance but that is how it worked before.
331330
hash = 0;
332331
} else {
333332
hash = PyObject_Hash(key);

0 commit comments

Comments
 (0)