Skip to content

Commit c94b480

Browse files
fix unhashable UserDict in JSONArray.duplicated
1 parent 2b0fa82 commit c94b480

File tree

1 file changed

+17
-1
lines changed

1 file changed

+17
-1
lines changed

pandas/tests/extension/json/array.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from typing import (
2626
TYPE_CHECKING,
2727
Any,
28+
Literal,
2829
)
2930

3031
import numpy as np
@@ -41,12 +42,16 @@
4142
ExtensionArray,
4243
ExtensionDtype,
4344
)
45+
from pandas.core.algorithms import duplicated
4446
from pandas.core.indexers import unpack_tuple_and_ellipses
4547

4648
if TYPE_CHECKING:
4749
from collections.abc import Mapping
4850

49-
from pandas._typing import type_t
51+
from pandas._typing import (
52+
npt,
53+
type_t,
54+
)
5055

5156

5257
class JSONDtype(ExtensionDtype):
@@ -254,6 +259,17 @@ def _pad_or_backfill(self, *, method, limit=None, copy=True):
254259
# GH#56616 - test EA method without limit_area argument
255260
return super()._pad_or_backfill(method=method, limit=limit, copy=copy)
256261

262+
def duplicated(
263+
self, keep: Literal["first", "last", False] = "first"
264+
) -> npt.NDArray[np.bool_]:
265+
# pd.core.algorithms.duplicated is implemented with a hash table that
266+
# does not support UserDict values.
267+
# However, dict values are always hashed as 0 for backwards compatibility,
268+
# see GH 57052
269+
mask = self.isna().astype(np.bool_, copy=False)
270+
values = np.array([dict(x) for x in self], dtype="object")
271+
return duplicated(values=values, keep=keep, mask=mask)
272+
257273

258274
def make_data(n: int):
259275
# TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer

0 commit comments

Comments
 (0)