Skip to content

Commit d6c267d

Browse files
BUG: Preserve extension dtypes in MultiIndex.factorize() #62337
1 parent ac9cd15 commit d6c267d

File tree

2 files changed

+17
-2
lines changed

2 files changed

+17
-2
lines changed

pandas/core/base.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1302,7 +1302,22 @@ def factorize(
13021302
# GH#57517
13031303
uniques = self[:0]
13041304
else:
1305-
uniques = self._constructor(uniques)
1305+
# GH#62337: preserve extension dtypes by reconstructing from original
1306+
if len(uniques) > 0:
1307+
# Map back to original positions to preserve dtypes
1308+
unique_positions = np.empty(len(uniques), dtype=np.intp)
1309+
seen = {}
1310+
pos = 0
1311+
for i, code in enumerate(codes):
1312+
if code not in seen and code != -1:
1313+
unique_positions[pos] = i
1314+
seen[code] = pos
1315+
pos += 1
1316+
1317+
# Reconstruct uniques from original MultiIndex to preserve dtypes
1318+
uniques = self[unique_positions]
1319+
else:
1320+
uniques = self[:0]
13061321
else:
13071322
from pandas import Index
13081323

pandas/tests/indexes/multi/test_factorize.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def test_factorize_preserves_names(self):
7474

7575
codes, uniques = mi.factorize()
7676

77-
tm.assert_index_equal(uniques.names, mi.names)
77+
tm.assert_index_equal(pd.Index(uniques.names), pd.Index(mi.names))
7878

7979
def test_factorize_extension_dtype_with_sort(self):
8080
# GH#62337: factorize with sort=True should preserve extension dtypes

0 commit comments

Comments
 (0)