Skip to content

Commit 33f13c4

Browse files
BUG: Preserve extension dtypes in MultiIndex reconstruction
1 parent d6c267d commit 33f13c4

File tree

1 file changed

+18
-14
lines changed

1 file changed

+18
-14
lines changed

pandas/core/base.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1303,21 +1303,25 @@ def factorize(
13031303
uniques = self[:0]
13041304
else:
13051305
# GH#62337: preserve extension dtypes by reconstructing from original
1306+
# First create the MultiIndex using the standard constructor
1307+
uniques = self._constructor(uniques)
1308+
1309+
# Then replace levels to preserve extension dtypes and set names
13061310
if len(uniques) > 0:
1307-
# Map back to original positions to preserve dtypes
1308-
unique_positions = np.empty(len(uniques), dtype=np.intp)
1309-
seen = {}
1310-
pos = 0
1311-
for i, code in enumerate(codes):
1312-
if code not in seen and code != -1:
1313-
unique_positions[pos] = i
1314-
seen[code] = pos
1315-
pos += 1
1316-
1317-
# Reconstruct uniques from original MultiIndex to preserve dtypes
1318-
uniques = self[unique_positions]
1319-
else:
1320-
uniques = self[:0]
1311+
new_levels = []
1312+
for i, (level, orig_level) in enumerate(
1313+
zip(uniques.levels, self.levels, strict=False)
1314+
):
1315+
try:
1316+
# Try to cast to original extension dtype
1317+
new_level = level.astype(orig_level.dtype)
1318+
new_levels.append(new_level)
1319+
except (TypeError, ValueError):
1320+
# If casting fails, keep the inferred level
1321+
new_levels.append(level)
1322+
1323+
# Reconstruct MultiIndex with preserved dtypes and names
1324+
uniques = uniques.set_levels(new_levels).set_names(self.names)
13211325
else:
13221326
from pandas import Index
13231327

0 commit comments

Comments
 (0)