Skip to content

Commit ef662a0

Browse files
small refactor
1 parent 4fdc459 commit ef662a0

File tree

3 files changed

+17
-15
lines changed

3 files changed

+17
-15
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1145,7 +1145,7 @@ Reshaping
11451145
^^^^^^^^^
11461146
- Bug in :func:`qcut` where values at the quantile boundaries could be incorrectly assigned (:issue:`59355`)
11471147
- Bug in :meth:`DataFrame.combine_first` not preserving the column order (:issue:`60427`)
1148-
- Bug in :meth:`DataFrame.combine_first` where large ``int64``/``uint64`` values could lose precision when an outer alignment introduced missing values. (:issue:`60128`)
1148+
- Bug in :meth:`DataFrame.combine_first` where very large integers could lose precision after the operation. (:issue:`60128`)
11491149
- Bug in :meth:`DataFrame.explode` producing incorrect result for :class:`pyarrow.large_list` type (:issue:`61091`)
11501150
- Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
11511151
- Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`)

pandas/core/frame.py

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9026,7 +9026,7 @@ def combine(
90269026
2 NaN 3.0 1.0
90279027
"""
90289028

9029-
# GH#62691 Prevent lossy conversion of wide integers
9029+
# GH#60128 Prevent lossy conversion of wide integers
90309030
# by proactively promoting them to their nullable versions
90319031
# because an outer align will force a round trip through float64.
90329032
def _promote_wide_ints(df: DataFrame) -> DataFrame:
@@ -9042,17 +9042,13 @@ def _promote_wide_ints(df: DataFrame) -> DataFrame:
90429042
df = df.astype(cast_map)
90439043
return df
90449044

9045-
# store originals before promotion
9046-
self_original = self
9047-
other_original = other
9048-
self = _promote_wide_ints(self)
9049-
other = _promote_wide_ints(other)
9050-
9051-
def _restore_wide_ints(df: DataFrame) -> DataFrame:
9045+
def _restore_wide_ints(
9046+
self_original: DataFrame, other_original: DataFrame, combined_df: DataFrame
9047+
) -> DataFrame:
90529048
"""Restores previously int64/uint64 columns if they don't have NAs."""
90539049
cast_map: dict[str, str] = {}
9054-
for col in df.columns:
9055-
ser = df[col]
9050+
for col in combined_df.columns:
9051+
ser = combined_df[col]
90569052
orig_dt_self = self_original.dtypes.get(col)
90579053
orig_dt_other = other_original.dtypes.get(col)
90589054

@@ -9068,8 +9064,14 @@ def _restore_wide_ints(df: DataFrame) -> DataFrame:
90689064
cast_map[col] = find_common_type(dtypes_to_resolve)
90699065

90709066
if cast_map:
9071-
df = df.astype(cast_map)
9072-
return df
9067+
combined_df = combined_df.astype(cast_map)
9068+
return combined_df
9069+
9070+
# store originals and promote wide ints before align
9071+
self_original = self
9072+
other_original = other
9073+
self = _promote_wide_ints(self)
9074+
other = _promote_wide_ints(other)
90739075

90749076
other_idxlen = len(other.index) # save for compare
90759077
other_columns = other.columns
@@ -9138,7 +9140,7 @@ def _restore_wide_ints(df: DataFrame) -> DataFrame:
91389140

91399141
# convert_objects just in case
91409142
frame_result = self._constructor(result, index=new_index, columns=new_columns)
9141-
frame_result = _restore_wide_ints(frame_result)
9143+
frame_result = _restore_wide_ints(self_original, other_original, frame_result)
91429144
return frame_result.__finalize__(self, method="combine")
91439145

91449146
def combine_first(self, other: DataFrame) -> DataFrame:

pandas/tests/frame/methods/test_combine_first.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,7 @@ def test_combine_first_with_nan_multiindex():
472472
expected = DataFrame(
473473
{
474474
"c": [np.nan, np.nan, 1, 1, 1, 1, 1, np.nan, 1, np.nan, 1],
475-
"d": [1.0, 4.0, np.nan, 2.0, 5.0, np.nan, np.nan, 3.0, np.nan, 6.0, np.nan],
475+
"d": [1, 4, np.nan, 2, 5, np.nan, np.nan, 3, np.nan, 6, np.nan],
476476
},
477477
index=mi_expected,
478478
dtype="Int64",

0 commit comments

Comments
 (0)