From 505c6f53b9ee32fe45b451914ca6de7fd56714bb Mon Sep 17 00:00:00 2001 From: Vignesh Iyer Date: Sun, 21 Sep 2025 16:08:57 -0700 Subject: [PATCH 1/3] BUG: Fix groupby.apply() dropping metadata from subclassed DataFrames (#62134) --- pandas/core/groupby/generic.py | 6 +++--- pandas/core/groupby/groupby.py | 2 +- pandas/tests/groupby/test_groupby_subclass.py | 21 +++++++++++++++++++ 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 39607d74c0dc8..1ffe9a05764cd 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -621,7 +621,7 @@ def _wrap_applied_output( if not self.as_index and not_indexed_same: result = self._insert_inaxis_grouper(result) result.index = default_index(len(result)) - return result + return result.__finalize__(self.obj, method="groupby") else: # GH #6265 #24880 result = self.obj._constructor( @@ -630,7 +630,7 @@ def _wrap_applied_output( if not self.as_index: result = self._insert_inaxis_grouper(result) result.index = default_index(len(result)) - return result + return result.__finalize__(self.obj, method="groupby") __examples_series_doc = dedent( """ @@ -2169,7 +2169,7 @@ def _wrap_applied_output_series( if not self.as_index: result = self._insert_inaxis_grouper(result) - return result + return result.__finalize__(self.obj, method="groupby") def _cython_transform( self, diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index f9789c82a1536..1fc2d5535ad9e 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1188,7 +1188,7 @@ def _concat_objects( if isinstance(result, Series) and name is not None: result.name = name - return result + return result.__finalize__(self.obj, method="groupby") @final def _set_result_index_ordered( diff --git a/pandas/tests/groupby/test_groupby_subclass.py b/pandas/tests/groupby/test_groupby_subclass.py index 5ffb3bc147fdf..ee4eefffd41d0 100644 --- a/pandas/tests/groupby/test_groupby_subclass.py +++ b/pandas/tests/groupby/test_groupby_subclass.py @@ -97,6 +97,27 @@ def func2(group): result = custom_series.groupby(custom_df["c"]).agg(func2) tm.assert_series_equal(result, expected) + # GH#62134 - Test that apply() preserves metadata when returning DataFrames/Series + def sum_func(group): + assert isinstance(group, tm.SubclassedDataFrame) + assert hasattr(group, "testattr") + assert group.testattr == "hello" + return group.sum() + + result = custom_df.groupby("c").apply(sum_func) + assert hasattr(result, "testattr"), "DataFrame apply() should preserve metadata" + assert result.testattr == "hello" + + def sum_series_func(group): + assert isinstance(group, tm.SubclassedSeries) + assert hasattr(group, "testattr") + assert group.testattr == "hello" + return group.sum() + + result = custom_series.groupby(custom_df["c"]).apply(sum_series_func) + assert hasattr(result, "testattr"), "Series apply() should preserve metadata" + assert result.testattr == "hello" + @pytest.mark.parametrize("obj", [DataFrame, tm.SubclassedDataFrame]) def test_groupby_resample_preserves_subclass(obj): From e7f8e876e93981c130dd1b12f2a3ffaa1c6b6cb2 Mon Sep 17 00:00:00 2001 From: Vignesh Iyer Date: Sun, 21 Sep 2025 16:11:29 -0700 Subject: [PATCH 2/3] DOC: Add whatsnew entry for groupby.apply() metadata preservation fix --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 9210f1e0082f0..6e547f8077348 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1068,6 +1068,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupBy.agg` that raises ``AttributeError`` when there is dictionary input and duplicated columns, instead of returning a DataFrame with the aggregation of all duplicate columns. (:issue:`55041`) - Bug in :meth:`DataFrameGroupBy.agg` where applying a user-defined function to an empty DataFrame returned a Series instead of an empty DataFrame. (:issue:`61503`) - Bug in :meth:`DataFrameGroupBy.apply` and :meth:`SeriesGroupBy.apply` for empty data frame with ``group_keys=False`` still creating output index using group keys. (:issue:`60471`) +- Bug in :meth:`DataFrameGroupBy.apply` and :meth:`SeriesGroupBy.apply` not preserving ``_metadata`` attributes from subclassed DataFrames and Series (:issue:`62134`) - Bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`) - Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`) - Bug in :meth:`DataFrameGroupBy.cumsum` and :meth:`DataFrameGroupBy.cumprod` where ``numeric_only`` parameter was passed indirectly through kwargs instead of passing directly. (:issue:`58811`) From 05308f30cf1370f5b017c885478df89f7c931d6e Mon Sep 17 00:00:00 2001 From: Vignesh Iyer Date: Sun, 21 Sep 2025 19:34:29 -0700 Subject: [PATCH 3/3] TST: Add test for groupby.apply() to ensure metadata preservation in subclassed DataFrames and Series --- pandas/tests/groupby/test_groupby_subclass.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/groupby/test_groupby_subclass.py b/pandas/tests/groupby/test_groupby_subclass.py index ee4eefffd41d0..e1dfb3aabdaf0 100644 --- a/pandas/tests/groupby/test_groupby_subclass.py +++ b/pandas/tests/groupby/test_groupby_subclass.py @@ -97,7 +97,12 @@ def func2(group): result = custom_series.groupby(custom_df["c"]).agg(func2) tm.assert_series_equal(result, expected) + +def test_groupby_apply_preserves_metadata(): # GH#62134 - Test that apply() preserves metadata when returning DataFrames/Series + custom_df = tm.SubclassedDataFrame({"a": [1, 2, 3], "b": [1, 1, 2], "c": [7, 8, 9]}) + custom_df.testattr = "hello" + def sum_func(group): assert isinstance(group, tm.SubclassedDataFrame) assert hasattr(group, "testattr") @@ -108,6 +113,9 @@ def sum_func(group): assert hasattr(result, "testattr"), "DataFrame apply() should preserve metadata" assert result.testattr == "hello" + custom_series = tm.SubclassedSeries([1, 2, 3]) + custom_series.testattr = "hello" + def sum_series_func(group): assert isinstance(group, tm.SubclassedSeries) assert hasattr(group, "testattr")