diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 222790cfbef6c..f2e646978b3c9 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1279,6 +1279,7 @@ Groupby/resample/rolling - Bug in :meth:`Series.resample` raising error when resampling non-nanosecond resolutions out of bounds for nanosecond precision (:issue:`57427`) - Bug in :meth:`Series.rolling.var` and :meth:`Series.rolling.std` computing incorrect results due to numerical instability. (:issue:`47721`, :issue:`52407`, :issue:`54518`, :issue:`55343`) - Bug in :meth:`DataFrame.groupby` methods when operating on NumPy-nullable data failing when the NA mask was not C-contiguous (:issue:`61031`) +- Bug in :meth:`DataFrame.groupby` when grouping by a Series and that Series was modified after calling :meth:`DataFrame.groupby` but prior to the groupby operation (:issue:`63219`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 98d9fbe1f87c5..eacde2e9661a8 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -460,6 +460,8 @@ def __init__( dropna: bool = True, uniques: ArrayLike | None = None, ) -> None: + if isinstance(grouper, Series): + grouper = grouper.copy(deep=False) self.level = level self._orig_grouper = grouper grouping_vector = _convert_grouper(index, grouper) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 49ce689e5f517..dc0bc7e476a02 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -226,6 +226,19 @@ def test_groupby_column_index_in_references(): tm.assert_frame_equal(result, expected) +def test_groupby_modify_series(): + # https://github.com/pandas-dev/pandas/issues/63219 + # Modifying a Series after using it to groupby should not impact + # the groupby operation. + ser = Series([1, 2, 1]) + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + gb = df.groupby(ser) + ser.iloc[0] = 100 + result = gb.sum() + expected = DataFrame({"a": [4, 2], "b": [10, 5]}, index=[1, 2]) + tm.assert_frame_equal(result, expected) + + def test_rename_columns(): # Case: renaming columns returns a new dataframe # + afterwards modifying the result