From 49b0300eff2e90d8a8405baab695f70eac2b5558 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sun, 2 Nov 2025 13:49:21 -0800 Subject: [PATCH 1/4] REF: Make deep keyword only in Block(Manager).copy --- pandas/core/generic.py | 2 +- pandas/core/internals/blocks.py | 8 ++++---- pandas/core/internals/concat.py | 4 ++-- pandas/core/internals/construction.py | 2 +- pandas/core/internals/managers.py | 14 +++----------- pandas/core/series.py | 4 +--- pandas/tests/internals/test_internals.py | 10 +++++----- 7 files changed, 17 insertions(+), 27 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b542ca1f431c3..fa478ee82c14d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -280,7 +280,7 @@ def _init_mgr( # make a copy if explicitly requested if copy: - mgr = mgr.copy() + mgr = mgr.copy(deep=True) if dtype is not None: # avoid further copies if we can if ( diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index a9ad561cbc393..10e96e1888109 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -638,7 +638,7 @@ def get_values_for_csv( return self.make_block(result) @final - def copy(self, deep: bool = True) -> Self: + def copy(self, *, deep: bool) -> Self: """copy constructor""" values = self.values refs: BlockValuesRefs | None @@ -656,7 +656,7 @@ def _maybe_copy(self, inplace: bool) -> Self: if inplace: deep = self.refs.has_reference() return self.copy(deep=deep) - return self.copy() + return self.copy(deep=True) @final def _get_refs_and_copy(self, inplace: bool): @@ -923,10 +923,10 @@ def _replace_coerce( has_ref = self.refs.has_reference() nb = self.astype(np.dtype(object)) if not inplace: - nb = nb.copy() + nb = nb.copy(deep=True) elif inplace and has_ref and nb.refs.has_reference(): # no copy in astype and we had refs before - nb = nb.copy() + nb = nb.copy(deep=True) putmask_inplace(nb.values, mask, value) return [nb] return [self.copy(deep=False)] diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 2ee7d3948a70f..6062cb1e3ea13 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -175,14 +175,14 @@ def _maybe_reindex_columns_na_proxy( for i, indexer in indexers.items(): mgr = mgr.reindex_indexer( axes[i], - indexers[i], + indexer, axis=i, only_slice=True, # only relevant for i==0 allow_dups=True, use_na_proxy=True, # only relevant for i==0 ) if needs_copy and not indexers: - mgr = mgr.copy() + mgr = mgr.copy(deep=True) new_mgrs.append(mgr) return new_mgrs diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 8db37f24cdbb9..135ddf58076ea 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -181,7 +181,7 @@ def rec_array_to_mgr( mgr = arrays_to_mgr(arrays, columns, index, dtype=dtype) if copy: - mgr = mgr.copy() + mgr = mgr.copy(deep=True) return mgr diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 40ba74fed49d1..dfd609b401593 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -598,7 +598,7 @@ def setitem(self, indexer, value) -> Self: return self # No need to split if we either set all columns or on a single block # manager - self = self.copy() + self = self.copy(deep=True) return self.apply("setitem", indexer=indexer, value=value) @@ -712,7 +712,7 @@ def _combine(self, blocks: list[Block], index: Index | None = None) -> Self: def nblocks(self) -> int: return len(self.blocks) - def copy(self, deep: bool | Literal["all"] = True) -> Self: + def copy(self, *, deep: bool) -> Self: """ Make deep or shallow copy of BlockManager @@ -727,15 +727,7 @@ def copy(self, deep: bool | Literal["all"] = True) -> Self: BlockManager """ # this preserves the notion of view copying of axes - if deep: - # hit in e.g. tests.io.json.test_pandas - - def copy_func(ax): - return ax.copy(deep=True) if deep == "all" else ax.view() - - new_axes = [copy_func(ax) for ax in self.axes] - else: - new_axes = [ax.view() for ax in self.axes] + new_axes = [ax.view() for ax in self.axes] res = self.apply("copy", deep=deep) res.axes = new_axes diff --git a/pandas/core/series.py b/pandas/core/series.py index 1a8645cf1815d..c304d977b89b4 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1833,9 +1833,7 @@ def to_frame(self, name: Hashable = lib.no_default) -> DataFrame: df = self._constructor_expanddim_from_mgr(mgr, axes=mgr.axes) return df.__finalize__(self, method="to_frame") - def _set_name( - self, name, inplace: bool = False, deep: bool | None = None - ) -> Series: + def _set_name(self, name, inplace: bool = False) -> Series: """ Set the Series name. diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 0add072a63053..2342f24547730 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -277,12 +277,12 @@ def test_attrs(self, fblock): assert len(fblock) == len(fblock.values) def test_copy(self, fblock): - cop = fblock.copy() + cop = fblock.copy(deep=True) assert cop is not fblock assert_block_equal(fblock, cop) def test_delete(self, fblock): - newb = fblock.copy() + newb = fblock.copy(deep=True) locs = newb.mgr_locs nb = newb.delete(0)[0] assert newb.mgr_locs is locs @@ -295,7 +295,7 @@ def test_delete(self, fblock): assert not (newb.values[0] == 1).all() assert (nb.values[0] == 1).all() - newb = fblock.copy() + newb = fblock.copy(deep=True) locs = newb.mgr_locs nb = newb.delete(1) assert len(nb) == 2 @@ -310,7 +310,7 @@ def test_delete(self, fblock): assert not (newb.values[1] == 2).all() assert (nb[1].values[0] == 2).all() - newb = fblock.copy() + newb = fblock.copy(deep=True) nb = newb.delete(2) assert len(nb) == 1 tm.assert_numpy_array_equal( @@ -318,7 +318,7 @@ def test_delete(self, fblock): ) assert (nb[0].values[1] == 1).all() - newb = fblock.copy() + newb = fblock.copy(deep=True) with pytest.raises(IndexError, match=None): newb.delete(3) From 2fdca2082b2b97cfcd1965dfe04675a4f6c2745f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 3 Nov 2025 11:26:03 -0800 Subject: [PATCH 2/4] Fix other usages of copy --- pandas/core/internals/managers.py | 2 +- pandas/core/series.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index dfd609b401593..0fe22455cc98c 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -2184,7 +2184,7 @@ def setitem_inplace(self, indexer, value) -> None: the dtype. """ if not self._has_no_reference(0): - self.blocks = (self._block.copy(),) + self.blocks = (self._block.copy(deep=True),) self._reset_cache() arr = self.array diff --git a/pandas/core/series.py b/pandas/core/series.py index c304d977b89b4..9bd31680c1c2f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -500,7 +500,7 @@ def __init__( if dtype is not None: data = data.astype(dtype=dtype) elif copy: - data = data.copy() + data = data.copy(deep=True) else: data = sanitize_array(data, index, dtype, copy) data = SingleBlockManager.from_array(data, index, refs=refs) From 050b5b803f8bda4d75ca93846ac0acb45bffed5f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 4 Nov 2025 10:12:10 -0800 Subject: [PATCH 3/4] Use copy and pass deep --- pandas/core/internals/managers.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 05d8e7cc4cc0c..63428c51e5175 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -719,15 +719,14 @@ def copy(self, *, deep: bool) -> Self: Parameters ---------- deep : bool, string or None, default True - If False or None, return a shallow copy (do not copy data) - If 'all', copy data and a deep copy of the index + If False, return a shallow copy (do not copy data) Returns ------- BlockManager """ # this preserves the notion of view copying of axes - new_axes = [ax.view() for ax in self.axes] + new_axes = [ax.copy(deep=deep) for ax in self.axes] res = self.apply("copy", deep=deep) res.axes = new_axes From e66635efd98b770dd749e2a539a6924b507da13c Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 4 Nov 2025 14:03:33 -0800 Subject: [PATCH 4/4] Revert back and add comment --- pandas/core/internals/managers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 63428c51e5175..b407342b85f9c 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -725,8 +725,8 @@ def copy(self, *, deep: bool) -> Self: ------- BlockManager """ - # this preserves the notion of view copying of axes - new_axes = [ax.copy(deep=deep) for ax in self.axes] + # TODO: Should deep=True be respected for axes? + new_axes = [ax.view() for ax in self.axes] res = self.apply("copy", deep=deep) res.axes = new_axes