Skip to content

Commit 4c1a545

Browse files
committed
Warn on setitems that do not change dtype #52593
1 parent 9be7f06 commit 4c1a545

File tree

7 files changed

+79
-16
lines changed

7 files changed

+79
-16
lines changed

pandas/core/indexing.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
from pandas.util._decorators import (
3232
doc,
3333
)
34+
from pandas.util._exceptions import find_stack_level
3435

3536
from pandas.core.dtypes.cast import (
3637
can_hold_element,
@@ -63,6 +64,7 @@
6364
)
6465

6566
from pandas.core import algorithms as algos
67+
from pandas.core.arrays import ExtensionArray
6668
import pandas.core.common as com
6769
from pandas.core.construction import (
6870
array as pd_array,
@@ -926,6 +928,7 @@ def __setitem__(self, key, value) -> None:
926928
_chained_assignment_msg, ChainedAssignmentError, stacklevel=2
927929
)
928930

931+
self._maybe_warn_non_casting_setitem(key, value)
929932
check_dict_or_set_indexers(key)
930933
if isinstance(key, tuple):
931934
key = (list(x) if is_iterator(x) else x for x in key)
@@ -941,6 +944,39 @@ def __setitem__(self, key, value) -> None:
941944
)
942945
iloc._setitem_with_indexer(indexer, value, self.name)
943946

947+
@final
948+
def _maybe_warn_non_casting_setitem(self, key, value) -> None:
949+
# GH#52593 many users got confused by this, so issue a warning
950+
951+
if (
952+
self.ndim == 2
953+
and isinstance(key, tuple)
954+
and len(key) > 1
955+
and isinstance(key[0], slice)
956+
and key[0] == slice(None)
957+
):
958+
# This is a `df.loc[:, foo] = bar` call
959+
if is_hashable(key[1]) and key[1] in self.obj.columns:
960+
obj = self.obj[key[1]]
961+
if isinstance(obj, ABCSeries) and isinstance(
962+
value, (ABCSeries, Index, ExtensionArray, np.ndarray)
963+
):
964+
# check necessary in case of non-unique columns
965+
if obj.dtype != value.dtype:
966+
warnings.warn(
967+
"Setting `df.loc[:, col] = values` does *not* change "
968+
"the dtype of `df[col]`. It writes the entries from "
969+
"`values` into the existing array behind `df[col]`. "
970+
"To swap out the old array for the new one, use "
971+
"`df[col] = values` instead.",
972+
UserWarning,
973+
stacklevel=find_stack_level(),
974+
)
975+
# TODO: the checks above handle the most common cases, but miss
976+
# a) obj.columns is MultiIndex
977+
# b) non-unique columns
978+
# c) df.loc[:, [col]] = ...
979+
944980
def _validate_key(self, key, axis: AxisInt) -> None:
945981
"""
946982
Ensure that key is valid for current indexer.

pandas/tests/copy_view/test_indexing.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,10 @@ def test_subset_set_column_with_loc(backend, dtype):
308308
df_orig = df.copy()
309309
subset = df[1:3]
310310

311-
subset.loc[:, "a"] = np.array([10, 11], dtype="int64")
311+
msg = r"Setting `df.loc\[:, col\] = values` does \*not\* change"
312+
err = UserWarning if backend[0] != "numpy" else None
313+
with tm.assert_produces_warning(err, match=msg):
314+
subset.loc[:, "a"] = np.array([10, 11], dtype="int64")
312315

313316
subset._mgr._verify_integrity()
314317
expected = DataFrame(

pandas/tests/frame/indexing/test_setitem.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -792,7 +792,9 @@ def test_setitem_frame_midx_columns(self):
792792
def test_loc_setitem_ea_dtype(self):
793793
# GH#55604
794794
df = DataFrame({"a": np.array([10], dtype="i8")})
795-
df.loc[:, "a"] = Series([11], dtype="Int64")
795+
msg = r"Setting `df.loc\[:, col\] = values` does \*not\* change"
796+
with tm.assert_produces_warning(UserWarning, match=msg):
797+
df.loc[:, "a"] = Series([11], dtype="Int64")
796798
expected = DataFrame({"a": np.array([11], dtype="i8")})
797799
tm.assert_frame_equal(df, expected)
798800

pandas/tests/indexing/test_iloc.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,12 @@ def test_iloc_setitem_fullcol_categorical(self, indexer_li, key):
8787
df = frame.copy()
8888
orig_vals = df.values
8989

90-
indexer_li(df)[key, 0] = cat
90+
msg = r"Setting `df.loc\[:, col\] = values` does \*not\* change"
91+
err = None
92+
if isinstance(key, slice) and key == slice(None):
93+
err = UserWarning
94+
with tm.assert_produces_warning(err, match=msg):
95+
indexer_li(df)[key, 0] = cat
9196

9297
expected = DataFrame({0: cat}).astype(object)
9398
assert np.shares_memory(df[0].values, orig_vals)
@@ -103,7 +108,8 @@ def test_iloc_setitem_fullcol_categorical(self, indexer_li, key):
103108
# we retain the object dtype.
104109
frame = DataFrame({0: np.array([0, 1, 2], dtype=object), 1: range(3)})
105110
df = frame.copy()
106-
indexer_li(df)[key, 0] = cat
111+
with tm.assert_produces_warning(err, match=msg):
112+
indexer_li(df)[key, 0] = cat
107113
expected = DataFrame({0: Series(cat.astype(object), dtype=object), 1: range(3)})
108114
tm.assert_frame_equal(df, expected)
109115

@@ -1521,10 +1527,12 @@ def test_iloc_setitem_pure_position_based(self):
15211527
def test_iloc_nullable_int64_size_1_nan(self):
15221528
# GH 31861
15231529
result = DataFrame({"a": ["test"], "b": [np.nan]})
1530+
msg = r"Setting `df.loc\[:, col\] = values` does \*not\* change"
15241531

15251532
ser = Series([NA], name="b", dtype="Int64")
15261533
with pytest.raises(TypeError, match="Invalid value"):
1527-
result.loc[:, "b"] = ser
1534+
with tm.assert_produces_warning(UserWarning, match=msg):
1535+
result.loc[:, "b"] = ser
15281536

15291537
def test_iloc_arrow_extension_array(self):
15301538
# GH#61311

pandas/tests/indexing/test_indexing.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -548,7 +548,9 @@ def test_astype_assignment(self, using_infer_string):
548548

549549
# GH5702 (loc)
550550
df = df_orig.copy()
551-
df.loc[:, "A"] = df.loc[:, "A"].astype(np.int64)
551+
msg = r"Setting `df.loc\[:, col\] = values` does \*not\* change"
552+
with tm.assert_produces_warning(UserWarning, match=msg):
553+
df.loc[:, "A"] = df.loc[:, "A"].astype(np.int64)
552554
expected = DataFrame(
553555
[[1, "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
554556
)
@@ -570,12 +572,14 @@ def test_astype_assignment_full_replacements(self):
570572

571573
# With the enforcement of GH#45333 in 2.0, this assignment occurs inplace,
572574
# so float64 is retained
575+
msg = r"Setting `df.loc\[:, col\] = values` does \*not\* change"
573576
df.iloc[:, 0] = df["A"].astype(np.int64)
574577
expected = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]})
575578
tm.assert_frame_equal(df, expected)
576579

577580
df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]})
578-
df.loc[:, "A"] = df["A"].astype(np.int64)
581+
with tm.assert_produces_warning(UserWarning, match=msg):
582+
df.loc[:, "A"] = df["A"].astype(np.int64)
579583
tm.assert_frame_equal(df, expected)
580584

581585
@pytest.mark.parametrize("indexer", [tm.getitem, tm.loc])

pandas/tests/indexing/test_loc.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -581,8 +581,11 @@ def test_loc_setitem_consistency(self, frame_for_consistency, val):
581581
# GH 6149
582582
# coerce similarly for setitem and loc when rows have a null-slice
583583
df = frame_for_consistency.copy()
584+
msg = r"Setting `df.loc\[:, col\] = values` does \*not\* change"
585+
err = UserWarning if isinstance(val, np.ndarray) else None
584586
with pytest.raises(TypeError, match="Invalid value"):
585-
df.loc[:, "date"] = val
587+
with tm.assert_produces_warning(err, match=msg):
588+
df.loc[:, "date"] = val
586589

587590
def test_loc_setitem_consistency_dt64_to_str(self, frame_for_consistency):
588591
# GH 6149
@@ -646,18 +649,21 @@ def test_loc_setitem_consistency_slice_column_len(self, using_infer_string):
646649
]
647650
df = DataFrame(values, index=mi, columns=cols)
648651

652+
msg = r"Setting `df.loc\[:, col\] = values` does \*not\* change"
649653
ctx = contextlib.nullcontext()
650654
if using_infer_string:
651655
ctx = pytest.raises(TypeError, match="Invalid value")
652656

653657
with ctx:
654-
df.loc[:, ("Respondent", "StartDate")] = to_datetime(
655-
df.loc[:, ("Respondent", "StartDate")]
656-
)
658+
with tm.assert_produces_warning(UserWarning, match=msg):
659+
df.loc[:, ("Respondent", "StartDate")] = to_datetime(
660+
df.loc[:, ("Respondent", "StartDate")]
661+
)
657662
with ctx:
658-
df.loc[:, ("Respondent", "EndDate")] = to_datetime(
659-
df.loc[:, ("Respondent", "EndDate")]
660-
)
663+
with tm.assert_produces_warning(UserWarning, match=msg):
664+
df.loc[:, ("Respondent", "EndDate")] = to_datetime(
665+
df.loc[:, ("Respondent", "EndDate")]
666+
)
661667

662668
if using_infer_string:
663669
# infer-objects won't infer stuff anymore
@@ -1426,7 +1432,9 @@ def test_loc_setitem_single_row_categorical(self, using_infer_string):
14261432

14271433
# pre-2.0 this swapped in a new array, in 2.0 it operates inplace,
14281434
# consistent with non-split-path
1429-
df.loc[:, "Alpha"] = categories
1435+
msg = r"Setting `df.loc\[:, col\] = values` does \*not\* change"
1436+
with tm.assert_produces_warning(UserWarning, match=msg):
1437+
df.loc[:, "Alpha"] = categories
14301438

14311439
result = df["Alpha"]
14321440
expected = Series(categories, index=df.index, name="Alpha").astype(

pandas/tests/indexing/test_partial.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,9 @@ def test_partial_setting_frame(self):
318318
df["B"] = df["B"].astype(np.float64)
319319
# as of 2.0, df.loc[:, "B"] = ... attempts (and here succeeds) at
320320
# setting inplace
321-
df.loc[:, "B"] = df.loc[:, "A"]
321+
msg = r"Setting `df.loc\[:, col\] = values` does \*not\* change"
322+
with tm.assert_produces_warning(UserWarning, match=msg):
323+
df.loc[:, "B"] = df.loc[:, "A"]
322324
tm.assert_frame_equal(df, expected)
323325

324326
# single dtype frame, partial setting

0 commit comments

Comments
 (0)