Skip to content

Commit 4802a0a

Browse files
author
Mayank
committed
Fix CoW replace with dict containing np.nan (GH#62787): handle invalid weakrefs when clearing referenced_blocks; add regression tests
1 parent e9e1b32 commit 4802a0a

File tree

2 files changed

+298
-1
lines changed

2 files changed

+298
-1
lines changed

pandas/core/internals/blocks.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -862,8 +862,11 @@ def replace_list(
862862
# This is ugly, but we have to get rid of intermediate refs. We
863863
# can simply clear the referenced_blocks if we already copied,
864864
# otherwise we have to remove ourselves
865+
# GH#62787: Handle invalid weak references properly
865866
self_blk_ids = {
866-
id(b()): i for i, b in enumerate(self.refs.referenced_blocks)
867+
id(ref_block): i
868+
for i, b in enumerate(self.refs.referenced_blocks)
869+
if (ref_block := b()) is not None
867870
}
868871
for b in result:
869872
if b.refs is self.refs:
Lines changed: 294 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,294 @@
1+
"""
2+
Tests for the CoW DataFrame.replace fix for np.nan dictionary replacement bug.
3+
4+
Regression tests for GH#62787: Enabling Copy on Write with DataFrame.replace
5+
Raises Exception with np.nan as replacement value.
6+
"""
7+
import numpy as np
8+
import pytest
9+
10+
import pandas as pd
11+
from pandas import DataFrame, Series
12+
import pandas._testing as tm
13+
14+
15+
class TestReplaceCoWFix:
16+
"""Tests for the CoW replace fix for GH#62787."""
17+
18+
def test_replace_dict_with_nan_cow_enabled(self):
19+
"""Test that dictionary replacement with np.nan works with CoW enabled."""
20+
# GH#62787
21+
with pd.option_context("mode.copy_on_write", True):
22+
df = DataFrame({
23+
"A": [1, 2],
24+
"B": ["b", "i like pandas"],
25+
})
26+
df["Name"] = "I Have a Name"
27+
df["Name2"] = "i like pandas"
28+
29+
# This should not raise an error
30+
replace_mappings = {
31+
pd.NA: None,
32+
pd.NaT: None,
33+
np.nan: None # This was causing the bug
34+
}
35+
result = df.replace(replace_mappings)
36+
37+
# Should return a DataFrame without errors
38+
assert isinstance(result, DataFrame)
39+
# The original data should remain unchanged since we're replacing values that don't exist
40+
tm.assert_frame_equal(result, df)
41+
42+
def test_replace_dict_with_various_na_values_cow(self):
43+
"""Test dictionary replacement with various NA values under CoW."""
44+
with pd.option_context("mode.copy_on_write", True):
45+
# Create DataFrame with actual NA values to replace
46+
df = DataFrame({
47+
"A": [1, np.nan, 3],
48+
"B": [pd.NA, "test", pd.NaT],
49+
"C": ["x", "y", "z"]
50+
})
51+
52+
replace_mappings = {
53+
pd.NA: "replaced_NA",
54+
pd.NaT: "replaced_NaT",
55+
np.nan: "replaced_nan"
56+
}
57+
58+
result = df.replace(replace_mappings)
59+
60+
expected = DataFrame({
61+
"A": [1, "replaced_nan", 3],
62+
"B": ["replaced_NA", "test", "replaced_NaT"],
63+
"C": ["x", "y", "z"]
64+
})
65+
66+
tm.assert_frame_equal(result, expected)
67+
68+
def test_replace_dict_nan_series_cow(self):
69+
"""Test Series replace with np.nan in dictionary under CoW."""
70+
with pd.option_context("mode.copy_on_write", True):
71+
s = Series([1, np.nan, 3, np.nan])
72+
73+
replace_mappings = {
74+
np.nan: "missing",
75+
1: "one"
76+
}
77+
78+
result = s.replace(replace_mappings)
79+
expected = Series(["one", "missing", 3, "missing"])
80+
81+
tm.assert_series_equal(result, expected)
82+
83+
def test_replace_dict_empty_cow(self):
84+
"""Test empty dictionary replacement under CoW."""
85+
with pd.option_context("mode.copy_on_write", True):
86+
df = DataFrame({"A": [1, 2], "B": ["a", "b"]})
87+
88+
# Empty replacement dict should work
89+
result = df.replace({})
90+
tm.assert_frame_equal(result, df)
91+
92+
def test_replace_dict_with_nan_inplace_cow(self):
93+
"""Test inplace dictionary replacement with np.nan under CoW."""
94+
with pd.option_context("mode.copy_on_write", True):
95+
df = DataFrame({
96+
"A": [1, np.nan, 3],
97+
"B": ["x", "y", "z"]
98+
})
99+
df_copy = df.copy()
100+
101+
replace_mappings = {np.nan: -999}
102+
result = df.replace(replace_mappings, inplace=True)
103+
104+
# inplace=True should return None
105+
assert result is None
106+
107+
expected = DataFrame({
108+
"A": [1, -999, 3],
109+
"B": ["x", "y", "z"]
110+
})
111+
112+
tm.assert_frame_equal(df, expected)
113+
114+
def test_replace_mixed_types_with_nan_cow(self):
115+
"""Test mixed type replacement including np.nan under CoW."""
116+
with pd.option_context("mode.copy_on_write", True):
117+
df = DataFrame({
118+
"int_col": [1, 2, 3],
119+
"float_col": [1.1, np.nan, 3.3],
120+
"str_col": ["a", "b", "c"],
121+
"mixed_col": [1, "text", np.nan]
122+
})
123+
124+
replace_mappings = {
125+
np.nan: "MISSING",
126+
1: "ONE",
127+
"a": "LETTER_A"
128+
}
129+
130+
result = df.replace(replace_mappings)
131+
132+
expected = DataFrame({
133+
"int_col": ["ONE", 2, 3],
134+
"float_col": [1.1, "MISSING", 3.3],
135+
"str_col": ["LETTER_A", "b", "c"],
136+
"mixed_col": ["ONE", "text", "MISSING"]
137+
})
138+
139+
tm.assert_frame_equal(result, expected)
140+
141+
def test_replace_cow_vs_no_cow_consistency(self):
142+
"""Test that CoW and non-CoW modes give same results."""
143+
df_data = {
144+
"A": [1, np.nan, 3],
145+
"B": ["x", "y", "z"]
146+
}
147+
replace_mappings = {np.nan: "REPLACED"}
148+
149+
# Test with CoW enabled
150+
with pd.option_context("mode.copy_on_write", True):
151+
df_cow = DataFrame(df_data)
152+
result_cow = df_cow.replace(replace_mappings)
153+
154+
# Test with CoW disabled
155+
with pd.option_context("mode.copy_on_write", False):
156+
df_no_cow = DataFrame(df_data)
157+
result_no_cow = df_no_cow.replace(replace_mappings)
158+
159+
# Results should be identical
160+
tm.assert_frame_equal(result_cow, result_no_cow)
161+
162+
def test_replace_complex_nested_dict_with_nan_cow(self):
163+
"""Test complex nested dictionary replacements with np.nan under CoW."""
164+
with pd.option_context("mode.copy_on_write", True):
165+
df = DataFrame({
166+
"A": [1, np.nan, 3],
167+
"B": [4, 5, np.nan],
168+
"C": ["x", "y", "z"]
169+
})
170+
171+
# Column-specific replacements
172+
replace_mappings = {
173+
"A": {np.nan: -1, 1: 100},
174+
"B": {np.nan: -2, 4: 400}
175+
}
176+
177+
result = df.replace(replace_mappings)
178+
179+
expected = DataFrame({
180+
"A": [100, -1, 3],
181+
"B": [400, 5, -2],
182+
"C": ["x", "y", "z"]
183+
})
184+
185+
tm.assert_frame_equal(result, expected)
186+
187+
def test_replace_regex_with_nan_cow(self):
188+
"""Test regex replacement combined with np.nan under CoW."""
189+
with pd.option_context("mode.copy_on_write", True):
190+
df = DataFrame({
191+
"text": ["hello world", "foo bar", "test"],
192+
"nums": [1, np.nan, 3]
193+
})
194+
195+
# First do dictionary replacement, then regex
196+
replace_mappings = {np.nan: "MISSING"}
197+
result = df.replace(replace_mappings)
198+
199+
# Then regex replacement
200+
result = result.replace(r"hello.*", "GREETING", regex=True)
201+
202+
expected = DataFrame({
203+
"text": ["GREETING", "foo bar", "test"],
204+
"nums": [1, "MISSING", 3]
205+
})
206+
207+
tm.assert_frame_equal(result, expected)
208+
209+
def test_replace_multiple_nan_types_cow(self):
210+
"""Test replacement of different NaN types in same operation."""
211+
with pd.option_context("mode.copy_on_write", True):
212+
# Create DataFrame with different types of missing values
213+
df = DataFrame({
214+
"float_nan": [1.0, np.nan, 3.0],
215+
"pd_na": ["a", pd.NA, "c"],
216+
"pd_nat": [pd.Timestamp("2020-01-01"), pd.NaT, pd.Timestamp("2020-01-03")]
217+
})
218+
219+
replace_mappings = {
220+
np.nan: "float_missing",
221+
pd.NA: "string_missing",
222+
pd.NaT: pd.Timestamp("1900-01-01")
223+
}
224+
225+
result = df.replace(replace_mappings)
226+
227+
expected = DataFrame({
228+
"float_nan": [1.0, "float_missing", 3.0],
229+
"pd_na": ["a", "string_missing", "c"],
230+
"pd_nat": [pd.Timestamp("2020-01-01"), pd.Timestamp("1900-01-01"), pd.Timestamp("2020-01-03")]
231+
})
232+
233+
tm.assert_frame_equal(result, expected)
234+
235+
236+
class TestReplaceCoWEdgeCases:
237+
"""Edge case tests for the CoW replace fix."""
238+
239+
def test_replace_nan_with_none_cow(self):
240+
"""Test specific case from bug report: np.nan -> None."""
241+
with pd.option_context("mode.copy_on_write", True):
242+
df = DataFrame({
243+
"A": [1, 2],
244+
"B": ["b", "i like pandas"],
245+
})
246+
df["Name"] = "I Have a Name"
247+
df["Name2"] = "i like pandas"
248+
249+
# This exact case from the bug report
250+
replace_mappings = {
251+
pd.NA: None,
252+
pd.NaT: None,
253+
np.nan: None
254+
}
255+
256+
# Should not raise ValueError about weakref
257+
result = df.replace(replace_mappings)
258+
assert isinstance(result, DataFrame)
259+
260+
def test_replace_large_dict_with_nan_cow(self):
261+
"""Test large replacement dictionary including np.nan."""
262+
with pd.option_context("mode.copy_on_write", True):
263+
df = DataFrame({"A": range(100), "B": [np.nan] * 100})
264+
265+
# Large replacement dict to stress test weak reference handling
266+
replace_dict = {i: f"num_{i}" for i in range(0, 50)}
267+
replace_dict[np.nan] = "missing"
268+
269+
result = df.replace(replace_dict)
270+
271+
# Verify it works without error
272+
assert len(result) == 100
273+
assert all(result["B"] == "missing")
274+
275+
def test_replace_chained_operations_cow(self):
276+
"""Test chained replace operations with np.nan under CoW."""
277+
with pd.option_context("mode.copy_on_write", True):
278+
df = DataFrame({
279+
"A": [1, np.nan, 3, np.nan],
280+
"B": ["a", "b", "c", "d"]
281+
})
282+
283+
# Chain multiple replace operations
284+
result = (df
285+
.replace({np.nan: -1})
286+
.replace({1: "ONE"})
287+
.replace({"a": "LETTER_A"}))
288+
289+
expected = DataFrame({
290+
"A": ["ONE", -1, 3, -1],
291+
"B": ["LETTER_A", "b", "c", "d"]
292+
})
293+
294+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)