1+ """
2+ Tests for the CoW DataFrame.replace fix for np.nan dictionary replacement bug.
3+
4+ Regression tests for GH#62787: Enabling Copy on Write with DataFrame.replace
5+ Raises Exception with np.nan as replacement value.
6+ """
7+ import numpy as np
8+ import pytest
9+
10+ import pandas as pd
11+ from pandas import DataFrame , Series
12+ import pandas ._testing as tm
13+
14+
15+ class TestReplaceCoWFix :
16+ """Tests for the CoW replace fix for GH#62787."""
17+
18+ def test_replace_dict_with_nan_cow_enabled (self ):
19+ """Test that dictionary replacement with np.nan works with CoW enabled."""
20+ # GH#62787
21+ with pd .option_context ("mode.copy_on_write" , True ):
22+ df = DataFrame ({
23+ "A" : [1 , 2 ],
24+ "B" : ["b" , "i like pandas" ],
25+ })
26+ df ["Name" ] = "I Have a Name"
27+ df ["Name2" ] = "i like pandas"
28+
29+ # This should not raise an error
30+ replace_mappings = {
31+ pd .NA : None ,
32+ pd .NaT : None ,
33+ np .nan : None # This was causing the bug
34+ }
35+ result = df .replace (replace_mappings )
36+
37+ # Should return a DataFrame without errors
38+ assert isinstance (result , DataFrame )
39+ # The original data should remain unchanged since we're replacing values that don't exist
40+ tm .assert_frame_equal (result , df )
41+
42+ def test_replace_dict_with_various_na_values_cow (self ):
43+ """Test dictionary replacement with various NA values under CoW."""
44+ with pd .option_context ("mode.copy_on_write" , True ):
45+ # Create DataFrame with actual NA values to replace
46+ df = DataFrame ({
47+ "A" : [1 , np .nan , 3 ],
48+ "B" : [pd .NA , "test" , pd .NaT ],
49+ "C" : ["x" , "y" , "z" ]
50+ })
51+
52+ replace_mappings = {
53+ pd .NA : "replaced_NA" ,
54+ pd .NaT : "replaced_NaT" ,
55+ np .nan : "replaced_nan"
56+ }
57+
58+ result = df .replace (replace_mappings )
59+
60+ expected = DataFrame ({
61+ "A" : [1 , "replaced_nan" , 3 ],
62+ "B" : ["replaced_NA" , "test" , "replaced_NaT" ],
63+ "C" : ["x" , "y" , "z" ]
64+ })
65+
66+ tm .assert_frame_equal (result , expected )
67+
68+ def test_replace_dict_nan_series_cow (self ):
69+ """Test Series replace with np.nan in dictionary under CoW."""
70+ with pd .option_context ("mode.copy_on_write" , True ):
71+ s = Series ([1 , np .nan , 3 , np .nan ])
72+
73+ replace_mappings = {
74+ np .nan : "missing" ,
75+ 1 : "one"
76+ }
77+
78+ result = s .replace (replace_mappings )
79+ expected = Series (["one" , "missing" , 3 , "missing" ])
80+
81+ tm .assert_series_equal (result , expected )
82+
83+ def test_replace_dict_empty_cow (self ):
84+ """Test empty dictionary replacement under CoW."""
85+ with pd .option_context ("mode.copy_on_write" , True ):
86+ df = DataFrame ({"A" : [1 , 2 ], "B" : ["a" , "b" ]})
87+
88+ # Empty replacement dict should work
89+ result = df .replace ({})
90+ tm .assert_frame_equal (result , df )
91+
92+ def test_replace_dict_with_nan_inplace_cow (self ):
93+ """Test inplace dictionary replacement with np.nan under CoW."""
94+ with pd .option_context ("mode.copy_on_write" , True ):
95+ df = DataFrame ({
96+ "A" : [1 , np .nan , 3 ],
97+ "B" : ["x" , "y" , "z" ]
98+ })
99+ df_copy = df .copy ()
100+
101+ replace_mappings = {np .nan : - 999 }
102+ result = df .replace (replace_mappings , inplace = True )
103+
104+ # inplace=True should return None
105+ assert result is None
106+
107+ expected = DataFrame ({
108+ "A" : [1 , - 999 , 3 ],
109+ "B" : ["x" , "y" , "z" ]
110+ })
111+
112+ tm .assert_frame_equal (df , expected )
113+
114+ def test_replace_mixed_types_with_nan_cow (self ):
115+ """Test mixed type replacement including np.nan under CoW."""
116+ with pd .option_context ("mode.copy_on_write" , True ):
117+ df = DataFrame ({
118+ "int_col" : [1 , 2 , 3 ],
119+ "float_col" : [1.1 , np .nan , 3.3 ],
120+ "str_col" : ["a" , "b" , "c" ],
121+ "mixed_col" : [1 , "text" , np .nan ]
122+ })
123+
124+ replace_mappings = {
125+ np .nan : "MISSING" ,
126+ 1 : "ONE" ,
127+ "a" : "LETTER_A"
128+ }
129+
130+ result = df .replace (replace_mappings )
131+
132+ expected = DataFrame ({
133+ "int_col" : ["ONE" , 2 , 3 ],
134+ "float_col" : [1.1 , "MISSING" , 3.3 ],
135+ "str_col" : ["LETTER_A" , "b" , "c" ],
136+ "mixed_col" : ["ONE" , "text" , "MISSING" ]
137+ })
138+
139+ tm .assert_frame_equal (result , expected )
140+
141+ def test_replace_cow_vs_no_cow_consistency (self ):
142+ """Test that CoW and non-CoW modes give same results."""
143+ df_data = {
144+ "A" : [1 , np .nan , 3 ],
145+ "B" : ["x" , "y" , "z" ]
146+ }
147+ replace_mappings = {np .nan : "REPLACED" }
148+
149+ # Test with CoW enabled
150+ with pd .option_context ("mode.copy_on_write" , True ):
151+ df_cow = DataFrame (df_data )
152+ result_cow = df_cow .replace (replace_mappings )
153+
154+ # Test with CoW disabled
155+ with pd .option_context ("mode.copy_on_write" , False ):
156+ df_no_cow = DataFrame (df_data )
157+ result_no_cow = df_no_cow .replace (replace_mappings )
158+
159+ # Results should be identical
160+ tm .assert_frame_equal (result_cow , result_no_cow )
161+
162+ def test_replace_complex_nested_dict_with_nan_cow (self ):
163+ """Test complex nested dictionary replacements with np.nan under CoW."""
164+ with pd .option_context ("mode.copy_on_write" , True ):
165+ df = DataFrame ({
166+ "A" : [1 , np .nan , 3 ],
167+ "B" : [4 , 5 , np .nan ],
168+ "C" : ["x" , "y" , "z" ]
169+ })
170+
171+ # Column-specific replacements
172+ replace_mappings = {
173+ "A" : {np .nan : - 1 , 1 : 100 },
174+ "B" : {np .nan : - 2 , 4 : 400 }
175+ }
176+
177+ result = df .replace (replace_mappings )
178+
179+ expected = DataFrame ({
180+ "A" : [100 , - 1 , 3 ],
181+ "B" : [400 , 5 , - 2 ],
182+ "C" : ["x" , "y" , "z" ]
183+ })
184+
185+ tm .assert_frame_equal (result , expected )
186+
187+ def test_replace_regex_with_nan_cow (self ):
188+ """Test regex replacement combined with np.nan under CoW."""
189+ with pd .option_context ("mode.copy_on_write" , True ):
190+ df = DataFrame ({
191+ "text" : ["hello world" , "foo bar" , "test" ],
192+ "nums" : [1 , np .nan , 3 ]
193+ })
194+
195+ # First do dictionary replacement, then regex
196+ replace_mappings = {np .nan : "MISSING" }
197+ result = df .replace (replace_mappings )
198+
199+ # Then regex replacement
200+ result = result .replace (r"hello.*" , "GREETING" , regex = True )
201+
202+ expected = DataFrame ({
203+ "text" : ["GREETING" , "foo bar" , "test" ],
204+ "nums" : [1 , "MISSING" , 3 ]
205+ })
206+
207+ tm .assert_frame_equal (result , expected )
208+
209+ def test_replace_multiple_nan_types_cow (self ):
210+ """Test replacement of different NaN types in same operation."""
211+ with pd .option_context ("mode.copy_on_write" , True ):
212+ # Create DataFrame with different types of missing values
213+ df = DataFrame ({
214+ "float_nan" : [1.0 , np .nan , 3.0 ],
215+ "pd_na" : ["a" , pd .NA , "c" ],
216+ "pd_nat" : [pd .Timestamp ("2020-01-01" ), pd .NaT , pd .Timestamp ("2020-01-03" )]
217+ })
218+
219+ replace_mappings = {
220+ np .nan : "float_missing" ,
221+ pd .NA : "string_missing" ,
222+ pd .NaT : pd .Timestamp ("1900-01-01" )
223+ }
224+
225+ result = df .replace (replace_mappings )
226+
227+ expected = DataFrame ({
228+ "float_nan" : [1.0 , "float_missing" , 3.0 ],
229+ "pd_na" : ["a" , "string_missing" , "c" ],
230+ "pd_nat" : [pd .Timestamp ("2020-01-01" ), pd .Timestamp ("1900-01-01" ), pd .Timestamp ("2020-01-03" )]
231+ })
232+
233+ tm .assert_frame_equal (result , expected )
234+
235+
236+ class TestReplaceCoWEdgeCases :
237+ """Edge case tests for the CoW replace fix."""
238+
239+ def test_replace_nan_with_none_cow (self ):
240+ """Test specific case from bug report: np.nan -> None."""
241+ with pd .option_context ("mode.copy_on_write" , True ):
242+ df = DataFrame ({
243+ "A" : [1 , 2 ],
244+ "B" : ["b" , "i like pandas" ],
245+ })
246+ df ["Name" ] = "I Have a Name"
247+ df ["Name2" ] = "i like pandas"
248+
249+ # This exact case from the bug report
250+ replace_mappings = {
251+ pd .NA : None ,
252+ pd .NaT : None ,
253+ np .nan : None
254+ }
255+
256+ # Should not raise ValueError about weakref
257+ result = df .replace (replace_mappings )
258+ assert isinstance (result , DataFrame )
259+
260+ def test_replace_large_dict_with_nan_cow (self ):
261+ """Test large replacement dictionary including np.nan."""
262+ with pd .option_context ("mode.copy_on_write" , True ):
263+ df = DataFrame ({"A" : range (100 ), "B" : [np .nan ] * 100 })
264+
265+ # Large replacement dict to stress test weak reference handling
266+ replace_dict = {i : f"num_{ i } " for i in range (0 , 50 )}
267+ replace_dict [np .nan ] = "missing"
268+
269+ result = df .replace (replace_dict )
270+
271+ # Verify it works without error
272+ assert len (result ) == 100
273+ assert all (result ["B" ] == "missing" )
274+
275+ def test_replace_chained_operations_cow (self ):
276+ """Test chained replace operations with np.nan under CoW."""
277+ with pd .option_context ("mode.copy_on_write" , True ):
278+ df = DataFrame ({
279+ "A" : [1 , np .nan , 3 , np .nan ],
280+ "B" : ["a" , "b" , "c" , "d" ]
281+ })
282+
283+ # Chain multiple replace operations
284+ result = (df
285+ .replace ({np .nan : - 1 })
286+ .replace ({1 : "ONE" })
287+ .replace ({"a" : "LETTER_A" }))
288+
289+ expected = DataFrame ({
290+ "A" : ["ONE" , - 1 , 3 , - 1 ],
291+ "B" : ["LETTER_A" , "b" , "c" , "d" ]
292+ })
293+
294+ tm .assert_frame_equal (result , expected )
0 commit comments