Updated fill_value test case, Updated whatsnew

eicchen · eicchen · commit 4493e0838dc5 · 2025-10-08T13:14:30.000-05:00
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -200,10 +200,10 @@ Other enhancements
 - :class:`Holiday` has gained the constructor argument and field ``exclude_dates`` to exclude specific datetimes from a custom holiday calendar (:issue:`54382`)
 - :class:`Rolling` and :class:`Expanding` now support ``nunique`` (:issue:`26958`)
 - :class:`Rolling` and :class:`Expanding` now support aggregations ``first`` and ``last`` (:issue:`33155`)
-- :class:`StringDtype` now supports addition to Series/DataFrame with strings (:issue:`61581`)
-
+- :class:`StringDtype` now supports addition while maintaining element typing (:issue:`61581`)
 - :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
 - :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`)
+- :meth:`DataFrame.add` now supports string addition with null-likes (:issue:`61581`)
 - :meth:`DataFrame.apply` supports using third-party execution engines like the Bodo.ai JIT compiler (:issue:`60668`)
 - :meth:`DataFrame.iloc` and :meth:`Series.iloc` now support boolean masks in ``__getitem__`` for more consistent indexing behavior (:issue:`60994`)
 - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support Python's new-style format strings (e.g., ``"{:.6f}"``) for the ``float_format`` parameter, in addition to old-style ``%`` format strings and callables. This allows for more flexible and modern formatting of floating point numbers when exporting to CSV. (:issue:`49580`)
diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
@@ -627,7 +627,7 @@ def test_arith_flex_frame_corner(self, float_frame):
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize("axis", [0, 1])
-    def test_arith_flex_frame_fill_value_corner(self, float_frame, axis):
+    def test_arith_flex_frame_fill_value_series(self, float_frame, axis):
         rng = np.random.default_rng(60)
         mask = rng.random(float_frame.shape) < 0.2
         left = float_frame.mask(mask)
@@ -2237,8 +2237,8 @@ def test_mixed_col_index_dtype(string_dtype_no_object):
 
 
 dt_params = [
-    (tm.ALL_INT_NUMPY_DTYPES[0], 5),
-    (tm.ALL_INT_EA_DTYPES[0], 5),
+    (tm.ALL_INT_NUMPY_DTYPES[0], 10),
+    (tm.ALL_INT_EA_DTYPES[0], 10),
     (tm.FLOAT_NUMPY_DTYPES[0], 4.9),
     (tm.FLOAT_EA_DTYPES[0], 4.9),
 ]
@@ -2250,45 +2250,38 @@ def test_mixed_col_index_dtype(string_dtype_no_object):
     "data_type,fill_val, axis",
     [(dt, val, axis) for axis in axes for dt, val in dt_params],
 )
-def test_df_fill_value_dtype(data_type, fill_val, axis):
+def test_df_mul_array_fill_value(data_type, fill_val, axis):
     # GH 61581
-    base_data = np.arange(25).reshape(5, 5)
-    mult_list = [1, np.nan, 5, np.nan, 3]
-    np_int_flag = 0
+    base_data = np.arange(12).reshape(4, 3)
+    df = DataFrame(base_data)
+    mult_list = [np.nan, 1, 5, np.nan]
+    mult_list = mult_list[: df.shape[axis]]
 
-    try:
-        mult_data = pd.array(mult_list, dtype=data_type)
-    except ValueError as e:
-        # Numpy int type cannot represent NaN, it will end up here
-        if "cannot convert float NaN to integer" in str(e):
-            mult_data = np.asarray(mult_list)
-            np_int_flag = 1
+    if data_type in tm.ALL_INT_NUMPY_DTYPES:
+        # Numpy int type cannot represent NaN
+        mult_np = np.asarray(mult_list)
+        mult_list = np.nan_to_num(mult_np, nan=fill_val)
 
-    columns = list("ABCDE")
-    df = DataFrame(base_data, columns=columns)
+    mult_data = pd.array(mult_list, dtype=data_type)
 
     for i in range(df.shape[0]):
         try:
             df.iat[i, i] = np.nan
-            df.iat[i + 1, i] = pd.NA
-            df.iat[i + 3, i] = pd.NA
+            df.iat[i + 2, i] = pd.NA
         except IndexError:
             pass
 
-    mult_mat = np.broadcast_to(mult_data, df.shape)
     if axis == 0:
-        mask = np.isnan(mult_mat).T
+        mult_mat = np.broadcast_to(mult_data.reshape(-1, 1), df.shape)
+        mask = np.isnan(mult_mat)
     else:
+        mult_mat = np.broadcast_to(mult_data.reshape(1, -1), df.shape)
         mask = np.isnan(mult_mat)
     mask = df.isna().values & mask
 
     df_result = df.mul(mult_data, axis=axis, fill_value=fill_val)
-    if np_int_flag == 1:
-        mult_np = np.nan_to_num(mult_data, nan=fill_val)
-        df_expected = (df.fillna(fill_val).mul(mult_np, axis=axis)).mask(mask, np.nan)
-    else:
-        df_expected = (
-            df.fillna(fill_val).mul(mult_data.fillna(fill_val), axis=axis)
-        ).mask(mask, np.nan)
+    df_expected = (df.fillna(fill_val).mul(mult_data.fillna(fill_val), axis=axis)).mask(
+        mask, np.nan
+    )
 
     tm.assert_frame_equal(df_result, df_expected)