Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
99ae672
ENH: fill_value in frame+series flex ops
jbrockmendel Sep 11, 2025
4e77fb7
Updated version of 62317
eicchen Oct 1, 2025
eb12b34
WIP frame addition with fill_value test
eicchen Oct 5, 2025
7e23b65
Completed flex frame fill_value testcase
eicchen Oct 5, 2025
4617108
Removed type-casting shenanigans in array.py
eicchen Oct 5, 2025
bca56fe
Removed float addition test, reintroduced check for float type nulls
eicchen Oct 6, 2025
7273396
Edited pyarrow catch to be more specific
eicchen Oct 6, 2025
4493e08
Updated fill_value test case, Updated whatsnew
eicchen Oct 8, 2025
406cd15
Applied PR feedback
eicchen Oct 13, 2025
ad9614b
Applied various suggestions from jbrock to testcases
eicchen Oct 18, 2025
5a64e5c
Merge branch 'main' into BUG-#61581-DataFrame.mul
eicchen Oct 20, 2025
73d168b
Updated catch for str[python[ in test_add_frame
eicchen Oct 20, 2025
13ed0d6
Merge branch 'BUG-#61581-DataFrame.mul' of https://github.com/eicchen…
eicchen Oct 20, 2025
3d3a2a6
reupdated str[python] catch after git shenanigans
eicchen Oct 20, 2025
76a122e
Update python test_add_frame catch to better reflect issues across di…
eicchen Oct 21, 2025
1d3260e
I have no idea where that test came from
eicchen Oct 21, 2025
32360a8
Merge branch 'main' into BUG-#61581-DataFrame.mul
eicchen Oct 28, 2025
30d4a07
Edited original merge to be inline with #62869, added 'object' as dty…
eicchen Oct 28, 2025
771a19c
Returned accidentally removed update note, modified test_add_frame to…
eicchen Nov 3, 2025
a321daf
Removed unnecessary change in test_string
eicchen Nov 3, 2025
c79c210
Simplified test case for mul*array with fill_value
eicchen Nov 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1083,6 +1083,7 @@ MultiIndex
- :func:`MultiIndex.get_level_values` accessing a :class:`DatetimeIndex` does not carry the frequency attribute along (:issue:`58327`, :issue:`57949`)
- Bug in :class:`DataFrame` arithmetic operations in case of unaligned MultiIndex columns (:issue:`60498`)
- Bug in :class:`DataFrame` arithmetic operations with :class:`Series` in case of unaligned MultiIndex (:issue:`61009`)
- Bug in :class:`DataFrame` arithmetic operations with :class:`Series` now works with ``fill_value`` parameter (:issue:`61581`)
- Bug in :meth:`MultiIndex.union` raising when indexes have duplicates with differing names (:issue:`62059`)
- Bug in :meth:`MultiIndex.from_tuples` causing wrong output with input of type tuples having NaN values (:issue:`60695`, :issue:`60988`)
- Bug in :meth:`DataFrame.__setitem__` where column alignment logic would reindex the assigned value with an empty index, incorrectly setting all values to ``NaN``.(:issue:`61841`)
Expand Down
53 changes: 29 additions & 24 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -8491,7 +8491,10 @@ def to_series(right):
# pass dtype to avoid doing inference, which would break consistency
# with Index/Series ops
dtype = None
if getattr(right, "dtype", None) == object:
if (
getattr(right, "dtype", None) == "object"
or getattr(right, "dtype", None) == object
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this shouldn't be necessary

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right, this is here to keep the output dtype for some operations as objects. Specifically, pandas/tests/arithmetic/test_string.py::test_add_strings[string=object]
returns

E       Attribute "dtype" are different
E       [left]:  <StringDtype(na_value=nan)>
E       [right]: object

When adding an object to another object. I can remove it if you think adding an exception for this test case is the better option.

):
# can't pass right.dtype unconditionally as that would break on e.g.
# datetime64[h] ndarray
dtype = object
Expand Down Expand Up @@ -8595,27 +8598,34 @@ def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt):
blockwise.
"""
rvalues = series._values
if not isinstance(rvalues, np.ndarray):
# TODO(EA2D): no need to special-case with 2D EAs
if rvalues.dtype in ("datetime64[ns]", "timedelta64[ns]"):
# We can losslessly+cheaply cast to ndarray
rvalues = np.asarray(rvalues)
if lib.is_np_dtype(rvalues.dtype):
# We can losslessly+cheaply cast to ndarray
# i.e. ndarray or dt64[naive], td64
# TODO(EA2D): no need to special case with 2D EAs
rvalues = np.asarray(rvalues)

if axis == 0:
rvalues = rvalues.reshape(-1, 1)
else:
return series
rvalues = rvalues.reshape(1, -1)

if axis == 0:
rvalues = rvalues.reshape(-1, 1)
else:
rvalues = rvalues.reshape(1, -1)
rvalues = np.broadcast_to(rvalues, self.shape)
# pass dtype to avoid doing inference
df = self._constructor(rvalues, dtype=rvalues.dtype)

rvalues = np.broadcast_to(rvalues, self.shape)
# pass dtype to avoid doing inference
return self._constructor(
rvalues,
index=self.index,
columns=self.columns,
dtype=rvalues.dtype,
).__finalize__(series)
else:
# GH#61581
if axis == 0:
df = DataFrame(dict.fromkeys(range(self.shape[1]), rvalues))
else:
nrows = self.shape[0]
df = DataFrame(
{i: rvalues[[i]].repeat(nrows) for i in range(self.shape[1])},
dtype=rvalues.dtype,
)
df.index = self.index
df.columns = self.columns
return df.__finalize__(series)

def _flex_arith_method(
self, other, op, *, axis: Axis = "columns", level=None, fill_value=None
Expand All @@ -8625,11 +8635,6 @@ def _flex_arith_method(
if self._should_reindex_frame_op(other, op, axis, fill_value, level):
return self._arith_method_with_reindex(other, op)

if isinstance(other, Series) and fill_value is not None:
# TODO: We could allow this in cases where we end up going
# through the DataFrame path
raise NotImplementedError(f"fill_value {fill_value} not supported.")

other = ops.maybe_prepare_scalar_for_op(other, self.shape)
self, other = self._align_for_op(other, axis, flex=True, level=level)

Expand Down
16 changes: 2 additions & 14 deletions pandas/tests/arithmetic/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -1361,12 +1361,7 @@ def test_period_add_timestamp_raises(self, box_with_array):
arr + ts
with pytest.raises(TypeError, match=msg):
ts + arr
if box_with_array is pd.DataFrame:
# TODO: before implementing resolution-inference we got the same
# message with DataFrame and non-DataFrame. Why did that change?
msg = "cannot add PeriodArray and Timestamp"
else:
msg = "cannot add PeriodArray and DatetimeArray"
msg = "cannot add PeriodArray and DatetimeArray"
with pytest.raises(TypeError, match=msg):
arr + Series([ts])
with pytest.raises(TypeError, match=msg):
Expand All @@ -1376,16 +1371,9 @@ def test_period_add_timestamp_raises(self, box_with_array):
with pytest.raises(TypeError, match=msg):
pd.Index([ts]) + arr

if box_with_array is pd.DataFrame:
msg = "cannot add PeriodArray and DatetimeArray"
else:
msg = r"unsupported operand type\(s\) for \+: 'Period' and 'DatetimeArray"
msg = "cannot add PeriodArray and DatetimeArray"
with pytest.raises(TypeError, match=msg):
arr + pd.DataFrame([ts])
if box_with_array is pd.DataFrame:
msg = "cannot add PeriodArray and DatetimeArray"
else:
msg = r"unsupported operand type\(s\) for \+: 'DatetimeArray' and 'Period'"
with pytest.raises(TypeError, match=msg):
pd.DataFrame([ts]) + arr

Expand Down
15 changes: 9 additions & 6 deletions pandas/tests/arithmetic/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,9 +244,6 @@ def test_mul(any_string_dtype):

def test_add_strings(any_string_dtype, request):
dtype = any_string_dtype
if dtype != np.dtype(object):
mark = pytest.mark.xfail(reason="GH-28527")
request.applymarker(mark)
arr = pd.array(["a", "b", "c", "d"], dtype=dtype)
df = pd.DataFrame([["t", "y", "v", "w"]], dtype=object)
assert arr.__add__(df) is NotImplemented
Expand All @@ -260,11 +257,17 @@ def test_add_strings(any_string_dtype, request):
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(reason="GH-28527")
def test_add_frame(dtype):
def test_add_frame(any_string_dtype, request):
# Inconsistent behavior between different versions of the python engine.
# Environments without PyArrow correctly return the value for python storage
# The same does not hold for
dtype = any_string_dtype
if HAS_PYARROW or getattr(dtype, "storage", None) != "python":
marks = pytest.mark.xfail(reason="GH-28527")
request.applymarker(marks)

arr = pd.array(["a", "b", np.nan, np.nan], dtype=dtype)
df = pd.DataFrame([["x", np.nan, "y", np.nan]])

assert arr.__add__(df) is NotImplemented

result = arr + df
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/boolean/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def test_error_invalid_values(data, all_arithmetic_operators):
ops(pd.Timestamp("20180101"))

# invalid array-likes
if op not in ("__mul__", "__rmul__"):
if op not in ("__mul__", "__rmul__", "__add__", "__radd__"):
# TODO(extension) numpy's mul with object array sees booleans as numbers
msg = "|".join(
[
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/arrays/floating/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,9 @@ def test_error_invalid_values(data, all_arithmetic_operators):
ops(pd.Timestamp("20180101"))

# invalid array-likes
str_ser = pd.Series("foo", index=s.index)
with pytest.raises(TypeError, match=msg):
ops(pd.Series("foo", index=s.index))
ops(str_ser)

msg = "|".join(
[
Expand Down
90 changes: 83 additions & 7 deletions pandas/tests/frame/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -629,11 +629,43 @@ def test_arith_flex_frame_corner(self, float_frame):
expected = float_frame.sort_index() * np.nan
tm.assert_frame_equal(result, expected)

with pytest.raises(NotImplementedError, match="fill_value"):
float_frame.add(float_frame.iloc[0], fill_value=3)
@pytest.mark.parametrize("axis", [0, 1])
def test_arith_flex_frame_fill_value_series(self, float_frame, axis):
rng = np.random.default_rng(60)
mask = rng.random(float_frame.shape) < 0.2
left = float_frame.mask(mask)
right = left.iloc[0]

result = left.add(right, axis=axis, fill_value=3)

if axis == 0: # axis = index, vertical
pad_num = abs(result.shape[0] - len(right))
mult_num = result.shape[1]
right_pad = np.pad(
right, (0, pad_num), mode="constant", constant_values=(np.nan)
)
right_df = DataFrame(
[right_pad] * mult_num, columns=result.index, index=result.columns
).T

left = left.reindex_like(result)

else: # axis = columns, horizontal
pad_num = abs(result.shape[1] - len(right))
mult_num = result.shape[0]
right_pad = np.pad(
right, (0, pad_num), mode="constant", constant_values=(np.nan)
)
right_df = DataFrame(
[right_pad] * mult_num, index=result.index, columns=result.columns
)

left_filled = left.fillna(3)
right_filled = right_df.fillna(3)
expected = right_filled + left_filled
expected = expected.mask(expected == 6, pd.NA)

with pytest.raises(NotImplementedError, match="fill_value"):
float_frame.add(float_frame.iloc[0], axis="index", fill_value=3)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("op", ["add", "sub", "mul", "mod"])
def test_arith_flex_series_ops(self, simple_frame, op):
Expand Down Expand Up @@ -675,11 +707,21 @@ def test_arith_flex_zero_len_raises(self):
df_len0 = DataFrame(columns=["A", "B"])
df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"])

with pytest.raises(NotImplementedError, match="fill_value"):
msg = r"unsupported operand type\(s\) for \+: 'int' and 'str'"
with pytest.raises(TypeError, match=msg):
df.add(ser_len0, fill_value="E")

with pytest.raises(NotImplementedError, match="fill_value"):
df_len0.sub(df["A"], axis=None, fill_value=3)
result = df_len0.sub(df, axis=None, fill_value=3)
expected = DataFrame([[2, 1], [0, -1]], columns=["A", "B"])
tm.assert_frame_equal(result, expected, check_dtype=False)

result = df_len0.sub(df["A"], axis=0, fill_value=3)
expected = DataFrame([[2, 2], [0, 0]], columns=["A", "B"])
tm.assert_frame_equal(result, expected, check_dtype=False)

result = df_len0.sub(df["A"], axis=1, fill_value=3)
expected = DataFrame([], columns=["A", "B", 0, 1])
tm.assert_frame_equal(result, expected, check_dtype=False)

def test_flex_add_scalar_fill_value(self):
# GH#12723
Expand Down Expand Up @@ -2201,3 +2243,37 @@ def test_mixed_col_index_dtype(string_dtype_no_object):
expected.columns = expected.columns.astype(string_dtype_no_object)

tm.assert_frame_equal(result, expected)


dt_params = [
(tm.ALL_INT_NUMPY_DTYPES[0], 10),
(tm.ALL_INT_EA_DTYPES[0], 10),
(tm.FLOAT_NUMPY_DTYPES[0], 4.9),
(tm.FLOAT_EA_DTYPES[0], 4.9),
]

axes = [0, 1]


@pytest.mark.parametrize(
"dtype,fill_val, axis",
[(dt, val, axis) for axis in axes for dt, val in dt_params],
)
def test_df_mul_array_fill_value(dtype, fill_val, axis):
# GH 61581
if dtype == tm.ALL_INT_NUMPY_DTYPES[0]:
# Numpy int type cannot represent NaN
safe_null = fill_val
else:
safe_null = np.nan

df = DataFrame([[safe_null, 1, 2], [3, safe_null, 5]], dtype=dtype)

mult = pd.array([safe_null, 1.0], dtype=dtype)

result = df.mul(mult, axis=0, fill_value=fill_val)
expected = DataFrame(
[[safe_null * safe_null, fill_val, fill_val * 2], [3.0, fill_val, 5.0]]
).astype(dtype)

tm.assert_frame_equal(result, expected)
Loading