diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 65982ecdb810c..40d71eee5784f 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -743,6 +743,7 @@ Other API changes - :class:`Series` "flex" methods like :meth:`Series.add` no longer allow passing a :class:`DataFrame` for ``other``; use the DataFrame reversed method instead (:issue:`46179`) - :meth:`CategoricalIndex.append` no longer attempts to cast different-dtype indexes to the caller's dtype (:issue:`41626`) - :meth:`ExtensionDtype.construct_array_type` is now a regular method instead of a ``classmethod`` (:issue:`58663`) +- Arithmetic operations between a :class:`Series`, :class:`Index`, or :class:`ExtensionArray` with a ``list`` now consistently wrap that list with an array equivalent to ``Series(my_list).array``. To do any other kind of type inference or casting, do so explicitly before operating (:issue:`62552`) - Comparison operations between :class:`Index` and :class:`Series` now consistently return :class:`Series` regardless of which object is on the left or right (:issue:`36759`) - Numpy functions like ``np.isinf`` that return a bool dtype when called on a :class:`Index` object now return a bool-dtype :class:`Index` instead of ``np.ndarray`` (:issue:`52676`) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 2adab31fff851..c040b27a433e1 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -1017,6 +1017,9 @@ def _maybe_mask_result( return IntegerArray(result, mask, copy=False) + elif result.dtype == object: + result[mask] = self.dtype.na_value + return result else: result[mask] = np.nan return result diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 67e03ec791e2c..af3309f6bd4fc 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -7285,19 +7285,7 @@ def _cmp_method(self, other, op): else: other = np.asarray(other) - if is_object_dtype(self.dtype) and isinstance(other, ExtensionArray): - # e.g. PeriodArray, Categorical - result = op(self._values, other) - - elif isinstance(self._values, ExtensionArray): - result = op(self._values, other) - - elif is_object_dtype(self.dtype) and not isinstance(self, ABCMultiIndex): - # don't pass MultiIndex - result = ops.comp_method_OBJECT_ARRAY(op, self._values, other) - - else: - result = ops.comparison_op(self._values, other, op) + result = ops.comparison_op(self._values, other, op) return result diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index ecd2e2e4963d3..be249215ea4dc 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -53,7 +53,10 @@ from pandas.core import roperator from pandas.core.computation import expressions -from pandas.core.construction import ensure_wrapped_if_datetimelike +from pandas.core.construction import ( + ensure_wrapped_if_datetimelike, + sanitize_array, +) from pandas.core.ops import missing from pandas.core.ops.dispatch import should_extension_dispatch from pandas.core.ops.invalid import invalid_comparison @@ -261,6 +264,10 @@ def arithmetic_op(left: ArrayLike, right: Any, op): # and `maybe_prepare_scalar_for_op` has already been called on `right` # We need to special-case datetime64/timedelta64 dtypes (e.g. because numpy # casts integer dtypes to timedelta64 when operating with timedelta64 - GH#22390) + if isinstance(right, list): + # GH#62423 + right = sanitize_array(right, None) + right = ensure_wrapped_if_datetimelike(right) if ( should_extension_dispatch(left, right) @@ -310,7 +317,8 @@ def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike: if isinstance(rvalues, list): # We don't catch tuple here bc we may be comparing e.g. MultiIndex # to a tuple that represents a single entry, see test_compare_tuple_strs - rvalues = np.asarray(rvalues) + rvalues = sanitize_array(rvalues, None) + rvalues = ensure_wrapped_if_datetimelike(rvalues) if isinstance(rvalues, (np.ndarray, ABCExtensionArray)): # TODO: make this treatment consistent across ops and classes. diff --git a/pandas/core/ops/common.py b/pandas/core/ops/common.py index e0aa4f44fe2be..96f6a23578bf0 100644 --- a/pandas/core/ops/common.py +++ b/pandas/core/ops/common.py @@ -11,10 +11,16 @@ from pandas._libs.missing import is_matching_na from pandas.core.dtypes.generic import ( + ABCExtensionArray, ABCIndex, ABCSeries, ) +from pandas.core.construction import ( + ensure_wrapped_if_datetimelike, + sanitize_array, +) + if TYPE_CHECKING: from collections.abc import Callable @@ -56,6 +62,7 @@ def _unpack_zerodim_and_defer(method: F, name: str) -> F: ------- method """ + is_logical = name.strip("_") in ["or", "xor", "and", "ror", "rxor", "rand"] @wraps(method) def new_method(self, other): @@ -66,6 +73,14 @@ def new_method(self, other): return NotImplemented other = item_from_zerodim(other) + if ( + isinstance(self, ABCExtensionArray) + and isinstance(other, list) + and not is_logical + ): + # See GH#62423 + other = sanitize_array(other, None) + other = ensure_wrapped_if_datetimelike(other) return method(self, other) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index a6a117ac53806..d9d9343a9b56e 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -151,6 +151,23 @@ def test_numeric_cmp_string_numexpr_path(self, box_with_array, monkeypatch): class TestNumericArraylikeArithmeticWithDatetimeLike: + def test_mul_timedelta_list(self, box_with_array): + # GH#62524 + box = box_with_array + left = np.array([3, 4]) + left = tm.box_expected(left, box) + + right = [Timedelta(days=1), Timedelta(days=2)] + + result = left * right + + expected = TimedeltaIndex([Timedelta(days=3), Timedelta(days=8)]) + expected = tm.box_expected(expected, box) + tm.assert_equal(result, expected) + + result2 = right * left + tm.assert_equal(result2, expected) + @pytest.mark.parametrize("box_cls", [np.array, Index, Series]) @pytest.mark.parametrize( "left", lefts, ids=lambda x: type(x).__name__ + str(x.dtype) diff --git a/pandas/tests/arithmetic/test_string.py b/pandas/tests/arithmetic/test_string.py index 66b7a5bd440c3..46a3d1e8386eb 100644 --- a/pandas/tests/arithmetic/test_string.py +++ b/pandas/tests/arithmetic/test_string.py @@ -213,9 +213,22 @@ def test_add_2d(any_string_dtype, request): s + b -def test_add_sequence(any_string_dtype, request): +def test_add_sequence(any_string_dtype, request, using_infer_string): dtype = any_string_dtype - if dtype == np.dtype(object): + if ( + dtype != object + and dtype.storage == "python" + and dtype.na_value is np.nan + and HAS_PYARROW + and using_infer_string + ): + mark = pytest.mark.xfail( + reason="As of GH#62522, the list gets wrapped with sanitize_array, " + "which casts to a higher-priority StringArray, so we get " + "NotImplemented." + ) + request.applymarker(mark) + if dtype == np.dtype(object) and using_infer_string: mark = pytest.mark.xfail(reason="Cannot broadcast list") request.applymarker(mark) @@ -415,11 +428,20 @@ def test_comparison_methods_array_arrow_extension(comparison_op, any_string_dtyp tm.assert_extension_array_equal(result, expected) -def test_comparison_methods_list(comparison_op, any_string_dtype): +@pytest.mark.parametrize("box", [pd.array, pd.Index, Series]) +def test_comparison_methods_list(comparison_op, any_string_dtype, box, request): dtype = any_string_dtype + + if box is pd.array and dtype != object and dtype.na_value is np.nan: + mark = pytest.mark.xfail( + reason="After wrapping list, op returns NotImplemented, see GH#62522" + ) + request.applymarker(mark) + op_name = f"__{comparison_op.__name__}__" - a = pd.array(["a", None, "c"], dtype=dtype) + a = box(pd.array(["a", None, "c"], dtype=dtype)) + item = "c" other = [None, None, "c"] result = comparison_op(a, other) @@ -427,18 +449,24 @@ def test_comparison_methods_list(comparison_op, any_string_dtype): result2 = comparison_op(other, a) tm.assert_equal(result, result2) - if dtype == object or dtype.na_value is np.nan: + if dtype == np.dtype(object) or dtype.na_value is np.nan: if operator.ne == comparison_op: expected = np.array([True, True, False]) else: expected = np.array([False, False, False]) - expected[-1] = getattr(other[-1], op_name)(a[-1]) - result = extract_array(result, extract_numpy=True) - tm.assert_numpy_array_equal(result, expected) + expected[-1] = getattr(item, op_name)(item) + if box is not pd.Index: + # if GH#62766 is addressed this check can be removed + expected = box(expected, dtype=expected.dtype) + tm.assert_equal(result, expected) else: expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean" expected = np.full(len(a), fill_value=None, dtype="object") - expected[-1] = getattr(other[-1], op_name)(a[-1]) + expected[-1] = getattr(item, op_name)(item) expected = pd.array(expected, dtype=expected_dtype) - tm.assert_extension_array_equal(result, expected) + expected = extract_array(expected, extract_numpy=True) + if box is not pd.Index: + # if GH#62766 is addressed this check can be removed + expected = tm.box_expected(expected, box) + tm.assert_equal(result, expected) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 9a54386abf281..aee8c7b3989ba 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -274,6 +274,23 @@ class TestTimedelta64ArithmeticUnsorted: # Tests moved from type-specific test files but not # yet sorted/parametrized/de-duplicated + def test_td64_op_with_list(self, box_with_array): + # GH#62353 + box = box_with_array + + left = TimedeltaIndex(["2D", "4D"]) + left = tm.box_expected(left, box) + + right = [Timestamp("2016-01-01"), Timestamp("2016-02-01")] + + result = left + right + expected = DatetimeIndex(["2016-01-03", "2016-02-05"], dtype="M8[ns]") + expected = tm.box_expected(expected, box) + tm.assert_equal(result, expected) + + result2 = right + left + tm.assert_equal(result2, expected) + def test_ufunc_coercions(self): # normal ops are also tested in tseries/test_timedeltas.py idx = TimedeltaIndex(["2h", "4h", "6h", "8h", "10h"], freq="2h", name="x") diff --git a/pandas/tests/arrays/integer/test_arithmetic.py b/pandas/tests/arrays/integer/test_arithmetic.py index e16ab6f23b417..119e8dfa0717e 100644 --- a/pandas/tests/arrays/integer/test_arithmetic.py +++ b/pandas/tests/arrays/integer/test_arithmetic.py @@ -201,10 +201,6 @@ def test_error_invalid_values(data, all_arithmetic_operators): ]: # (data[~data.isna()] >= 0).all(): res = ops(str_ser) expected = pd.Series(["foo" * x for x in data], index=s.index) - expected = expected.fillna(np.nan) - # TODO: doing this fillna to keep tests passing as we make - # assert_almost_equal stricter, but the expected with pd.NA seems - # more-correct than np.nan here. tm.assert_series_equal(res, expected) else: with tm.external_error_raised(TypeError): diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index b23e40f4d7f97..6e45abc9df6f2 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -326,9 +326,10 @@ def test_disallow_addsub_ops(self, func, op_name): cat_or_list = "'(Categorical|list)' and '(Categorical|list)'" msg = "|".join( [ - f"cannot perform {op_name} with this index type: CategoricalIndex", - "can only concatenate list", rf"unsupported operand type\(s\) for [\+-]: {cat_or_list}", + "Object with dtype category cannot perform the numpy op (add|subtract)", + "operation 'r?(add|sub)' not supported for dtype 'str' " + "with dtype 'category'", ] ) with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 488eb99f81ef5..736ab9c82ca6f 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -14,7 +14,6 @@ import pytest from pandas._libs import lib -from pandas.compat._optional import import_optional_dependency import pandas as pd from pandas import ( @@ -30,7 +29,6 @@ import pandas._testing as tm from pandas.core import ops from pandas.core.computation import expressions as expr -from pandas.util.version import Version @pytest.fixture(autouse=True, params=[0, 1000000], ids=["numexpr", "python"]) @@ -380,36 +378,25 @@ def test_mask_div_propagate_na_for_non_na_dtype(self): result = ser2 / ser1 tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("val, dtype", [(3, "Int64"), (3.5, "Float64")]) - def test_add_list_to_masked_array(self, val, dtype): - # GH#22962 + @pytest.mark.parametrize("val", [3, 3.5]) + def test_add_list_to_masked_array(self, val): + # GH#22962, behavior changed by GH#62552 ser = Series([1, None, 3], dtype="Int64") result = ser + [1, None, val] - expected = Series([2, None, 3 + val], dtype=dtype) + expected = Series([2, pd.NA, 3 + val], dtype="Float64") tm.assert_series_equal(result, expected) result = [1, None, val] + ser tm.assert_series_equal(result, expected) - def test_add_list_to_masked_array_boolean(self, request): + def test_add_list_to_masked_array_boolean(self): # GH#22962 - ne = import_optional_dependency("numexpr", errors="ignore") - warning = ( - UserWarning - if request.node.callspec.id == "numexpr" - and ne - and Version(ne.__version__) < Version("2.13.1") - else None - ) ser = Series([True, None, False], dtype="boolean") - msg = "operator is not supported by numexpr for the bool dtype" - with tm.assert_produces_warning(warning, match=msg): - result = ser + [True, None, True] - expected = Series([True, None, True], dtype="boolean") + result = ser + [True, None, True] + expected = Series([2, pd.NA, 1], dtype=object) tm.assert_series_equal(result, expected) - with tm.assert_produces_warning(warning, match=msg): - result = [True, None, True] + ser + result = [True, None, True] + ser tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 018ae06562148..7020ff608365e 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -2216,7 +2216,16 @@ def test_np_string_array_object_cast(self, data): arr = np.array(data, dtype=StringDType()) res = Series(arr) assert res.dtype == np.object_ - assert (res == data).all() + + if data[-1] is np.nan: + # as of GH#62522 the comparison op for `res==data` casts data + # using sanitize_array, which casts to 'str' dtype, which does not + # consider string 'nan' to be equal to np.nan, + # (which apparently numpy does? weird.) + assert (res.iloc[:-1] == data[:-1]).all() + assert res.iloc[-1] == "nan" + else: + assert (res == data).all() class TestSeriesConstructorInternals: