Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1001,6 +1001,7 @@ Numeric
- Bug in :meth:`Series.dot` returning ``object`` dtype for :class:`ArrowDtype` and nullable-dtype data (:issue:`61375`)
- Bug in :meth:`Series.std` and :meth:`Series.var` when using complex-valued data (:issue:`61645`)
- Bug in ``np.matmul`` with :class:`Index` inputs raising a ``TypeError`` (:issue:`57079`)
- Bug in arithmetic operations between objects with numpy-nullable dtype and :class:`ArrowDtype` incorrectly raising (:issue:`58602`)

Conversion
^^^^^^^^^^
Expand Down
15 changes: 13 additions & 2 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@
is_string_dtype,
pandas_dtype,
)
from pandas.core.dtypes.dtypes import BaseMaskedDtype
from pandas.core.dtypes.dtypes import (
ArrowDtype,
BaseMaskedDtype,
)
from pandas.core.dtypes.missing import (
array_equivalent,
is_valid_na_for_dtype,
Expand Down Expand Up @@ -767,6 +770,10 @@ def _arith_method(self, other, op):
pd_op = ops.get_array_op(op)
other = ensure_wrapped_if_datetimelike(other)

if isinstance(other, ExtensionArray) and isinstance(other.dtype, ArrowDtype):
# GH#58602
return NotImplemented

if op_name in {"pow", "rpow"} and isinstance(other, np.bool_):
# Avoid DeprecationWarning: In future, it will be an error
# for 'np.bool_' scalars to be interpreted as an index
Expand Down Expand Up @@ -843,7 +850,11 @@ def _cmp_method(self, other, op) -> BooleanArray:

mask = None

if isinstance(other, BaseMaskedArray):
if isinstance(other, ExtensionArray) and isinstance(other.dtype, ArrowDtype):
# GH#58602
return NotImplemented

elif isinstance(other, BaseMaskedArray):
other, mask = other._data, other._mask

elif is_list_like(other):
Expand Down
31 changes: 29 additions & 2 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2308,8 +2308,35 @@ def kind(self) -> str:

@cache_readonly
def itemsize(self) -> int:
"""Return the number of bytes in this dtype"""
return self.numpy_dtype.itemsize
"""
Return the number of bytes in this dtype.

For Arrow-backed dtypes:
- Returns the fixed-width bit size divided by 8 for standard fixed-width types.
- For boolean types, returns the NumPy itemsize.
- Falls back to the NumPy dtype itemsize for variable-width & unsupported types.

Examples
--------
>>> import pyarrow as pa
>>> import pandas as pd
>>> dtype = pd.ArrowDtype(pa.int32())
>>> dtype.itemsize
4

>>> dtype = pd.ArrowDtype(pa.bool_())
>>> dtype.itemsize # falls back to numpy dtype
1
"""
if pa.types.is_boolean(self.pyarrow_dtype):
return self.numpy_dtype.itemsize

# Use pyarrow itemsize for fixed-width data types
# e.g. int32 -> 32 bits // 8 = 4 bytes
try:
return self.pyarrow_dtype.bit_width // 8
except (ValueError, AttributeError, NotImplementedError):
return self.numpy_dtype.itemsize

def construct_array_type(self) -> type_t[ArrowExtensionArray]:
"""
Expand Down
88 changes: 88 additions & 0 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -3702,6 +3702,94 @@ def test_pow_with_all_na_float():
tm.assert_series_equal(result, expected)


def test_mul_numpy_nullable_with_pyarrow_float():
# GH#58602
left = pd.Series(range(5), dtype="Float64")
right = pd.Series(range(5), dtype="float64[pyarrow]")

expected = pd.Series([0, 1, 4, 9, 16], dtype="float64[pyarrow]")

result = left * right
tm.assert_series_equal(result, expected)

result2 = right * left
tm.assert_series_equal(result2, expected)

# while we're here, let's check __eq__
result3 = left == right
expected3 = pd.Series([True] * 5, dtype="bool[pyarrow]")
tm.assert_series_equal(result3, expected3)

result4 = right == left
tm.assert_series_equal(result4, expected3)


@pytest.mark.parametrize(
"type_name, expected_size",
[
# Integer types
("int8", 1),
("int16", 2),
("int32", 4),
("int64", 8),
("uint8", 1),
("uint16", 2),
("uint32", 4),
("uint64", 8),
# Floating point types
("float16", 2),
("float32", 4),
("float64", 8),
# Boolean
("bool_", 1),
# Date and timestamp types
("date32", 4),
("date64", 8),
("timestamp", 8),
# Time types
("time32", 4),
("time64", 8),
# Decimal types
("decimal128", 16),
("decimal256", 32),
],
)
def test_arrow_dtype_itemsize_fixed_width(type_name, expected_size):
# GH 57948

parametric_type_map = {
"timestamp": pa.timestamp("ns"),
"time32": pa.time32("s"),
"time64": pa.time64("ns"),
"decimal128": pa.decimal128(38, 10),
"decimal256": pa.decimal256(76, 10),
}

if type_name in parametric_type_map:
arrow_type = parametric_type_map.get(type_name)
else:
arrow_type = getattr(pa, type_name)()
dtype = ArrowDtype(arrow_type)

if type_name == "bool_":
expected_size = dtype.numpy_dtype.itemsize

assert dtype.itemsize == expected_size, (
f"{type_name} expected {expected_size}, got {dtype.itemsize} "
f"(bit_width={getattr(dtype.pyarrow_dtype, 'bit_width', 'N/A')})"
)


@pytest.mark.parametrize("type_name", ["string", "binary", "large_string"])
def test_arrow_dtype_itemsize_variable_width(type_name):
# GH 57948

arrow_type = getattr(pa, type_name)()
dtype = ArrowDtype(arrow_type)

assert dtype.itemsize == dtype.numpy_dtype.itemsize


def test_cast_pontwise_result_decimal_nan():
# GH#62522 we don't want to get back null[pyarrow] here
ser = pd.Series([], dtype="float64[pyarrow]")
Expand Down
Loading