diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 65982ecdb810c..4560e5912976a 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1108,6 +1108,7 @@ Numeric Conversion ^^^^^^^^^^ +- Bug in :meth:`ArrowDtype._get_common_dtype` and :meth:`ExtentionDtype._get_common_dtype` when using ``date32[pyarrow]`` and ``date64[pyarrow]`` types (:issue:`62343`) - Bug in :meth:`DataFrame.astype` not casting ``values`` for Arrow-based dictionary dtype correctly (:issue:`58479`) - Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`) - Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 2e3d73edcdf4f..6189aa0a08d2c 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -2277,6 +2277,18 @@ def name(self) -> str: # type: ignore[override] @cache_readonly def numpy_dtype(self) -> np.dtype: """Return an instance of the related numpy dtype""" + if pa.types.is_date32(self.pyarrow_dtype): + # pa.timestamp(unit).to_pandas_dtype() returns ns units + # regardless of the pyarrow timestamp units. + # This can be removed if/when pyarrow addresses it: + # https://github.com/apache/arrow/issues/34462 + return np.dtype("datetime64[D]") + if pa.types.is_date64(self.pyarrow_dtype): + # pa.timestamp(unit).to_pandas_dtype() returns ns units + # regardless of the pyarrow timestamp units. + # This can be removed if/when pyarrow addresses it: + # https://github.com/apache/arrow/issues/34462 + return np.dtype("datetime64[ms]") if pa.types.is_timestamp(self.pyarrow_dtype): # pa.timestamp(unit).to_pandas_dtype() returns ns units # regardless of the pyarrow timestamp units. diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 875aa38481ecf..f458c52709ce1 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -650,7 +650,7 @@ def test_construct_from_string_another_type_raises(self, dtype): def test_get_common_dtype(self, dtype, request): pa_dtype = dtype.pyarrow_dtype if ( - pa.types.is_date(pa_dtype) + pa.types.is_date64(pa_dtype) or pa.types.is_time(pa_dtype) or (pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None) or pa.types.is_binary(pa_dtype) @@ -1087,9 +1087,9 @@ def test_comp_masked_numpy(self, masked_dtype, comparison_op): def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data, request): pa_dtype = data.dtype.pyarrow_dtype - if pa.types.is_date(pa_dtype): + if pa.types.is_date64(pa_dtype): mark = pytest.mark.xfail( - reason="GH#62343 incorrectly casts to timestamp[ms][pyarrow]" + reason="GH#62343 incorrectly casts date64 to timestamp[ms][pyarrow]" ) request.applymarker(mark) super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data)