From 5071e1196d741ce7bd54cf13a45e7ea075e57d6f Mon Sep 17 00:00:00 2001 From: nli307 Date: Wed, 29 Oct 2025 20:42:44 -0400 Subject: [PATCH 01/16] When finding the common type, return the original type if there is only one common type. Return objects for date32 and date64 types when converting those types to numpy types. --- pandas/core/dtypes/common.py | 5 +++++ pandas/core/dtypes/dtypes.py | 18 ++++++++++++++++++ .../tests/dtypes/cast/test_find_common_type.py | 16 ++++++++++++++++ 3 files changed, 39 insertions(+) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index dd63445266134..16fb90b0eca05 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1872,6 +1872,11 @@ def pandas_dtype(dtype) -> DtypeObj: result = result() return result + # try a pyarrow dtype + from pandas.core.dtypes.dtypes import ArrowDtype + if isinstance(dtype, ArrowDtype): + return ArrowDtype(dtype) + # try a numpy dtype # raise a consistent TypeError if failed try: diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 2e3d73edcdf4f..7fa6011ee438d 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -2277,6 +2277,12 @@ def name(self) -> str: # type: ignore[override] @cache_readonly def numpy_dtype(self) -> np.dtype: """Return an instance of the related numpy dtype""" + if pa.types.is_date32(self.pyarrow_dtype) or pa.types.is_date64( + self.pyarrow_dtype + ): + # date32 and date64 are pyarrow timestamps but do not have a + # corresponding numpy dtype. + return np.dtype(object) if pa.types.is_timestamp(self.pyarrow_dtype): # pa.timestamp(unit).to_pandas_dtype() returns ns units # regardless of the pyarrow timestamp units. @@ -2453,6 +2459,18 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: null_dtype = type(self)(pa.null()) + # Cover cases where numpy does not have a corresponding dtype, but + # only one non-null dtype is received, or all dtypes are null. + single_dtype = { + dtype + for dtype in dtypes + if dtype != null_dtype + } + if len(single_dtype) == 0: + return null_dtype + if len(single_dtype) == 1: + return single_dtype.pop() + new_dtype = find_common_type( [ dtype.numpy_dtype if isinstance(dtype, ArrowDtype) else dtype diff --git a/pandas/tests/dtypes/cast/test_find_common_type.py b/pandas/tests/dtypes/cast/test_find_common_type.py index 83ef7382fbe8a..dbce6c6e899b1 100644 --- a/pandas/tests/dtypes/cast/test_find_common_type.py +++ b/pandas/tests/dtypes/cast/test_find_common_type.py @@ -173,3 +173,19 @@ def test_interval_dtype_with_categorical(dtype): result = find_common_type([dtype, cat.dtype]) assert result == dtype + + +@pytest.mark.parametrize( + "dtypes,expected", + [ + ( + ["date32[pyarrow]", "null[pyarrow]"], + "date32[day][pyarrow]", + ), + ], +) +def test_pyarrow_dtypes(dtypes, expected): + """Test finding common types with pyarrow dtypes not in numpy.""" + source_dtypes = [pandas_dtype(dtype) for dtype in dtypes] + result = find_common_type(source_dtypes) + assert result == pandas_dtype(expected) \ No newline at end of file From 38ba1bdfb21ba4d2ab9df7c8e17f38e3d242e7f8 Mon Sep 17 00:00:00 2001 From: nli307 Date: Wed, 29 Oct 2025 20:57:19 -0400 Subject: [PATCH 02/16] Add bug to changelog --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 2b35abd45a96f..3b7b7863ab722 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1036,6 +1036,7 @@ Conversion - Bug in :meth:`Series.convert_dtypes` and :meth:`DataFrame.convert_dtypes` removing timezone information for objects with :class:`ArrowDtype` (:issue:`60237`) - Bug in :meth:`Series.reindex` not maintaining ``float32`` type when a ``reindex`` introduces a missing value (:issue:`45857`) - Bug in :meth:`to_datetime` and :meth:`to_timedelta` with input ``None`` returning ``None`` instead of ``NaT``, inconsistent with other conversion methods (:issue:`23055`) +- Bug in :meth:`ArrowDtype._get_common_dtype` and :meth:`ExtentionDtype._get_common_dtype` when using ``date32[pyarrow]`` and ``date64[pyarrow]`` types (:issue:`62343`) Strings ^^^^^^^ From 4b0a90c56fd43fc8a4f4fa2ba5e9141c1ddc37ab Mon Sep 17 00:00:00 2001 From: nli307 Date: Sat, 8 Nov 2025 12:55:11 -0500 Subject: [PATCH 03/16] change pyarrow dtype of date32 and date64 to day and ms granularity --- pandas/core/dtypes/common.py | 5 ----- pandas/core/dtypes/dtypes.py | 30 ++++++++++++------------------ 2 files changed, 12 insertions(+), 23 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 1b68df22583e4..bb3e8105d5472 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1872,11 +1872,6 @@ def pandas_dtype(dtype) -> DtypeObj: result = result() return result - # try a pyarrow dtype - from pandas.core.dtypes.dtypes import ArrowDtype - if isinstance(dtype, ArrowDtype): - return ArrowDtype(dtype) - # try a numpy dtype # raise a consistent TypeError if failed try: diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 7fa6011ee438d..6189aa0a08d2c 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -2277,12 +2277,18 @@ def name(self) -> str: # type: ignore[override] @cache_readonly def numpy_dtype(self) -> np.dtype: """Return an instance of the related numpy dtype""" - if pa.types.is_date32(self.pyarrow_dtype) or pa.types.is_date64( - self.pyarrow_dtype - ): - # date32 and date64 are pyarrow timestamps but do not have a - # corresponding numpy dtype. - return np.dtype(object) + if pa.types.is_date32(self.pyarrow_dtype): + # pa.timestamp(unit).to_pandas_dtype() returns ns units + # regardless of the pyarrow timestamp units. + # This can be removed if/when pyarrow addresses it: + # https://github.com/apache/arrow/issues/34462 + return np.dtype("datetime64[D]") + if pa.types.is_date64(self.pyarrow_dtype): + # pa.timestamp(unit).to_pandas_dtype() returns ns units + # regardless of the pyarrow timestamp units. + # This can be removed if/when pyarrow addresses it: + # https://github.com/apache/arrow/issues/34462 + return np.dtype("datetime64[ms]") if pa.types.is_timestamp(self.pyarrow_dtype): # pa.timestamp(unit).to_pandas_dtype() returns ns units # regardless of the pyarrow timestamp units. @@ -2459,18 +2465,6 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: null_dtype = type(self)(pa.null()) - # Cover cases where numpy does not have a corresponding dtype, but - # only one non-null dtype is received, or all dtypes are null. - single_dtype = { - dtype - for dtype in dtypes - if dtype != null_dtype - } - if len(single_dtype) == 0: - return null_dtype - if len(single_dtype) == 1: - return single_dtype.pop() - new_dtype = find_common_type( [ dtype.numpy_dtype if isinstance(dtype, ArrowDtype) else dtype From 9241196298ed76e89201c4a42ff83e466f52feaa Mon Sep 17 00:00:00 2001 From: nli307 Date: Sat, 8 Nov 2025 13:15:44 -0500 Subject: [PATCH 04/16] working on tests --- .../dtypes/cast/test_find_common_type.py | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/pandas/tests/dtypes/cast/test_find_common_type.py b/pandas/tests/dtypes/cast/test_find_common_type.py index dbce6c6e899b1..36af26eaeb7ec 100644 --- a/pandas/tests/dtypes/cast/test_find_common_type.py +++ b/pandas/tests/dtypes/cast/test_find_common_type.py @@ -182,10 +182,30 @@ def test_interval_dtype_with_categorical(dtype): ["date32[pyarrow]", "null[pyarrow]"], "date32[day][pyarrow]", ), + ( + ["date32[pyarrow]", "date32[pyarrow]"], + "date32[day][pyarrow]", + ), + ( + ["date64[pyarrow]", "null[pyarrow]"], + "date64[ms][pyarrow]", # timestamp[ms][pyarrow] + ), + ( + ["date64[pyarrow]", "date64[pyarrow]"], + "date64[ms][pyarrow]", + ), + ( + ["date32[pyarrow]", "date64[pyarrow]"], + "date64[ms][pyarrow]", # timestamp[ms][pyarrow] + ), + ( + ["date32[pyarrow]", "date64[pyarrow]", "datetime64[ms]"], + "timestamp[ms][pyarrow]", + ), ], ) -def test_pyarrow_dtypes(dtypes, expected): - """Test finding common types with pyarrow dtypes not in numpy.""" +def test_pyarrow_date_dtypes(dtypes, expected): source_dtypes = [pandas_dtype(dtype) for dtype in dtypes] result = find_common_type(source_dtypes) + print(f'{source_dtypes}: {result}') assert result == pandas_dtype(expected) \ No newline at end of file From 0f319928f5b1800325e881efbb9d43786654b082 Mon Sep 17 00:00:00 2001 From: nli307 Date: Sat, 8 Nov 2025 14:16:21 -0500 Subject: [PATCH 05/16] xfail date64 tests --- .../dtypes/cast/test_find_common_type.py | 48 +++++++++++++++---- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/pandas/tests/dtypes/cast/test_find_common_type.py b/pandas/tests/dtypes/cast/test_find_common_type.py index 36af26eaeb7ec..c8cc653e2f097 100644 --- a/pandas/tests/dtypes/cast/test_find_common_type.py +++ b/pandas/tests/dtypes/cast/test_find_common_type.py @@ -186,18 +186,10 @@ def test_interval_dtype_with_categorical(dtype): ["date32[pyarrow]", "date32[pyarrow]"], "date32[day][pyarrow]", ), - ( - ["date64[pyarrow]", "null[pyarrow]"], - "date64[ms][pyarrow]", # timestamp[ms][pyarrow] - ), ( ["date64[pyarrow]", "date64[pyarrow]"], "date64[ms][pyarrow]", ), - ( - ["date32[pyarrow]", "date64[pyarrow]"], - "date64[ms][pyarrow]", # timestamp[ms][pyarrow] - ), ( ["date32[pyarrow]", "date64[pyarrow]", "datetime64[ms]"], "timestamp[ms][pyarrow]", @@ -205,6 +197,46 @@ def test_interval_dtype_with_categorical(dtype): ], ) def test_pyarrow_date_dtypes(dtypes, expected): + source_dtypes = [pandas_dtype(dtype) for dtype in dtypes] + result = find_common_type(source_dtypes) + assert result == pandas_dtype(expected) + +@pytest.mark.xfail(reason=""" + Finding common pyarrow dtypes relies on conversion + to numpy dtypes and then back to pyarrow dtypes. + + We have: + >>> pa.from_numpy_dtype(np.dtype('datetime64[D]')) + DataType(date32[day]) + >>> pa.from_numpy_dtype(np.dtype('datetime64[ms]')) + TimestampType(timestamp[ms]) + + To fix this test, we would need to have + >>> pa.from_numpy_dtype(np.dtype('datetime64[ms]')) + DataType(date64[ms]) + + But date64 isn't the same as datetime64[ms]. date64 + is meant to represent a date (without time) only, + represented in milliseconds (see + https://github.com/apache/arrow/issues/15032#issuecomment-1368096718). + + Hence, some date64-related common type computations + end up becoming cast to timestamps rather than date64. + """) +@pytest.mark.parametrize( + "dtypes,expected", + [ + ( + ["date64[pyarrow]", "null[pyarrow]"], + "date64[ms][pyarrow]", + ), + ( + ["date32[pyarrow]", "date64[pyarrow]"], + "date64[ms][pyarrow]", + ), + ], +) +def test_pyarrow_date64_dtype(dtypes, expected): source_dtypes = [pandas_dtype(dtype) for dtype in dtypes] result = find_common_type(source_dtypes) print(f'{source_dtypes}: {result}') From 3fea3e0a3f66a1487521ba61355d25daa4bcd7da Mon Sep 17 00:00:00 2001 From: nli307 Date: Sat, 8 Nov 2025 14:17:40 -0500 Subject: [PATCH 06/16] working on tests --- pandas/tests/dtypes/cast/test_find_common_type.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/tests/dtypes/cast/test_find_common_type.py b/pandas/tests/dtypes/cast/test_find_common_type.py index c8cc653e2f097..d7f97090a8fbf 100644 --- a/pandas/tests/dtypes/cast/test_find_common_type.py +++ b/pandas/tests/dtypes/cast/test_find_common_type.py @@ -211,9 +211,8 @@ def test_pyarrow_date_dtypes(dtypes, expected): >>> pa.from_numpy_dtype(np.dtype('datetime64[ms]')) TimestampType(timestamp[ms]) - To fix this test, we would need to have - >>> pa.from_numpy_dtype(np.dtype('datetime64[ms]')) - DataType(date64[ms]) + To fix this test, we would need to have an exception + that makes datetime64[ms] convert to date64[ms]. But date64 isn't the same as datetime64[ms]. date64 is meant to represent a date (without time) only, From ac77415aafb9acea12b5274aea40c8fcf7917226 Mon Sep 17 00:00:00 2001 From: nli307 Date: Sat, 8 Nov 2025 14:21:30 -0500 Subject: [PATCH 07/16] remove print statement from test --- pandas/tests/dtypes/cast/test_find_common_type.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/dtypes/cast/test_find_common_type.py b/pandas/tests/dtypes/cast/test_find_common_type.py index d7f97090a8fbf..14f120805ec0e 100644 --- a/pandas/tests/dtypes/cast/test_find_common_type.py +++ b/pandas/tests/dtypes/cast/test_find_common_type.py @@ -238,5 +238,4 @@ def test_pyarrow_date_dtypes(dtypes, expected): def test_pyarrow_date64_dtype(dtypes, expected): source_dtypes = [pandas_dtype(dtype) for dtype in dtypes] result = find_common_type(source_dtypes) - print(f'{source_dtypes}: {result}') assert result == pandas_dtype(expected) \ No newline at end of file From 608a43df797d6d3c986085bd90e43242b6233fe3 Mon Sep 17 00:00:00 2001 From: nli307 Date: Sat, 8 Nov 2025 14:27:27 -0500 Subject: [PATCH 08/16] lint --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/tests/dtypes/cast/test_find_common_type.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 992dcc42521b4..4560e5912976a 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1108,13 +1108,13 @@ Numeric Conversion ^^^^^^^^^^ +- Bug in :meth:`ArrowDtype._get_common_dtype` and :meth:`ExtentionDtype._get_common_dtype` when using ``date32[pyarrow]`` and ``date64[pyarrow]`` types (:issue:`62343`) - Bug in :meth:`DataFrame.astype` not casting ``values`` for Arrow-based dictionary dtype correctly (:issue:`58479`) - Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`) - Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`) - Bug in :meth:`Series.convert_dtypes` and :meth:`DataFrame.convert_dtypes` removing timezone information for objects with :class:`ArrowDtype` (:issue:`60237`) - Bug in :meth:`Series.reindex` not maintaining ``float32`` type when a ``reindex`` introduces a missing value (:issue:`45857`) - Bug in :meth:`to_datetime` and :meth:`to_timedelta` with input ``None`` returning ``None`` instead of ``NaT``, inconsistent with other conversion methods (:issue:`23055`) -- Bug in :meth:`ArrowDtype._get_common_dtype` and :meth:`ExtentionDtype._get_common_dtype` when using ``date32[pyarrow]`` and ``date64[pyarrow]`` types (:issue:`62343`) Strings ^^^^^^^ diff --git a/pandas/tests/dtypes/cast/test_find_common_type.py b/pandas/tests/dtypes/cast/test_find_common_type.py index 14f120805ec0e..68d337d589901 100644 --- a/pandas/tests/dtypes/cast/test_find_common_type.py +++ b/pandas/tests/dtypes/cast/test_find_common_type.py @@ -204,7 +204,7 @@ def test_pyarrow_date_dtypes(dtypes, expected): @pytest.mark.xfail(reason=""" Finding common pyarrow dtypes relies on conversion to numpy dtypes and then back to pyarrow dtypes. - + We have: >>> pa.from_numpy_dtype(np.dtype('datetime64[D]')) DataType(date32[day]) @@ -238,4 +238,4 @@ def test_pyarrow_date_dtypes(dtypes, expected): def test_pyarrow_date64_dtype(dtypes, expected): source_dtypes = [pandas_dtype(dtype) for dtype in dtypes] result = find_common_type(source_dtypes) - assert result == pandas_dtype(expected) \ No newline at end of file + assert result == pandas_dtype(expected) From d3680a125ed65dd8ab3ebde7ff40b10afd846fd4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 8 Nov 2025 19:32:38 +0000 Subject: [PATCH 09/16] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pandas/tests/dtypes/cast/test_find_common_type.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/tests/dtypes/cast/test_find_common_type.py b/pandas/tests/dtypes/cast/test_find_common_type.py index 68d337d589901..b287aa4dcd828 100644 --- a/pandas/tests/dtypes/cast/test_find_common_type.py +++ b/pandas/tests/dtypes/cast/test_find_common_type.py @@ -201,7 +201,9 @@ def test_pyarrow_date_dtypes(dtypes, expected): result = find_common_type(source_dtypes) assert result == pandas_dtype(expected) -@pytest.mark.xfail(reason=""" + +@pytest.mark.xfail( + reason=""" Finding common pyarrow dtypes relies on conversion to numpy dtypes and then back to pyarrow dtypes. @@ -221,7 +223,8 @@ def test_pyarrow_date_dtypes(dtypes, expected): Hence, some date64-related common type computations end up becoming cast to timestamps rather than date64. - """) + """ +) @pytest.mark.parametrize( "dtypes,expected", [ From 7ff8825f9ee7fa65f52c418653346d71a2a90712 Mon Sep 17 00:00:00 2001 From: nli307 Date: Sat, 8 Nov 2025 15:13:55 -0500 Subject: [PATCH 10/16] skip pyarrow tests if no pyarrow --- pandas/tests/dtypes/cast/test_find_common_type.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/tests/dtypes/cast/test_find_common_type.py b/pandas/tests/dtypes/cast/test_find_common_type.py index b287aa4dcd828..942a331ccd138 100644 --- a/pandas/tests/dtypes/cast/test_find_common_type.py +++ b/pandas/tests/dtypes/cast/test_find_common_type.py @@ -10,6 +10,8 @@ PeriodDtype, ) +import pandas.util._test_decorators as td + from pandas import ( Categorical, Index, @@ -165,6 +167,7 @@ def test_interval_dtype(left, right): assert result == object +@td.skip_if_no("pyarrow") @pytest.mark.parametrize("dtype", interval_dtypes) def test_interval_dtype_with_categorical(dtype): obj = Index([], dtype=dtype) @@ -175,6 +178,7 @@ def test_interval_dtype_with_categorical(dtype): assert result == dtype +@td.skip_if_no("pyarrow") @pytest.mark.parametrize( "dtypes,expected", [ @@ -202,6 +206,7 @@ def test_pyarrow_date_dtypes(dtypes, expected): assert result == pandas_dtype(expected) +@td.skip_if_no("pyarrow") @pytest.mark.xfail( reason=""" Finding common pyarrow dtypes relies on conversion From 220e2906fb63e4ab7aa823b5a6b43d2dbccd6ba9 Mon Sep 17 00:00:00 2001 From: nli307 Date: Sat, 8 Nov 2025 15:14:52 -0500 Subject: [PATCH 11/16] ruff format --- pandas/tests/dtypes/cast/test_find_common_type.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/dtypes/cast/test_find_common_type.py b/pandas/tests/dtypes/cast/test_find_common_type.py index 942a331ccd138..c1caca6ff34fb 100644 --- a/pandas/tests/dtypes/cast/test_find_common_type.py +++ b/pandas/tests/dtypes/cast/test_find_common_type.py @@ -1,6 +1,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + from pandas.core.dtypes.cast import find_common_type from pandas.core.dtypes.common import pandas_dtype from pandas.core.dtypes.dtypes import ( @@ -10,8 +12,6 @@ PeriodDtype, ) -import pandas.util._test_decorators as td - from pandas import ( Categorical, Index, From a187f86f452f9be092b526b0400685d8ae253101 Mon Sep 17 00:00:00 2001 From: nli307 Date: Sun, 9 Nov 2025 10:08:48 -0500 Subject: [PATCH 12/16] removing xfail --- pandas/tests/extension/test_arrow.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 875aa38481ecf..3f772e4d16431 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1085,13 +1085,7 @@ def test_comp_masked_numpy(self, masked_dtype, comparison_op): expected = pd.Series(exp, dtype=ArrowDtype(pa.bool_())) tm.assert_series_equal(result, expected) - def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data, request): - pa_dtype = data.dtype.pyarrow_dtype - if pa.types.is_date(pa_dtype): - mark = pytest.mark.xfail( - reason="GH#62343 incorrectly casts to timestamp[ms][pyarrow]" - ) - request.applymarker(mark) + def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data): super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data) From 74552ce3ef232b60ac309bbabb1e63cdd8ce940e Mon Sep 17 00:00:00 2001 From: nli307 Date: Sun, 9 Nov 2025 10:39:47 -0500 Subject: [PATCH 13/16] xfail date64 --- pandas/tests/extension/test_arrow.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 3f772e4d16431..4a6ec68d9417d 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1085,7 +1085,13 @@ def test_comp_masked_numpy(self, masked_dtype, comparison_op): expected = pd.Series(exp, dtype=ArrowDtype(pa.bool_())) tm.assert_series_equal(result, expected) - def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data): + def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data, request): + pa_dtype = data.dtype.pyarrow_dtype + if pa.types.is_date64(pa_dtype): + mark = pytest.mark.xfail( + reason="GH#62343 incorrectly casts date64 to timestamp[ms][pyarrow]" + ) + request.applymarker(mark) super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data) From 495f85eea68642be93d5571a2f418cdee8eeb264 Mon Sep 17 00:00:00 2001 From: nli307 Date: Sun, 9 Nov 2025 10:50:26 -0500 Subject: [PATCH 14/16] remove date type as xfail in test_get_common_dtype --- pandas/tests/extension/test_arrow.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 4a6ec68d9417d..44bcbe494d507 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -650,8 +650,7 @@ def test_construct_from_string_another_type_raises(self, dtype): def test_get_common_dtype(self, dtype, request): pa_dtype = dtype.pyarrow_dtype if ( - pa.types.is_date(pa_dtype) - or pa.types.is_time(pa_dtype) + pa.types.is_time(pa_dtype) or (pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None) or pa.types.is_binary(pa_dtype) or pa.types.is_decimal(pa_dtype) From a4f89dd75db32d0a55cd29d325ce125fc6388037 Mon Sep 17 00:00:00 2001 From: nli307 Date: Sun, 9 Nov 2025 11:23:12 -0500 Subject: [PATCH 15/16] xfail is_date64 for test_get_common_dtype --- pandas/tests/extension/test_arrow.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 44bcbe494d507..f458c52709ce1 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -650,7 +650,8 @@ def test_construct_from_string_another_type_raises(self, dtype): def test_get_common_dtype(self, dtype, request): pa_dtype = dtype.pyarrow_dtype if ( - pa.types.is_time(pa_dtype) + pa.types.is_date64(pa_dtype) + or pa.types.is_time(pa_dtype) or (pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None) or pa.types.is_binary(pa_dtype) or pa.types.is_decimal(pa_dtype) From ea3f900c62222e41858ec6105e80347f2ae7c785 Mon Sep 17 00:00:00 2001 From: nli307 Date: Sun, 9 Nov 2025 11:33:08 -0500 Subject: [PATCH 16/16] remove pyarrow tests from test_find_common_type, already covered by tests in test_arrow --- .../dtypes/cast/test_find_common_type.py | 74 ------------------- 1 file changed, 74 deletions(-) diff --git a/pandas/tests/dtypes/cast/test_find_common_type.py b/pandas/tests/dtypes/cast/test_find_common_type.py index c1caca6ff34fb..83ef7382fbe8a 100644 --- a/pandas/tests/dtypes/cast/test_find_common_type.py +++ b/pandas/tests/dtypes/cast/test_find_common_type.py @@ -1,8 +1,6 @@ import numpy as np import pytest -import pandas.util._test_decorators as td - from pandas.core.dtypes.cast import find_common_type from pandas.core.dtypes.common import pandas_dtype from pandas.core.dtypes.dtypes import ( @@ -167,7 +165,6 @@ def test_interval_dtype(left, right): assert result == object -@td.skip_if_no("pyarrow") @pytest.mark.parametrize("dtype", interval_dtypes) def test_interval_dtype_with_categorical(dtype): obj = Index([], dtype=dtype) @@ -176,74 +173,3 @@ def test_interval_dtype_with_categorical(dtype): result = find_common_type([dtype, cat.dtype]) assert result == dtype - - -@td.skip_if_no("pyarrow") -@pytest.mark.parametrize( - "dtypes,expected", - [ - ( - ["date32[pyarrow]", "null[pyarrow]"], - "date32[day][pyarrow]", - ), - ( - ["date32[pyarrow]", "date32[pyarrow]"], - "date32[day][pyarrow]", - ), - ( - ["date64[pyarrow]", "date64[pyarrow]"], - "date64[ms][pyarrow]", - ), - ( - ["date32[pyarrow]", "date64[pyarrow]", "datetime64[ms]"], - "timestamp[ms][pyarrow]", - ), - ], -) -def test_pyarrow_date_dtypes(dtypes, expected): - source_dtypes = [pandas_dtype(dtype) for dtype in dtypes] - result = find_common_type(source_dtypes) - assert result == pandas_dtype(expected) - - -@td.skip_if_no("pyarrow") -@pytest.mark.xfail( - reason=""" - Finding common pyarrow dtypes relies on conversion - to numpy dtypes and then back to pyarrow dtypes. - - We have: - >>> pa.from_numpy_dtype(np.dtype('datetime64[D]')) - DataType(date32[day]) - >>> pa.from_numpy_dtype(np.dtype('datetime64[ms]')) - TimestampType(timestamp[ms]) - - To fix this test, we would need to have an exception - that makes datetime64[ms] convert to date64[ms]. - - But date64 isn't the same as datetime64[ms]. date64 - is meant to represent a date (without time) only, - represented in milliseconds (see - https://github.com/apache/arrow/issues/15032#issuecomment-1368096718). - - Hence, some date64-related common type computations - end up becoming cast to timestamps rather than date64. - """ -) -@pytest.mark.parametrize( - "dtypes,expected", - [ - ( - ["date64[pyarrow]", "null[pyarrow]"], - "date64[ms][pyarrow]", - ), - ( - ["date32[pyarrow]", "date64[pyarrow]"], - "date64[ms][pyarrow]", - ), - ], -) -def test_pyarrow_date64_dtype(dtypes, expected): - source_dtypes = [pandas_dtype(dtype) for dtype in dtypes] - result = find_common_type(source_dtypes) - assert result == pandas_dtype(expected)