diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 448ceffdaa1eb..08ba3d78b5b72 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1002,6 +1002,7 @@ Numeric Conversion ^^^^^^^^^^ - Bug in :meth:`DataFrame.astype` not casting ``values`` for Arrow-based dictionary dtype correctly (:issue:`58479`) +- Bug in :meth:`DataFrame.convert_dtypes` with ``dtype_backend='pyarrow'`` for large ``int64`` values (:issue:`58485`) - Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`) - Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`) - Bug in :meth:`Series.convert_dtypes` and :meth:`DataFrame.convert_dtypes` removing timezone information for objects with :class:`ArrowDtype` (:issue:`60237`) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 3b615c70ebea2..2c4221b52bcd3 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -960,7 +960,7 @@ def convert_dtypes( if len(arr) < len(input_array) and not is_nan_na(): # In the presence of NaNs, we cannot convert to IntegerDtype pass - elif (arr.astype(int) == arr).all(): + elif np.array_equal(arr, np.trunc(arr), equal_nan=True): inferred_dtype = target_int_dtype else: inferred_dtype = input_array.dtype @@ -987,7 +987,7 @@ def convert_dtypes( if len(arr) < len(input_array) and not is_nan_na(): # In the presence of NaNs, we can't convert to IntegerDtype inferred_dtype = inferred_float_dtype - elif (arr.astype(int) == arr).all(): + elif np.array_equal(arr, np.trunc(arr)): inferred_dtype = pandas_dtype_func("Int64") else: inferred_dtype = inferred_float_dtype diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py index e90786a43c483..56706220a920f 100644 --- a/pandas/tests/frame/methods/test_convert_dtypes.py +++ b/pandas/tests/frame/methods/test_convert_dtypes.py @@ -228,3 +228,30 @@ def test_convert_dtype_pyarrow_timezone_preserve(self): result = df.convert_dtypes(dtype_backend="pyarrow") expected = df.copy() tm.assert_frame_equal(result, expected) + + def test_convert_dtype_pyarrow_int64_limits_warning(self): + # GH 58485 + pytest.importorskip("pyarrow") + data = { + "a": [ + -9223372036854775808, + 4611686018427387904, + 9223372036854775807, + None, + ], + "b": [ + -9223372036854775808, + 4611686018427387904, + 9223372036854775807, + None, + ], + "c": [ + -9223372036854775808, + 4611686018427387904, + 9223372036854775807, + None, + ], + } + result = pd.DataFrame(data).convert_dtypes(dtype_backend="pyarrow") + expected = pd.DataFrame(data, dtype="int64[pyarrow]") + tm.assert_frame_equal(result, expected)