From 7a72a064b7a4f0024c783272dc896be794b83a66 Mon Sep 17 00:00:00 2001 From: Augustus <22328646+aijams@users.noreply.github.com> Date: Mon, 29 Sep 2025 13:25:49 -0400 Subject: [PATCH 01/13] Added test to check the dtype of the result of take method. Modified take method to return correct dtype. --- pandas/core/arrays/_mixins.py | 3 ++- pandas/tests/arrays/numpy_/test_numpy.py | 9 +++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 84aca81420fe1..74041bb81d2ac 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -173,7 +173,8 @@ def take( fill_value=fill_value, axis=axis, ) - return self._from_backing_data(new_data) + + return type(self)._simple_new(new_data, new_data.dtype) # ------------------------------------------------------------------------ diff --git a/pandas/tests/arrays/numpy_/test_numpy.py b/pandas/tests/arrays/numpy_/test_numpy.py index 620a553d5a731..f299b5818a66d 100644 --- a/pandas/tests/arrays/numpy_/test_numpy.py +++ b/pandas/tests/arrays/numpy_/test_numpy.py @@ -325,6 +325,15 @@ def test_factorize_unsigned(): tm.assert_extension_array_equal(res_unique, NumpyExtensionArray(exp_unique)) +@pytest.mark.parametrize("dtype", [np.uint32, np.uint64, np.int32, np.int64]) +def test_take_assigns_correct_dtype(dtype): + array = NumpyExtensionArray(np.array([1, 2, 3], dtype=dtype)) + + result = array.take([-1], allow_fill=True) + + assert result.dtype == np.float64 + + # ---------------------------------------------------------------------------- # Output formatting From 65511cd72e6c177dd7756a8527d215855f42c2ab Mon Sep 17 00:00:00 2001 From: Augustus <22328646+aijams@users.noreply.github.com> Date: Mon, 29 Sep 2025 13:51:56 -0400 Subject: [PATCH 02/13] Added bug info to docs. --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 99a6be03c84d3..7d95553995996 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1164,6 +1164,7 @@ ExtensionArray - Bug in :class:`Categorical` when constructing with an :class:`Index` with :class:`ArrowDtype` (:issue:`60563`) - Bug in :meth:`.arrays.ArrowExtensionArray.__setitem__` which caused wrong behavior when using an integer array with repeated values as a key (:issue:`58530`) - Bug in :meth:`ArrowExtensionArray.factorize` where NA values were dropped when input was dictionary-encoded even when dropna was set to False(:issue:`60567`) +- Bug in :meth:`NDArrayBackedExtensionArray.take` which produced arrays whose dtypes didn't match their underlying data, when called with integer arrays (:issue:`62448`) - Bug in :meth:`api.types.is_datetime64_any_dtype` where a custom :class:`ExtensionDtype` would return ``False`` for array-likes (:issue:`57055`) - Bug in comparison between object with :class:`ArrowDtype` and incompatible-dtyped (e.g. string vs bool) incorrectly raising instead of returning all-``False`` (for ``==``) or all-``True`` (for ``!=``) (:issue:`59505`) - Bug in constructing pandas data structures when passing into ``dtype`` a string of the type followed by ``[pyarrow]`` while PyArrow is not installed would raise ``NameError`` rather than ``ImportError`` (:issue:`57928`) From 42898c6046e077cde6692db7bb03c1a400cf7dbd Mon Sep 17 00:00:00 2001 From: Augustus <22328646+aijams@users.noreply.github.com> Date: Thu, 2 Oct 2025 10:48:53 -0400 Subject: [PATCH 03/13] Added conditional to handle integer arrays in take method as special case. --- pandas/core/arrays/_mixins.py | 11 +++++++++-- pandas/tests/arrays/numpy_/test_numpy.py | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 74041bb81d2ac..6d45bdc038f42 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -40,6 +40,7 @@ from pandas.core.dtypes.dtypes import ( DatetimeTZDtype, ExtensionDtype, + NumpyEADtype, PeriodDtype, ) from pandas.core.dtypes.missing import array_equivalent @@ -173,8 +174,14 @@ def take( fill_value=fill_value, axis=axis, ) - - return type(self)._simple_new(new_data, new_data.dtype) + if self.dtype in [ + NumpyEADtype(np.uint32), + NumpyEADtype(np.uint64), + NumpyEADtype(np.int32), + NumpyEADtype(np.int64), + ]: + return type(self)(new_data) + return self._from_backing_data(new_data) # ------------------------------------------------------------------------ diff --git a/pandas/tests/arrays/numpy_/test_numpy.py b/pandas/tests/arrays/numpy_/test_numpy.py index f299b5818a66d..8337c26e4ba10 100644 --- a/pandas/tests/arrays/numpy_/test_numpy.py +++ b/pandas/tests/arrays/numpy_/test_numpy.py @@ -331,7 +331,7 @@ def test_take_assigns_correct_dtype(dtype): result = array.take([-1], allow_fill=True) - assert result.dtype == np.float64 + assert result.dtype == NumpyEADtype(np.float64) # ---------------------------------------------------------------------------- From 3ca2e84f35c1d60b2d0b988ba6c1d78f2ce5bce2 Mon Sep 17 00:00:00 2001 From: Augustus <22328646+aijams@users.noreply.github.com> Date: Mon, 6 Oct 2025 14:10:52 -0400 Subject: [PATCH 04/13] Added cases for smaller integer types to mixins take function. --- pandas/core/arrays/_mixins.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 6d45bdc038f42..1245beb4c22d0 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -175,8 +175,12 @@ def take( axis=axis, ) if self.dtype in [ + NumpyEADtype(np.uint8), + NumpyEADtype(np.uint16), NumpyEADtype(np.uint32), NumpyEADtype(np.uint64), + NumpyEADtype(np.int8), + NumpyEADtype(np.int16), NumpyEADtype(np.int32), NumpyEADtype(np.int64), ]: From eeb3d8569c89e03511cb4c7327ca2bc6e752de6f Mon Sep 17 00:00:00 2001 From: Augustus <22328646+aijams@users.noreply.github.com> Date: Thu, 9 Oct 2025 10:29:34 -0400 Subject: [PATCH 05/13] Added note to take method. --- pandas/core/arrays/_mixins.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 1245beb4c22d0..365be6e4d1e8d 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -174,6 +174,22 @@ def take( fill_value=fill_value, axis=axis, ) + # One of the base classes to this class: ExtensionArray, provides + # the dtype property, but abstractly, so it leaves the implementation + # of dtype storage up to its derived classes. Some of these derived + # classes don't provide a setter method for their dtype property, so + # I can't set the dtype here and expect it to work for all classes that + # inherit this take method. + + # How can I produce an extension array of the same type as self, + # having a floating-point dtype if self has an integer dtype or otherwise + # the same dtype as self? + + # Constructing a new object of the same type as self doesn't always + # work since the constructors of some derived classes of this class + # don't accept a dtype parameter, which I need to pass to set the + # result's dtype to a floating-point type. + if self.dtype in [ NumpyEADtype(np.uint8), NumpyEADtype(np.uint16), From 468de3fab65f60784d1e3fa5eb64171c4712f822 Mon Sep 17 00:00:00 2001 From: Augustus <22328646+aijams@users.noreply.github.com> Date: Wed, 15 Oct 2025 11:43:39 -0400 Subject: [PATCH 06/13] Added link to issue in test for take method. --- pandas/core/arrays/_mixins.py | 74 ++++++++++++++++++++---- pandas/tests/arrays/numpy_/test_numpy.py | 1 + 2 files changed, 65 insertions(+), 10 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 365be6e4d1e8d..c5bde6ebf0ab4 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -40,7 +40,6 @@ from pandas.core.dtypes.dtypes import ( DatetimeTZDtype, ExtensionDtype, - NumpyEADtype, PeriodDtype, ) from pandas.core.dtypes.missing import array_equivalent @@ -190,15 +189,70 @@ def take( # don't accept a dtype parameter, which I need to pass to set the # result's dtype to a floating-point type. - if self.dtype in [ - NumpyEADtype(np.uint8), - NumpyEADtype(np.uint16), - NumpyEADtype(np.uint32), - NumpyEADtype(np.uint64), - NumpyEADtype(np.int8), - NumpyEADtype(np.int16), - NumpyEADtype(np.int32), - NumpyEADtype(np.int64), + # All tests pass when I create a new extension array object with the + # appropriate dtype (in the integer-dtype source case), however MyPy + # complains about the missing dtype argument in the call to type(self) + # below. By creating a new array object, this call produces an array + # with a floating point dtype, even when the source dtype is integral. + # I think this happens because the new array is created with the newly + # produced data from the underlying take method, which has the + # appropriate underlying dtype. + + # Essentially, these extension arrays are wrappers around Numpy arrays + # which have their own dtype and store the data. Thus, the new + # extension array inherits the dtype from the Numpy array used + # to create it. + + # Unfortunately, some of the derived constructors of this class have a + # positional dtype argument, while some do not. If I call a constructor + # without specifying this argument, mypy will complain about the + # missing argument in the case of constructors that require it, but + # if I call the constructor with the dtype argument, the constructors + # that don't have it will fail at runtime since they don't recognize + # it. + + # How can I get around this issue? + # Ideas: + # Modify the extension array type to allow modification of its dtype + # after construction. + + # Add a conditional branch to this method to call derived constructors + # with or without the dtype argument, depending on their class. + # This approach has the disadvantage of hardcoding information about + # derived classes in this base class, which means that if someone + # changes a constructor of a derived class to remove the dtype argument, + # this method will break. + + # Classes derived from this class include: + + # Categorical + # DatetimeLikeArrayMixin + # DatelikeOps + # PeriodArray + # DatetimeArray + # TimelikeOps + # TimedeltaArray + # NumpyExtensionArray + # StringArray + + # The types of extension arrays (within Pandas) derived from this class are: + # Class name Constructor takes dtype argument Dtype argument required + # Categorical yes no + # PeriodArray yes no + # DatetimeArray + # TimedeltaArray + # StringArray yes no + # NumpyExtensionArray no no + + if hasattr(self.dtype, "numpy_dtype") and self.dtype.numpy_dtype in [ + np.uint8, + np.uint16, + np.uint32, + np.uint64, + np.int8, + np.int16, + np.int32, + np.int64, ]: return type(self)(new_data) return self._from_backing_data(new_data) diff --git a/pandas/tests/arrays/numpy_/test_numpy.py b/pandas/tests/arrays/numpy_/test_numpy.py index 8337c26e4ba10..1922cbc92b0b9 100644 --- a/pandas/tests/arrays/numpy_/test_numpy.py +++ b/pandas/tests/arrays/numpy_/test_numpy.py @@ -327,6 +327,7 @@ def test_factorize_unsigned(): @pytest.mark.parametrize("dtype", [np.uint32, np.uint64, np.int32, np.int64]) def test_take_assigns_correct_dtype(dtype): + # GH#62448. array = NumpyExtensionArray(np.array([1, 2, 3], dtype=dtype)) result = array.take([-1], allow_fill=True) From 91442a8d13438dab9cc1897f6e3806398d2aead3 Mon Sep 17 00:00:00 2001 From: Augustus <22328646+aijams@users.noreply.github.com> Date: Thu, 16 Oct 2025 10:29:57 -0400 Subject: [PATCH 07/13] Moved changes to take method to NumpyExtensionArray class. --- pandas/core/arrays/_mixins.py | 51 ++++++++---------- pandas/core/arrays/numpy_.py | 68 ++++++++++++++++++++++++ pandas/tests/arrays/numpy_/test_numpy.py | 20 +++++-- 3 files changed, 108 insertions(+), 31 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index c5bde6ebf0ab4..1b0e1933ae56c 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -155,24 +155,11 @@ def view(self, dtype: Dtype | None = None) -> ArrayLike: # "ExtensionDtype | dtype[Any]"; expected "dtype[Any] | _HasDType[dtype[Any]]" return arr.view(dtype=dtype) # type: ignore[arg-type] - def take( - self, - indices: TakeIndexer, - *, - allow_fill: bool = False, - fill_value: Any = None, - axis: AxisInt = 0, - ) -> Self: - if allow_fill: - fill_value = self._validate_scalar(fill_value) - new_data = take( - self._ndarray, - indices, - allow_fill=allow_fill, - fill_value=fill_value, - axis=axis, - ) + # Notes on take method fix + # Please remove these once this fix is ready to submit. + # ======================================================================= + # One of the base classes to this class: ExtensionArray, provides # the dtype property, but abstractly, so it leaves the implementation # of dtype storage up to its derived classes. Some of these derived @@ -244,17 +231,25 @@ def take( # StringArray yes no # NumpyExtensionArray no no - if hasattr(self.dtype, "numpy_dtype") and self.dtype.numpy_dtype in [ - np.uint8, - np.uint16, - np.uint32, - np.uint64, - np.int8, - np.int16, - np.int32, - np.int64, - ]: - return type(self)(new_data) + + def take( + self, + indices: TakeIndexer, + *, + allow_fill: bool = False, + fill_value: Any = None, + axis: AxisInt = 0, + ) -> Self: + if allow_fill: + fill_value = self._validate_scalar(fill_value) + + new_data = take( + self._ndarray, + indices, + allow_fill=allow_fill, + fill_value=fill_value, + axis=axis, + ) return self._from_backing_data(new_data) # ------------------------------------------------------------------------ diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index fab51ffa56919..f5e4a71a0e008 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -45,6 +45,7 @@ InterpolateOptions, NpDtype, Scalar, + TakeIndexer, npt, ) @@ -350,6 +351,73 @@ def interpolate( return self return type(self)._simple_new(out_data, dtype=self.dtype) + def take( + self, + indices: TakeIndexer, + *, + allow_fill: bool = False, + fill_value: Any = None, + axis: AxisInt = 0, + ) -> Self: + """ + Take entries from this array at each index in a list of indices, + producing an array containing only those entries. + """ + # See GH#62448. + if self.dtype.numpy_dtype in [ + np.uint8, + np.uint16, + np.uint32, + np.uint64, + np.int8, + np.int16, + np.int32, + np.int64 + ]: + # In this case, the resulting extension array should have a floating-point + # dtype to match the result of the underlying take method when + # NaN values need to be incorporated into it. + # This occurs when allow_fill is True and fill_value is None. + # (fill_value may be an arbitrary Python object, in which case + # the result will be an array of objects.) + + # Call the take method of NDArrayBackedExtensionArray + + # TODO: How is the dtype of a newly constructed NumpyExtensionArray set? + # It's set to match the dtype of its underlying array. + + result = super().take( + indices, + allow_fill=allow_fill, + fill_value=fill_value, + axis=axis + ) + return type(self)(result, copy=False) + + # In this case, the resulting extension array will have a dtype + # that matches that of the underlying Numpy array and we can link + # to the underlying array without manipulating the extension's + # dtype. + + return super().take( + indices, + allow_fill=allow_fill, + fill_value=fill_value, + axis=axis + ) + # result array dtype = self dtype + + # Implementation steps: + # Determine requirements for this method, including: + # Argument types [done] + # Return type [done] + # Return dtype [done] + # Write tests to check whether this method satisfies these requirements. [done] + # Figure out what base class method to call to implement the take functionality. [done] + # Implement the call. [done] + # Check whether this method satisfies its requirements by running the tests. + + # ------------------------------------------------------------------------ # Reductions diff --git a/pandas/tests/arrays/numpy_/test_numpy.py b/pandas/tests/arrays/numpy_/test_numpy.py index 1922cbc92b0b9..f72a794cdd59f 100644 --- a/pandas/tests/arrays/numpy_/test_numpy.py +++ b/pandas/tests/arrays/numpy_/test_numpy.py @@ -325,14 +325,28 @@ def test_factorize_unsigned(): tm.assert_extension_array_equal(res_unique, NumpyExtensionArray(exp_unique)) -@pytest.mark.parametrize("dtype", [np.uint32, np.uint64, np.int32, np.int64]) -def test_take_assigns_correct_dtype(dtype): +# TODO: Add the smaller width dtypes to the parameter sets of these tests. +@pytest.mark.parametrize("dtype", [np.uint8, np.uint16, np.uint32, np.uint64, np.int8, np.int16, np.int32, np.int64]) +def test_take_assigns_floating_point_dtype(dtype): # GH#62448. array = NumpyExtensionArray(np.array([1, 2, 3], dtype=dtype)) result = array.take([-1], allow_fill=True) - assert result.dtype == NumpyEADtype(np.float64) + assert result.dtype.numpy_dtype == np.float64 + + result = array.take([-1], allow_fill=True, fill_value=5.0) + + assert result.dtype.numpy_dtype == np.float64 + +@pytest.mark.parametrize("dtype", [np.uint8, np.uint16, np.uint32, np.uint64, np.int8, np.int16, np.int32, np.int64]) +def test_take_assigns_integer_dtype_when_fill_disallowed(dtype): + # GH#62448. + array = NumpyExtensionArray(np.array([1, 2, 3], dtype=dtype)) + + result = array.take([-1], allow_fill=False) + + assert result.dtype.numpy_dtype == dtype # ---------------------------------------------------------------------------- From aa3c228927ad82d0f29dbf5e8a920e001b383a8d Mon Sep 17 00:00:00 2001 From: Augustus <22328646+aijams@users.noreply.github.com> Date: Thu, 16 Oct 2025 10:34:53 -0400 Subject: [PATCH 08/13] Cleaned up work comments. --- pandas/core/arrays/_mixins.py | 77 ------------------------ pandas/core/arrays/numpy_.py | 49 ++------------- pandas/tests/arrays/numpy_/test_numpy.py | 11 +++- 3 files changed, 15 insertions(+), 122 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 1b0e1933ae56c..84aca81420fe1 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -155,83 +155,6 @@ def view(self, dtype: Dtype | None = None) -> ArrayLike: # "ExtensionDtype | dtype[Any]"; expected "dtype[Any] | _HasDType[dtype[Any]]" return arr.view(dtype=dtype) # type: ignore[arg-type] - - # Notes on take method fix - # Please remove these once this fix is ready to submit. - # ======================================================================= - - # One of the base classes to this class: ExtensionArray, provides - # the dtype property, but abstractly, so it leaves the implementation - # of dtype storage up to its derived classes. Some of these derived - # classes don't provide a setter method for their dtype property, so - # I can't set the dtype here and expect it to work for all classes that - # inherit this take method. - - # How can I produce an extension array of the same type as self, - # having a floating-point dtype if self has an integer dtype or otherwise - # the same dtype as self? - - # Constructing a new object of the same type as self doesn't always - # work since the constructors of some derived classes of this class - # don't accept a dtype parameter, which I need to pass to set the - # result's dtype to a floating-point type. - - # All tests pass when I create a new extension array object with the - # appropriate dtype (in the integer-dtype source case), however MyPy - # complains about the missing dtype argument in the call to type(self) - # below. By creating a new array object, this call produces an array - # with a floating point dtype, even when the source dtype is integral. - # I think this happens because the new array is created with the newly - # produced data from the underlying take method, which has the - # appropriate underlying dtype. - - # Essentially, these extension arrays are wrappers around Numpy arrays - # which have their own dtype and store the data. Thus, the new - # extension array inherits the dtype from the Numpy array used - # to create it. - - # Unfortunately, some of the derived constructors of this class have a - # positional dtype argument, while some do not. If I call a constructor - # without specifying this argument, mypy will complain about the - # missing argument in the case of constructors that require it, but - # if I call the constructor with the dtype argument, the constructors - # that don't have it will fail at runtime since they don't recognize - # it. - - # How can I get around this issue? - # Ideas: - # Modify the extension array type to allow modification of its dtype - # after construction. - - # Add a conditional branch to this method to call derived constructors - # with or without the dtype argument, depending on their class. - # This approach has the disadvantage of hardcoding information about - # derived classes in this base class, which means that if someone - # changes a constructor of a derived class to remove the dtype argument, - # this method will break. - - # Classes derived from this class include: - - # Categorical - # DatetimeLikeArrayMixin - # DatelikeOps - # PeriodArray - # DatetimeArray - # TimelikeOps - # TimedeltaArray - # NumpyExtensionArray - # StringArray - - # The types of extension arrays (within Pandas) derived from this class are: - # Class name Constructor takes dtype argument Dtype argument required - # Categorical yes no - # PeriodArray yes no - # DatetimeArray - # TimedeltaArray - # StringArray yes no - # NumpyExtensionArray no no - - def take( self, indices: TakeIndexer, diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index f5e4a71a0e008..4ec9b6419bea3 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -363,6 +363,9 @@ def take( Take entries from this array at each index in a list of indices, producing an array containing only those entries. """ + result = super().take( + indices, allow_fill=allow_fill, fill_value=fill_value, axis=axis + ) # See GH#62448. if self.dtype.numpy_dtype in [ np.uint8, @@ -372,51 +375,11 @@ def take( np.int8, np.int16, np.int32, - np.int64 + np.int64, ]: - # In this case, the resulting extension array should have a floating-point - # dtype to match the result of the underlying take method when - # NaN values need to be incorporated into it. - # This occurs when allow_fill is True and fill_value is None. - # (fill_value may be an arbitrary Python object, in which case - # the result will be an array of objects.) - - # Call the take method of NDArrayBackedExtensionArray - - # TODO: How is the dtype of a newly constructed NumpyExtensionArray set? - # It's set to match the dtype of its underlying array. - - result = super().take( - indices, - allow_fill=allow_fill, - fill_value=fill_value, - axis=axis - ) return type(self)(result, copy=False) - - # In this case, the resulting extension array will have a dtype - # that matches that of the underlying Numpy array and we can link - # to the underlying array without manipulating the extension's - # dtype. - - return super().take( - indices, - allow_fill=allow_fill, - fill_value=fill_value, - axis=axis - ) - # result array dtype = self dtype - - # Implementation steps: - # Determine requirements for this method, including: - # Argument types [done] - # Return type [done] - # Return dtype [done] - # Write tests to check whether this method satisfies these requirements. [done] - # Figure out what base class method to call to implement the take functionality. [done] - # Implement the call. [done] - # Check whether this method satisfies its requirements by running the tests. - + + return result # ------------------------------------------------------------------------ # Reductions diff --git a/pandas/tests/arrays/numpy_/test_numpy.py b/pandas/tests/arrays/numpy_/test_numpy.py index f72a794cdd59f..e679174440ef6 100644 --- a/pandas/tests/arrays/numpy_/test_numpy.py +++ b/pandas/tests/arrays/numpy_/test_numpy.py @@ -326,7 +326,10 @@ def test_factorize_unsigned(): # TODO: Add the smaller width dtypes to the parameter sets of these tests. -@pytest.mark.parametrize("dtype", [np.uint8, np.uint16, np.uint32, np.uint64, np.int8, np.int16, np.int32, np.int64]) +@pytest.mark.parametrize( + "dtype", + [np.uint8, np.uint16, np.uint32, np.uint64, np.int8, np.int16, np.int32, np.int64], +) def test_take_assigns_floating_point_dtype(dtype): # GH#62448. array = NumpyExtensionArray(np.array([1, 2, 3], dtype=dtype)) @@ -339,7 +342,11 @@ def test_take_assigns_floating_point_dtype(dtype): assert result.dtype.numpy_dtype == np.float64 -@pytest.mark.parametrize("dtype", [np.uint8, np.uint16, np.uint32, np.uint64, np.int8, np.int16, np.int32, np.int64]) + +@pytest.mark.parametrize( + "dtype", + [np.uint8, np.uint16, np.uint32, np.uint64, np.int8, np.int16, np.int32, np.int64], +) def test_take_assigns_integer_dtype_when_fill_disallowed(dtype): # GH#62448. array = NumpyExtensionArray(np.array([1, 2, 3], dtype=dtype)) From 3e5836c32218777a7005d2dcdcaec58f81bb0289 Mon Sep 17 00:00:00 2001 From: Augustus <22328646+aijams@users.noreply.github.com> Date: Tue, 21 Oct 2025 12:56:31 -0400 Subject: [PATCH 09/13] Tests for take method check against object dtype for boolean inputs. --- pandas/core/arrays/numpy_.py | 14 +++----- pandas/tests/arrays/numpy_/test_numpy.py | 43 +++++++++++++++++++----- 2 files changed, 38 insertions(+), 19 deletions(-) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 532638250bd9c..0ad188bf8b2e1 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -368,16 +368,10 @@ def take( indices, allow_fill=allow_fill, fill_value=fill_value, axis=axis ) # See GH#62448. - if self.dtype.numpy_dtype in [ - np.uint8, - np.uint16, - np.uint32, - np.uint64, - np.int8, - np.int16, - np.int32, - np.int64, - ]: + # TODO: Not all Pandas extension dtypes have an underlying Numpy dtype. + # I will need to handle the case where self.dtype doesn't have this + # attribute. + if self.dtype.kind in "iub": return type(self)(result, copy=False) return result diff --git a/pandas/tests/arrays/numpy_/test_numpy.py b/pandas/tests/arrays/numpy_/test_numpy.py index e679174440ef6..febdda7ffd66d 100644 --- a/pandas/tests/arrays/numpy_/test_numpy.py +++ b/pandas/tests/arrays/numpy_/test_numpy.py @@ -325,31 +325,56 @@ def test_factorize_unsigned(): tm.assert_extension_array_equal(res_unique, NumpyExtensionArray(exp_unique)) -# TODO: Add the smaller width dtypes to the parameter sets of these tests. @pytest.mark.parametrize( "dtype", - [np.uint8, np.uint16, np.uint32, np.uint64, np.int8, np.int16, np.int32, np.int64], + [ + np.bool, + np.uint8, + np.uint16, + np.uint32, + np.uint64, + np.int8, + np.int16, + np.int32, + np.int64, + ], ) def test_take_assigns_floating_point_dtype(dtype): # GH#62448. - array = NumpyExtensionArray(np.array([1, 2, 3], dtype=dtype)) + if dtype == np.bool: + array = NumpyExtensionArray(np.array([False, True, False], dtype=dtype)) + expected = np.dtype(object) + else: + array = NumpyExtensionArray(np.array([1, 2, 3], dtype=dtype)) + expected = np.float64 result = array.take([-1], allow_fill=True) - - assert result.dtype.numpy_dtype == np.float64 + assert result.dtype.numpy_dtype == expected result = array.take([-1], allow_fill=True, fill_value=5.0) - - assert result.dtype.numpy_dtype == np.float64 + assert result.dtype.numpy_dtype == expected @pytest.mark.parametrize( "dtype", - [np.uint8, np.uint16, np.uint32, np.uint64, np.int8, np.int16, np.int32, np.int64], + [ + np.bool, + np.uint8, + np.uint16, + np.uint32, + np.uint64, + np.int8, + np.int16, + np.int32, + np.int64, + ], ) def test_take_assigns_integer_dtype_when_fill_disallowed(dtype): # GH#62448. - array = NumpyExtensionArray(np.array([1, 2, 3], dtype=dtype)) + if dtype == np.bool: + array = NumpyExtensionArray(np.array([False, True, False], dtype=dtype)) + else: + array = NumpyExtensionArray(np.array([1, 2, 3], dtype=dtype)) result = array.take([-1], allow_fill=False) From b4258f2a2ce3efd1ed796db343bec98963c1452c Mon Sep 17 00:00:00 2001 From: Augustus <22328646+aijams@users.noreply.github.com> Date: Tue, 21 Oct 2025 16:12:51 -0400 Subject: [PATCH 10/13] Removed TODO comment. --- pandas/core/arrays/numpy_.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 0ad188bf8b2e1..ec64c04788ed4 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -368,9 +368,6 @@ def take( indices, allow_fill=allow_fill, fill_value=fill_value, axis=axis ) # See GH#62448. - # TODO: Not all Pandas extension dtypes have an underlying Numpy dtype. - # I will need to handle the case where self.dtype doesn't have this - # attribute. if self.dtype.kind in "iub": return type(self)(result, copy=False) From acdfb629f49e3a0e54ffb1aca623b35b8abda5c8 Mon Sep 17 00:00:00 2001 From: Augustus <22328646+aijams@users.noreply.github.com> Date: Wed, 22 Oct 2025 12:20:12 -0400 Subject: [PATCH 11/13] Updated references to numpy.bool to include underscore. --- pandas/tests/arrays/numpy_/test_numpy.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/arrays/numpy_/test_numpy.py b/pandas/tests/arrays/numpy_/test_numpy.py index febdda7ffd66d..a04655c9efcf3 100644 --- a/pandas/tests/arrays/numpy_/test_numpy.py +++ b/pandas/tests/arrays/numpy_/test_numpy.py @@ -328,7 +328,7 @@ def test_factorize_unsigned(): @pytest.mark.parametrize( "dtype", [ - np.bool, + np.bool_, np.uint8, np.uint16, np.uint32, @@ -341,7 +341,7 @@ def test_factorize_unsigned(): ) def test_take_assigns_floating_point_dtype(dtype): # GH#62448. - if dtype == np.bool: + if dtype == np.bool_: array = NumpyExtensionArray(np.array([False, True, False], dtype=dtype)) expected = np.dtype(object) else: @@ -358,7 +358,7 @@ def test_take_assigns_floating_point_dtype(dtype): @pytest.mark.parametrize( "dtype", [ - np.bool, + np.bool_, np.uint8, np.uint16, np.uint32, @@ -371,7 +371,7 @@ def test_take_assigns_floating_point_dtype(dtype): ) def test_take_assigns_integer_dtype_when_fill_disallowed(dtype): # GH#62448. - if dtype == np.bool: + if dtype == np.bool_: array = NumpyExtensionArray(np.array([False, True, False], dtype=dtype)) else: array = NumpyExtensionArray(np.array([1, 2, 3], dtype=dtype)) From aa1c58b7d4c69ab41c43fbc55677e9da9b285fa2 Mon Sep 17 00:00:00 2001 From: Augustus <22328646+aijams@users.noreply.github.com> Date: Tue, 4 Nov 2025 12:00:30 -0500 Subject: [PATCH 12/13] Take method now gets underlying array of result to build a new extension array with. --- pandas/core/arrays/numpy_.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index ec64c04788ed4..dd8ca3597f910 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -369,7 +369,7 @@ def take( ) # See GH#62448. if self.dtype.kind in "iub": - return type(self)(result, copy=False) + return type(self)(result._ndarray, copy=False) return result From 79c3116d0e8a26d51d79ecbdd2a35d729278e015 Mon Sep 17 00:00:00 2001 From: Augustus <22328646+aijams@users.noreply.github.com> Date: Tue, 11 Nov 2025 10:40:40 -0500 Subject: [PATCH 13/13] Removed dtype preservation tests for take, except for boolean case. --- pandas/tests/arrays/numpy_/test_numpy.py | 26 +++--------------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/pandas/tests/arrays/numpy_/test_numpy.py b/pandas/tests/arrays/numpy_/test_numpy.py index a04655c9efcf3..a853f90a8a964 100644 --- a/pandas/tests/arrays/numpy_/test_numpy.py +++ b/pandas/tests/arrays/numpy_/test_numpy.py @@ -355,30 +355,10 @@ def test_take_assigns_floating_point_dtype(dtype): assert result.dtype.numpy_dtype == expected -@pytest.mark.parametrize( - "dtype", - [ - np.bool_, - np.uint8, - np.uint16, - np.uint32, - np.uint64, - np.int8, - np.int16, - np.int32, - np.int64, - ], -) -def test_take_assigns_integer_dtype_when_fill_disallowed(dtype): - # GH#62448. - if dtype == np.bool_: - array = NumpyExtensionArray(np.array([False, True, False], dtype=dtype)) - else: - array = NumpyExtensionArray(np.array([1, 2, 3], dtype=dtype)) - +def test_take_preserves_boolean_arrays(): + array = NumpyExtensionArray(np.array([False, True, False], dtype=np.bool_)) result = array.take([-1], allow_fill=False) - - assert result.dtype.numpy_dtype == dtype + assert result.dtype.numpy_dtype == np.bool_ # ----------------------------------------------------------------------------