Moved changes to take method to NumpyExtensionArray class.

aijams · aijams · commit 91442a8d1343 · 2025-10-16T10:29:57.000-04:00
diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py
@@ -155,24 +155,11 @@ def view(self, dtype: Dtype | None = None) -> ArrayLike:
         # "ExtensionDtype | dtype[Any]"; expected "dtype[Any] | _HasDType[dtype[Any]]"
         return arr.view(dtype=dtype)  # type: ignore[arg-type]
 
-    def take(
-        self,
-        indices: TakeIndexer,
-        *,
-        allow_fill: bool = False,
-        fill_value: Any = None,
-        axis: AxisInt = 0,
-    ) -> Self:
-        if allow_fill:
-            fill_value = self._validate_scalar(fill_value)
 
-        new_data = take(
-            self._ndarray,
-            indices,
-            allow_fill=allow_fill,
-            fill_value=fill_value,
-            axis=axis,
-        )
+        # Notes on take method fix
+        # Please remove these once this fix is ready to submit.
+        # =======================================================================
+
         # One of the base classes to this class: ExtensionArray, provides
         # the dtype property, but abstractly, so it leaves the implementation
         # of dtype storage up to its derived classes. Some of these derived
@@ -244,17 +231,25 @@ def take(
         #   StringArray             yes                                 no
         #   NumpyExtensionArray     no                                  no
 
-        if hasattr(self.dtype, "numpy_dtype") and self.dtype.numpy_dtype in [
-            np.uint8,
-            np.uint16,
-            np.uint32,
-            np.uint64,
-            np.int8,
-            np.int16,
-            np.int32,
-            np.int64,
-        ]:
-            return type(self)(new_data)
+    
+    def take(
+        self,
+        indices: TakeIndexer,
+        *,
+        allow_fill: bool = False,
+        fill_value: Any = None,
+        axis: AxisInt = 0,
+    ) -> Self:
+        if allow_fill:
+            fill_value = self._validate_scalar(fill_value)
+
+        new_data = take(
+            self._ndarray,
+            indices,
+            allow_fill=allow_fill,
+            fill_value=fill_value,
+            axis=axis,
+        )
         return self._from_backing_data(new_data)
 
     # ------------------------------------------------------------------------
diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py
@@ -45,6 +45,7 @@
         InterpolateOptions,
         NpDtype,
         Scalar,
+        TakeIndexer,
         npt,
     )
 
@@ -350,6 +351,73 @@ def interpolate(
             return self
         return type(self)._simple_new(out_data, dtype=self.dtype)
 
+    def take(
+        self,
+        indices: TakeIndexer,
+        *,
+        allow_fill: bool = False,
+        fill_value: Any = None,
+        axis: AxisInt = 0,
+    ) -> Self:
+        """
+        Take entries from this array at each index in a list of indices,
+        producing an array containing only those entries.
+        """
+        # See GH#62448.
+        if self.dtype.numpy_dtype in [
+            np.uint8,
+            np.uint16,
+            np.uint32,
+            np.uint64,
+            np.int8,
+            np.int16,
+            np.int32,
+            np.int64
+        ]:
+            # In this case, the resulting extension array should have a floating-point
+            # dtype to match the result of the underlying take method when
+            # NaN values need to be incorporated into it.
+            # This occurs when allow_fill is True and fill_value is None.
+            # (fill_value may be an arbitrary Python object, in which case
+            # the result will be an array of objects.)
+
+            # Call the take method of NDArrayBackedExtensionArray
+
+            # TODO: How is the dtype of a newly constructed NumpyExtensionArray set?
+            # It's set to match the dtype of its underlying array.
+
+            result = super().take(
+                indices,
+                allow_fill=allow_fill,
+                fill_value=fill_value,
+                axis=axis
+            )
+            return type(self)(result, copy=False)
+        
+        # In this case, the resulting extension array will have a dtype
+        # that matches that of the underlying Numpy array and we can link
+        # to the underlying array without manipulating the extension's
+        # dtype.
+
+        return super().take(
+            indices,
+            allow_fill=allow_fill,
+            fill_value=fill_value,
+            axis=axis
+        )
+        # result array dtype = self dtype
+
+        # Implementation steps:
+        #   Determine requirements for this method, including:
+        #       Argument types [done]
+        #       Return type [done]
+        #       Return dtype [done]
+        #   Write tests to check whether this method satisfies these requirements. [done]
+        #   Figure out what base class method to call to implement the take functionality. [done]
+        #   Implement the call. [done]
+        #   Check whether this method satisfies its requirements by running the tests.
+            
+
     # ------------------------------------------------------------------------
     # Reductions
 
diff --git a/pandas/tests/arrays/numpy_/test_numpy.py b/pandas/tests/arrays/numpy_/test_numpy.py
@@ -325,14 +325,28 @@ def test_factorize_unsigned():
     tm.assert_extension_array_equal(res_unique, NumpyExtensionArray(exp_unique))
 
 
-@pytest.mark.parametrize("dtype", [np.uint32, np.uint64, np.int32, np.int64])
-def test_take_assigns_correct_dtype(dtype):
+# TODO: Add the smaller width dtypes to the parameter sets of these tests.
+@pytest.mark.parametrize("dtype", [np.uint8, np.uint16, np.uint32, np.uint64, np.int8, np.int16, np.int32, np.int64])
+def test_take_assigns_floating_point_dtype(dtype):
     # GH#62448.
     array = NumpyExtensionArray(np.array([1, 2, 3], dtype=dtype))
 
     result = array.take([-1], allow_fill=True)
 
-    assert result.dtype == NumpyEADtype(np.float64)
+    assert result.dtype.numpy_dtype == np.float64
+
+    result = array.take([-1], allow_fill=True, fill_value=5.0)
+
+    assert result.dtype.numpy_dtype == np.float64
+
+@pytest.mark.parametrize("dtype", [np.uint8, np.uint16, np.uint32, np.uint64, np.int8, np.int16, np.int32, np.int64])
+def test_take_assigns_integer_dtype_when_fill_disallowed(dtype):
+    # GH#62448.
+    array = NumpyExtensionArray(np.array([1, 2, 3], dtype=dtype))
+
+    result = array.take([-1], allow_fill=False)
+
+    assert result.dtype.numpy_dtype == dtype
 
 
 # ----------------------------------------------------------------------------