pandas-dev
diff --git a/‎doc/source/user_guide/migration-3-strings.rst‎
Lines changed: 31 additions & 0 deletions b/‎doc/source/user_guide/migration-3-strings.rst‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎pandas/_libs/src/datetime/pd_datetime.c‎
Lines changed: 40 additions & 7 deletions b/‎pandas/_libs/src/datetime/pd_datetime.c‎
Lines changed: 40 additions & 7 deletions
diff --git a/‎pandas/core/construction.py‎
Lines changed: 18 additions & 3 deletions b/‎pandas/core/construction.py‎
Lines changed: 18 additions & 3 deletions
diff --git a/‎pandas/core/dtypes/cast.py‎
Lines changed: 0 additions & 46 deletions b/‎pandas/core/dtypes/cast.py‎
Lines changed: 0 additions & 46 deletions
diff --git a/‎pandas/core/frame.py‎
Lines changed: 16 additions & 33 deletions b/‎pandas/core/frame.py‎
Lines changed: 16 additions & 33 deletions
diff --git a/‎pandas/core/groupby/groupby.py‎
Lines changed: 2 additions & 6 deletions b/‎pandas/core/groupby/groupby.py‎
Lines changed: 2 additions & 6 deletions
diff --git a/‎pandas/core/internals/construction.py‎
Lines changed: 22 additions & 4 deletions b/‎pandas/core/internals/construction.py‎
Lines changed: 22 additions & 4 deletions
diff --git a/‎pandas/tests/extension/test_arrow.py‎
Lines changed: 2 additions & 2 deletions b/‎pandas/tests/extension/test_arrow.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pandas/tests/frame/test_reductions.py‎
Lines changed: 1 addition & 3 deletions b/‎pandas/tests/frame/test_reductions.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎pandas/tests/groupby/test_libgroupby.py‎
Lines changed: 2 additions & 2 deletions b/‎pandas/tests/groupby/test_libgroupby.py‎
Lines changed: 2 additions & 2 deletions
@@ -315,6 +315,37 @@ the :meth:`~pandas.Series.str.decode` method now has a ``dtype`` parameter to be
 able to specify object dtype instead of the default of string dtype for this use
 case.
 
+:meth:`Series.values` now returns an :class:`~pandas.api.extensions.ExtensionArray`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+With object dtype, using ``.values`` on a Series will return the underlying NumPy array.
+
+.. code-block:: python
+
+   >>> ser = pd.Series(["a", "b", np.nan], dtype="object")
+   >>> type(ser.values)
+   <class 'numpy.ndarray'>
+
+However with the new string dtype, the underlying ExtensionArray is returned instead.
+
+.. code-block:: python
+
+   >>> ser = pd.Series(["a", "b", pd.NA], dtype="str")
+   >>> ser.values
+   <ArrowStringArray>
+   ['a', 'b', nan]
+   Length: 3, dtype: str
+
+If your code requires a NumPy array, you should use :meth:`Series.to_numpy`.
+
+.. code-block:: python
+
+   >>> ser = pd.Series(["a", "b", pd.NA], dtype="str")
+   >>> ser.to_numpy()
+   ['a' 'b' nan]
+
+In general, you should always prefer :meth:`Series.to_numpy` to get a NumPy array or :meth:`Series.array` to get an ExtensionArray over using :meth:`Series.values`.
+
 Notable bug fixes
 ~~~~~~~~~~~~~~~~~
 
 
@@ -55,9 +55,23 @@ static int convert_pydatetime_to_datetimestruct(PyObject *dtobj,
   out->month = 1;
   out->day = 1;
 
-  out->year = PyLong_AsLong(PyObject_GetAttrString(obj, "year"));
-  out->month = PyLong_AsLong(PyObject_GetAttrString(obj, "month"));
-  out->day = PyLong_AsLong(PyObject_GetAttrString(obj, "day"));
+  tmp = PyObject_GetAttrString(obj, "year");
+  if (tmp == NULL)
+    return -1;
+  out->year = PyLong_AsLong(tmp);
+  Py_DECREF(tmp);
+
+  tmp = PyObject_GetAttrString(obj, "month");
+  if (tmp == NULL)
+    return -1;
+  out->month = PyLong_AsLong(tmp);
+  Py_DECREF(tmp);
+
+  tmp = PyObject_GetAttrString(obj, "day");
+  if (tmp == NULL)
+    return -1;
+  out->day = PyLong_AsLong(tmp);
+  Py_DECREF(tmp);
 
   // TODO(anyone): If we can get PyDateTime_IMPORT to work, we could use
   // PyDateTime_Check here, and less verbose attribute lookups.
@@ -70,10 +84,29 @@ static int convert_pydatetime_to_datetimestruct(PyObject *dtobj,
     return 0;
   }
 
-  out->hour = PyLong_AsLong(PyObject_GetAttrString(obj, "hour"));
-  out->min = PyLong_AsLong(PyObject_GetAttrString(obj, "minute"));
-  out->sec = PyLong_AsLong(PyObject_GetAttrString(obj, "second"));
-  out->us = PyLong_AsLong(PyObject_GetAttrString(obj, "microsecond"));
+  tmp = PyObject_GetAttrString(obj, "hour");
+  if (tmp == NULL)
+    return -1;
+  out->hour = PyLong_AsLong(tmp);
+  Py_DECREF(tmp);
+
+  tmp = PyObject_GetAttrString(obj, "minute");
+  if (tmp == NULL)
+    return -1;
+  out->min = PyLong_AsLong(tmp);
+  Py_DECREF(tmp);
+
+  tmp = PyObject_GetAttrString(obj, "second");
+  if (tmp == NULL)
+    return -1;
+  out->sec = PyLong_AsLong(tmp);
+  Py_DECREF(tmp);
+
+  tmp = PyObject_GetAttrString(obj, "microsecond");
+  if (tmp == NULL)
+    return -1;
+  out->us = PyLong_AsLong(tmp);
+  Py_DECREF(tmp);
 
   if (PyObject_HasAttrString(obj, "tzinfo")) {
     PyObject *offset = extract_utc_offset(obj);
 
@@ -31,7 +31,6 @@
     maybe_cast_to_datetime,
     maybe_cast_to_integer_array,
     maybe_convert_platform,
-    maybe_infer_to_datetimelike,
     maybe_promote,
 )
 from pandas.core.dtypes.common import (
@@ -612,7 +611,15 @@ def sanitize_array(
         if dtype is None:
             subarr = data
             if data.dtype == object and infer_object:
-                subarr = maybe_infer_to_datetimelike(data)
+                subarr = lib.maybe_convert_objects(
+                    data,
+                    # Here we do not convert numeric dtypes, as if we wanted that,
+                    #  numpy would have done it for us.
+                    convert_numeric=False,
+                    convert_non_numeric=True,
+                    convert_to_nullable_dtype=False,
+                    dtype_if_all_nat=np.dtype("M8[s]"),
+                )
             elif data.dtype.kind == "U" and using_string_dtype():
                 from pandas.core.arrays.string_ import StringDtype
 
@@ -659,7 +666,15 @@ def sanitize_array(
             subarr = maybe_convert_platform(data)
             if subarr.dtype == object:
                 subarr = cast(np.ndarray, subarr)
-                subarr = maybe_infer_to_datetimelike(subarr)
+                subarr = lib.maybe_convert_objects(
+                    subarr,
+                    # Here we do not convert numeric dtypes, as if we wanted that,
+                    #  numpy would have done it for us.
+                    convert_numeric=False,
+                    convert_non_numeric=True,
+                    convert_to_nullable_dtype=False,
+                    dtype_if_all_nat=np.dtype("M8[s]"),
+                )
 
     subarr = _sanitize_ndim(subarr, data, dtype, index, allow_2d=allow_2d)
 
 
@@ -97,7 +97,6 @@
         DtypeObj,
         NumpyIndexT,
         Scalar,
-        npt,
     )
 
     from pandas import Index
@@ -1058,51 +1057,6 @@ def convert_dtypes(
     return inferred_dtype  # type: ignore[return-value]
 
 
-def maybe_infer_to_datetimelike(
-    value: npt.NDArray[np.object_],
-    convert_to_nullable_dtype: bool = False,
-) -> np.ndarray | DatetimeArray | TimedeltaArray | PeriodArray | IntervalArray:
-    """
-    we might have a array (or single object) that is datetime like,
-    and no dtype is passed don't change the value unless we find a
-    datetime/timedelta set
-
-    this is pretty strict in that a datetime/timedelta is REQUIRED
-    in addition to possible nulls/string likes
-
-    Parameters
-    ----------
-    value : np.ndarray[object]
-
-    Returns
-    -------
-    np.ndarray, DatetimeArray, TimedeltaArray, PeriodArray, or IntervalArray
-
-    """
-    if not isinstance(value, np.ndarray) or value.dtype != object:
-        # Caller is responsible for passing only ndarray[object]
-        raise TypeError(type(value))  # pragma: no cover
-    if value.ndim != 1:
-        # Caller is responsible
-        raise ValueError(value.ndim)  # pragma: no cover
-
-    if not len(value):
-        return value
-
-    # error: Incompatible return value type (got "Union[ExtensionArray,
-    # ndarray[Any, Any]]", expected "Union[ndarray[Any, Any], DatetimeArray,
-    # TimedeltaArray, PeriodArray, IntervalArray]")
-    return lib.maybe_convert_objects(  # type: ignore[return-value]
-        value,
-        # Here we do not convert numeric dtypes, as if we wanted that,
-        #  numpy would have done it for us.
-        convert_numeric=False,
-        convert_non_numeric=True,
-        convert_to_nullable_dtype=convert_to_nullable_dtype,
-        dtype_if_all_nat=np.dtype("M8[s]"),
-    )
-
-
 def maybe_cast_to_datetime(
     value: np.ndarray | list, dtype: np.dtype
 ) -> DatetimeArray | TimedeltaArray | np.ndarray:
 
@@ -71,7 +71,6 @@
 )
 from pandas.util._exceptions import (
     find_stack_level,
-    rewrite_warning,
 )
 from pandas.util._validators import (
     validate_ascending,
@@ -11926,25 +11925,13 @@ def _get_data() -> DataFrame:
                     row_index = np.tile(np.arange(nrows), ncols)
                     col_index = np.repeat(np.arange(ncols), nrows)
                     ser = Series(arr, index=col_index, copy=False)
-                    # GroupBy will raise a warning with SeriesGroupBy as the object,
-                    # likely confusing users
-                    with rewrite_warning(
-                        target_message=(
-                            f"The behavior of SeriesGroupBy.{name} with all-NA values"
-                        ),
-                        target_category=FutureWarning,
-                        new_message=(
-                            f"The behavior of {type(self).__name__}.{name} with all-NA "
-                            "values, or any-NA and skipna=False, is deprecated. In "
-                            "a future version this will raise ValueError"
-                        ),
-                    ):
-                        result = ser.groupby(row_index).agg(name, **kwds)
+                    if name == "all":
+                        # Behavior here appears incorrect; preserving
+                        # for backwards compatibility for now.
+                        # See https://github.com/pandas-dev/pandas/issues/57171
+                        skipna = True
+                    result = ser.groupby(row_index).agg(name, **kwds, skipna=skipna)
                     result.index = df.index
-                    if not skipna and name not in ("any", "all"):
-                        mask = df.isna().to_numpy(dtype=np.bool_).any(axis=1)
-                        other = -1 if name in ("idxmax", "idxmin") else lib.no_default
-                        result = result.mask(mask, other)
                     return result
 
             df = df.T
@@ -13258,13 +13245,11 @@ def idxmin(
         # indices will always be np.ndarray since axis is not N
 
         if (indices == -1).any():
-            warnings.warn(
-                f"The behavior of {type(self).__name__}.idxmin with all-NA "
-                "values, or any-NA and skipna=False, is deprecated. In a future "
-                "version this will raise ValueError",
-                FutureWarning,
-                stacklevel=find_stack_level(),
-            )
+            if skipna:
+                msg = "Encountered all NA values"
+            else:
+                msg = "Encountered an NA values with skipna=False"
+            raise ValueError(msg)
 
         index = data._get_axis(axis)
         result = algorithms.take(
@@ -13365,13 +13350,11 @@ def idxmax(
         # indices will always be 1d array since axis is not None
 
         if (indices == -1).any():
-            warnings.warn(
-                f"The behavior of {type(self).__name__}.idxmax with all-NA "
-                "values, or any-NA and skipna=False, is deprecated. In a future "
-                "version this will raise ValueError",
-                FutureWarning,
-                stacklevel=find_stack_level(),
-            )
+            if skipna:
+                msg = "Encountered all NA values"
+            else:
+                msg = "Encountered an NA values with skipna=False"
+            raise ValueError(msg)
 
         index = data._get_axis(axis)
         result = algorithms.take(
 
@@ -5703,10 +5703,7 @@ def _idxmax_idxmin(
                     "Specify observed=True in groupby instead."
                 )
         elif not skipna and self._obj_with_exclusions.isna().any(axis=None):
-            raise ValueError(
-                f"{type(self).__name__}.{how} with skipna=False encountered an NA "
-                f"value."
-            )
+            raise ValueError(f"{how} with skipna=False encountered an NA value.")
 
         result = self._agg_general(
             numeric_only=numeric_only,
@@ -5724,8 +5721,7 @@ def _wrap_idxmax_idxmin(
             result = res.astype(index.dtype)
         elif skipna and res.lt(0).any(axis=None):
             raise ValueError(
-                f"{type(self).__name__}.{how} with skipna=True encountered all NA "
-                f"values in a group."
+                f"{how} with skipna=True encountered all NA values in a group."
             )
         else:
             if isinstance(index, MultiIndex):
 
@@ -24,7 +24,6 @@
     dict_compat,
     maybe_cast_to_datetime,
     maybe_convert_platform,
-    maybe_infer_to_datetimelike,
 )
 from pandas.core.dtypes.common import (
     is_1d_only_ea_dtype,
@@ -290,7 +289,18 @@ def ndarray_to_mgr(
     # embedded in an object type
     if dtype is None and infer_object and is_object_dtype(values.dtype):
         obj_columns = list(values)
-        maybe_datetime = [maybe_infer_to_datetimelike(x) for x in obj_columns]
+        maybe_datetime = [
+            lib.maybe_convert_objects(
+                x,
+                # Here we do not convert numeric dtypes, as if we wanted that,
+                #  numpy would have done it for us.
+                convert_numeric=False,
+                convert_non_numeric=True,
+                convert_to_nullable_dtype=False,
+                dtype_if_all_nat=np.dtype("M8[s]"),
+            )
+            for x in obj_columns
+        ]
         # don't convert (and copy) the objects if no type inference occurs
         if any(x is not y for x, y in zip(obj_columns, maybe_datetime)):
             block_values = [
@@ -485,7 +495,7 @@ def convert(v):
 
         v = extract_array(v, extract_numpy=True)
         res = maybe_convert_platform(v)
-        # We don't do maybe_infer_to_datetimelike here bc we will end up doing
+        # We don't do maybe_infer_objects here bc we will end up doing
         #  it column-by-column in ndarray_to_mgr
         return res
 
@@ -965,7 +975,15 @@ def convert(arr):
                 if arr.dtype == np.dtype("O"):
                     # i.e. maybe_convert_objects didn't convert
                     convert_to_nullable_dtype = dtype_backend != "numpy"
-                    arr = maybe_infer_to_datetimelike(arr, convert_to_nullable_dtype)
+                    arr = lib.maybe_convert_objects(
+                        arr,
+                        # Here we do not convert numeric dtypes, as if we wanted that,
+                        #  numpy would have done it for us.
+                        convert_numeric=False,
+                        convert_non_numeric=True,
+                        convert_to_nullable_dtype=convert_to_nullable_dtype,
+                        dtype_if_all_nat=np.dtype("M8[s]"),
+                    )
                     if convert_to_nullable_dtype and arr.dtype == np.dtype("O"):
                         new_dtype = StringDtype()
                         arr_cls = new_dtype.construct_array_type()
 
@@ -3333,9 +3333,9 @@ def test_factorize_chunked_dictionary():
     )
     ser = pd.Series(ArrowExtensionArray(pa_array))
     res_indices, res_uniques = ser.factorize()
-    exp_indicies = np.array([0, 1], dtype=np.intp)
+    exp_indices = np.array([0, 1], dtype=np.intp)
     exp_uniques = pd.Index(ArrowExtensionArray(pa_array.combine_chunks()))
-    tm.assert_numpy_array_equal(res_indices, exp_indicies)
+    tm.assert_numpy_array_equal(res_indices, exp_indices)
     tm.assert_index_equal(res_uniques, exp_uniques)
 
 
 
@@ -2160,9 +2160,7 @@ def test_numeric_ea_axis_1(method, skipna, min_count, any_numeric_ea_dtype):
         kwargs["min_count"] = min_count
 
     if not skipna and method in ("idxmax", "idxmin"):
-        # GH#57745 - EAs use groupby for axis=1 which still needs a proper deprecation.
-        msg = f"The behavior of DataFrame.{method} with all-NA values"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
+        with pytest.raises(ValueError, match="encountered an NA value"):
             getattr(df, method)(axis=1, **kwargs)
         with pytest.raises(ValueError, match="Encountered an NA value"):
             getattr(expected_df, method)(axis=1, **kwargs)
 
@@ -285,7 +285,7 @@ def test_cython_group_mean_not_datetimelike_but_has_NaT_values():
     )
 
 
-def test_cython_group_mean_Inf_at_begining_and_end():
+def test_cython_group_mean_Inf_at_beginning_and_end():
     # GH 50367
     actual = np.array([[np.nan, np.nan], [np.nan, np.nan]], dtype="float64")
     counts = np.array([0, 0], dtype="int64")
@@ -314,7 +314,7 @@ def test_cython_group_mean_Inf_at_begining_and_end():
         ([[np.inf], [-np.inf], [-np.inf]], [[np.inf], [-np.inf]]),
     ],
 )
-def test_cython_group_sum_Inf_at_begining_and_end(values, out):
+def test_cython_group_sum_Inf_at_beginning_and_end(values, out):
     # GH #53606
     actual = np.array([[np.nan], [np.nan]], dtype="float64")
     counts = np.array([0, 0], dtype="int64")