Skip to content

Commit 55ca3ab

Browse files
committed
Merge branch 'main' into bug-expansion-dtypes
2 parents b2b2540 + 6e27e26 commit 55ca3ab

File tree

22 files changed

+182
-132
lines changed

22 files changed

+182
-132
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -655,6 +655,7 @@ Other API changes
655655
an empty ``RangeIndex`` or empty ``Index`` with object dtype when determining
656656
the dtype of the resulting Index (:issue:`60797`)
657657
- :class:`IncompatibleFrequency` now subclasses ``TypeError`` instead of ``ValueError``. As a result, joins with mismatched frequencies now cast to object like other non-comparable joins, and arithmetic with indexes with mismatched frequencies align (:issue:`55782`)
658+
- :class:`Series` "flex" methods like :meth:`Series.add` no longer allow passing a :class:`DataFrame` for ``other``; use the DataFrame reversed method instead (:issue:`46179`)
658659
- :meth:`CategoricalIndex.append` no longer attempts to cast different-dtype indexes to the caller's dtype (:issue:`41626`)
659660
- :meth:`ExtensionDtype.construct_array_type` is now a regular method instead of a ``classmethod`` (:issue:`58663`)
660661
- Comparison operations between :class:`Index` and :class:`Series` now consistently return :class:`Series` regardless of which object is on the left or right (:issue:`36759`)
@@ -874,6 +875,7 @@ Other Removals
874875
- Removed the ``method`` keyword in ``ExtensionArray.fillna``, implement ``ExtensionArray._pad_or_backfill`` instead (:issue:`53621`)
875876
- Removed the attribute ``dtypes`` from :class:`.DataFrameGroupBy` (:issue:`51997`)
876877
- Enforced deprecation of ``argmin``, ``argmax``, ``idxmin``, and ``idxmax`` returning a result when ``skipna=False`` and an NA value is encountered or all values are NA values; these operations will now raise in such cases (:issue:`33941`, :issue:`51276`)
878+
- Enforced deprecation of storage option "pyarrow_numpy" for :class:`StringDtype` (:issue:`60152`)
877879
- Removed specifying ``include_groups=True`` in :class:`.DataFrameGroupBy.apply` and :class:`.Resampler.apply` (:issue:`7155`)
878880

879881
.. ---------------------------------------------------------------------------

pandas/core/arrays/arrow/array.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,10 @@ def _cast_pointwise_result(self, values) -> ArrayLike:
439439
# or test_agg_lambda_complex128_dtype_conversion for complex values
440440
return super()._cast_pointwise_result(values)
441441

442+
if pa.types.is_null(arr.type):
443+
if lib.infer_dtype(values) == "decimal":
444+
# GH#62522; the specific decimal precision here is arbitrary
445+
arr = arr.cast(pa.decimal128(1))
442446
if pa.types.is_duration(arr.type):
443447
# workaround for https://github.com/apache/arrow/issues/40620
444448
result = ArrowExtensionArray._from_sequence(values)

pandas/core/arrays/datetimelike.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,8 @@
157157
Sequence,
158158
)
159159

160+
from pandas._typing import TimeUnit
161+
160162
from pandas import Index
161163
from pandas.core.arrays import (
162164
DatetimeArray,
@@ -2114,7 +2116,7 @@ def _creso(self) -> int:
21142116
return get_unit_from_dtype(self._ndarray.dtype)
21152117

21162118
@cache_readonly
2117-
def unit(self) -> str:
2119+
def unit(self) -> TimeUnit:
21182120
"""
21192121
The precision unit of the datetime data.
21202122
@@ -2138,11 +2140,11 @@ def unit(self) -> str:
21382140
>>> idx.as_unit("s").unit
21392141
's'
21402142
"""
2141-
# error: Argument 1 to "dtype_to_unit" has incompatible type
2142-
# "ExtensionDtype"; expected "Union[DatetimeTZDtype, dtype[Any]]"
2143-
return dtype_to_unit(self.dtype) # type: ignore[arg-type]
2143+
# error: Incompatible return value type (got "str", expected
2144+
# "Literal['s', 'ms', 'us', 'ns']") [return-value]
2145+
return dtype_to_unit(self.dtype) # type: ignore[return-value,arg-type]
21442146

2145-
def as_unit(self, unit: str, round_ok: bool = True) -> Self:
2147+
def as_unit(self, unit: TimeUnit, round_ok: bool = True) -> Self:
21462148
"""
21472149
Convert to a dtype with the given unit resolution.
21482150

pandas/core/arrays/datetimes.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@
8888
IntervalClosedType,
8989
TimeAmbiguous,
9090
TimeNonexistent,
91+
TimeUnit,
9192
npt,
9293
)
9394

@@ -394,7 +395,10 @@ def _from_sequence_not_strict(
394395
result = cls._simple_new(subarr, freq=inferred_freq, dtype=data_dtype)
395396
if unit is not None and unit != result.unit:
396397
# If unit was specified in user-passed dtype, cast to it here
397-
result = result.as_unit(unit)
398+
# error: Argument 1 to "as_unit" of "TimelikeOps" has
399+
# incompatible type "str"; expected "Literal['s', 'ms', 'us', 'ns']"
400+
# [arg-type]
401+
result = result.as_unit(unit) # type: ignore[arg-type]
398402

399403
validate_kwds = {"ambiguous": ambiguous}
400404
result._maybe_pin_freq(freq, validate_kwds)
@@ -413,7 +417,7 @@ def _generate_range(
413417
nonexistent: TimeNonexistent = "raise",
414418
inclusive: IntervalClosedType = "both",
415419
*,
416-
unit: str | None = None,
420+
unit: TimeUnit = "ns",
417421
) -> Self:
418422
periods = dtl.validate_periods(periods)
419423
if freq is None and any(x is None for x in [periods, start, end]):
@@ -534,7 +538,7 @@ def _unbox_scalar(self, value) -> np.datetime64:
534538
raise ValueError("'value' should be a Timestamp.")
535539
self._check_compatible_with(value)
536540
if value is NaT:
537-
return np.datetime64(value._value, self.unit) # type: ignore[call-overload]
541+
return np.datetime64(value._value, self.unit)
538542
else:
539543
return value.as_unit(self.unit, round_ok=False).asm8
540544

pandas/core/arrays/period.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -962,7 +962,9 @@ def astype(self, dtype, copy: bool = True):
962962
# GH#45038 match PeriodIndex behavior.
963963
tz = getattr(dtype, "tz", None)
964964
unit = dtl.dtype_to_unit(dtype)
965-
return self.to_timestamp().tz_localize(tz).as_unit(unit)
965+
# error: Argument 1 to "as_unit" of "TimelikeOps" has incompatible
966+
# type "str"; expected "Literal['s', 'ms', 'us', 'ns']" [arg-type]
967+
return self.to_timestamp().tz_localize(tz).as_unit(unit) # type: ignore[arg-type]
966968

967969
return super().astype(dtype, copy=copy)
968970

pandas/core/arrays/string_.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -167,21 +167,6 @@ def __init__(
167167
else:
168168
storage = "python"
169169

170-
if storage == "pyarrow_numpy":
171-
# TODO: Enforce in 3.0 (#60152)
172-
warnings.warn(
173-
"The 'pyarrow_numpy' storage option name is deprecated and will be "
174-
'removed in pandas 3.0. Use \'pd.StringDtype(storage="pyarrow", '
175-
"na_value=np.nan)' to construct the same dtype.\nOr enable the "
176-
"'pd.options.future.infer_string = True' option globally and use "
177-
'the "str" alias as a shorthand notation to specify a dtype '
178-
'(instead of "string[pyarrow_numpy]").',
179-
FutureWarning, # pdlint: ignore[warning_class]
180-
stacklevel=find_stack_level(),
181-
)
182-
storage = "pyarrow"
183-
na_value = np.nan
184-
185170
# validate options
186171
if storage not in {"python", "pyarrow"}:
187172
raise ValueError(
@@ -280,9 +265,6 @@ def construct_from_string(cls, string) -> Self:
280265
return cls(storage="python")
281266
elif string == "string[pyarrow]":
282267
return cls(storage="pyarrow")
283-
elif string == "string[pyarrow_numpy]":
284-
# this is deprecated in the dtype __init__, remove this in pandas 3.0
285-
return cls(storage="pyarrow_numpy")
286268
else:
287269
raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'")
288270

pandas/core/arrays/timedeltas.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
DtypeObj,
7474
NpDtype,
7575
npt,
76+
TimeUnit,
7677
)
7778

7879
from pandas import DataFrame
@@ -275,7 +276,7 @@ def _from_sequence_not_strict(
275276

276277
@classmethod
277278
def _generate_range(
278-
cls, start, end, periods, freq, closed=None, *, unit: str | None = None
279+
cls, start, end, periods, freq, closed=None, *, unit: TimeUnit
279280
) -> Self:
280281
periods = dtl.validate_periods(periods)
281282
if freq is None and any(x is None for x in [periods, start, end]):
@@ -293,11 +294,8 @@ def _generate_range(
293294
if end is not None:
294295
end = Timedelta(end).as_unit("ns")
295296

296-
if unit is not None:
297-
if unit not in ["s", "ms", "us", "ns"]:
298-
raise ValueError("'unit' must be one of 's', 'ms', 'us', 'ns'")
299-
else:
300-
unit = "ns"
297+
if unit not in ["s", "ms", "us", "ns"]:
298+
raise ValueError("'unit' must be one of 's', 'ms', 'us', 'ns'")
301299

302300
if start is not None and unit is not None:
303301
start = start.as_unit(unit, round_ok=False)
@@ -327,7 +325,7 @@ def _unbox_scalar(self, value) -> np.timedelta64:
327325
raise ValueError("'value' should be a Timedelta.")
328326
self._check_compatible_with(value)
329327
if value is NaT:
330-
return np.timedelta64(value._value, self.unit) # type: ignore[call-overload]
328+
return np.timedelta64(value._value, self.unit)
331329
else:
332330
return value.as_unit(self.unit, round_ok=False).asm8
333331

pandas/core/config_init.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -478,12 +478,6 @@ def is_valid_string_storage(value: Any) -> None:
478478
legal_values = ["auto", "python", "pyarrow"]
479479
if value not in legal_values:
480480
msg = "Value must be one of python|pyarrow"
481-
if value == "pyarrow_numpy":
482-
# TODO: we can remove extra message after 3.0
483-
msg += (
484-
". 'pyarrow_numpy' was specified, but this option should be "
485-
"enabled using pandas.options.future.infer_string instead"
486-
)
487481
raise ValueError(msg)
488482

489483

pandas/core/indexes/datetimelike.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575
from pandas._typing import (
7676
Axis,
7777
JoinHow,
78+
TimeUnit,
7879
npt,
7980
)
8081

@@ -434,10 +435,10 @@ class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin, ABC):
434435
_is_unique = Index.is_unique
435436

436437
@property
437-
def unit(self) -> str:
438+
def unit(self) -> TimeUnit:
438439
return self._data.unit
439440

440-
def as_unit(self, unit: str) -> Self:
441+
def as_unit(self, unit: TimeUnit) -> Self:
441442
"""
442443
Convert to a dtype with the given unit resolution.
443444

pandas/core/indexes/datetimes.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
TimeAmbiguous,
6767
TimeNonexistent,
6868
npt,
69+
TimeUnit,
6970
)
7071

7172
from pandas.core.api import (
@@ -852,7 +853,7 @@ def date_range(
852853
name: Hashable | None = None,
853854
inclusive: IntervalClosedType = "both",
854855
*,
855-
unit: str | None = None,
856+
unit: TimeUnit = "ns",
856857
**kwargs,
857858
) -> DatetimeIndex:
858859
"""
@@ -893,7 +894,7 @@ def date_range(
893894
Include boundaries; Whether to set each bound as closed or open.
894895
895896
.. versionadded:: 1.4.0
896-
unit : str, default None
897+
unit : {'s', 'ms', 'us', 'ns'}, default 'ns'
897898
Specify the desired resolution of the result.
898899
899900
.. versionadded:: 2.0.0

0 commit comments

Comments
 (0)