diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py index 7d5b250c7b157..09758a41250ac 100644 --- a/asv_bench/benchmarks/categoricals.py +++ b/asv_bench/benchmarks/categoricals.py @@ -79,7 +79,8 @@ def setup(self): "int": np.random.randint(2**16, size=154), "float": sys.maxsize * np.random.random((38,)), "timestamp": [ - pd.Timestamp(x, unit="s") for x in np.random.randint(2**18, size=578) + pd.Timestamp(x, input_unit="s") + for x in np.random.randint(2**18, size=578) ], } diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index 7c1d6457eea15..b9e0d9fa8e244 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -1134,7 +1134,7 @@ def setup(self): index = MultiIndex.from_product( [ np.arange(num_groups), - to_timedelta(np.arange(num_timedeltas), unit="s"), + to_timedelta(np.arange(num_timedeltas), input_unit="s"), ], names=["groups", "timedeltas"], ) diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py index ce3935d2cd0ac..aea0772ff8a62 100644 --- a/asv_bench/benchmarks/inference.py +++ b/asv_bench/benchmarks/inference.py @@ -118,22 +118,22 @@ def setup(self): # speed of int64, uint64 and float64 paths should be comparable def time_nanosec_int64(self): - to_datetime(self.ts_nanosec, unit="ns") + to_datetime(self.ts_nanosec, input_unit="ns") def time_nanosec_uint64(self): - to_datetime(self.ts_nanosec_uint, unit="ns") + to_datetime(self.ts_nanosec_uint, input_unit="ns") def time_nanosec_float64(self): - to_datetime(self.ts_nanosec_float, unit="ns") + to_datetime(self.ts_nanosec_float, input_unit="ns") def time_sec_uint64(self): - to_datetime(self.ts_sec_uint, unit="s") + to_datetime(self.ts_sec_uint, input_unit="s") def time_sec_int64(self): - to_datetime(self.ts_sec, unit="s") + to_datetime(self.ts_sec, input_unit="s") def time_sec_float64(self): - to_datetime(self.ts_sec_float, unit="s") + to_datetime(self.ts_sec_float, input_unit="s") class ToDatetimeYYYYMMDD: @@ -250,10 +250,10 @@ def setup(self, cache): self.dup_string_with_tz = ["2000-02-11 15:00:00-0800"] * N def time_unique_seconds_and_unit(self, cache): - to_datetime(self.unique_numeric_seconds, unit="s", cache=cache) + to_datetime(self.unique_numeric_seconds, input_unit="s", cache=cache) def time_dup_seconds_and_unit(self, cache): - to_datetime(self.dup_numeric_seconds, unit="s", cache=cache) + to_datetime(self.dup_numeric_seconds, input_unit="s", cache=cache) def time_dup_string_dates(self, cache): to_datetime(self.dup_string_dates, cache=cache) @@ -275,7 +275,7 @@ def setup(self): self.str_seconds.append(f"00:00:{i:02d}") def time_convert_int(self): - to_timedelta(self.ints, unit="s") + to_timedelta(self.ints, input_unit="s") def time_convert_string_days(self): to_timedelta(self.str_days) diff --git a/asv_bench/benchmarks/tslibs/timedelta.py b/asv_bench/benchmarks/tslibs/timedelta.py index 9d9689fcfa94b..542b1df6f440a 100644 --- a/asv_bench/benchmarks/tslibs/timedelta.py +++ b/asv_bench/benchmarks/tslibs/timedelta.py @@ -14,13 +14,13 @@ class TimedeltaConstructor: def setup(self): self.nptimedelta64 = np.timedelta64(3600) self.dttimedelta = datetime.timedelta(seconds=3600) - self.td = Timedelta(3600, unit="s") + self.td = Timedelta(3600, input_unit="s") def time_from_int(self): Timedelta(123456789) def time_from_unit(self): - Timedelta(1, unit="D") + Timedelta(1, input_unit="D") def time_from_components(self): Timedelta( diff --git a/doc/source/user_guide/timedeltas.rst b/doc/source/user_guide/timedeltas.rst index 15b6de4b6a054..d516a69c02499 100644 --- a/doc/source/user_guide/timedeltas.rst +++ b/doc/source/user_guide/timedeltas.rst @@ -35,7 +35,7 @@ You can construct a ``Timedelta`` scalar through various arguments, including `I pd.Timedelta(days=1, seconds=1) # integers with a unit - pd.Timedelta(1, unit="D") + pd.Timedelta(1, input_unit="D") # from a datetime.timedelta/np.timedelta64 pd.Timedelta(datetime.timedelta(days=1, seconds=1)) @@ -93,8 +93,8 @@ is numeric: .. ipython:: python - pd.to_timedelta(np.arange(5), unit="s") - pd.to_timedelta(np.arange(5), unit="D") + pd.to_timedelta(np.arange(5), input_unit="s") + pd.to_timedelta(np.arange(5), input_unit="D") .. warning:: If a string or array of strings is passed as an input then the ``unit`` keyword @@ -199,7 +199,7 @@ You can fillna on timedeltas, passing a timedelta to get a particular value. .. ipython:: python y.fillna(pd.Timedelta(0)) - y.fillna(pd.Timedelta(10, unit="s")) + y.fillna(pd.Timedelta(10, input_unit="s")) y.fillna(pd.Timedelta("-1 days, 00:00:05")) You can also negate, multiply and use ``abs`` on ``Timedeltas``: diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 6a66c30cffbf0..78c3551a641e5 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -307,25 +307,25 @@ Epoch timestamps ~~~~~~~~~~~~~~~~ pandas supports converting integer or float epoch times to ``Timestamp`` and -``DatetimeIndex``. The default unit is nanoseconds, since that is how ``Timestamp`` -objects are stored internally. However, epochs are often stored in another ``unit`` +``DatetimeIndex``. The default input_unit is nanoseconds, since that is how ``Timestamp`` +objects are stored internally. However, epochs are often stored in another ``input_unit`` which can be specified. These are computed from the starting point specified by the ``origin`` parameter. .. ipython:: python pd.to_datetime( - [1349720105, 1349806505, 1349892905, 1349979305, 1350065705], unit="s" + [1349720105, 1349806505, 1349892905, 1349979305, 1350065705], input_unit="s" ) pd.to_datetime( [1349720105100, 1349720105200, 1349720105300, 1349720105400, 1349720105500], - unit="ms", + input_unit="ms", ) .. note:: - The ``unit`` parameter does not use the same strings as the ``format`` parameter + The ``input_unit`` parameter does not use the same strings as the ``format`` parameter that was discussed :ref:`above`. The available units are listed on the documentation for :func:`pandas.to_datetime`. @@ -353,8 +353,8 @@ as timezone-naive timestamps and then localize to the appropriate timezone: .. ipython:: python - pd.to_datetime([1490195805.433, 1490195805.433502912], unit="s") - pd.to_datetime(1490195805433502912, unit="ns") + pd.to_datetime([1490195805.433, 1490195805.433502912], input_unit="s") + pd.to_datetime(1490195805433502912, input_unit="ns") .. seealso:: @@ -389,14 +389,14 @@ of a ``DatetimeIndex``. For example, to use 1960-01-01 as the starting date: .. ipython:: python - pd.to_datetime([1, 2, 3], unit="D", origin=pd.Timestamp("1960-01-01")) + pd.to_datetime([1, 2, 3], input_unit="D", origin=pd.Timestamp("1960-01-01")) The default is set at ``origin='unix'``, which defaults to ``1970-01-01 00:00:00``. Commonly called 'unix epoch' or POSIX time. .. ipython:: python - pd.to_datetime([1, 2, 3], unit="D") + pd.to_datetime([1, 2, 3], input_unit="D") .. _timeseries.daterange: @@ -2633,7 +2633,7 @@ Transform nonexistent times to ``NaT`` or shift the times. dti dti.tz_localize("Europe/Warsaw", nonexistent="shift_forward") dti.tz_localize("Europe/Warsaw", nonexistent="shift_backward") - dti.tz_localize("Europe/Warsaw", nonexistent=pd.Timedelta(1, unit="h")) + dti.tz_localize("Europe/Warsaw", nonexistent=pd.Timedelta(1, input_unit="h")) dti.tz_localize("Europe/Warsaw", nonexistent="NaT") diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst index d6d1d96ccc878..b553e997d3ac2 100644 --- a/doc/source/whatsnew/v0.20.0.rst +++ b/doc/source/whatsnew/v0.20.0.rst @@ -142,6 +142,7 @@ from where to compute the resulting timestamps when parsing numerical values wit For example, with 1960-01-01 as the starting date: .. ipython:: python + :okwarning: pd.to_datetime([1, 2, 3], unit='D', origin=pd.Timestamp('1960-01-01')) @@ -149,6 +150,7 @@ The default is set at ``origin='unix'``, which defaults to ``1970-01-01 00:00:00 commonly called 'unix epoch' or POSIX time. This was the previous default, so this is a backward compatible change. .. ipython:: python + :okwarning: pd.to_datetime([1, 2, 3], unit='D') diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 65982ecdb810c..8d3c88fa018c2 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -812,6 +812,7 @@ Other Deprecations - Deprecated slicing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` using a ``datetime.date`` object, explicitly cast to :class:`Timestamp` instead (:issue:`35830`) - Deprecated support for the Dataframe Interchange Protocol (:issue:`56732`) - Deprecated the 'inplace' keyword from :meth:`Resampler.interpolate`, as passing ``True`` raises ``AttributeError`` (:issue:`58690`) +- Deprecated the ``unit`` keyword in :meth:`to_datetime` and :meth:`to_timedelta`, use ``input_unit`` instead (:issue:`62097`) .. --------------------------------------------------------------------------- .. _whatsnew_300.prior_deprecations: diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 2a080bcb19ae9..abf041df7f9e3 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -117,7 +117,7 @@ def cast_from_unit_vectorized( # but not clear what 2.5 "M" corresponds to, so we will # disallow that case. raise ValueError( - f"Conversion of non-round float with unit={unit} " + f"Conversion of non-round float with input_unit={unit} " "is ambiguous" ) @@ -194,7 +194,7 @@ cdef int64_t cast_from_unit( # but not clear what 2.5 "M" corresponds to, so we will # disallow that case. raise ValueError( - f"Conversion of non-round float with unit={unit} " + f"Conversion of non-round float with input_unit={unit} " "is ambiguous" ) # GH#47266 go through np.datetime64 to avoid weird results e.g. with "Y" diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index be86118a2b9e2..94a37180a699b 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -5409,7 +5409,7 @@ cpdef to_offset(freq, bint is_period=False): # For these prefixes, we have something like "3h" or # "2.5min", so we can construct a Timedelta with the # matching unit and get our offset from delta_to_tick - td = Timedelta(1, unit=name) + td = Timedelta(1, input_unit=name) off = delta_to_tick(td) offset = off * float(stride) if n != 0: diff --git a/pandas/_libs/tslibs/timedeltas.pyi b/pandas/_libs/tslibs/timedeltas.pyi index 2200f9ebbbbb5..dd9db5095fd49 100644 --- a/pandas/_libs/tslibs/timedeltas.pyi +++ b/pandas/_libs/tslibs/timedeltas.pyi @@ -95,6 +95,8 @@ class Timedelta(timedelta): def __new__( # type: ignore[misc] cls: type[Self], value=..., + input_unit: str | None = ..., + *, unit: str | None = ..., **kwargs: float | np.integer | np.floating, ) -> Self | NaTType: ... diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 2d18a275f26f5..da1512c992306 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1107,7 +1107,7 @@ cdef class _Timedelta(timedelta): Examples -------- - >>> pd.Timedelta(1, "us").value + >>> pd.Timedelta(1, input_unit="us").value 1000 """ try: @@ -1149,7 +1149,7 @@ cdef class _Timedelta(timedelta): Examples -------- - >>> td = pd.Timedelta(1, "d") + >>> td = pd.Timedelta(1, input_unit="d") >>> td.days 1 @@ -1191,7 +1191,7 @@ cdef class _Timedelta(timedelta): **Using integer input** - >>> td = pd.Timedelta(42, unit='s') + >>> td = pd.Timedelta(42, input_unit='s') >>> td.seconds 42 """ @@ -1231,7 +1231,7 @@ cdef class _Timedelta(timedelta): **Using integer input** - >>> td = pd.Timedelta(42, unit='us') + >>> td = pd.Timedelta(42, input_unit='us') >>> td.microseconds 42 """ @@ -1283,7 +1283,8 @@ cdef class _Timedelta(timedelta): Examples -------- - >>> td = pd.Timedelta(42, unit='us') + >>> td = pd.Timedelta(42, input_unit='us') + >>> td.unit 'ns' """ return npy_unit_to_abbrev(self._creso) @@ -1627,7 +1628,7 @@ cdef class _Timedelta(timedelta): >>> td.asm8 numpy.timedelta64(3005000,'ns') - >>> td = pd.Timedelta(42, unit='ns') + >>> td = pd.Timedelta(42, input_unit='ns') >>> td.asm8 numpy.timedelta64(42,'ns') """ @@ -1671,7 +1672,7 @@ cdef class _Timedelta(timedelta): >>> td.resolution_string 's' - >>> td = pd.Timedelta(36, unit='us') + >>> td = pd.Timedelta(36, input_unit='us') >>> td.resolution_string 'us' """ @@ -1718,7 +1719,7 @@ cdef class _Timedelta(timedelta): **Using integer input** - >>> td = pd.Timedelta(42, unit='ns') + >>> td = pd.Timedelta(42, input_unit='ns') >>> td.nanoseconds 42 """ @@ -1920,7 +1921,7 @@ class Timedelta(_Timedelta): ---------- value : Timedelta, timedelta, np.timedelta64, str, int or float Input value. - unit : str, default 'ns' + input_unit : str, default 'ns' If input is an integer, denote the unit of the input. If input is a float, denote the unit of the integer parts. The decimal parts with resolution lower than 1 nanosecond are ignored. @@ -1940,6 +1941,10 @@ class Timedelta(_Timedelta): Allowing the values `w`, `d`, `MIN`, `MS`, `US` and `NS` to denote units are deprecated in favour of the values `W`, `D`, `min`, `ms`, `us` and `ns`. + unit : str or None, default None + Use input_unit instead. + + .. deprecated:: 3.0.0 **kwargs Available kwargs: {days, seconds, microseconds, @@ -1970,7 +1975,7 @@ class Timedelta(_Timedelta): -------- Here we initialize Timedelta object with both value and unit - >>> td = pd.Timedelta(1, "D") + >>> td = pd.Timedelta(1, input_unit="D") >>> td Timedelta('1 days 00:00:00') @@ -1986,7 +1991,18 @@ class Timedelta(_Timedelta): _req_any_kwargs_new = {"weeks", "days", "hours", "minutes", "seconds", "milliseconds", "microseconds", "nanoseconds"} - def __new__(cls, object value=_no_input, unit=None, **kwargs): + def __new__(cls, object value=_no_input, input_unit=None, *, unit=None, **kwargs): + if unit is not None: + if input_unit is not None: + raise ValueError("Specify only 'input_unit', not 'unit'") + from pandas.errors import Pandas4Warning + warnings.warn( + "The 'unit' keyword is deprecated. Use 'input_unit' instead.", + Pandas4Warning, + stacklevel=find_stack_level(), + ) + input_unit = unit + unsupported_kwargs = set(kwargs) unsupported_kwargs.difference_update(cls._req_any_kwargs_new) if unsupported_kwargs or ( @@ -2002,12 +2018,12 @@ class Timedelta(_Timedelta): ) if ( - unit is not None + input_unit is not None and not (is_float_object(value) or is_integer_object(value)) ): # GH#53198 warnings.warn( - "The 'unit' keyword is only used when the Timedelta input is " + "The 'input_unit' keyword is only used when the Timedelta input is " f"an integer or float, not {type(value).__name__}. " "To specify the storage unit of the output use `td.as_unit(unit)`", UserWarning, @@ -2054,7 +2070,7 @@ class Timedelta(_Timedelta): ) raise OutOfBoundsTimedelta(msg) from err - disallow_ambiguous_unit(unit) + disallow_ambiguous_unit(input_unit) cdef: int64_t new_value @@ -2062,7 +2078,7 @@ class Timedelta(_Timedelta): # GH 30543 if pd.Timedelta already passed, return it # check that only value is passed if isinstance(value, _Timedelta): - # 'unit' is benign in this case, but e.g. days or seconds + # 'input_unit' is benign in this case, but e.g. days or seconds # doesn't make sense here. if len(kwargs): # GH#48898 @@ -2073,8 +2089,10 @@ class Timedelta(_Timedelta): ) return value elif isinstance(value, str): - if unit is not None: - raise ValueError("unit must not be specified if the value is a str") + if input_unit is not None: + raise ValueError( + "input_unit must not be specified if the value is a str" + ) if (len(value) > 0 and value[0] == "P") or ( len(value) > 1 and value[:2] == "-P" ): @@ -2134,8 +2152,8 @@ class Timedelta(_Timedelta): elif is_integer_object(value) or is_float_object(value): # unit=None is de-facto 'ns' - unit = parse_timedelta_unit(unit) - value = _numeric_to_td64ns(value, unit) + input_unit = parse_timedelta_unit(input_unit) + value = _numeric_to_td64ns(value, input_unit) else: raise ValueError( diff --git a/pandas/_libs/tslibs/timestamps.pyi b/pandas/_libs/tslibs/timestamps.pyi index 3195ce9641f2b..92f8e3f35bb46 100644 --- a/pandas/_libs/tslibs/timestamps.pyi +++ b/pandas/_libs/tslibs/timestamps.pyi @@ -51,8 +51,9 @@ class Timestamp(datetime): *, nanosecond: int | None = ..., tz: _TimeZones = ..., - unit: str | int | None = ..., + input_unit: str | None = ..., fold: int | None = ..., + unit: str | None = ..., ) -> Self | NaTType: ... @classmethod def _from_value_and_reso( diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 5d6c7b53f918c..82e65e2d157dc 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1805,18 +1805,22 @@ class Timestamp(_Timestamp): Value of nanosecond. tz : str, zoneinfo.ZoneInfo, pytz.timezone, dateutil.tz.tzfile or None Time zone for time which Timestamp will have. - unit : str - Unit used for conversion if ts_input is of type int or float. The - valid values are 'W', 'D', 'h', 'm', 's', 'ms', 'us', and 'ns'. For - example, 's' means seconds and 'ms' means milliseconds. + unit : str or None, default None + Use input_unit instead. - For float inputs, the result will be stored in nanoseconds, and - the unit attribute will be set as ``'ns'``. + .. deprecated:: 3.0.0 fold : {0, 1}, default None, keyword-only Due to daylight saving time, one wall clock time can occur twice when shifting from summer to winter time; fold describes whether the datetime-like corresponds to the first (0) or the second time (1) the wall clock hits the ambiguous time. + input_unit : str + Unit used for conversion if ts_input is of type int or float. The + valid values are 'W', 'D', 'h', 'm', 's', 'ms', 'us', and 'ns'. For + example, 's' means seconds and 'ms' means milliseconds. + + For float inputs, the result will be stored in nanoseconds, and + the unit attribute will be set as ``'ns'``. See Also -------- @@ -1843,18 +1847,18 @@ class Timestamp(_Timestamp): This converts a float representing a Unix epoch in units of seconds - >>> pd.Timestamp(1513393355.5, unit='s') + >>> pd.Timestamp(1513393355.5, input_unit='s') Timestamp('2017-12-16 03:02:35.500000') This converts an int representing a Unix-epoch in units of weeks - >>> pd.Timestamp(1535, unit='W') + >>> pd.Timestamp(1535, input_unit='W') Timestamp('1999-06-03 00:00:00') This converts an int representing a Unix-epoch in units of seconds and for a particular timezone - >>> pd.Timestamp(1513393355, unit='s', tz='US/Pacific') + >>> pd.Timestamp(1513393355, input_unit='s', tz='US/Pacific') Timestamp('2017-12-15 19:02:35-0800', tz='US/Pacific') Using the other two forms that mimic the API for ``datetime.datetime``: @@ -2587,6 +2591,7 @@ class Timestamp(_Timestamp): tz=_no_input, unit=None, fold=None, + input_unit=None, ): # The parameter list folds together legacy parameter names (the first # four) and positional and keyword parameter names from pydatetime. @@ -2613,6 +2618,17 @@ class Timestamp(_Timestamp): _TSObject ts tzinfo_type tzobj + if unit is not None: + if input_unit is not None: + raise ValueError("Specify only 'input_unit', not 'unit'") + from pandas.errors import Pandas4Warning + warnings.warn( + "The 'unit' keyword is deprecated. Use 'input_unit' instead.", + Pandas4Warning, + stacklevel=find_stack_level(), + ) + input_unit = unit + _date_attributes = [year, month, day, hour, minute, second, microsecond, nanosecond] @@ -2656,12 +2672,12 @@ class Timestamp(_Timestamp): ts_input = ts_input.replace(fold=fold) if ( - unit is not None + input_unit is not None and not (is_float_object(ts_input) or is_integer_object(ts_input)) ): # GH#53198 warnings.warn( - "The 'unit' keyword is only used when the Timestamp input is " + "The 'input_unit' keyword is only used when the Timestamp input is " f"an integer or float, not {type(ts_input).__name__}. " "To specify the storage unit of the output use `ts.as_unit(unit)`", UserWarning, @@ -2673,7 +2689,7 @@ class Timestamp(_Timestamp): # checking verbosely, because cython doesn't optimize # list comprehensions (as of cython 0.29.x) if (isinstance(ts_input, _Timestamp) and - tz is None and unit is None and year is None and + tz is None and input_unit is None and year is None and month is None and day is None and hour is None and minute is None and second is None and microsecond is None and nanosecond is None and @@ -2715,7 +2731,7 @@ class Timestamp(_Timestamp): # microsecond[, tzinfo]]]]]) ts_input = datetime(ts_input, year, month, day or 0, hour or 0, minute or 0, second or 0, fold=fold or 0) - unit = None + input_unit = None if getattr(ts_input, "tzinfo", None) is not None and tz is not None: raise ValueError("Cannot pass a datetime or Timestamp with tzinfo with " @@ -2728,7 +2744,7 @@ class Timestamp(_Timestamp): elif not (999 >= nanosecond >= 0): raise ValueError("nanosecond must be in 0..999") - ts = convert_to_tsobject(ts_input, tzobj, unit, 0, 0, nanosecond) + ts = convert_to_tsobject(ts_input, tzobj, input_unit, 0, 0, nanosecond) if ts.value == NPY_NAT: return NaT @@ -3092,7 +3108,7 @@ timedelta}, default 'raise' Examples -------- - >>> ts = pd.Timestamp(1584226800, unit='s', tz='Europe/Stockholm') + >>> ts = pd.Timestamp(1584226800, input_unit='s', tz='Europe/Stockholm') >>> ts.tz zoneinfo.ZoneInfo(key='Europe/Stockholm') """ diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index aa224ee571d70..abf3ecd5bdbd1 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -616,7 +616,9 @@ def _box_pa_array( # Workaround https://github.com/apache/arrow/issues/37291 from pandas.core.tools.timedeltas import to_timedelta - value = to_timedelta(value, unit=pa_type.unit).as_unit(pa_type.unit) + value = to_timedelta(value, input_unit=pa_type.unit).as_unit( + pa_type.unit + ) value = value.to_numpy() if pa_type is not None and pa.types.is_timestamp(pa_type): @@ -797,7 +799,13 @@ def __getitem__(self, item: PositionalIndexer): return result else: pa_type = self._pa_array.type - scalar = value.as_py() + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "The 'unit' keyword is deprecated", + Pandas4Warning, + ) + scalar = value.as_py() if scalar is None: return self._dtype.na_value elif pa.types.is_timestamp(pa_type) and pa_type.unit != "ns": @@ -818,16 +826,22 @@ def __iter__(self) -> Iterator[Any]: pa_type = self._pa_array.type box_timestamp = pa.types.is_timestamp(pa_type) and pa_type.unit != "ns" box_timedelta = pa.types.is_duration(pa_type) and pa_type.unit != "ns" - for value in self._pa_array: - val = value.as_py() - if val is None: - yield na_value - elif box_timestamp: - yield Timestamp(val).as_unit(pa_type.unit) - elif box_timedelta: - yield Timedelta(val).as_unit(pa_type.unit) - else: - yield val + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "The 'unit' keyword is deprecated", + Pandas4Warning, + ) + for value in self._pa_array: + val = value.as_py() + if val is None: + yield na_value + elif box_timestamp: + yield Timestamp(val).as_unit(pa_type.unit) + elif box_timedelta: + yield Timedelta(val).as_unit(pa_type.unit) + else: + yield val def __arrow_array__(self, type=None): """Convert myself to a pyarrow ChunkedArray.""" @@ -2137,7 +2151,14 @@ def _reduce_calc( if keepdims: if isinstance(pa_result, pa.Scalar): - result = pa.array([pa_result.as_py()], type=pa_result.type) + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "The 'unit' keyword is deprecated", + Pandas4Warning, + ) + item = pa_result.as_py() + result = pa.array([item], type=pa_result.type) else: result = pa.array( [pa_result], @@ -2148,7 +2169,13 @@ def _reduce_calc( if pc.is_null(pa_result).as_py(): return self.dtype.na_value elif isinstance(pa_result, pa.Scalar): - return pa_result.as_py() + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "The 'unit' keyword is deprecated", + Pandas4Warning, + ) + return pa_result.as_py() else: return pa_result @@ -2225,7 +2252,13 @@ def __setitem__(self, key, value) -> None: f"index {key} is out of bounds for axis 0 with size {n}" ) if isinstance(value, pa.Scalar): - value = value.as_py() + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "The 'unit' keyword is deprecated", + Pandas4Warning, + ) + value = value.as_py() elif is_list_like(value): raise ValueError("Length of indexer and values mismatch") chunks = [ @@ -2526,7 +2559,13 @@ def _to_numpy_and_type(value) -> tuple[np.ndarray, pa.DataType | None]: pa_type = value.type elif isinstance(value, pa.Scalar): pa_type = value.type - value = value.as_py() + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "The 'unit' keyword is deprecated", + Pandas4Warning, + ) + value = value.as_py() else: pa_type = None return np.array(value, dtype=object), pa_type @@ -2575,7 +2614,13 @@ def _replace_with_mask( if isinstance(replacements, pa.Array): replacements = np.array(replacements, dtype=object) elif isinstance(replacements, pa.Scalar): - replacements = replacements.as_py() + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "The 'unit' keyword is deprecated", + Pandas4Warning, + ) + replacements = replacements.as_py() result = np.array(values, dtype=object) result[mask] = replacements @@ -2907,7 +2952,13 @@ def _dt_nanoseconds(self) -> Self: ) def _dt_to_pytimedelta(self) -> np.ndarray: - data = self._pa_array.to_pylist() + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "The 'unit' keyword is deprecated", + Pandas4Warning, + ) + data = self._pa_array.to_pylist() if self._dtype.pyarrow_dtype.unit == "ns": data = [None if ts is None else ts.to_pytimedelta() for ts in data] return np.array(data, dtype=object) @@ -3170,7 +3221,13 @@ def _dt_to_pydatetime(self) -> Series: f"to_pydatetime cannot be called with {self.dtype.pyarrow_dtype} type. " "Convert to pyarrow timestamp type." ) - data = self._pa_array.to_pylist() + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "The 'unit' keyword is deprecated", + Pandas4Warning, + ) + data = self._pa_array.to_pylist() if self._dtype.pyarrow_dtype.unit == "ns": data = [None if ts is None else ts.to_pydatetime(warn=False) for ts in data] return Series(data, dtype=object) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 2f7330d1e81fe..115a49b8fc0d5 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1655,7 +1655,7 @@ def mean(self, *, skipna: bool = True, axis: AxisInt | None = 0): For :class:`pandas.TimedeltaIndex`: - >>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit="D") + >>> tdelta_idx = pd.to_timedelta([1, 2, 3], input_unit="D") >>> tdelta_idx TimedeltaIndex(['1 days', '2 days', '3 days'], dtype='timedelta64[ns]', freq=None) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index da9a70ac51379..cb4fb5661ae9c 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -813,10 +813,10 @@ def to_timestamp(self, freq=None, how: str = "start") -> DatetimeArray: if end: if freq == "B" or self.freq == "B": # roll forward to ensure we land on B date - adjust = Timedelta(1, "D") - Timedelta(1, "ns") + adjust = Timedelta(1, input_unit="D") - Timedelta(1, input_unit="ns") return self.to_timestamp(how="start") + adjust else: - adjust = Timedelta(1, "ns") + adjust = Timedelta(1, input_unit="ns") return (self + self.freq).to_timestamp(how="start") - adjust if freq is None: diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 1647a7f6714ed..6955b455d3f82 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -791,7 +791,7 @@ def total_seconds(self) -> npt.NDArray[np.float64]: -------- **Series** - >>> s = pd.Series(pd.to_timedelta(np.arange(5), unit="D")) + >>> s = pd.Series(pd.to_timedelta(np.arange(5), input_unit="D")) >>> s 0 0 days 1 1 days @@ -810,7 +810,7 @@ def total_seconds(self) -> npt.NDArray[np.float64]: **TimedeltaIndex** - >>> idx = pd.to_timedelta(np.arange(5), unit="D") + >>> idx = pd.to_timedelta(np.arange(5), input_unit="D") >>> idx TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'], dtype='timedelta64[ns]', freq=None) @@ -844,7 +844,7 @@ def to_pytimedelta(self) -> npt.NDArray[np.object_]: Examples -------- - >>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit="D") + >>> tdelta_idx = pd.to_timedelta([1, 2, 3], input_unit="D") >>> tdelta_idx TimedeltaIndex(['1 days', '2 days', '3 days'], dtype='timedelta64[ns]', freq=None) @@ -875,7 +875,7 @@ def to_pytimedelta(self) -> npt.NDArray[np.object_]: -------- For Series: - >>> ser = pd.Series(pd.to_timedelta([1, 2, 3], unit='D')) + >>> ser = pd.Series(pd.to_timedelta([1, 2, 3], input_unit='D')) >>> ser 0 1 days 1 2 days @@ -910,7 +910,7 @@ def to_pytimedelta(self) -> npt.NDArray[np.object_]: -------- For Series: - >>> ser = pd.Series(pd.to_timedelta([1, 2, 3], unit='s')) + >>> ser = pd.Series(pd.to_timedelta([1, 2, 3], input_unit='s')) >>> ser 0 0 days 00:00:01 1 0 days 00:00:02 @@ -924,7 +924,7 @@ def to_pytimedelta(self) -> npt.NDArray[np.object_]: For TimedeltaIndex: - >>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit='s') + >>> tdelta_idx = pd.to_timedelta([1, 2, 3], input_unit='s') >>> tdelta_idx TimedeltaIndex(['0 days 00:00:01', '0 days 00:00:02', '0 days 00:00:03'], dtype='timedelta64[ns]', freq=None) @@ -950,7 +950,7 @@ def to_pytimedelta(self) -> npt.NDArray[np.object_]: -------- For Series: - >>> ser = pd.Series(pd.to_timedelta([1, 2, 3], unit='us')) + >>> ser = pd.Series(pd.to_timedelta([1, 2, 3], input_unit='us')) >>> ser 0 0 days 00:00:00.000001 1 0 days 00:00:00.000002 @@ -964,7 +964,7 @@ def to_pytimedelta(self) -> npt.NDArray[np.object_]: For TimedeltaIndex: - >>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit='us') + >>> tdelta_idx = pd.to_timedelta([1, 2, 3], input_unit='us') >>> tdelta_idx TimedeltaIndex(['0 days 00:00:00.000001', '0 days 00:00:00.000002', '0 days 00:00:00.000003'], @@ -990,7 +990,7 @@ def to_pytimedelta(self) -> npt.NDArray[np.object_]: -------- For Series: - >>> ser = pd.Series(pd.to_timedelta([1, 2, 3], unit='ns')) + >>> ser = pd.Series(pd.to_timedelta([1, 2, 3], input_unit='ns')) >>> ser 0 0 days 00:00:00.000000001 1 0 days 00:00:00.000000002 @@ -1004,7 +1004,7 @@ def to_pytimedelta(self) -> npt.NDArray[np.object_]: For TimedeltaIndex: - >>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit='ns') + >>> tdelta_idx = pd.to_timedelta([1, 2, 3], input_unit='ns') >>> tdelta_idx TimedeltaIndex(['0 days 00:00:00.000000001', '0 days 00:00:00.000000002', '0 days 00:00:00.000000003'], diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index ee6ac6584569e..67288ed937067 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -229,7 +229,7 @@ def stringify(value): if isinstance(conv_val, str): conv_val = Timedelta(conv_val) elif lib.is_integer(conv_val) or lib.is_float(conv_val): - conv_val = Timedelta(conv_val, unit="s") + conv_val = Timedelta(conv_val, input_unit="s") else: conv_val = Timedelta(conv_val) conv_val = conv_val.as_unit("ns")._value diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index b9a0af6b0bc28..b343dfe23738b 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -482,7 +482,7 @@ def to_pytimedelta(self) -> np.ndarray: Examples -------- - >>> s = pd.Series(pd.to_timedelta(np.arange(5), unit="D")) + >>> s = pd.Series(pd.to_timedelta(np.arange(5), input_unit="D")) >>> s 0 0 days 1 1 days @@ -528,7 +528,7 @@ def components(self) -> DataFrame: Examples -------- - >>> s = pd.Series(pd.to_timedelta(np.arange(5), unit="s")) + >>> s = pd.Series(pd.to_timedelta(np.arange(5), input_unit="s")) >>> s 0 0 days 00:00:00 1 0 days 00:00:01 diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 58529c5597b6e..80090e285e07b 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -550,7 +550,7 @@ def _wrap_range_setop(self, other, res_i8) -> Self: new_freq = self.freq elif isinstance(res_i8, RangeIndex): new_freq = to_offset( - Timedelta(res_i8.step, unit=self.unit).as_unit(self.unit) + Timedelta(res_i8.step, input_unit=self.unit).as_unit(self.unit) ) # TODO(GH#41493): we cannot just do diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 382b3678da75b..f816a04dfce5a 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1062,6 +1062,7 @@ def date_range( '2817-01-01', '2917-01-01'], dtype='datetime64[s]', freq='100YS-JAN') """ + # assert not kwargs, kwargs if freq is None and com.any_none(periods, start, end): freq = "D" diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index be8d1e465ed34..0c61f7ca4f7e4 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -233,7 +233,9 @@ def _parse_with_reso(self, label: str) -> tuple[Timedelta | NaTType, None]: # t def _parsed_string_to_bounds(self, reso, parsed: Timedelta): # reso is unused, included to match signature of DTI/PI lbound = parsed.round(parsed.resolution_string) - rbound = lbound + to_offset(parsed.resolution_string) - Timedelta(1, "ns") + rbound = ( + lbound + to_offset(parsed.resolution_string) - Timedelta(1, input_unit="ns") + ) return lbound, rbound # ------------------------------------------------------------------- diff --git a/pandas/core/resample.py b/pandas/core/resample.py index f84bedda8d00c..9e63b1b44aead 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -2657,7 +2657,7 @@ def _adjust_bin_edges( edges_dti = ( edges_dti + Timedelta(days=1).as_unit(edges_dti.unit) - - Timedelta(1, unit=edges_dti.unit).as_unit(edges_dti.unit) + - Timedelta(1, input_unit=edges_dti.unit).as_unit(edges_dti.unit) ) bin_edges = edges_dti.tz_localize(binner.tz).asi8 else: @@ -3070,8 +3070,8 @@ def _adjust_dates_anchored( lresult_int = last._value + (freq_value - loffset) else: lresult_int = last._value + freq_value - fresult = Timestamp(fresult_int, unit=unit) - lresult = Timestamp(lresult_int, unit=unit) + fresult = Timestamp(fresult_int, input_unit=unit) + lresult = Timestamp(lresult_int, input_unit=unit) if first_tzinfo is not None: fresult = fresult.tz_localize("UTC").tz_convert(first_tzinfo) if last_tzinfo is not None: diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index b13da83084e5c..894b8856ed596 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -596,7 +596,7 @@ def _format_labels( # "dtype[Any] | ExtensionDtype"; expected "DatetimeTZDtype | dtype[Any]" unit = dtype_to_unit(bins.dtype) # type: ignore[arg-type] formatter = lambda x: x - adjust = lambda x: x - Timedelta(1, unit=unit).as_unit(unit) + adjust = lambda x: x - Timedelta(1, input_unit=unit).as_unit(unit) else: precision = _infer_precision(precision, bins) formatter = lambda x: _round_frac(x, precision) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index ce311d0c89b55..a1b0825b703ef 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -41,6 +41,7 @@ ArrayLike, DateTimeErrorChoices, ) +from pandas.errors import Pandas4Warning from pandas.util._decorators import set_module from pandas.util._exceptions import find_stack_level @@ -322,7 +323,7 @@ def _convert_listlike_datetimes( format: str | None, name: Hashable | None = None, utc: bool = False, - unit: str | None = None, + input_unit: str | None = None, errors: DateTimeErrorChoices = "raise", dayfirst: bool | None = None, yearfirst: bool | None = None, @@ -340,7 +341,7 @@ def _convert_listlike_datetimes( None or string for the Index name utc : bool Whether to convert/localize timestamps to UTC. - unit : str + input_unit : str None or string of the frequency of the passed data errors : str error handing behaviors from to_datetime, 'raise', 'coerce' @@ -406,10 +407,10 @@ def _convert_listlike_datetimes( return arg - elif unit is not None: + elif input_unit is not None: if format is not None: - raise ValueError("cannot specify both format and unit") - return _to_datetime_with_unit(arg, unit, name, utc, errors) + raise ValueError("cannot specify both format and input_unit") + return _to_datetime_with_unit(arg, input_unit, name, utc, errors) elif getattr(arg, "ndim", 1) > 1: raise TypeError( "arg must be a string, datetime, list, tuple, 1-d array, or Series" @@ -481,16 +482,16 @@ def _array_strptime_with_fallback( return Index(result, dtype=result.dtype, name=name) -def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index: +def _to_datetime_with_unit(arg, input_unit, name, utc: bool, errors: str) -> Index: """ - to_datetime specalized to the case where a 'unit' is passed. + to_datetime specalized to the case where a 'input_unit' is passed. """ arg = extract_array(arg, extract_numpy=True) # GH#30050 pass an ndarray to tslib.array_to_datetime # because it expects an ndarray argument if isinstance(arg, IntegerArray): - arr = arg.astype(f"datetime64[{unit}]") + arr = arg.astype(f"datetime64[{input_unit}]") tz_parsed = None else: arg = np.asarray(arg) @@ -498,27 +499,27 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index: if arg.dtype.kind in "iu": # Note we can't do "f" here because that could induce unwanted # rounding GH#14156, GH#20445 - arr = arg.astype(f"datetime64[{unit}]", copy=False) + arr = arg.astype(f"datetime64[{input_unit}]", copy=False) try: arr = astype_overflowsafe(arr, np.dtype("M8[ns]"), copy=False) except OutOfBoundsDatetime: if errors == "raise": raise arg = arg.astype(object) - return _to_datetime_with_unit(arg, unit, name, utc, errors) + return _to_datetime_with_unit(arg, input_unit, name, utc, errors) tz_parsed = None elif arg.dtype.kind == "f": with np.errstate(over="raise"): try: - arr = cast_from_unit_vectorized(arg, unit=unit) + arr = cast_from_unit_vectorized(arg, unit=input_unit) except OutOfBoundsDatetime as err: if errors != "raise": return _to_datetime_with_unit( - arg.astype(object), unit, name, utc, errors + arg.astype(object), input_unit, name, utc, errors ) raise OutOfBoundsDatetime( - f"cannot convert input with unit '{unit}'" + f"cannot convert input with input_unit '{input_unit}'" ) from err arr = arr.view("M8[ns]") @@ -529,7 +530,7 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index: arg, utc=utc, errors=errors, - unit_for_numerics=unit, + unit_for_numerics=input_unit, creso=cast(int, NpyDatetimeUnit.NPY_FR_ns.value), ) @@ -550,7 +551,7 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index: return result -def _adjust_to_origin(arg, origin, unit): +def _adjust_to_origin(arg, origin, input_unit): """ Helper function for to_datetime. Adjust input argument to the specified origin @@ -561,8 +562,8 @@ def _adjust_to_origin(arg, origin, unit): date to be adjusted origin : 'julian' or Timestamp origin offset for the arg - unit : str - passed unit from to_datetime, must be 'D' + input_unit : str + passed input_unit from to_datetime, must be 'D' Returns ------- @@ -571,7 +572,7 @@ def _adjust_to_origin(arg, origin, unit): if origin == "julian": original = arg j0 = Timestamp(0).to_julian_date() - if unit != "D": + if input_unit != "D": raise ValueError("unit must be 'D' for origin='julian'") try: arg = arg - j0 @@ -594,13 +595,13 @@ def _adjust_to_origin(arg, origin, unit): ): raise ValueError( f"'{arg}' is not compatible with origin='{origin}'; " - "it must be numeric with a unit specified" + "it must be numeric with a input_unit specified" ) # we are going to offset back to unix / epoch time try: if lib.is_integer(origin) or lib.is_float(origin): - offset = Timestamp(origin, unit=unit) + offset = Timestamp(origin, input_unit=input_unit) else: offset = Timestamp(origin) except OutOfBoundsDatetime as err: @@ -616,7 +617,7 @@ def _adjust_to_origin(arg, origin, unit): # convert the offset to the unit of the arg # this should be lossless in terms of precision - ioffset = td_offset // Timedelta(1, unit=unit) + ioffset = td_offset // Timedelta(1, input_unit=input_unit) # scalars & ndarray-like can handle the addition if is_list_like(arg) and not isinstance(arg, (ABCSeries, Index, np.ndarray)): @@ -637,6 +638,8 @@ def to_datetime( unit: str | None = ..., origin=..., cache: bool = ..., + *, + input_unit: str | None = ..., ) -> Timestamp: ... @@ -652,6 +655,8 @@ def to_datetime( unit: str | None = ..., origin=..., cache: bool = ..., + *, + input_unit: str | None = ..., ) -> Series: ... @@ -667,6 +672,8 @@ def to_datetime( unit: str | None = ..., origin=..., cache: bool = ..., + *, + input_unit: str | None = ..., ) -> DatetimeIndex: ... @@ -682,6 +689,8 @@ def to_datetime( unit: str | None = None, origin: str = "unix", cache: bool = True, + *, + input_unit: str | None = None, ) -> DatetimeIndex | Series | DatetimeScalar | NaTType: """ Convert argument to datetime. @@ -766,11 +775,10 @@ def to_datetime( string. Cannot be used alongside ``format='ISO8601'`` or ``format='mixed'``. - unit : str, default 'ns' - The unit of the arg (D,s,ms,us,ns) denote the unit, which is an - integer or float number. This will be based off the origin. - Example, with ``unit='ms'`` and ``origin='unix'``, this would calculate - the number of milliseconds to the unix epoch start. + unit : str or None, default None + Use input_unit instead. + + .. deprecated:: 3.0.0 origin : scalar, default 'unix' Define the reference date. The numeric values would be parsed as number of units (defined by `unit`) since this reference date. @@ -790,6 +798,11 @@ def to_datetime( is only used when there are at least 50 values. The presence of out-of-bounds values will render the cache unusable and may slow down parsing. + input_unit : str, default 'ns' + The unit of the arg (D,s,ms,us,ns) denote the unit, which is an + integer or float number. This will be based off the origin. + Example, with ``unit='ms'`` and ``origin='unix'``, this would calculate + the number of milliseconds to the unix epoch start. Returns ------- @@ -888,9 +901,9 @@ def to_datetime( Using a unix epoch time - >>> pd.to_datetime(1490195805, unit="s") + >>> pd.to_datetime(1490195805, input_unit="s") Timestamp('2017-03-22 15:16:45') - >>> pd.to_datetime(1490195805433502912, unit="ns") + >>> pd.to_datetime(1490195805433502912, input_unit="ns") Timestamp('2017-03-22 15:16:45.433502912') .. warning:: For float arg, precision rounding might happen. To prevent @@ -898,7 +911,7 @@ def to_datetime( Using a non-unix epoch origin - >>> pd.to_datetime([1, 2, 3], unit="D", origin=pd.Timestamp("1960-01-01")) + >>> pd.to_datetime([1, 2, 3], input_unit="D", origin=pd.Timestamp("1960-01-01")) DatetimeIndex(['1960-01-02', '1960-01-03', '1960-01-04'], dtype='datetime64[ns]', freq=None) @@ -992,18 +1005,28 @@ def to_datetime( DatetimeIndex(['2018-10-26 12:00:00+00:00', '2020-01-01 18:00:00+00:00'], dtype='datetime64[us, UTC]', freq=None) """ + if unit is not None: + # GH#62097 + if input_unit is not None: + raise ValueError("Specify only 'input_unit', not 'unit'") + warnings.warn( + "The 'unit' keyword is deprecated. Use 'input_unit' instead.", + Pandas4Warning, + stacklevel=find_stack_level(), + ) + input_unit = unit if exact is not lib.no_default and format in {"mixed", "ISO8601"}: raise ValueError("Cannot use 'exact' when 'format' is 'mixed' or 'ISO8601'") if arg is None: return NaT if origin != "unix": - arg = _adjust_to_origin(arg, origin, unit) + arg = _adjust_to_origin(arg, origin, input_unit) convert_listlike = partial( _convert_listlike_datetimes, utc=utc, - unit=unit, + input_unit=input_unit, dayfirst=dayfirst, yearfirst=yearfirst, errors=errors, @@ -1184,7 +1207,7 @@ def coerce(values): value = unit_rev.get(u) if value is not None and value in arg: try: - values += to_timedelta(coerce(arg[value]), unit=u, errors=errors) + values += to_timedelta(coerce(arg[value]), input_unit=u, errors=errors) except (TypeError, ValueError) as err: raise ValueError( f"cannot assemble the datetimes [{value}]: {err}" diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 2dc5e29308214..61437bd3c168f 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -9,6 +9,7 @@ Any, overload, ) +import warnings import numpy as np @@ -22,7 +23,9 @@ disallow_ambiguous_unit, parse_timedelta_unit, ) +from pandas.errors import Pandas4Warning from pandas.util._decorators import set_module +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import is_list_like from pandas.core.dtypes.dtypes import ArrowDtype @@ -55,6 +58,8 @@ def to_timedelta( arg: str | float | timedelta, unit: UnitChoices | None = ..., errors: DateTimeErrorChoices = ..., + *, + input_unit: UnitChoices | None = ..., ) -> Timedelta: ... @@ -63,6 +68,8 @@ def to_timedelta( arg: Series, unit: UnitChoices | None = ..., errors: DateTimeErrorChoices = ..., + *, + input_unit: UnitChoices | None = ..., ) -> Series: ... @@ -71,6 +78,8 @@ def to_timedelta( arg: list | tuple | range | ArrayLike | Index, unit: UnitChoices | None = ..., errors: DateTimeErrorChoices = ..., + *, + input_unit: UnitChoices | None = ..., ) -> TimedeltaIndex: ... @@ -88,6 +97,8 @@ def to_timedelta( | Series, unit: UnitChoices | None = None, errors: DateTimeErrorChoices = "raise", + *, + input_unit: UnitChoices | None = None, ) -> Timedelta | TimedeltaIndex | Series | NaTType | Any: """ Convert argument to timedelta. @@ -106,7 +117,15 @@ def to_timedelta( Strings with units 'M', 'Y' and 'y' do not represent unambiguous timedelta values and will raise an exception. - unit : str, optional + unit : str or None, default None + Use input_unit instead. + + .. deprecated:: 3.0.0 + + errors : {'raise', 'coerce'}, default 'raise' + - If 'raise', then invalid parsing will raise an exception. + - If 'coerce', then invalid parsing will be set as NaT. + input_unit : str, optional Denotes the unit of the arg for numeric `arg`. Defaults to ``"ns"``. Possible values: @@ -126,10 +145,6 @@ def to_timedelta( Units 'H'and 'S' are deprecated and will be removed in a future version. Please use 'h' and 's'. - errors : {'raise', 'coerce'}, default 'raise' - - If 'raise', then invalid parsing will raise an exception. - - If 'coerce', then invalid parsing will be set as NaT. - Returns ------- timedelta @@ -168,17 +183,27 @@ def to_timedelta( Converting numbers by specifying the `unit` keyword argument: - >>> pd.to_timedelta(np.arange(5), unit="s") + >>> pd.to_timedelta(np.arange(5), input_unit="s") TimedeltaIndex(['0 days 00:00:00', '0 days 00:00:01', '0 days 00:00:02', '0 days 00:00:03', '0 days 00:00:04'], dtype='timedelta64[ns]', freq=None) - >>> pd.to_timedelta(np.arange(5), unit="D") + >>> pd.to_timedelta(np.arange(5), input_unit="D") TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'], dtype='timedelta64[ns]', freq=None) """ if unit is not None: - unit = parse_timedelta_unit(unit) - disallow_ambiguous_unit(unit) + # GH#62097 + if input_unit is not None: + raise ValueError("Specify only 'input_unit', not 'unit'") + warnings.warn( + "The 'unit' keyword is deprecated. Use 'input_unit' instead.", + Pandas4Warning, + stacklevel=find_stack_level(), + ) + input_unit = unit + if input_unit is not None: + input_unit = parse_timedelta_unit(input_unit) + disallow_ambiguous_unit(input_unit) if errors not in ("raise", "coerce"): raise ValueError("errors must be one of 'raise', or 'coerce'.") @@ -186,10 +211,12 @@ def to_timedelta( if arg is None: return NaT elif isinstance(arg, ABCSeries): - values = _convert_listlike(arg._values, unit=unit, errors=errors) + values = _convert_listlike(arg._values, input_unit=input_unit, errors=errors) return arg._constructor(values, index=arg.index, name=arg.name) elif isinstance(arg, ABCIndex): - return _convert_listlike(arg, unit=unit, errors=errors, name=arg.name) + return _convert_listlike( + arg, input_unit=input_unit, errors=errors, name=arg.name + ) elif isinstance(arg, np.ndarray) and arg.ndim == 0: # extract array scalar and process below # error: Incompatible types in assignment (expression has type "object", @@ -198,27 +225,29 @@ def to_timedelta( # Series]]") [assignment] arg = lib.item_from_zerodim(arg) # type: ignore[assignment] elif is_list_like(arg) and getattr(arg, "ndim", 1) == 1: - return _convert_listlike(arg, unit=unit, errors=errors) + return _convert_listlike(arg, input_unit=input_unit, errors=errors) elif getattr(arg, "ndim", 1) > 1: raise TypeError( "arg must be a string, timedelta, list, tuple, 1-d array, or Series" ) - if isinstance(arg, str) and unit is not None: - raise ValueError("unit must not be specified if the input is/contains a str") + if isinstance(arg, str) and input_unit is not None: + raise ValueError( + "input_unit must not be specified if the input is/contains a str" + ) # ...so it must be a scalar value. Return scalar. - return _coerce_scalar_to_timedelta_type(arg, unit=unit, errors=errors) + return _coerce_scalar_to_timedelta_type(arg, input_unit=input_unit, errors=errors) def _coerce_scalar_to_timedelta_type( - r, unit: UnitChoices | None = "ns", errors: DateTimeErrorChoices = "raise" + r, input_unit: UnitChoices | None = "ns", errors: DateTimeErrorChoices = "raise" ) -> Timedelta | NaTType: """Convert string 'r' to a timedelta object.""" result: Timedelta | NaTType try: - result = Timedelta(r, unit) + result = Timedelta(r, input_unit=input_unit) except ValueError: if errors == "raise": raise @@ -230,7 +259,7 @@ def _coerce_scalar_to_timedelta_type( def _convert_listlike( arg, - unit: UnitChoices | None = None, + input_unit: UnitChoices | None = None, errors: DateTimeErrorChoices = "raise", name: Hashable | None = None, ): @@ -241,7 +270,7 @@ def _convert_listlike( elif isinstance(arg_dtype, ArrowDtype) and arg_dtype.kind == "m": return arg - td64arr = sequence_to_td64ns(arg, unit=unit, errors=errors, copy=False)[0] + td64arr = sequence_to_td64ns(arg, unit=input_unit, errors=errors, copy=False)[0] from pandas import TimedeltaIndex diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 5b0ecc2be5cdb..bf2ecad087e6a 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -1318,7 +1318,7 @@ def _try_convert_to_date(self, data: Series) -> Series: date_units = (self.date_unit,) if self.date_unit else self._STAMP_UNITS for date_unit in date_units: try: - return to_datetime(new_data, errors="raise", unit=date_unit) + return to_datetime(new_data, errors="raise", input_unit=date_unit) except (ValueError, OverflowError, TypeError): continue return data diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index e3c1b2219599d..8203777c5cb01 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -280,11 +280,11 @@ def _get_properties(self) -> None: x = self._read_float( const.date_created_offset + align1, const.date_created_length ) - self.date_created = epoch + pd.to_timedelta(x, unit="s") + self.date_created = epoch + pd.to_timedelta(x, input_unit="s") x = self._read_float( const.date_modified_offset + align1, const.date_modified_length ) - self.date_modified = epoch + pd.to_timedelta(x, unit="s") + self.date_modified = epoch + pd.to_timedelta(x, input_unit="s") self.header_length = self._read_uint( const.header_size_offset + align1, const.header_size_length diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 0247a4b1da8dd..9f5103c94d271 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -127,7 +127,7 @@ def _handle_date_column( ): format = "s" if format in ["D", "d", "h", "m", "s", "ms", "us", "ns"]: - return to_datetime(col, errors="coerce", unit=format, utc=utc) + return to_datetime(col, errors="coerce", input_unit=format, utc=utc) elif isinstance(col.dtype, DatetimeTZDtype): # coerce to UTC timezone # GH11216 diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 06793769b39bb..2cf7a9d4f4fdc 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -179,7 +179,7 @@ def test_apply_mixed_datetimelike(): expected = DataFrame( { "A": date_range("20130101", periods=3), - "B": pd.to_timedelta(np.arange(3), unit="s"), + "B": pd.to_timedelta(np.arange(3), input_unit="s"), } ) result = expected.apply(lambda x: x, axis=1) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 9a54386abf281..889451ae1c5e5 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -696,16 +696,16 @@ def test_tdi_add_overflow(self): # preliminary test scalar analogue of vectorized tests below # TODO: Make raised error message more informative and test with pytest.raises(OutOfBoundsDatetime, match="10155196800000000000"): - pd.to_timedelta(106580, "D") + Timestamp("2000") + pd.to_timedelta(106580, input_unit="D") + Timestamp("2000") with pytest.raises(OutOfBoundsDatetime, match="10155196800000000000"): - Timestamp("2000") + pd.to_timedelta(106580, "D") + Timestamp("2000") + pd.to_timedelta(106580, input_unit="D") _NaT = NaT._value + 1 msg = "Overflow in int64 addition" with pytest.raises(OverflowError, match=msg): - pd.to_timedelta([106580], "D") + Timestamp("2000") + pd.to_timedelta([106580], input_unit="D") + Timestamp("2000") with pytest.raises(OverflowError, match=msg): - Timestamp("2000") + pd.to_timedelta([106580], "D") + Timestamp("2000") + pd.to_timedelta([106580], input_unit="D") with pytest.raises(OverflowError, match=msg): pd.to_timedelta([_NaT]) - Timedelta("1 days") with pytest.raises(OverflowError, match=msg): diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 6a3568184b990..2fcb4636ec03f 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -1301,7 +1301,7 @@ def test_to_numpy_extra_readonly(arr): "values", [ pd.to_datetime(["2020-01-01", "2020-02-01"]), - pd.to_timedelta([1, 2], unit="D"), + pd.to_timedelta([1, 2], input_unit="D"), PeriodIndex(["2020-01-01", "2020-02-01"], freq="D"), ], ) @@ -1332,7 +1332,7 @@ def test_searchsorted_datetimelike_with_listlike(values, klass, as_index): "values", [ pd.to_datetime(["2020-01-01", "2020-02-01"]), - pd.to_timedelta([1, 2], unit="D"), + pd.to_timedelta([1, 2], input_unit="D"), PeriodIndex(["2020-01-01", "2020-02-01"], freq="D"), ], ) diff --git a/pandas/tests/dtypes/cast/test_maybe_box_native.py b/pandas/tests/dtypes/cast/test_maybe_box_native.py index 151586962d517..2a08a34cf5803 100644 --- a/pandas/tests/dtypes/cast/test_maybe_box_native.py +++ b/pandas/tests/dtypes/cast/test_maybe_box_native.py @@ -29,7 +29,7 @@ (np.datetime64("2005-02-25"), Timestamp), (Timestamp("2005-02-25"), Timestamp), (np.timedelta64(1, "D"), Timedelta), - (Timedelta(1, "D"), Timedelta), + (Timedelta(1, input_unit="D"), Timedelta), (Interval(0, 1), Interval), (Period("4Q2005"), Period), ], diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index 2b90886a8d070..400c63cedc15c 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -13,7 +13,7 @@ class TestABCClasses: tuples = [[1, 2, 2], ["red", "blue", "red"]] multi_index = pd.MultiIndex.from_arrays(tuples, names=("number", "color")) datetime_index = pd.to_datetime(["2000/1/1", "2010/1/1"]) - timedelta_index = pd.to_timedelta(np.arange(5), unit="s") + timedelta_index = pd.to_timedelta(np.arange(5), input_unit="s") period_index = pd.period_range("2000/1/1", "2010/1/1/", freq="M") categorical = pd.Categorical([1, 2, 3], categories=[2, 3, 1]) categorical_df = pd.DataFrame({"values": [1, 2, 3]}, index=categorical) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 875aa38481ecf..dfcc91708a7d2 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -3053,7 +3053,7 @@ def test_describe_timedelta_data(pa_type): data = pd.Series(range(1, 10), dtype=ArrowDtype(pa_type)) result = data.describe() expected = pd.Series( - [9] + pd.to_timedelta([5, 2, 1, 3, 5, 7, 9], unit=pa_type.unit).tolist(), + [9] + pd.to_timedelta([5, 2, 1, 3, 5, 7, 9], input_unit=pa_type.unit).tolist(), dtype=object, index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], ) @@ -3068,7 +3068,7 @@ def test_describe_datetime_data(pa_type): expected = pd.Series( [9] + [ - pd.Timestamp(v, tz=pa_type.tz, unit=pa_type.unit) + pd.Timestamp(v, tz=pa_type.tz, input_unit=pa_type.unit) for v in [5, 1, 3, 5, 7, 9] ], dtype=object, @@ -3129,9 +3129,9 @@ def test_from_sequence_temporal(pa_type): val = 3 unit = pa_type.unit if pa.types.is_duration(pa_type): - seq = [pd.Timedelta(val, unit=unit).as_unit(unit)] + seq = [pd.Timedelta(val, input_unit=unit).as_unit(unit)] else: - seq = [pd.Timestamp(val, unit=unit, tz=pa_type.tz).as_unit(unit)] + seq = [pd.Timestamp(val, input_unit=unit, tz=pa_type.tz).as_unit(unit)] result = ArrowExtensionArray._from_sequence(seq, dtype=pa_type) expected = ArrowExtensionArray(pa.array([val], type=pa_type)) @@ -3145,9 +3145,9 @@ def test_setitem_temporal(pa_type): # GH 53171 unit = pa_type.unit if pa.types.is_duration(pa_type): - val = pd.Timedelta(1, unit=unit).as_unit(unit) + val = pd.Timedelta(1, input_unit=unit).as_unit(unit) else: - val = pd.Timestamp(1, unit=unit, tz=pa_type.tz).as_unit(unit) + val = pd.Timestamp(1, input_unit=unit, tz=pa_type.tz).as_unit(unit) arr = ArrowExtensionArray(pa.array([1, 2, 3], type=pa_type)) @@ -3164,7 +3164,7 @@ def test_arithmetic_temporal(pa_type, request): # GH 53171 arr = ArrowExtensionArray(pa.array([1, 2, 3], type=pa_type)) unit = pa_type.unit - result = arr - pd.Timedelta(1, unit=unit).as_unit(unit) + result = arr - pd.Timedelta(1, input_unit=unit).as_unit(unit) expected = ArrowExtensionArray(pa.array([0, 1, 2], type=pa_type)) tm.assert_extension_array_equal(result, expected) @@ -3176,9 +3176,9 @@ def test_comparison_temporal(pa_type): # GH 53171 unit = pa_type.unit if pa.types.is_duration(pa_type): - val = pd.Timedelta(1, unit=unit).as_unit(unit) + val = pd.Timedelta(1, input_unit=unit).as_unit(unit) else: - val = pd.Timestamp(1, unit=unit, tz=pa_type.tz).as_unit(unit) + val = pd.Timestamp(1, input_unit=unit, tz=pa_type.tz).as_unit(unit) arr = ArrowExtensionArray(pa.array([1, 2, 3], type=pa_type)) @@ -3195,10 +3195,10 @@ def test_getitem_temporal(pa_type): arr = ArrowExtensionArray(pa.array([1, 2, 3], type=pa_type)) result = arr[1] if pa.types.is_duration(pa_type): - expected = pd.Timedelta(2, unit=pa_type.unit).as_unit(pa_type.unit) + expected = pd.Timedelta(2, input_unit=pa_type.unit).as_unit(pa_type.unit) assert isinstance(result, pd.Timedelta) else: - expected = pd.Timestamp(2, unit=pa_type.unit, tz=pa_type.tz).as_unit( + expected = pd.Timestamp(2, input_unit=pa_type.unit, tz=pa_type.tz).as_unit( pa_type.unit ) assert isinstance(result, pd.Timestamp) @@ -3215,13 +3215,15 @@ def test_iter_temporal(pa_type): result = list(arr) if pa.types.is_duration(pa_type): expected = [ - pd.Timedelta(1, unit=pa_type.unit).as_unit(pa_type.unit), + pd.Timedelta(1, input_unit=pa_type.unit).as_unit(pa_type.unit), pd.NA, ] assert isinstance(result[0], pd.Timedelta) else: expected = [ - pd.Timestamp(1, unit=pa_type.unit, tz=pa_type.tz).as_unit(pa_type.unit), + pd.Timestamp(1, input_unit=pa_type.unit, tz=pa_type.tz).as_unit( + pa_type.unit + ), pd.NA, ] assert isinstance(result[0], pd.Timestamp) @@ -3247,9 +3249,11 @@ def test_to_numpy_temporal(pa_type, dtype): arr = ArrowExtensionArray(pa.array([1, None], type=pa_type)) result = arr.to_numpy(dtype=dtype) if pa.types.is_duration(pa_type): - value = pd.Timedelta(1, unit=pa_type.unit).as_unit(pa_type.unit) + value = pd.Timedelta(1, input_unit=pa_type.unit).as_unit(pa_type.unit) else: - value = pd.Timestamp(1, unit=pa_type.unit, tz=pa_type.tz).as_unit(pa_type.unit) + value = pd.Timestamp(1, input_unit=pa_type.unit, tz=pa_type.tz).as_unit( + pa_type.unit + ) if dtype == object or (pa.types.is_timestamp(pa_type) and pa_type.tz is not None): if dtype == object: diff --git a/pandas/tests/frame/indexing/test_mask.py b/pandas/tests/frame/indexing/test_mask.py index e4036efeab7ff..b7748722dd9b6 100644 --- a/pandas/tests/frame/indexing/test_mask.py +++ b/pandas/tests/frame/indexing/test_mask.py @@ -125,7 +125,7 @@ def test_mask_stringdtype(frame_or_series): def test_mask_where_dtype_timedelta(): # https://github.com/pandas-dev/pandas/issues/39548 - df = DataFrame([Timedelta(i, unit="D") for i in range(5)]) + df = DataFrame([Timedelta(i, input_unit="D") for i in range(5)]) expected = DataFrame(np.full(5, np.nan, dtype="timedelta64[ns]")) tm.assert_frame_equal(df.mask(df.notna()), expected) @@ -133,7 +133,7 @@ def test_mask_where_dtype_timedelta(): expected = DataFrame( [np.nan, np.nan, np.nan, Timedelta("3 day"), Timedelta("4 day")] ) - tm.assert_frame_equal(df.where(df > Timedelta(2, unit="D")), expected) + tm.assert_frame_equal(df.where(df > Timedelta(2, input_unit="D")), expected) def test_mask_return_dtype(): diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 906c7654ef11f..2efe33565a7c7 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -991,8 +991,8 @@ def test_setitem_frame_keep_ea_dtype(self, any_numeric_ea_dtype): def test_loc_expansion_with_timedelta_type(self): result = DataFrame(columns=list("abc")) result.loc[0] = { - "a": pd.to_timedelta(5, unit="s"), - "b": pd.to_timedelta(72, unit="s"), + "a": pd.to_timedelta(5, input_unit="s"), + "b": pd.to_timedelta(72, input_unit="s"), "c": "23", } expected = DataFrame( diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index 9fd1f3133c2f5..dd996d2f3adea 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -150,7 +150,7 @@ def test_astype_str(self): # see GH#9757 a = Series(date_range("2010-01-04", periods=5)) b = Series(date_range("3/6/2012 00:00", periods=5, tz="US/Eastern")) - c = Series([Timedelta(x, unit="D") for x in range(5)]) + c = Series([Timedelta(x, input_unit="D") for x in range(5)]) d = Series(range(5)) e = Series([0.0, 0.2, 0.4, 0.6, 0.8]) @@ -442,9 +442,9 @@ def test_astype_from_datetimelike_to_object(self, dtype, unit): assert (result.dtypes == object).all() if dtype.startswith("M8"): - assert result.iloc[0, 0] == Timestamp(1, unit=unit) + assert result.iloc[0, 0] == Timestamp(1, input_unit=unit) else: - assert result.iloc[0, 0] == Timedelta(1, unit=unit) + assert result.iloc[0, 0] == Timedelta(1, input_unit=unit) @pytest.mark.parametrize("dtype", ["M8", "m8"]) @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"]) diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py index e90786a43c483..2ce7bd6aa2544 100644 --- a/pandas/tests/frame/methods/test_convert_dtypes.py +++ b/pandas/tests/frame/methods/test_convert_dtypes.py @@ -220,7 +220,7 @@ def test_convert_dtype_pyarrow_timezone_preserve(self): df = pd.DataFrame( { "timestamps": pd.Series( - pd.to_datetime(range(5), utc=True, unit="h"), + pd.to_datetime(range(5), utc=True, input_unit="h"), dtype="timestamp[ns, tz=UTC][pyarrow]", ) } diff --git a/pandas/tests/frame/methods/test_isin.py b/pandas/tests/frame/methods/test_isin.py index b4511aad27a93..9679e1fdcdd62 100644 --- a/pandas/tests/frame/methods/test_isin.py +++ b/pandas/tests/frame/methods/test_isin.py @@ -178,7 +178,9 @@ def test_isin_multiIndex(self): def test_isin_empty_datetimelike(self): # GH#15473 df1_ts = DataFrame({"date": pd.to_datetime(["2014-01-01", "2014-01-02"])}) - df1_td = DataFrame({"date": [pd.Timedelta(1, "s"), pd.Timedelta(2, "s")]}) + df1_td = DataFrame( + {"date": [pd.Timedelta(1, input_unit="s"), pd.Timedelta(2, input_unit="s")]} + ) df2 = DataFrame({"date": []}) df3 = DataFrame() diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 80227c0462329..676f42403e46a 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -600,8 +600,12 @@ def test_reset_index_with_drop( {"a": [pd.NaT, Timestamp("2020-01-01")], "b": [1, 2], "x": [11, 12]}, ), ( - [(pd.NaT, 1), (pd.Timedelta(123, "D"), 2)], - {"a": [pd.NaT, pd.Timedelta(123, "D")], "b": [1, 2], "x": [11, 12]}, + [(pd.NaT, 1), (pd.Timedelta(123, input_unit="D"), 2)], + { + "a": [pd.NaT, pd.Timedelta(123, input_unit="D")], + "b": [1, 2], + "x": [11, 12], + }, ), ], ) diff --git a/pandas/tests/frame/methods/test_to_timestamp.py b/pandas/tests/frame/methods/test_to_timestamp.py index 0e7e1d595d6be..0e59a1c6f39ae 100644 --- a/pandas/tests/frame/methods/test_to_timestamp.py +++ b/pandas/tests/frame/methods/test_to_timestamp.py @@ -37,7 +37,9 @@ def test_to_timestamp(self, frame_or_series): obj = tm.get_obj(obj, frame_or_series) exp_index = date_range("1/1/2001", end="12/31/2009", freq="YE-DEC") - exp_index = exp_index + Timedelta(1, "D") - Timedelta(1, "ns") + exp_index = ( + exp_index + Timedelta(1, input_unit="D") - Timedelta(1, input_unit="ns") + ) result = obj.to_timestamp("D", "end") tm.assert_index_equal(result.index, exp_index) tm.assert_numpy_array_equal(result.values, obj.values) @@ -54,19 +56,25 @@ def test_to_timestamp(self, frame_or_series): delta = timedelta(hours=23) result = obj.to_timestamp("H", "end") exp_index = _get_with_delta(delta) - exp_index = exp_index + Timedelta(1, "h") - Timedelta(1, "ns") + exp_index = ( + exp_index + Timedelta(1, input_unit="h") - Timedelta(1, input_unit="ns") + ) tm.assert_index_equal(result.index, exp_index) delta = timedelta(hours=23, minutes=59) result = obj.to_timestamp("T", "end") exp_index = _get_with_delta(delta) - exp_index = exp_index + Timedelta(1, "m") - Timedelta(1, "ns") + exp_index = ( + exp_index + Timedelta(1, input_unit="m") - Timedelta(1, input_unit="ns") + ) tm.assert_index_equal(result.index, exp_index) result = obj.to_timestamp("S", "end") delta = timedelta(hours=23, minutes=59, seconds=59) exp_index = _get_with_delta(delta) - exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns") + exp_index = ( + exp_index + Timedelta(1, input_unit="s") - Timedelta(1, input_unit="ns") + ) tm.assert_index_equal(result.index, exp_index) def test_to_timestamp_columns(self): @@ -83,7 +91,9 @@ def test_to_timestamp_columns(self): df = df.T exp_index = date_range("1/1/2001", end="12/31/2009", freq="YE-DEC") - exp_index = exp_index + Timedelta(1, "D") - Timedelta(1, "ns") + exp_index = ( + exp_index + Timedelta(1, input_unit="D") - Timedelta(1, input_unit="ns") + ) result = df.to_timestamp("D", "end", axis=1) tm.assert_index_equal(result.columns, exp_index) tm.assert_numpy_array_equal(result.values, df.values) @@ -95,19 +105,25 @@ def test_to_timestamp_columns(self): delta = timedelta(hours=23) result = df.to_timestamp("H", "end", axis=1) exp_index = _get_with_delta(delta) - exp_index = exp_index + Timedelta(1, "h") - Timedelta(1, "ns") + exp_index = ( + exp_index + Timedelta(1, input_unit="h") - Timedelta(1, input_unit="ns") + ) tm.assert_index_equal(result.columns, exp_index) delta = timedelta(hours=23, minutes=59) result = df.to_timestamp("min", "end", axis=1) exp_index = _get_with_delta(delta) - exp_index = exp_index + Timedelta(1, "m") - Timedelta(1, "ns") + exp_index = ( + exp_index + Timedelta(1, input_unit="m") - Timedelta(1, input_unit="ns") + ) tm.assert_index_equal(result.columns, exp_index) result = df.to_timestamp("S", "end", axis=1) delta = timedelta(hours=23, minutes=59, seconds=59) exp_index = _get_with_delta(delta) - exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns") + exp_index = ( + exp_index + Timedelta(1, input_unit="s") - Timedelta(1, input_unit="ns") + ) tm.assert_index_equal(result.columns, exp_index) result1 = df.to_timestamp("5min", axis=1) @@ -139,7 +155,9 @@ def test_to_timestamp_hourly(self, frame_or_series): exp_index = date_range("1/1/2001 00:59:59", end="1/2/2001 00:59:59", freq="h") result = obj.to_timestamp(how="end") - exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns") + exp_index = ( + exp_index + Timedelta(1, input_unit="s") - Timedelta(1, input_unit="ns") + ) tm.assert_index_equal(result.index, exp_index) if frame_or_series is Series: assert result.name == "foo" diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 264011edb65b5..ccde72836f7fa 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -892,8 +892,8 @@ def create_data(constructor): [ (lambda x: np.timedelta64(x, "D"), "timedelta64"), (lambda x: timedelta(days=x), "pytimedelta"), - (lambda x: Timedelta(x, "D"), "Timedelta[ns]"), - (lambda x: Timedelta(x, "D").as_unit("s"), "Timedelta[s]"), + (lambda x: Timedelta(x, input_unit="D"), "Timedelta[ns]"), + (lambda x: Timedelta(x, input_unit="D").as_unit("s"), "Timedelta[s]"), ], ) def test_constructor_dict_timedelta64_index(self, klass, name): @@ -909,7 +909,7 @@ def test_constructor_dict_timedelta64_index(self, klass, name): {0: None, 1: None, 2: 4, 3: None}, {0: None, 1: None, 2: None, 3: 6}, ], - index=[Timedelta(td, "D") for td in td_as_int], + index=[Timedelta(td, input_unit="D") for td in td_as_int], ) result = DataFrame(data) @@ -2079,8 +2079,8 @@ def test_constructor_timedelta_non_ns(self, order, unit): exp_dtype = np.dtype(f"m8[{exp_unit}]") expected = DataFrame( [ - [Timedelta(1, "D"), Timedelta(2, "D")], - [Timedelta(4, "D"), Timedelta(5, "D")], + [Timedelta(1, input_unit="D"), Timedelta(2, input_unit="D")], + [Timedelta(4, input_unit="D"), Timedelta(5, input_unit="D")], ], dtype=exp_dtype, ) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 4d235587c2407..7a75a1a28fbb9 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1684,7 +1684,9 @@ def test_frame_any_with_timedelta(self): df = DataFrame( { "a": Series([0, 0]), - "t": Series([to_timedelta(0, "s"), to_timedelta(1, "ms")]), + "t": Series( + [to_timedelta(0, input_unit="s"), to_timedelta(1, input_unit="ms")] + ), } ) diff --git a/pandas/tests/groupby/methods/test_nth.py b/pandas/tests/groupby/methods/test_nth.py index d9c8706ec9202..ccf11b4997627 100644 --- a/pandas/tests/groupby/methods/test_nth.py +++ b/pandas/tests/groupby/methods/test_nth.py @@ -443,7 +443,7 @@ def test_first_last_tz_multi_column(method, ts, alpha, unit): pd.array([True, False], dtype="boolean"), pd.array([1, 2], dtype="Int64"), pd.to_datetime(["2020-01-01", "2020-02-01"]), - pd.to_timedelta([1, 2], unit="D"), + pd.to_timedelta([1, 2], input_unit="D"), ], ) @pytest.mark.parametrize("function", ["first", "last", "min", "max"]) diff --git a/pandas/tests/groupby/methods/test_quantile.py b/pandas/tests/groupby/methods/test_quantile.py index 815513fe96009..496f73644d07b 100644 --- a/pandas/tests/groupby/methods/test_quantile.py +++ b/pandas/tests/groupby/methods/test_quantile.py @@ -364,7 +364,7 @@ def test_groupby_quantile_allNA_column(dtype): def test_groupby_timedelta_quantile(): # GH: 29485 df = DataFrame( - {"value": pd.to_timedelta(np.arange(4), unit="s"), "group": [1, 1, 2, 2]} + {"value": pd.to_timedelta(np.arange(4), input_unit="s"), "group": [1, 1, 2, 2]} ) result = df.groupby("group").quantile(0.99) expected = DataFrame( diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py index 519c2c3064e59..aab16c58da9d6 100644 --- a/pandas/tests/groupby/methods/test_value_counts.py +++ b/pandas/tests/groupby/methods/test_value_counts.py @@ -132,7 +132,7 @@ def test_series_groupby_value_counts_with_grouper(utc): } ).drop([3]) - df["Datetime"] = to_datetime(df["Timestamp"], utc=utc, unit="s") + df["Datetime"] = to_datetime(df["Timestamp"], utc=utc, input_unit="s") dfg = df.groupby(Grouper(freq="1D", key="Datetime")) # have to sort on index because of unstable sort on values xref GH9212 @@ -1127,7 +1127,9 @@ def test_value_counts_time_grouper(utc, unit): } ).drop([3]) - df["Datetime"] = to_datetime(df["Timestamp"], utc=utc, unit="s").dt.as_unit(unit) + df["Datetime"] = to_datetime(df["Timestamp"], utc=utc, input_unit="s").dt.as_unit( + unit + ) gb = df.groupby(Grouper(freq="1D", key="Datetime")) result = gb.value_counts() dates = to_datetime( diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 635393e41bd9d..2298f98d52c09 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1273,14 +1273,14 @@ def test_apply_empty_string_nan_coerce_bug(): { "a": [1, 1, 2, 2], "b": ["", "", "", ""], - "c": pd.to_datetime([1, 2, 3, 4], unit="s"), + "c": pd.to_datetime([1, 2, 3, 4], input_unit="s"), } ) .groupby(["a", "b"]) .apply(lambda df: df.iloc[-1]) ) expected = DataFrame( - [[pd.to_datetime(2, unit="s")], [pd.to_datetime(4, unit="s")]], + [[pd.to_datetime(2, input_unit="s")], [pd.to_datetime(4, input_unit="s")]], columns=["c"], index=MultiIndex.from_tuples([(1, ""), (2, "")], names=["a", "b"]), ) diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py index 679f7eb7f7f11..e4b252897577e 100644 --- a/pandas/tests/groupby/test_counting.py +++ b/pandas/tests/groupby/test_counting.py @@ -208,7 +208,7 @@ def test_ngroup_respects_groupby_order(self, sort): [Timestamp(f"2016-05-{i:02d} 20:09:25+00:00") for i in range(1, 4)], [Timestamp(f"2016-05-{i:02d} 20:09:25") for i in range(1, 4)], [Timestamp(f"2016-05-{i:02d} 20:09:25", tz="UTC") for i in range(1, 4)], - [Timedelta(x, unit="h") for x in range(1, 4)], + [Timedelta(x, input_unit="h") for x in range(1, 4)], [Period(freq="2W", year=2017, month=x) for x in range(1, 4)], ], ) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 4955b1fe0da54..023b21fb13467 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2339,15 +2339,15 @@ def test_groupby_aggregation_multi_non_numeric_dtype(): df = DataFrame( { "x": [1, 0, 1, 1, 0], - "y": [Timedelta(i, "days") for i in range(1, 6)], - "z": [Timedelta(i * 10, "days") for i in range(1, 6)], + "y": [Timedelta(i, input_unit="days") for i in range(1, 6)], + "z": [Timedelta(i * 10, input_unit="days") for i in range(1, 6)], } ) expected = DataFrame( { - "y": [Timedelta(i, "days") for i in range(7, 9)], - "z": [Timedelta(i * 10, "days") for i in range(7, 9)], + "y": [Timedelta(i, input_unit="days") for i in range(7, 9)], + "z": [Timedelta(i * 10, input_unit="days") for i in range(7, 9)], }, index=Index([0, 1], dtype="int64", name="x"), ) @@ -2362,13 +2362,16 @@ def test_groupby_aggregation_numeric_with_non_numeric_dtype(): df = DataFrame( { "x": [1, 0, 1, 1, 0], - "y": [Timedelta(i, "days") for i in range(1, 6)], + "y": [Timedelta(i, input_unit="days") for i in range(1, 6)], "z": list(range(1, 6)), } ) expected = DataFrame( - {"y": [Timedelta(7, "days"), Timedelta(8, "days")], "z": [7, 8]}, + { + "y": [Timedelta(7, input_unit="days"), Timedelta(8, input_unit="days")], + "z": [7, 8], + }, index=Index([0, 1], dtype="int64", name="x"), ) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index c418b2a18008b..e18b0c1ad26c9 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -1018,7 +1018,7 @@ def test_dti_constructor_with_non_nano_dtype(self, tz): pointwise = [ vals[0].tz_localize(tz), Timestamp(vals[1], tz=tz), - to_datetime(vals[2], unit="us", utc=True).tz_convert(tz), + to_datetime(vals[2], input_unit="us", utc=True).tz_convert(tz), ] exp_vals = [x.as_unit("us").asm8 for x in pointwise] exp_arr = np.array(exp_vals, dtype="M8[us]") diff --git a/pandas/tests/indexes/period/methods/test_to_timestamp.py b/pandas/tests/indexes/period/methods/test_to_timestamp.py index 4fe429ce71ee4..3b4e0d18d21ab 100644 --- a/pandas/tests/indexes/period/methods/test_to_timestamp.py +++ b/pandas/tests/indexes/period/methods/test_to_timestamp.py @@ -109,7 +109,9 @@ def test_to_timestamp_pi_mult(self): expected = DatetimeIndex( ["2011-02-28", "NaT", "2011-03-31"], dtype="M8[ns]", name="idx" ) - expected = expected + Timedelta(1, "D") - Timedelta(1, "ns") + expected = ( + expected + Timedelta(1, input_unit="D") - Timedelta(1, input_unit="ns") + ) tm.assert_index_equal(result, expected) def test_to_timestamp_pi_combined(self): @@ -125,14 +127,18 @@ def test_to_timestamp_pi_combined(self): expected = DatetimeIndex( ["2011-01-02 00:59:59", "2011-01-03 01:59:59"], name="idx", dtype="M8[ns]" ) - expected = expected + Timedelta(1, "s") - Timedelta(1, "ns") + expected = ( + expected + Timedelta(1, input_unit="s") - Timedelta(1, input_unit="ns") + ) tm.assert_index_equal(result, expected) result = idx.to_timestamp(how="E", freq="h") expected = DatetimeIndex( ["2011-01-02 00:00", "2011-01-03 01:00"], dtype="M8[ns]", name="idx" ) - expected = expected + Timedelta(1, "h") - Timedelta(1, "ns") + expected = ( + expected + Timedelta(1, input_unit="h") - Timedelta(1, input_unit="ns") + ) tm.assert_index_equal(result, expected) def test_to_timestamp_1703(self): diff --git a/pandas/tests/indexes/period/test_scalar_compat.py b/pandas/tests/indexes/period/test_scalar_compat.py index d8afd29ff31c5..a95e8d3b4bcbe 100644 --- a/pandas/tests/indexes/period/test_scalar_compat.py +++ b/pandas/tests/indexes/period/test_scalar_compat.py @@ -21,7 +21,7 @@ def test_end_time(self): # GH#17157 index = period_range(freq="M", start="2016-01-01", end="2016-05-31") expected_index = date_range("2016-01-01", end="2016-05-31", freq="ME") - expected_index += Timedelta(1, "D") - Timedelta(1, "ns") + expected_index += Timedelta(1, input_unit="D") - Timedelta(1, input_unit="ns") tm.assert_index_equal(index.end_time, expected_index) @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py index e45d11e6286e2..c6a94f05319ec 100644 --- a/pandas/tests/indexes/test_datetimelike.py +++ b/pandas/tests/indexes/test_datetimelike.py @@ -164,7 +164,7 @@ def test_where_cast_str(self, simple_index): def test_diff(self, unit): # GH 55080 - dti = pd.to_datetime([10, 20, 30], unit=unit).as_unit(unit) + dti = pd.to_datetime([10, 20, 30], input_unit=unit).as_unit(unit) result = dti.diff(1) - expected = pd.to_timedelta([pd.NaT, 10, 10], unit=unit).as_unit(unit) + expected = pd.to_timedelta([pd.NaT, 10, 10], input_unit=unit).as_unit(unit) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/timedeltas/methods/test_shift.py b/pandas/tests/indexes/timedeltas/methods/test_shift.py index 9bbf06dc51a0c..d06058b1d4c65 100644 --- a/pandas/tests/indexes/timedeltas/methods/test_shift.py +++ b/pandas/tests/indexes/timedeltas/methods/test_shift.py @@ -37,7 +37,7 @@ def test_tdi_shift_minutes(self): def test_tdi_shift_int(self): # GH#8083 - tdi = pd.to_timedelta(range(5), unit="D") + tdi = pd.to_timedelta(range(5), input_unit="D") trange = tdi._with_freq("infer") + pd.offsets.Hour(1) result = trange.shift(1) expected = TimedeltaIndex( @@ -54,7 +54,7 @@ def test_tdi_shift_int(self): def test_tdi_shift_nonstandard_freq(self): # GH#8083 - tdi = pd.to_timedelta(range(5), unit="D") + tdi = pd.to_timedelta(range(5), input_unit="D") trange = tdi._with_freq("infer") + pd.offsets.Hour(1) result = trange.shift(3, freq="2D 1s") expected = TimedeltaIndex( diff --git a/pandas/tests/indexes/timedeltas/test_constructors.py b/pandas/tests/indexes/timedeltas/test_constructors.py index 63d2161dcec09..6645994da4f94 100644 --- a/pandas/tests/indexes/timedeltas/test_constructors.py +++ b/pandas/tests/indexes/timedeltas/test_constructors.py @@ -103,7 +103,7 @@ def test_float64_ns_rounded(self): def test_float64_unit_conversion(self): # GH#23539 - tdi = to_timedelta([1.5, 2.25], unit="D") + tdi = to_timedelta([1.5, 2.25], input_unit="D") expected = TimedeltaIndex([Timedelta(days=1.5), Timedelta(days=2.25)]) tm.assert_index_equal(tdi, expected) @@ -170,7 +170,7 @@ def test_constructor_coverage(self): # NumPy string array strings = np.array(["1 days", "2 days", "3 days"]) result = TimedeltaIndex(strings) - expected = to_timedelta([1, 2, 3], unit="D") + expected = to_timedelta([1, 2, 3], input_unit="D") tm.assert_index_equal(result, expected) from_ints = TimedeltaIndex(expected.asi8) @@ -264,5 +264,5 @@ def test_unit_deprecated(self, unit, unit_depr): tm.assert_index_equal(result, expected) with tm.assert_produces_warning(Pandas4Warning, match=msg): - tdi = to_timedelta([1, 2], unit=unit_depr) + tdi = to_timedelta([1, 2], input_unit=unit_depr) tm.assert_index_equal(tdi, expected) diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py index ca126a68cbd43..86a2a94c0c279 100644 --- a/pandas/tests/indexes/timedeltas/test_indexing.py +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -343,7 +343,7 @@ def test_contains(self): # GH#13603, GH#59051 msg = "'d' is deprecated and will be removed in a future version." with tm.assert_produces_warning(Pandas4Warning, match=msg): - td = to_timedelta(range(5), unit="d") + offsets.Hour(1) + td = to_timedelta(range(5), input_unit="d") + offsets.Hour(1) for v in [NaT, None, float("nan"), np.nan]: assert v not in td diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py index 01ff536652d2f..eab5052b1fce4 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta_range.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -24,23 +24,23 @@ def test_timedelta_range_unit(self): tm.assert_numpy_array_equal(tdi.to_numpy(), exp_arr) def test_timedelta_range(self): - expected = to_timedelta(np.arange(5), unit="D") + expected = to_timedelta(np.arange(5), input_unit="D") result = timedelta_range("0 days", periods=5, freq="D") tm.assert_index_equal(result, expected) - expected = to_timedelta(np.arange(11), unit="D") + expected = to_timedelta(np.arange(11), input_unit="D") result = timedelta_range("0 days", "10 days", freq="D") tm.assert_index_equal(result, expected) - expected = to_timedelta(np.arange(5), unit="D") + Second(2) + Day() + expected = to_timedelta(np.arange(5), input_unit="D") + Second(2) + Day() result = timedelta_range("1 days, 00:00:02", "5 days, 00:00:02", freq="D") tm.assert_index_equal(result, expected) - expected = to_timedelta([1, 3, 5, 7, 9], unit="D") + Second(2) + expected = to_timedelta([1, 3, 5, 7, 9], input_unit="D") + Second(2) result = timedelta_range("1 days, 00:00:02", periods=5, freq="2D") tm.assert_index_equal(result, expected) - expected = to_timedelta(np.arange(50), unit="min") * 30 + expected = to_timedelta(np.arange(50), input_unit="min") * 30 result = timedelta_range("0 days", freq="30min", periods=50) tm.assert_index_equal(result, expected) @@ -50,17 +50,17 @@ def test_timedelta_units_H_S_deprecated(self, depr_unit, unit): depr_msg = ( f"'{depr_unit}' is deprecated and will be removed in a future version." ) - expected = to_timedelta(np.arange(5), unit=unit) + expected = to_timedelta(np.arange(5), input_unit=unit) with tm.assert_produces_warning(Pandas4Warning, match=depr_msg): - result = to_timedelta(np.arange(5), unit=depr_unit) - tm.assert_index_equal(result, expected) + result = to_timedelta(np.arange(5), input_unit=depr_unit) + tm.assert_index_equal(result, expected) @pytest.mark.parametrize("unit", ["T", "t", "L", "l", "U", "u", "N", "n"]) def test_timedelta_unit_T_L_U_N_raises(self, unit): msg = f"invalid unit abbreviation: {unit}" with pytest.raises(ValueError, match=msg): - to_timedelta(np.arange(5), unit=unit) + to_timedelta(np.arange(5), input_unit=unit) @pytest.mark.parametrize( "periods, freq", [(3, "2D"), (5, "D"), (6, "19h12min"), (7, "16h"), (9, "12h")] diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index a31f463d0b17e..0373f1d0073d5 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -511,7 +511,11 @@ def test_loc_and_at_with_categorical_index(self): # pandas scalars [Interval(1, 4), Interval(4, 6), Interval(6, 9)], [Timestamp(2019, 1, 1), Timestamp(2019, 2, 1), Timestamp(2019, 3, 1)], - [Timedelta(1, "D"), Timedelta(2, "D"), Timedelta(3, "D")], + [ + Timedelta(1, input_unit="D"), + Timedelta(2, input_unit="D"), + Timedelta(3, input_unit="D"), + ], # pandas Integer arrays *(pd.array([1, 2, 3], dtype=dtype) for dtype in tm.ALL_INT_EA_DTYPES), # other pandas arrays diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 8d59b0c026e0c..3890da5d837f8 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -721,7 +721,9 @@ def test_loc_modify_datetime(self): {"date": [1485264372711, 1485265925110, 1540215845888, 1540282121025]} ) - df["date_dt"] = to_datetime(df["date"], unit="ms", cache=True).dt.as_unit("ms") + df["date_dt"] = to_datetime(df["date"], input_unit="ms", cache=True).dt.as_unit( + "ms" + ) df.loc[:, "date_dt_cp"] = df.loc[:, "date_dt"] df.loc[[2, 3], "date_dt_cp"] = df.loc[[2, 3], "date_dt"] @@ -1416,7 +1418,7 @@ def test_loc_setitem_int_label_with_float_index(self, float_numpy_dtype): ) def test_loc_setitem_listlike_with_timedelta64index(self, indexer, expected): # GH#16637 - tdi = to_timedelta(range(10), unit="s") + tdi = to_timedelta(range(10), input_unit="s") df = DataFrame({"x": range(10)}, dtype="int64", index=tdi) df.loc[df.index[indexer], "x"] = 20 @@ -2391,7 +2393,7 @@ def test_loc_getitem_partial_string_slicing_with_timedeltaindex(self): def test_loc_getitem_str_timedeltaindex(self): # GH#16896 - df = DataFrame({"x": range(3)}, index=to_timedelta(range(3), unit="days")) + df = DataFrame({"x": range(3)}, index=to_timedelta(range(3), input_unit="days")) expected = df.iloc[0] sliced = df.loc["0 days"] tm.assert_series_equal(sliced, expected) @@ -2598,7 +2600,7 @@ class TestLocBooleanMask: def test_loc_setitem_bool_mask_timedeltaindex(self): # GH#14946 df = DataFrame({"x": range(10)}) - df.index = to_timedelta(range(10), unit="s") + df.index = to_timedelta(range(10), input_unit="s") conditions = [df["x"] > 3, df["x"] == 3, df["x"] < 3] expected_data = [ [0, 1, 2, 3, 10, 10, 10, 10, 10, 10], @@ -2611,7 +2613,7 @@ def test_loc_setitem_bool_mask_timedeltaindex(self): expected = DataFrame( data, - index=to_timedelta(range(10), unit="s"), + index=to_timedelta(range(10), input_unit="s"), columns=["x"], dtype="int64", ) @@ -3324,10 +3326,12 @@ def test_loc_assign_dict_to_row(self, dtype): def test_loc_setitem_dict_timedelta_multiple_set(self): # GH 16309 result = DataFrame(columns=["time", "value"]) - result.loc[1] = {"time": Timedelta(6, unit="s"), "value": "foo"} - result.loc[1] = {"time": Timedelta(6, unit="s"), "value": "foo"} + result.loc[1] = {"time": Timedelta(6, input_unit="s"), "value": "foo"} + result.loc[1] = {"time": Timedelta(6, input_unit="s"), "value": "foo"} expected = DataFrame( - [[Timedelta(6, unit="s"), "foo"]], columns=["time", "value"], index=[1] + [[Timedelta(6, input_unit="s"), "foo"]], + columns=["time", "value"], + index=[1], ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index 3551cbc52b755..730ce2d8c4023 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -524,6 +524,9 @@ def test_non_str_names_w_duplicates(): ), ], ) +@pytest.mark.filterwarnings( + "ignore:The 'unit' keyword is deprecated:DeprecationWarning" +) def test_pandas_nullable_with_missing_values( data: list, dtype: str, expected_dtype: str ) -> None: @@ -592,6 +595,9 @@ def test_pandas_nullable_with_missing_values( ), ], ) +@pytest.mark.filterwarnings( + "ignore:The 'unit' keyword is deprecated:DeprecationWarning" +) def test_pandas_nullable_without_missing_values( data: list, dtype: str, expected_dtype: str ) -> None: diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index d2da38be865a0..e3c94e6387daa 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -2054,7 +2054,7 @@ def test_too_long(self): class TestTimedelta64Formatter: def test_days(self): - x = pd.to_timedelta(list(range(5)) + [NaT], unit="D")._values + x = pd.to_timedelta(list(range(5)) + [NaT], input_unit="D")._values result = fmt._Timedelta64Formatter(x).get_result() assert result[0].strip() == "0 days" assert result[1].strip() == "1 days" @@ -2070,29 +2070,29 @@ def test_days(self): assert result[0].strip() == "1 days" def test_days_neg(self): - x = pd.to_timedelta(list(range(5)) + [NaT], unit="D")._values + x = pd.to_timedelta(list(range(5)) + [NaT], input_unit="D")._values result = fmt._Timedelta64Formatter(-x).get_result() assert result[0].strip() == "0 days" assert result[1].strip() == "-1 days" def test_subdays(self): - y = pd.to_timedelta(list(range(5)) + [NaT], unit="s")._values + y = pd.to_timedelta(list(range(5)) + [NaT], input_unit="s")._values result = fmt._Timedelta64Formatter(y).get_result() assert result[0].strip() == "0 days 00:00:00" assert result[1].strip() == "0 days 00:00:01" def test_subdays_neg(self): - y = pd.to_timedelta(list(range(5)) + [NaT], unit="s")._values + y = pd.to_timedelta(list(range(5)) + [NaT], input_unit="s")._values result = fmt._Timedelta64Formatter(-y).get_result() assert result[0].strip() == "0 days 00:00:00" assert result[1].strip() == "-1 days +23:59:59" def test_zero(self): - x = pd.to_timedelta(list(range(1)) + [NaT], unit="D")._values + x = pd.to_timedelta(list(range(1)) + [NaT], input_unit="D")._values result = fmt._Timedelta64Formatter(x).get_result() assert result[0].strip() == "0 days" - x = pd.to_timedelta(list(range(1)), unit="D")._values + x = pd.to_timedelta(list(range(1)), input_unit="D")._values result = fmt._Timedelta64Formatter(x).get_result() assert result[0].strip() == "0 days" diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index e4d5eb4758303..485f7a6984bdb 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -646,7 +646,7 @@ def test_warns_non_roundtrippable_names(self, idx): def test_timestamp_in_columns(self): df = DataFrame( - [[1, 2]], columns=[pd.Timestamp("2016"), pd.Timedelta(10, unit="s")] + [[1, 2]], columns=[pd.Timestamp("2016"), pd.Timedelta(10, input_unit="s")] ) result = df.to_json(orient="table") js = json.loads(result) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 9c93be0937e91..d8f1301bcfeb6 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1002,7 +1002,10 @@ def test_date_unit(self, unit, datetime_frame): (DataFrame({"A": [True, False, False]}), None), ( DataFrame( - {"A": ["a", "b", "c"], "B": pd.to_timedelta(np.arange(3), unit="D")} + { + "A": ["a", "b", "c"], + "B": pd.to_timedelta(np.arange(3), input_unit="D"), + } ), Pandas4Warning, ), @@ -1129,7 +1132,7 @@ def test_url(self, field, dtype, httpserver): assert result[field].dtype == dtype def test_timedelta(self): - converter = lambda x: pd.to_timedelta(x, unit="ms") + converter = lambda x: pd.to_timedelta(x, input_unit="ms") ser = Series([timedelta(23), timedelta(seconds=5)], dtype="m8[ns]") assert ser.dtype == "timedelta64[ns]" @@ -1170,7 +1173,7 @@ def test_timedelta2(self): with tm.assert_produces_warning(Pandas4Warning, match=msg): data = StringIO(frame.to_json(date_unit="ns")) result = read_json(data) - result["a"] = pd.to_timedelta(result.a, unit="ns") + result["a"] = pd.to_timedelta(result.a, input_unit="ns") result["c"] = pd.to_datetime(result.c) tm.assert_frame_equal(frame, result) diff --git a/pandas/tests/io/pytables/test_select.py b/pandas/tests/io/pytables/test_select.py index 0dffb284fa6d2..664bb35883a29 100644 --- a/pandas/tests/io/pytables/test_select.py +++ b/pandas/tests/io/pytables/test_select.py @@ -999,7 +999,12 @@ def test_query_compare_column_type(setup_path): for op in ["<", ">", "=="]: # non strings to string column always fail - for v in [2.1, True, Timestamp("2014-01-01"), pd.Timedelta(1, "s")]: + for v in [ + 2.1, + True, + Timestamp("2014-01-01"), + pd.Timedelta(1, input_unit="s"), + ]: query = f"date {op} v" msg = f"Cannot compare {v} of type {type(v)} to string column" with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index a17cd27f8284e..3ee18470a8441 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -30,9 +30,9 @@ def data_test_ix(request, dirpath): fname = os.path.join(dirpath, f"test_sas7bdat_{i}.csv") df = pd.read_csv(fname) epoch = datetime(1960, 1, 1) - t1 = pd.to_timedelta(df["Column4"], unit="D") + t1 = pd.to_timedelta(df["Column4"], input_unit="D") df["Column4"] = (epoch + t1).astype("M8[s]") - t2 = pd.to_timedelta(df["Column12"], unit="D") + t2 = pd.to_timedelta(df["Column12"], input_unit="D") df["Column12"] = (epoch + t2).astype("M8[s]") for k in range(df.shape[1]): col = df.iloc[:, k] diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 5865c46b4031e..812313169af95 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2982,7 +2982,9 @@ def test_date_parsing(conn, request): df = sql.read_sql_table("types", conn, parse_dates={"IntDateCol": "s"}) assert issubclass(df.IntDateCol.dtype.type, np.datetime64) - df = sql.read_sql_table("types", conn, parse_dates={"IntDateCol": {"unit": "s"}}) + df = sql.read_sql_table( + "types", conn, parse_dates={"IntDateCol": {"input_unit": "s"}} + ) assert issubclass(df.IntDateCol.dtype.type, np.datetime64) diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index e8cdc02591bfe..0809810b63c2b 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -130,9 +130,9 @@ def test_subplots_timeseries_y_axis(self, col): data = { "numeric": np.array([1, 2, 5]), "timedelta": [ - pd.Timedelta(-10, unit="s"), - pd.Timedelta(10, unit="m"), - pd.Timedelta(10, unit="h"), + pd.Timedelta(-10, input_unit="s"), + pd.Timedelta(10, input_unit="m"), + pd.Timedelta(10, input_unit="h"), ], "datetime_no_tz": [ pd.to_datetime("2017-08-01 00:00:00"), diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 4c68a25309303..fdbc826f7f4ba 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1112,7 +1112,7 @@ def test_resample_anchored_intraday(unit): result = df.resample("ME").mean() expected = df.resample("ME").mean().to_period() expected = expected.to_timestamp(how="end") - expected.index += Timedelta(1, "ns") - Timedelta(1, "D") + expected.index += Timedelta(1, input_unit="ns") - Timedelta(1, input_unit="D") expected.index = expected.index.as_unit(unit)._with_freq("infer") assert expected.index.freq == "ME" tm.assert_frame_equal(result, expected) @@ -1121,7 +1121,7 @@ def test_resample_anchored_intraday(unit): exp = df.shift(1, freq="D").resample("ME").mean().to_period() exp = exp.to_timestamp(how="end") - exp.index = exp.index + Timedelta(1, "ns") - Timedelta(1, "D") + exp.index = exp.index + Timedelta(1, input_unit="ns") - Timedelta(1, input_unit="D") exp.index = exp.index.as_unit(unit)._with_freq("infer") assert exp.index.freq == "ME" tm.assert_frame_equal(result, exp) @@ -1134,7 +1134,7 @@ def test_resample_anchored_intraday2(unit): result = df.resample("QE").mean() expected = df.resample("QE").mean().to_period() expected = expected.to_timestamp(how="end") - expected.index += Timedelta(1, "ns") - Timedelta(1, "D") + expected.index += Timedelta(1, input_unit="ns") - Timedelta(1, input_unit="D") expected.index._data.freq = "QE" expected.index._freq = lib.no_default expected.index = expected.index.as_unit(unit) @@ -1144,7 +1144,7 @@ def test_resample_anchored_intraday2(unit): expected = df.shift(1, freq="D").resample("QE").mean() expected = expected.to_period() expected = expected.to_timestamp(how="end") - expected.index += Timedelta(1, "ns") - Timedelta(1, "D") + expected.index += Timedelta(1, input_unit="ns") - Timedelta(1, input_unit="D") expected.index._data.freq = "QE" expected.index._freq = lib.no_default expected.index = expected.index.as_unit(unit) @@ -1515,7 +1515,7 @@ def test_resample_across_dst(): # 2016-10-30 02:23:00+02:00, 2016-10-30 02:23:00+01:00 df1 = DataFrame([1477786980, 1477790580], columns=["ts"]) dti1 = DatetimeIndex( - pd.to_datetime(df1.ts, unit="s") + pd.to_datetime(df1.ts, input_unit="s") .dt.tz_localize("UTC") .dt.tz_convert("Europe/Madrid") ) @@ -1524,7 +1524,7 @@ def test_resample_across_dst(): # 2016-10-30 02:00:00+02:00, 2016-10-30 02:00:00+01:00 df2 = DataFrame([1477785600, 1477789200], columns=["ts"]) dti2 = DatetimeIndex( - pd.to_datetime(df2.ts, unit="s") + pd.to_datetime(df2.ts, input_unit="s") .dt.tz_localize("UTC") .dt.tz_convert("Europe/Madrid"), freq="h", diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index f3c52a674cf66..bd6bd7fca2592 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -502,7 +502,7 @@ def test_groupby_resample_empty_sum_string( result = gbrs.sum(min_count=min_count) index = pd.MultiIndex( - levels=[[1, 2, 3], [pd.to_datetime("2000-01-01", unit="ns")]], + levels=[[1, 2, 3], [pd.to_datetime("2000-01-01", input_unit="ns")]], codes=[[0, 1, 2], [0, 0, 0]], names=["A", None], ) diff --git a/pandas/tests/resample/test_timedelta.py b/pandas/tests/resample/test_timedelta.py index 309810b656ed3..c3258be0f2863 100644 --- a/pandas/tests/resample/test_timedelta.py +++ b/pandas/tests/resample/test_timedelta.py @@ -53,7 +53,7 @@ def test_resample_with_timedeltas(): expected.index = timedelta_range("0 days", freq="30min", periods=50) df = DataFrame( - {"A": np.arange(1480)}, index=pd.to_timedelta(np.arange(1480), unit="min") + {"A": np.arange(1480)}, index=pd.to_timedelta(np.arange(1480), input_unit="min") ) result = df.resample("30min").sum() @@ -97,7 +97,9 @@ def test_resample_offset_with_timedeltaindex(): def test_resample_categorical_data_with_timedeltaindex(): # GH #12169 - df = DataFrame({"Group_obj": "A"}, index=pd.to_timedelta(list(range(20)), unit="s")) + df = DataFrame( + {"Group_obj": "A"}, index=pd.to_timedelta(list(range(20)), input_unit="s") + ) df["Group"] = df["Group_obj"].astype("category") result = df.resample("10s").agg(lambda x: (x.value_counts().index[0])) exp_tdi = pd.TimedeltaIndex(np.array([0, 10], dtype="m8[s]"), freq="10s").as_unit( @@ -180,7 +182,7 @@ def test_resample_quantile_timedelta(unit): # GH: 29485 dtype = np.dtype(f"m8[{unit}]") df = DataFrame( - {"value": pd.to_timedelta(np.arange(4), unit="s").astype(dtype)}, + {"value": pd.to_timedelta(np.arange(4), input_unit="s").astype(dtype)}, index=pd.date_range("20200101", periods=4, tz="UTC"), ) result = df.resample("2D").quantile(0.99) diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py index b1cba7ee31eac..a295ce357cebe 100644 --- a/pandas/tests/reshape/concat/test_datetimes.py +++ b/pandas/tests/reshape/concat/test_datetimes.py @@ -538,7 +538,7 @@ def test_concat_period_other_series3(self): def test_concat_timedelta64_block(): - rng = to_timedelta(np.arange(10), unit="s") + rng = to_timedelta(np.arange(10), input_unit="s") df = DataFrame({"time": rng}) diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index 3e697f7de0a3c..484df3626583b 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -3253,14 +3253,18 @@ def test_timedelta_tolerance_nearest(self, unit): columns=["time", "left"], ) - left["time"] = pd.to_timedelta(left["time"], "ms").astype(f"m8[{unit}]") + left["time"] = pd.to_timedelta(left["time"], input_unit="ms").astype( + f"m8[{unit}]" + ) right = pd.DataFrame( list(zip([0, 3, 9, 12, 15, 18], [0, 1, 2, 3, 4, 5], strict=True)), columns=["time", "right"], ) - right["time"] = pd.to_timedelta(right["time"], "ms").astype(f"m8[{unit}]") + right["time"] = pd.to_timedelta(right["time"], input_unit="ms").astype( + f"m8[{unit}]" + ) expected = pd.DataFrame( list( @@ -3274,7 +3278,9 @@ def test_timedelta_tolerance_nearest(self, unit): columns=["time", "left", "right"], ) - expected["time"] = pd.to_timedelta(expected["time"], "ms").astype(f"m8[{unit}]") + expected["time"] = pd.to_timedelta(expected["time"], input_unit="ms").astype( + f"m8[{unit}]" + ) result = merge_asof( left, right, on="time", tolerance=Timedelta("1ms"), direction="nearest" diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py index b6d45aeab8a7b..a817a7f8750b0 100644 --- a/pandas/tests/reshape/test_qcut.py +++ b/pandas/tests/reshape/test_qcut.py @@ -206,7 +206,7 @@ def test_qcut_nat(ser, unit): # see gh-19768 ser = Series(ser) ser = ser.dt.as_unit(unit) - td = Timedelta(1, unit=unit).as_unit(unit) + td = Timedelta(1, input_unit=unit).as_unit(unit) left = Series([ser[0] - td, np.nan, ser[2] - Day()], dtype=ser.dtype) right = Series([ser[2] - Day(), np.nan, ser[2]], dtype=ser.dtype) diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index eff90335ebab1..8a8c47f4e3221 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -665,12 +665,12 @@ def test_hash(self): def test_to_timestamp_mult(self): p = Period("2011-01", freq="M") assert p.to_timestamp(how="S") == Timestamp("2011-01-01") - expected = Timestamp("2011-02-01") - Timedelta(1, "ns") + expected = Timestamp("2011-02-01") - Timedelta(1, input_unit="ns") assert p.to_timestamp(how="E") == expected p = Period("2011-01", freq="3M") assert p.to_timestamp(how="S") == Timestamp("2011-01-01") - expected = Timestamp("2011-04-01") - Timedelta(1, "ns") + expected = Timestamp("2011-04-01") - Timedelta(1, input_unit="ns") assert p.to_timestamp(how="E") == expected @pytest.mark.filterwarnings( @@ -712,19 +712,19 @@ def _ex(p): p = Period("1985", freq="Y") result = p.to_timestamp("h", how="end") - expected = Timestamp(1986, 1, 1) - Timedelta(1, "ns") + expected = Timestamp(1986, 1, 1) - Timedelta(1, input_unit="ns") assert result == expected result = p.to_timestamp("3h", how="end") assert result == expected result = p.to_timestamp("min", how="end") - expected = Timestamp(1986, 1, 1) - Timedelta(1, "ns") + expected = Timestamp(1986, 1, 1) - Timedelta(1, input_unit="ns") assert result == expected result = p.to_timestamp("2min", how="end") assert result == expected result = p.to_timestamp(how="end") - expected = Timestamp(1986, 1, 1) - Timedelta(1, "ns") + expected = Timestamp(1986, 1, 1) - Timedelta(1, input_unit="ns") assert result == expected expected = datetime(1985, 1, 1) @@ -913,7 +913,7 @@ def test_inner_bounds_start_and_end_time(self, bound, offset, period_property): period = TestPeriodProperties._period_constructor(bound, -offset) expected = period.to_timestamp().round(freq="s") assert getattr(period, period_property).round(freq="s") == expected - expected = (bound - offset * Timedelta(1, unit="s")).floor("s") + expected = (bound - offset * Timedelta(1, input_unit="s")).floor("s") assert getattr(period, period_property).floor("s") == expected def test_start_time(self): diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 6f7f2a339d944..9148c602b0c81 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -38,7 +38,7 @@ class TestTimedeltaAdditionSubtraction: @pytest.mark.parametrize( "ten_seconds", [ - Timedelta(10, unit="s"), + Timedelta(10, input_unit="s"), timedelta(seconds=10), np.timedelta64(10, "s"), np.timedelta64(10000000000, "ns"), @@ -82,7 +82,7 @@ def test_td_add_sub_one_day_ten_seconds(self, one_day_ten_secs): @pytest.mark.parametrize("op", [operator.add, ops.radd]) def test_td_add_datetimelike_scalar(self, op): # GH#19738 - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") result = op(td, datetime(2016, 1, 1)) if op is operator.add: @@ -106,7 +106,7 @@ def test_td_add_timestamp_overflow(self): ts = Timestamp("1700-01-01").as_unit("ns") msg = "Cannot cast 259987 from D to 'ns' without overflow." with pytest.raises(OutOfBoundsTimedelta, match=msg): - ts + Timedelta(13 * 19999, unit="D") + ts + Timedelta(13 * 19999, input_unit="D") msg = "Cannot cast 259987 days 00:00:00 to unit='ns' without overflow" with pytest.raises(OutOfBoundsTimedelta, match=msg): @@ -114,7 +114,7 @@ def test_td_add_timestamp_overflow(self): @pytest.mark.parametrize("op", [operator.add, ops.radd]) def test_td_add_td(self, op): - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") result = op(td, Timedelta(days=10)) assert isinstance(result, Timedelta) @@ -122,36 +122,36 @@ def test_td_add_td(self, op): @pytest.mark.parametrize("op", [operator.add, ops.radd]) def test_td_add_pytimedelta(self, op): - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") result = op(td, timedelta(days=9)) assert isinstance(result, Timedelta) assert result == Timedelta(days=19) @pytest.mark.parametrize("op", [operator.add, ops.radd]) def test_td_add_timedelta64(self, op): - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") result = op(td, np.timedelta64(-4, "D")) assert isinstance(result, Timedelta) assert result == Timedelta(days=6) @pytest.mark.parametrize("op", [operator.add, ops.radd]) def test_td_add_offset(self, op): - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") result = op(td, offsets.Hour(6)) assert isinstance(result, Timedelta) assert result == Timedelta(days=10, hours=6) def test_td_sub_td(self): - td = Timedelta(10, unit="D") - expected = Timedelta(0, unit="ns") + td = Timedelta(10, input_unit="D") + expected = Timedelta(0, input_unit="ns") result = td - td assert isinstance(result, Timedelta) assert result == expected def test_td_sub_pytimedelta(self): - td = Timedelta(10, unit="D") - expected = Timedelta(0, unit="ns") + td = Timedelta(10, input_unit="D") + expected = Timedelta(0, input_unit="ns") result = td - td.to_pytimedelta() assert isinstance(result, Timedelta) @@ -162,8 +162,8 @@ def test_td_sub_pytimedelta(self): assert result == expected def test_td_sub_timedelta64(self): - td = Timedelta(10, unit="D") - expected = Timedelta(0, unit="ns") + td = Timedelta(10, input_unit="D") + expected = Timedelta(0, input_unit="ns") result = td - td.to_timedelta64() assert isinstance(result, Timedelta) @@ -175,12 +175,12 @@ def test_td_sub_timedelta64(self): def test_td_sub_nat(self): # In this context pd.NaT is treated as timedelta-like - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") result = td - NaT assert result is NaT def test_td_sub_td64_nat(self): - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") td_nat = np.timedelta64("NaT") result = td - td_nat @@ -190,13 +190,13 @@ def test_td_sub_td64_nat(self): assert result is NaT def test_td_sub_offset(self): - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") result = td - offsets.Hour(1) assert isinstance(result, Timedelta) - assert result == Timedelta(239, unit="h") + assert result == Timedelta(239, input_unit="h") def test_td_add_sub_numeric_raises(self): - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") msg = "unsupported operand type" for other in [2, 2.0, np.int64(2), np.float64(2)]: with pytest.raises(TypeError, match=msg): @@ -237,7 +237,7 @@ def test_td_add_sub_int_ndarray(self): other - td def test_td_rsub_nat(self): - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") result = NaT - td assert result is NaT @@ -245,9 +245,9 @@ def test_td_rsub_nat(self): assert result is NaT def test_td_rsub_offset(self): - result = offsets.Hour(1) - Timedelta(10, unit="D") + result = offsets.Hour(1) - Timedelta(10, input_unit="D") assert isinstance(result, Timedelta) - assert result == Timedelta(-239, unit="h") + assert result == Timedelta(-239, input_unit="h") def test_td_sub_timedeltalike_object_dtype_array(self): # GH#21980 @@ -365,7 +365,7 @@ class TestTimedeltaMultiplicationDivision: @pytest.mark.parametrize("op", [operator.mul, ops.rmul]) def test_td_mul_nat(self, op, td_nat): # GH#19819 - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") typs = "|".join(["numpy.timedelta64", "NaTType", "Timedelta"]) msg = "|".join( [ @@ -380,7 +380,7 @@ def test_td_mul_nat(self, op, td_nat): @pytest.mark.parametrize("op", [operator.mul, ops.rmul]) def test_td_mul_nan(self, op, nan): # np.float64('NaN') has a 'dtype' attr, avoid treating as array - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") result = op(td, nan) assert result is NaT @@ -452,7 +452,7 @@ def test_td_mul_td64_ndarray_invalid(self): def test_td_div_timedeltalike_scalar(self): # GH#19738 - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") result = td / offsets.Hour(1) assert result == 240 @@ -483,7 +483,7 @@ def test_td_div_td64_non_nano(self): def test_td_div_numeric_scalar(self): # GH#19738 - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") result = td / 2 assert isinstance(result, Timedelta) @@ -503,7 +503,7 @@ def test_td_div_numeric_scalar(self): ) def test_td_div_nan(self, nan): # np.float64('NaN') has a 'dtype' attr, avoid treating as array - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") result = td / nan assert result is NaT @@ -535,7 +535,7 @@ def test_td_div_ndarray_0d(self): def test_td_rdiv_timedeltalike_scalar(self): # GH#19738 - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") result = offsets.Hour(1) / td assert result == 1 / 240.0 @@ -543,7 +543,7 @@ def test_td_rdiv_timedeltalike_scalar(self): def test_td_rdiv_na_scalar(self): # GH#31869 None gets cast to NaT - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") result = NaT / td assert np.isnan(result) @@ -563,7 +563,7 @@ def test_td_rdiv_na_scalar(self): np.nan / td def test_td_rdiv_ndarray(self): - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") arr = np.array([td], dtype=object) result = arr / td @@ -586,7 +586,7 @@ def test_td_rdiv_ndarray(self): arr / td def test_td_rdiv_ndarray_0d(self): - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") arr = np.array(td.asm8) @@ -753,7 +753,7 @@ def test_td_rfloordiv_intarray(self): msg = "Invalid dtype" with pytest.raises(TypeError, match=msg): - ints // Timedelta(1, unit="s") + ints // Timedelta(1, input_unit="s") def test_td_rfloordiv_numeric_series(self): # GH#18846 @@ -877,7 +877,7 @@ def test_divmod_numeric(self): td = Timedelta(days=2, hours=6) result = divmod(td, 53 * 3600 * 1e9) - assert result[0] == Timedelta(1, unit="ns") + assert result[0] == Timedelta(1, input_unit="ns") assert isinstance(result[1], Timedelta) assert result[1] == Timedelta(hours=1) diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index 28614c513e8ef..bac3546cd6bb4 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -25,13 +25,13 @@ def test_unit_m_y_raises(self, unit): msg = "Units 'M', 'Y', and 'y' are no longer supported" with pytest.raises(ValueError, match=msg): - Timedelta(10, unit) + Timedelta(10, input_unit=unit) with pytest.raises(ValueError, match=msg): - to_timedelta(10, unit) + to_timedelta(10, input_unit=unit) with pytest.raises(ValueError, match=msg): - to_timedelta([1, 2], unit) + to_timedelta([1, 2], input_unit=unit) @pytest.mark.parametrize( "unit,unit_depr", @@ -49,9 +49,10 @@ def test_unit_deprecated(self, unit, unit_depr): # GH#52536 msg = f"'{unit_depr}' is deprecated and will be removed in a future version." - expected = Timedelta(1, unit=unit) + expected = Timedelta(1, input_unit=unit) with tm.assert_produces_warning(Pandas4Warning, match=msg): - result = Timedelta(1, unit=unit_depr) + result = Timedelta(1, input_unit=unit_depr) + tm.assert_equal(result, expected) @pytest.mark.parametrize( @@ -135,7 +136,7 @@ def test_unit_parser(self, unit, np_unit, wrapper): ) # TODO(2.0): the desired output dtype may have non-nano resolution - result = to_timedelta(wrapper(range(5)), unit=unit) + result = to_timedelta(wrapper(range(5)), input_unit=unit) tm.assert_index_equal(result, expected) str_repr = [f"{x}{unit}" for x in np.arange(5)] @@ -146,9 +147,9 @@ def test_unit_parser(self, unit, np_unit, wrapper): # scalar expected = Timedelta(np.timedelta64(2, np_unit).astype("timedelta64[ns]")) - result = to_timedelta(2, unit=unit) + result = to_timedelta(2, input_unit=unit) assert result == expected - result = Timedelta(2, unit=unit) + result = Timedelta(2, input_unit=unit) assert result == expected result = to_timedelta(f"2{unit}") @@ -160,13 +161,13 @@ def test_unit_parser(self, unit, np_unit, wrapper): def test_unit_T_L_N_U_raises(self, unit): msg = f"invalid unit abbreviation: {unit}" with pytest.raises(ValueError, match=msg): - Timedelta(1, unit=unit) + Timedelta(1, input_unit=unit) with pytest.raises(ValueError, match=msg): - to_timedelta(10, unit) + to_timedelta(10, input_unit=unit) with pytest.raises(ValueError, match=msg): - to_timedelta([1, 2], unit) + to_timedelta([1, 2], input_unit=unit) def test_construct_from_kwargs_overflow(): @@ -182,10 +183,10 @@ def test_construct_from_kwargs_overflow(): def test_construct_with_weeks_unit_overflow(): # GH#47268 don't silently wrap around with pytest.raises(OutOfBoundsTimedelta, match="without overflow"): - Timedelta(1000000000000000000, unit="W") + Timedelta(1000000000000000000, input_unit="W") with pytest.raises(OutOfBoundsTimedelta, match="without overflow"): - Timedelta(1000000000000000000.0, unit="W") + Timedelta(1000000000000000000.0, input_unit="W") def test_construct_from_td64_with_unit(): @@ -193,15 +194,15 @@ def test_construct_from_td64_with_unit(): # results, and in non-overflow cases is irrelevant GH#46827 obj = np.timedelta64(123456789000000000, "h") - msg = "The 'unit' keyword is only used when the Timedelta input is" + msg = "The 'input_unit' keyword is only used when the Timedelta input is" with pytest.raises(OutOfBoundsTimedelta, match="123456789000000000 hours"): with tm.assert_produces_warning(UserWarning, match=msg): - Timedelta(obj, unit="ps") + Timedelta(obj, input_unit="ps") with pytest.raises(OutOfBoundsTimedelta, match="123456789000000000 hours"): with tm.assert_produces_warning(UserWarning, match=msg): - Timedelta(obj, unit="ns") + Timedelta(obj, input_unit="ns") with pytest.raises(OutOfBoundsTimedelta, match="123456789000000000 hours"): Timedelta(obj) @@ -269,8 +270,8 @@ def test_from_tick_reso(): def test_construction(): expected = np.timedelta64(10, "D").astype("m8[ns]").view("i8") - assert Timedelta(10, unit="D")._value == expected - assert Timedelta(10.0, unit="D")._value == expected + assert Timedelta(10, input_unit="D")._value == expected + assert Timedelta(10.0, input_unit="D")._value == expected assert Timedelta("10 days")._value == expected assert Timedelta(days=10)._value == expected assert Timedelta(days=10.0)._value == expected @@ -373,7 +374,7 @@ def test_construction(): expected = np.timedelta64(10, "s").astype("m8[ns]").view("i8") + np.timedelta64( 500, "ms" ).astype("m8[ns]").view("i8") - assert Timedelta(10.5, unit="s")._value == expected + assert Timedelta(10.5, input_unit="s")._value == expected # offset assert to_timedelta(offsets.Hour(2)) == Timedelta(hours=2) @@ -451,7 +452,7 @@ def test_overflow_on_construction(): # xref GH#17637 msg = "Cannot cast 139993 from D to 'ns' without overflow" with pytest.raises(OutOfBoundsTimedelta, match=msg): - Timedelta(7 * 19999, unit="D") + Timedelta(7 * 19999, input_unit="D") # used to overflow before non-ns support td = Timedelta(timedelta(days=13 * 19999)) @@ -479,7 +480,7 @@ def test_construction_out_of_bounds_td64ns(val, unit): td = Timedelta(td64) if unit != "M": - # with unit="M" the conversion to "s" is poorly defined + # with input_unit="M" the conversion to "s" is poorly defined # (and numpy issues DeprecationWarning) assert td.asm8 == td64 assert td.asm8.dtype == "m8[s]" @@ -638,7 +639,7 @@ def test_timedelta_pass_td_and_kwargs_raises(): @pytest.mark.filterwarnings( - "ignore:The 'unit' keyword is only used when the Timedelta input:UserWarning" + "ignore:The 'input_unit' keyword is only used when the Timedelta input:UserWarning" ) @pytest.mark.parametrize( "constructor, value, unit", @@ -650,7 +651,7 @@ def test_timedelta_pass_td_and_kwargs_raises(): ) def test_string_with_unit(constructor, value, unit): with pytest.raises(ValueError, match="unit must not be specified"): - constructor(value, unit=unit) + constructor(value, input_unit=unit) @pytest.mark.parametrize( @@ -689,7 +690,7 @@ class MyCustomTimedelta(Timedelta): def test_non_nano_value(): # https://github.com/pandas-dev/pandas/issues/49076 - result = Timedelta(10, unit="D").as_unit("s").value + result = Timedelta(10, input_unit="D").as_unit("s").value # `.value` shows nanoseconds, even though unit is 's' assert result == 864000000000000 @@ -699,7 +700,7 @@ def test_non_nano_value(): r"Use `.asm8.view\('i8'\)` to cast represent Timedelta in its " r"own unit \(here, s\).$" ) - td = Timedelta(1_000, "D").as_unit("s") * 1_000 + td = Timedelta(1_000, input_unit="D").as_unit("s") * 1_000 with pytest.raises(OverflowError, match=msg): td.value # check that the suggested workaround actually works diff --git a/pandas/tests/scalar/timedelta/test_formats.py b/pandas/tests/scalar/timedelta/test_formats.py index 1aafeec2ceed5..24031679005ca 100644 --- a/pandas/tests/scalar/timedelta/test_formats.py +++ b/pandas/tests/scalar/timedelta/test_formats.py @@ -6,10 +6,10 @@ @pytest.mark.parametrize( "td, expected_repr", [ - (Timedelta(10, unit="D"), "Timedelta('10 days 00:00:00')"), - (Timedelta(10, unit="s"), "Timedelta('0 days 00:00:10')"), - (Timedelta(10, unit="ms"), "Timedelta('0 days 00:00:00.010000')"), - (Timedelta(-10, unit="ms"), "Timedelta('-1 days +23:59:59.990000')"), + (Timedelta(10, input_unit="D"), "Timedelta('10 days 00:00:00')"), + (Timedelta(10, input_unit="s"), "Timedelta('0 days 00:00:10')"), + (Timedelta(10, input_unit="ms"), "Timedelta('0 days 00:00:00.010000')"), + (Timedelta(-10, input_unit="ms"), "Timedelta('-1 days +23:59:59.990000')"), ], ) def test_repr(td, expected_repr): @@ -46,10 +46,10 @@ def test_isoformat(td, expected_iso): class TestReprBase: def test_none(self): - delta_1d = Timedelta(1, unit="D") - delta_0d = Timedelta(0, unit="D") - delta_1s = Timedelta(1, unit="s") - delta_500ms = Timedelta(500, unit="ms") + delta_1d = Timedelta(1, input_unit="D") + delta_0d = Timedelta(0, input_unit="D") + delta_1s = Timedelta(1, input_unit="s") + delta_500ms = Timedelta(500, input_unit="ms") drepr = lambda x: x._repr_base() assert drepr(delta_1d) == "1 days" @@ -63,10 +63,10 @@ def test_none(self): assert drepr(-delta_1d + delta_500ms) == "-1 days +00:00:00.500000" def test_sub_day(self): - delta_1d = Timedelta(1, unit="D") - delta_0d = Timedelta(0, unit="D") - delta_1s = Timedelta(1, unit="s") - delta_500ms = Timedelta(500, unit="ms") + delta_1d = Timedelta(1, input_unit="D") + delta_0d = Timedelta(0, input_unit="D") + delta_1s = Timedelta(1, input_unit="s") + delta_500ms = Timedelta(500, input_unit="ms") drepr = lambda x: x._repr_base(format="sub_day") assert drepr(delta_1d) == "1 days" @@ -80,10 +80,10 @@ def test_sub_day(self): assert drepr(-delta_1d + delta_500ms) == "-1 days +00:00:00.500000" def test_long(self): - delta_1d = Timedelta(1, unit="D") - delta_0d = Timedelta(0, unit="D") - delta_1s = Timedelta(1, unit="s") - delta_500ms = Timedelta(500, unit="ms") + delta_1d = Timedelta(1, input_unit="D") + delta_0d = Timedelta(0, input_unit="D") + delta_1s = Timedelta(1, input_unit="s") + delta_500ms = Timedelta(500, input_unit="ms") drepr = lambda x: x._repr_base(format="long") assert drepr(delta_1d) == "1 days 00:00:00" @@ -97,9 +97,9 @@ def test_long(self): assert drepr(-delta_1d + delta_500ms) == "-1 days +00:00:00.500000" def test_all(self): - delta_1d = Timedelta(1, unit="D") - delta_0d = Timedelta(0, unit="D") - delta_1ns = Timedelta(1, unit="ns") + delta_1d = Timedelta(1, input_unit="D") + delta_0d = Timedelta(0, input_unit="D") + delta_1ns = Timedelta(1, input_unit="ns") drepr = lambda x: x._repr_base(format="all") assert drepr(delta_1d) == "1 days 00:00:00.000000000" diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index cf878b1164b3f..e14db20322522 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -283,7 +283,7 @@ def test_timedelta_class_min_max_resolution(): class TestTimedeltaUnaryOps: def test_invert(self): - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") msg = "bad operand type for unary ~" with pytest.raises(TypeError, match=msg): @@ -298,12 +298,12 @@ def test_invert(self): ~(td.to_timedelta64()) def test_unary_ops(self): - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") # __neg__, __pos__ - assert -td == Timedelta(-10, unit="D") + assert -td == Timedelta(-10, input_unit="D") assert -td == Timedelta("-10D") - assert +td == Timedelta(10, unit="D") + assert +td == Timedelta(10, input_unit="D") # __abs__, __abs__(__neg__) assert abs(td) == td @@ -322,7 +322,7 @@ class TestTimedeltas: ) def test_rounding_on_int_unit_construction(self, unit, value, expected): # GH 12690 - result = Timedelta(value, unit=unit) + result = Timedelta(value, input_unit=unit) assert result._value == expected result = Timedelta(str(value) + unit) assert result._value == expected @@ -337,7 +337,7 @@ def test_total_seconds_scalar(self): assert np.isnan(rng.total_seconds()) def test_conversion(self): - for td in [Timedelta(10, unit="D"), Timedelta("1 days, 10:11:12.012345")]: + for td in [Timedelta(10, input_unit="D"), Timedelta("1 days, 10:11:12.012345")]: pydt = td.to_pytimedelta() assert td == Timedelta(pydt) assert td == pydt @@ -402,7 +402,7 @@ def check(value): rng.milliseconds # components - tup = to_timedelta(-1, "us").components + tup = to_timedelta(-1, input_unit="us").components assert tup.days == -1 assert tup.hours == 23 assert tup.minutes == 59 @@ -432,7 +432,7 @@ def check(value): # TODO: this is a test of to_timedelta string parsing def test_iso_conversion(self): # GH #21877 - expected = Timedelta(1, unit="s") + expected = Timedelta(1, input_unit="s") assert to_timedelta("P0DT0H0M1S") == expected # TODO: this is a test of to_timedelta returning NaT @@ -448,12 +448,12 @@ def test_nat_converters(self): def test_numeric_conversions(self): assert Timedelta(0) == np.timedelta64(0, "ns") assert Timedelta(10) == np.timedelta64(10, "ns") - assert Timedelta(10, unit="ns") == np.timedelta64(10, "ns") + assert Timedelta(10, input_unit="ns") == np.timedelta64(10, "ns") - assert Timedelta(10, unit="us") == np.timedelta64(10, "us") - assert Timedelta(10, unit="ms") == np.timedelta64(10, "ms") - assert Timedelta(10, unit="s") == np.timedelta64(10, "s") - assert Timedelta(10, unit="D") == np.timedelta64(10, "D") + assert Timedelta(10, input_unit="us") == np.timedelta64(10, "us") + assert Timedelta(10, input_unit="ms") == np.timedelta64(10, "ms") + assert Timedelta(10, input_unit="s") == np.timedelta64(10, "s") + assert Timedelta(10, input_unit="D") == np.timedelta64(10, "D") def test_timedelta_conversions(self): assert Timedelta(timedelta(seconds=1)) == np.timedelta64(1, "s").astype( @@ -477,7 +477,7 @@ def test_to_numpy_alias(self): td.to_numpy(copy=True) def test_identity(self): - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") assert isinstance(td, Timedelta) assert isinstance(td, timedelta) @@ -573,7 +573,7 @@ def test_pickle(self): def test_timedelta_hash_equality(self): # GH 11129 - v = Timedelta(1, "D") + v = Timedelta(1, input_unit="D") td = timedelta(days=1) assert hash(v) == hash(td) @@ -584,7 +584,7 @@ def test_timedelta_hash_equality(self): assert all(hash(td) == hash(td.to_pytimedelta()) for td in tds) # python timedeltas drop ns resolution - ns_td = Timedelta(1, "ns") + ns_td = Timedelta(1, input_unit="ns") assert hash(ns_td) != hash(ns_td.to_pytimedelta()) @pytest.mark.slow @@ -621,26 +621,26 @@ def test_implementation_limits(self): assert max_td._value == lib.i8max # Beyond lower limit, a NAT before the Overflow - assert (min_td - Timedelta(1, "ns")) is NaT + assert (min_td - Timedelta(1, input_unit="ns")) is NaT msg = "int too (large|big) to convert" with pytest.raises(OverflowError, match=msg): - min_td - Timedelta(2, "ns") + min_td - Timedelta(2, input_unit="ns") with pytest.raises(OverflowError, match=msg): - max_td + Timedelta(1, "ns") + max_td + Timedelta(1, input_unit="ns") # Same tests using the internal nanosecond values - td = Timedelta(min_td._value - 1, "ns") + td = Timedelta(min_td._value - 1, input_unit="ns") assert td is NaT msg = "Cannot cast -9223372036854775809 from ns to 'ns' without overflow" with pytest.raises(OutOfBoundsTimedelta, match=msg): - Timedelta(min_td._value - 2, "ns") + Timedelta(min_td._value - 2, input_unit="ns") msg = "Cannot cast 9223372036854775808 from ns to 'ns' without overflow" with pytest.raises(OutOfBoundsTimedelta, match=msg): - Timedelta(max_td._value + 1, "ns") + Timedelta(max_td._value + 1, input_unit="ns") def test_total_seconds_precision(self): # GH 19458 @@ -688,8 +688,8 @@ def test_unit_deprecated(self, unit, unit_depr): msg = f"'{unit_depr}' is deprecated and will be removed in a future version." with tm.assert_produces_warning(Pandas4Warning, match=msg): - result = Timedelta(1, unit_depr) - assert result == Timedelta(1, unit) + result = Timedelta(1, input_unit=unit_depr) + assert result == Timedelta(1, input_unit=unit) @pytest.mark.parametrize( @@ -697,9 +697,9 @@ def test_unit_deprecated(self, unit, unit_depr): [ (Timedelta("10s"), True), (Timedelta("-10s"), True), - (Timedelta(10, unit="ns"), True), - (Timedelta(0, unit="ns"), False), - (Timedelta(-10, unit="ns"), True), + (Timedelta(10, input_unit="ns"), True), + (Timedelta(0, input_unit="ns"), False), + (Timedelta(-10, input_unit="ns"), True), (Timedelta(None), True), (NaT, True), ], @@ -711,7 +711,7 @@ def test_truthiness(value, expected): def test_timedelta_attribute_precision(): # GH 31354 - td = Timedelta(1552211999999999872, unit="ns") + td = Timedelta(1552211999999999872, input_unit="ns") result = td.days * 86400 result += td.seconds result *= 1000000 @@ -723,7 +723,7 @@ def test_timedelta_attribute_precision(): def test_to_pytimedelta_large_values(): - td = Timedelta(1152921504609987375, unit="ns") + td = Timedelta(1152921504609987375, input_unit="ns") result = td.to_pytimedelta() expected = timedelta(days=13343, seconds=86304, microseconds=609987) assert result == expected @@ -732,7 +732,7 @@ def test_to_pytimedelta_large_values(): def test_timedelta_week_suffix(): # GH#12691 ensure 'W' suffix works as a string passed to Timedelta expected = Timedelta("7 days") - result = Timedelta(1, unit="W") + result = Timedelta(1, input_unit="W") assert result == expected result = Timedelta("1W") diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 5c89cb3a66c2f..30880bd134a18 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -39,11 +39,11 @@ def test_constructor_int_float_with_YM_unit(self, typ): # GH#47266 avoid the conversions in cast_from_unit val = typ(150) - ts = Timestamp(val, unit="Y") + ts = Timestamp(val, input_unit="Y") expected = Timestamp("2120-01-01") assert ts == expected - ts = Timestamp(val, unit="M") + ts = Timestamp(val, input_unit="M") expected = Timestamp("1982-07-01") assert ts == expected @@ -54,17 +54,17 @@ def test_construct_from_int_float_with_unit_out_of_bound_raises(self, typ): msg = f"cannot convert input {val} with the unit 'D'" with pytest.raises(OutOfBoundsDatetime, match=msg): - Timestamp(val, unit="D") + Timestamp(val, input_unit="D") def test_constructor_float_not_round_with_YM_unit_raises(self): # GH#47267 avoid the conversions in cast_from-unit - msg = "Conversion of non-round float with unit=[MY] is ambiguous" + msg = "Conversion of non-round float with input_unit=[MY] is ambiguous" with pytest.raises(ValueError, match=msg): - Timestamp(150.5, unit="Y") + Timestamp(150.5, input_unit="Y") with pytest.raises(ValueError, match=msg): - Timestamp(150.5, unit="M") + Timestamp(150.5, input_unit="M") @pytest.mark.parametrize( "value, check_kwargs", @@ -95,7 +95,7 @@ def test_constructor_float_not_round_with_YM_unit_raises(self): ) def test_construct_with_unit(self, value, check_kwargs): def check(value, unit=None, h=1, s=1, us=0, ns=0): - stamp = Timestamp(value, unit=unit) + stamp = Timestamp(value, input_unit=unit) assert stamp.year == 2000 assert stamp.month == 1 assert stamp.day == 1 @@ -1076,9 +1076,9 @@ def test_timestamp_nano_range(nano): def test_non_nano_value(): # https://github.com/pandas-dev/pandas/issues/49076 - msg = "The 'unit' keyword is only used when" + msg = "The 'input_unit' keyword is only used when" with tm.assert_produces_warning(UserWarning, match=msg): - result = Timestamp("1800-01-01", unit="s").value + result = Timestamp("1800-01-01", input_unit="s").value # `.value` shows nanoseconds, even though unit is 's' assert result == -5364662400000000000 diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 8e153827ad086..de93cfc7d2a5e 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -869,7 +869,7 @@ def test_resolution(self, ts): def test_out_of_ns_bounds(self): # https://github.com/pandas-dev/pandas/issues/51060 - result = Timestamp(-52700112000, unit="s") + result = Timestamp(-52700112000, input_unit="s") assert result == Timestamp("0300-01-01") assert result.to_numpy() == np.datetime64("0300-01-01T00:00:00", "s") diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index 81648377942f7..7dae35f54a316 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -126,7 +126,7 @@ def test_astype_object_to_dt64_non_nano(self, tz): pointwise = [ vals[0].tz_localize(tz), Timestamp(vals[1], tz=tz), - to_datetime(vals[2], unit="us", utc=True).tz_convert(tz), + to_datetime(vals[2], input_unit="us", utc=True).tz_convert(tz), ] exp_vals = [x.as_unit("us").asm8 for x in pointwise] exp_arr = np.array(exp_vals, dtype="M8[us]") @@ -303,7 +303,7 @@ def test_astype_str_cast_dt64(self): def test_astype_str_cast_td64(self): # see GH#9757 - td = Series([Timedelta(1, unit="D")]) + td = Series([Timedelta(1, input_unit="D")]) ser = td.astype(str) expected = Series(["1 days"], dtype="str") diff --git a/pandas/tests/series/methods/test_clip.py b/pandas/tests/series/methods/test_clip.py index c1ee7f8c9e008..ffc480d097455 100644 --- a/pandas/tests/series/methods/test_clip.py +++ b/pandas/tests/series/methods/test_clip.py @@ -31,7 +31,7 @@ def test_clip_types_and_nulls(self): sers = [ Series([np.nan, 1.0, 2.0, 3.0]), Series([None, "a", "b", "c"]), - Series(pd.to_datetime([np.nan, 1, 2, 3], unit="D")), + Series(pd.to_datetime([np.nan, 1, 2, 3], input_unit="D")), ] for s in sers: diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index 57d0c60118504..42921e4c5f499 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -326,7 +326,7 @@ def test_convert_dtype_pyarrow_timezone_preserve(self): # GH 60237 pytest.importorskip("pyarrow") ser = pd.Series( - pd.to_datetime(range(5), utc=True, unit="h"), + pd.to_datetime(range(5), utc=True, input_unit="h"), dtype="timestamp[ns, tz=UTC][pyarrow]", ) result = ser.convert_dtypes(dtype_backend="pyarrow") diff --git a/pandas/tests/series/methods/test_isin.py b/pandas/tests/series/methods/test_isin.py index 4f8484252ba8f..c470d4ffb2161 100644 --- a/pandas/tests/series/methods/test_isin.py +++ b/pandas/tests/series/methods/test_isin.py @@ -92,7 +92,7 @@ def test_isin_with_i8(self): tm.assert_series_equal(result, expected) # timedelta64[ns] - s = Series(pd.to_timedelta(range(5), unit="D")) + s = Series(pd.to_timedelta(range(5), input_unit="D")) result = s.isin(s[0:2]) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 018ae06562148..98256cfdbd37b 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1910,16 +1910,16 @@ def test_constructor_dict_timedelta_index(self): # construct Series from dict as data and TimedeltaIndex as index # will result NaN in result Series data expected = Series( - data=["A", "B", "C"], index=pd.to_timedelta([0, 10, 20], unit="s") + data=["A", "B", "C"], index=pd.to_timedelta([0, 10, 20], input_unit="s") ) result = Series( data={ - pd.to_timedelta(0, unit="s"): "A", - pd.to_timedelta(10, unit="s"): "B", - pd.to_timedelta(20, unit="s"): "C", + pd.to_timedelta(0, input_unit="s"): "A", + pd.to_timedelta(10, input_unit="s"): "B", + pd.to_timedelta(20, input_unit="s"): "C", }, - index=pd.to_timedelta([0, 10, 20], unit="s"), + index=pd.to_timedelta([0, 10, 20], input_unit="s"), ) tm.assert_series_equal(result, expected) @@ -1979,7 +1979,7 @@ def test_constructor_raise_on_lossy_conversion_of_strings(self): def test_constructor_dtype_timedelta_alternative_construct(self): # GH#35465 result = Series([1000000, 200000, 3000000], dtype="timedelta64[ns]") - expected = Series(pd.to_timedelta([1000000, 200000, 3000000], unit="ns")) + expected = Series(pd.to_timedelta([1000000, 200000, 3000000], input_unit="ns")) tm.assert_series_equal(result, expected) @pytest.mark.xfail( diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index 02ead5bddec70..a0b63b07dbaee 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -289,7 +289,7 @@ def test_xarray_coerce_unit(): xr = pytest.importorskip("xarray") arr = xr.DataArray([1, 2, 3]) - result = pd.to_datetime(arr, unit="ns") + result = pd.to_datetime(arr, input_unit="ns") expected = DatetimeIndex( [ "1970-01-01 00:00:00.000000001", diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index c9cda0c4153cb..b464643e4d939 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -28,6 +28,7 @@ from pandas.errors import ( OutOfBoundsDatetime, OutOfBoundsTimedelta, + Pandas4Warning, ) import pandas.util._test_decorators as td @@ -548,6 +549,15 @@ def test_to_datetime_none(self): # GH#23055 assert to_datetime(None) is NaT + def test_to_datetime_unit_deprecated(self): + msg = "The 'unit' keyword is deprecated" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + to_datetime([1], unit="s") + + msg2 = "Specify only 'input_unit', not 'unit'" + with pytest.raises(ValueError, match=msg2): + to_datetime([1], unit="s", input_unit="s") + @pytest.mark.filterwarnings("ignore:Could not infer format") def test_to_datetime_overflow(self): # we should get an OutOfBoundsDatetime, NOT OverflowError @@ -1759,20 +1769,22 @@ class TestToDatetimeUnit: @pytest.mark.parametrize("item", [150, float(150)]) def test_to_datetime_month_or_year_unit_int(self, cache, unit, item, request): # GH#50870 Note we have separate tests that pd.Timestamp gets these right - ts = Timestamp(item, unit=unit) + ts = Timestamp(item, input_unit=unit) expected = DatetimeIndex([ts], dtype="M8[ns]") - result = to_datetime([item], unit=unit, cache=cache) + result = to_datetime([item], input_unit=unit, cache=cache) tm.assert_index_equal(result, expected) - result = to_datetime(np.array([item], dtype=object), unit=unit, cache=cache) + result = to_datetime( + np.array([item], dtype=object), input_unit=unit, cache=cache + ) tm.assert_index_equal(result, expected) - result = to_datetime(np.array([item]), unit=unit, cache=cache) + result = to_datetime(np.array([item]), input_unit=unit, cache=cache) tm.assert_index_equal(result, expected) # with a nan! - result = to_datetime(np.array([item, np.nan]), unit=unit, cache=cache) + result = to_datetime(np.array([item, np.nan]), input_unit=unit, cache=cache) assert result.isna()[1] tm.assert_index_equal(result[:1], expected) @@ -1781,42 +1793,42 @@ def test_to_datetime_month_or_year_unit_non_round_float(self, cache, unit): # GH#50301 # Match Timestamp behavior in disallowing non-round floats with # Y or M unit - msg = f"Conversion of non-round float with unit={unit} is ambiguous" + msg = f"Conversion of non-round float with input_unit={unit} is ambiguous" with pytest.raises(ValueError, match=msg): - to_datetime([1.5], unit=unit, errors="raise") + to_datetime([1.5], input_unit=unit, errors="raise") with pytest.raises(ValueError, match=msg): - to_datetime(np.array([1.5]), unit=unit, errors="raise") + to_datetime(np.array([1.5]), input_unit=unit, errors="raise") msg = r"Given date string \"1.5\" not likely a datetime" with pytest.raises(ValueError, match=msg): - to_datetime(["1.5"], unit=unit, errors="raise") + to_datetime(["1.5"], input_unit=unit, errors="raise") - res = to_datetime([1.5], unit=unit, errors="coerce") + res = to_datetime([1.5], input_unit=unit, errors="coerce") expected = Index([NaT], dtype="M8[ns]") tm.assert_index_equal(res, expected) # In 3.0, the string "1.5" is parsed as as it would be without unit, # which fails. With errors="coerce" this becomes NaT. - res = to_datetime(["1.5"], unit=unit, errors="coerce") + res = to_datetime(["1.5"], input_unit=unit, errors="coerce") expected = to_datetime([NaT]).as_unit("ns") tm.assert_index_equal(res, expected) # round floats are OK - res = to_datetime([1.0], unit=unit) - expected = to_datetime([1], unit=unit) + res = to_datetime([1.0], input_unit=unit) + expected = to_datetime([1], input_unit=unit) tm.assert_index_equal(res, expected) def test_unit(self, cache): # GH 11758 # test proper behavior with errors - msg = "cannot specify both format and unit" + msg = "cannot specify both format and input_unit" with pytest.raises(ValueError, match=msg): - to_datetime([1], unit="D", format="%Y%m%d", cache=cache) + to_datetime([1], input_unit="D", format="%Y%m%d", cache=cache) def test_unit_array_mixed_nans(self, cache): values = [11111111111111111, 1, 1.0, iNaT, NaT, np.nan, "NaT", ""] - result = to_datetime(values, unit="D", errors="coerce", cache=cache) + result = to_datetime(values, input_unit="D", errors="coerce", cache=cache) expected = DatetimeIndex( ["NaT", "1970-01-02", "1970-01-02", "NaT", "NaT", "NaT", "NaT", "NaT"], dtype="M8[ns]", @@ -1825,31 +1837,31 @@ def test_unit_array_mixed_nans(self, cache): msg = "cannot convert input 11111111111111111 with the unit 'D'" with pytest.raises(OutOfBoundsDatetime, match=msg): - to_datetime(values, unit="D", errors="raise", cache=cache) + to_datetime(values, input_unit="D", errors="raise", cache=cache) def test_unit_array_mixed_nans_large_int(self, cache): values = [1420043460000000000000000, iNaT, NaT, np.nan, "NaT"] - result = to_datetime(values, errors="coerce", unit="s", cache=cache) + result = to_datetime(values, errors="coerce", input_unit="s", cache=cache) expected = DatetimeIndex(["NaT", "NaT", "NaT", "NaT", "NaT"], dtype="M8[ns]") tm.assert_index_equal(result, expected) msg = "cannot convert input 1420043460000000000000000 with the unit 's'" with pytest.raises(OutOfBoundsDatetime, match=msg): - to_datetime(values, errors="raise", unit="s", cache=cache) + to_datetime(values, errors="raise", input_unit="s", cache=cache) def test_to_datetime_invalid_str_not_out_of_bounds_valuerror(self, cache): # if we have a string, then we raise a ValueError # and NOT an OutOfBoundsDatetime msg = "Unknown datetime string format, unable to parse: foo" with pytest.raises(ValueError, match=msg): - to_datetime("foo", errors="raise", unit="s", cache=cache) + to_datetime("foo", errors="raise", input_unit="s", cache=cache) @pytest.mark.parametrize("error", ["raise", "coerce"]) def test_unit_consistency(self, cache, error): # consistency of conversions expected = Timestamp("1970-05-09 14:25:11") - result = to_datetime(11111111, unit="s", errors=error, cache=cache) + result = to_datetime(11111111, input_unit="s", errors=error, cache=cache) assert result == expected assert isinstance(result, Timestamp) @@ -1916,18 +1928,18 @@ def test_unit_rounding(self, cache): # GH 14156 & GH 20445: argument will incur floating point errors # but no premature rounding value = 1434743731.8770001 - result = to_datetime(value, unit="s", cache=cache) + result = to_datetime(value, input_unit="s", cache=cache) expected = Timestamp("2015-06-19 19:55:31.877000093") assert result == expected - alt = Timestamp(value, unit="s") + alt = Timestamp(value, input_unit="s") assert alt == result @pytest.mark.parametrize("dtype", [int, float]) def test_to_datetime_unit(self, dtype): epoch = 1370745748 ser = Series([epoch + t for t in range(20)]).astype(dtype) - result = to_datetime(ser, unit="s") + result = to_datetime(ser, input_unit="s") expected = Series( [ Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) @@ -1941,7 +1953,7 @@ def test_to_datetime_unit(self, dtype): def test_to_datetime_unit_with_nulls(self, null): epoch = 1370745748 ser = Series([epoch + t for t in range(20)] + [null]) - result = to_datetime(ser, unit="s") + result = to_datetime(ser, input_unit="s") expected = Series( [Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)] + [NaT], @@ -1953,7 +1965,7 @@ def test_to_datetime_unit_fractional_seconds(self): # GH13834 epoch = 1370745748 ser = Series([epoch + t for t in np.arange(0, 2, 0.25)] + [iNaT]).astype(float) - result = to_datetime(ser, unit="s") + result = to_datetime(ser, input_unit="s") expected = Series( [ Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) @@ -1967,7 +1979,7 @@ def test_to_datetime_unit_fractional_seconds(self): tm.assert_series_equal(result, expected) def test_to_datetime_unit_na_values(self): - result = to_datetime([1, 2, "NaT", NaT, np.nan], unit="D") + result = to_datetime([1, 2, "NaT", NaT, np.nan], input_unit="D") expected = DatetimeIndex( [Timestamp("1970-01-02"), Timestamp("1970-01-03")] + ["NaT"] * 3, dtype="M8[ns]", @@ -1981,7 +1993,7 @@ def test_to_datetime_unit_invalid(self, bad_val): else: msg = "cannot convert input 111111111 with the unit 'D'" with pytest.raises(ValueError, match=msg): - to_datetime([1, 2, bad_val], unit="D") + to_datetime([1, 2, bad_val], input_unit="D") @pytest.mark.parametrize("bad_val", ["foo", 111111111]) def test_to_timestamp_unit_coerce(self, bad_val): @@ -1990,12 +2002,12 @@ def test_to_timestamp_unit_coerce(self, bad_val): [Timestamp("1970-01-02"), Timestamp("1970-01-03")] + ["NaT"] * 1, dtype="M8[ns]", ) - result = to_datetime([1, 2, bad_val], unit="D", errors="coerce") + result = to_datetime([1, 2, bad_val], input_unit="D", errors="coerce") tm.assert_index_equal(result, expected) def test_float_to_datetime_raise_near_bounds(self): # GH50183 - msg = "cannot convert input with unit 'D'" + msg = "cannot convert input with input_unit 'D'" oneday_in_ns = 1e9 * 60 * 60 * 24 tsmax_in_days = 2**63 / oneday_in_ns # 2**63 ns, in days # just in bounds @@ -2004,7 +2016,7 @@ def test_float_to_datetime_raise_near_bounds(self): ) expected = (should_succeed * oneday_in_ns).astype(np.int64) for error_mode in ["raise", "coerce"]: - result1 = to_datetime(should_succeed, unit="D", errors=error_mode) + result1 = to_datetime(should_succeed, input_unit="D", errors=error_mode) # Cast to `np.float64` so that `rtol` and inexact checking kick in # (`check_exact` doesn't take place for integer dtypes) tm.assert_almost_equal( @@ -2016,9 +2028,9 @@ def test_float_to_datetime_raise_near_bounds(self): should_fail1 = Series([0, tsmax_in_days + 0.005], dtype=float) should_fail2 = Series([0, -tsmax_in_days - 0.005], dtype=float) with pytest.raises(OutOfBoundsDatetime, match=msg): - to_datetime(should_fail1, unit="D", errors="raise") + to_datetime(should_fail1, input_unit="D", errors="raise") with pytest.raises(OutOfBoundsDatetime, match=msg): - to_datetime(should_fail2, unit="D", errors="raise") + to_datetime(should_fail2, input_unit="D", errors="raise") class TestToDatetimeDataFrame: @@ -2541,7 +2553,7 @@ def test_to_datetime_overflow(self): def test_to_datetime_float_with_nans_floating_point_error(self): # GH#58419 ser = Series([np.nan] * 1000 + [1712219033.0], dtype=np.float64) - result = to_datetime(ser, unit="s", errors="coerce") + result = to_datetime(ser, input_unit="s", errors="coerce") expected = Series( [NaT] * 1000 + [Timestamp("2024-04-04 08:23:53")], dtype="datetime64[ns]" ) @@ -3183,11 +3195,11 @@ def julian_dates(): class TestOrigin: def test_origin_and_unit(self): # GH#42624 - ts = to_datetime(1, unit="s", origin=1) + ts = to_datetime(1, input_unit="s", origin=1) expected = Timestamp("1970-01-01 00:00:02") assert ts == expected - ts = to_datetime(1, unit="s", origin=1_000_000_000) + ts = to_datetime(1, input_unit="s", origin=1_000_000_000) expected = Timestamp("2001-09-09 01:46:41") assert ts == expected @@ -3195,14 +3207,14 @@ def test_julian(self, julian_dates): # gh-11276, gh-11745 # for origin as julian - result = Series(to_datetime(julian_dates, unit="D", origin="julian")) + result = Series(to_datetime(julian_dates, input_unit="D", origin="julian")) expected = Series( - to_datetime(julian_dates - Timestamp(0).to_julian_date(), unit="D") + to_datetime(julian_dates - Timestamp(0).to_julian_date(), input_unit="D") ) tm.assert_series_equal(result, expected) def test_unix(self): - result = Series(to_datetime([0, 1, 2], unit="D", origin="unix")) + result = Series(to_datetime([0, 1, 2], input_unit="D", origin="unix")) expected = Series( [Timestamp("1970-01-01"), Timestamp("1970-01-02"), Timestamp("1970-01-03")], dtype="M8[ns]", @@ -3210,27 +3222,27 @@ def test_unix(self): tm.assert_series_equal(result, expected) def test_julian_round_trip(self): - result = to_datetime(2456658, origin="julian", unit="D") + result = to_datetime(2456658, origin="julian", input_unit="D") assert result.to_julian_date() == 2456658 # out-of-bounds msg = "1 is Out of Bounds for origin='julian'" with pytest.raises(ValueError, match=msg): - to_datetime(1, origin="julian", unit="D") + to_datetime(1, origin="julian", input_unit="D") def test_invalid_unit(self, units, julian_dates): # checking for invalid combination of origin='julian' and unit != D if units != "D": msg = "unit must be 'D' for origin='julian'" with pytest.raises(ValueError, match=msg): - to_datetime(julian_dates, unit=units, origin="julian") + to_datetime(julian_dates, input_unit=units, origin="julian") @pytest.mark.parametrize("unit", ["ns", "D"]) def test_invalid_origin(self, unit): # need to have a numeric specified - msg = "it must be numeric with a unit specified" + msg = "it must be numeric with a input_unit specified" with pytest.raises(ValueError, match=msg): - to_datetime("2005-01-01", origin="1960-01-01", unit=unit) + to_datetime("2005-01-01", origin="1960-01-01", input_unit=unit) @pytest.mark.parametrize( "epochs", @@ -3245,10 +3257,10 @@ def test_epoch(self, units, epochs): epoch_1960 = Timestamp(1960, 1, 1) units_from_epochs = np.arange(5, dtype=np.int64) expected = Series( - [pd.Timedelta(x, unit=units) + epoch_1960 for x in units_from_epochs] + [pd.Timedelta(x, input_unit=units) + epoch_1960 for x in units_from_epochs] ) - result = Series(to_datetime(units_from_epochs, unit=units, origin=epochs)) + result = Series(to_datetime(units_from_epochs, input_unit=units, origin=epochs)) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( @@ -3269,12 +3281,14 @@ def test_invalid_origins(self, origin, exc, units): ] ) with pytest.raises(exc, match=msg): - to_datetime(list(range(5)), unit=units, origin=origin) + to_datetime(list(range(5)), input_unit=units, origin=origin) def test_invalid_origins_tzinfo(self): # GH16842 with pytest.raises(ValueError, match="must be tz-naive"): - to_datetime(1, unit="D", origin=datetime(2000, 1, 1, tzinfo=timezone.utc)) + to_datetime( + 1, input_unit="D", origin=datetime(2000, 1, 1, tzinfo=timezone.utc) + ) def test_incorrect_value_exception(self): # GH47495 @@ -3315,15 +3329,15 @@ def test_processing_order(self, arg, origin, expected_str): # make sure we handle out-of-bounds *before* # constructing the dates - result = to_datetime(arg, unit="D", origin=origin) + result = to_datetime(arg, input_unit="D", origin=origin) expected = Timestamp(expected_str) assert result == expected - result = to_datetime(200 * 365, unit="D", origin="1870-01-01") + result = to_datetime(200 * 365, input_unit="D", origin="1870-01-01") expected = Timestamp("2069-11-13 00:00:00") assert result == expected - result = to_datetime(300 * 365, unit="D", origin="1870-01-01") + result = to_datetime(300 * 365, input_unit="D", origin="1870-01-01") expected = Timestamp("2169-10-20 00:00:00") assert result == expected @@ -3339,7 +3353,7 @@ def test_processing_order(self, arg, origin, expected_str): def test_arg_tz_ns_unit(self, offset, utc, exp): # GH 25546 arg = "2019-01-01T00:00:00.000" + offset - result = to_datetime([arg], unit="ns", utc=utc) + result = to_datetime([arg], input_unit="ns", utc=utc) expected = to_datetime([exp]).as_unit("ns") tm.assert_index_equal(result, expected) @@ -3389,7 +3403,7 @@ def test_nullable_integer_to_datetime(): ser = Series([1, 2, None, 2**61, None], dtype="Int64") ser_copy = ser.copy() - res = to_datetime(ser, unit="ns") + res = to_datetime(ser, input_unit="ns") expected = Series( [ @@ -3439,12 +3453,12 @@ def test_empty_string_datetime(errors, args, format): def test_empty_string_datetime_coerce__unit(): # GH13044 # coerce empty string to pd.NaT - result = to_datetime([1, ""], unit="s", errors="coerce") + result = to_datetime([1, ""], input_unit="s", errors="coerce") expected = DatetimeIndex(["1970-01-01 00:00:01", "NaT"], dtype="datetime64[ns]") tm.assert_index_equal(expected, result) # verify that no exception is raised even when errors='raise' is set - result = to_datetime([1, ""], unit="s", errors="raise") + result = to_datetime([1, ""], input_unit="s", errors="raise") tm.assert_index_equal(expected, result) diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index fcbc91d4c632f..fa7016c1ef4bc 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -370,7 +370,7 @@ def test_timedelta(transform_assert_equal): @pytest.mark.parametrize( "scalar", [ - pd.Timedelta(1, "D"), + pd.Timedelta(1, input_unit="D"), pd.Timestamp("2017-01-01T12"), pd.Timestamp("2017-01-01T12", tz="US/Pacific"), ], diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py index 9d5866ef97017..4f9bb47b0aa1b 100644 --- a/pandas/tests/tools/test_to_timedelta.py +++ b/pandas/tests/tools/test_to_timedelta.py @@ -31,6 +31,15 @@ def test_to_timedelta_none(self): # GH#23055 assert to_timedelta(None) is pd.NaT + def test_to_timedelta_unit_deprecated(self): + msg = "The 'unit' keyword is deprecated" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + to_timedelta([1], unit="s") + + msg2 = "Specify only 'input_unit', not 'unit'" + with pytest.raises(ValueError, match=msg2): + to_timedelta([1], unit="s", input_unit="s") + def test_to_timedelta_dt64_raises(self): # Passing datetime64-dtype data to TimedeltaIndex is no longer # supported GH#29794 @@ -76,7 +85,7 @@ def test_to_timedelta_units(self): result = TimedeltaIndex( [np.timedelta64(0, "ns"), np.timedelta64(10, "s").astype("m8[ns]")] ) - expected = to_timedelta([0, 10], unit="s") + expected = to_timedelta([0, 10], input_unit="s") tm.assert_index_equal(result, expected) @pytest.mark.parametrize( @@ -92,7 +101,7 @@ def test_to_timedelta_units(self): def test_to_timedelta_units_dtypes(self, dtype, unit): # arrays of various dtypes arr = np.array([1] * 5, dtype=dtype) - result = to_timedelta(arr, unit=unit) + result = to_timedelta(arr, input_unit=unit) exp_dtype = "m8[ns]" if dtype == "int64" else "m8[s]" expected = TimedeltaIndex([np.timedelta64(1, unit)] * 5, dtype=exp_dtype) tm.assert_index_equal(result, expected) @@ -132,7 +141,7 @@ def test_to_timedelta_invalid_unit(self, arg): # these will error msg = "invalid unit abbreviation: foo" with pytest.raises(ValueError, match=msg): - to_timedelta(arg, unit="foo") + to_timedelta(arg, input_unit="foo") def test_to_timedelta_time(self): # time not supported ATM @@ -234,14 +243,14 @@ def test_to_timedelta_on_missing_values_list(self, val): def test_to_timedelta_float(self): # https://github.com/pandas-dev/pandas/issues/25077 arr = np.arange(0, 1, 1e-6)[-10:] - result = to_timedelta(arr, unit="s") + result = to_timedelta(arr, input_unit="s") expected_asi8 = np.arange(999990000, 10**9, 1000, dtype="int64") tm.assert_numpy_array_equal(result.asi8, expected_asi8) def test_to_timedelta_coerce_strings_unit(self): arr = np.array([1, 2, "error"], dtype=object) - result = to_timedelta(arr, unit="ns", errors="coerce") - expected = to_timedelta([1, 2, pd.NaT], unit="ns") + result = to_timedelta(arr, input_unit="ns", errors="coerce") + expected = to_timedelta([1, 2, pd.NaT], input_unit="ns") tm.assert_index_equal(result, expected) @pytest.mark.parametrize( @@ -250,7 +259,7 @@ def test_to_timedelta_coerce_strings_unit(self): def test_to_timedelta_nullable_int64_dtype(self, expected_val, result_val): # GH 35574 expected = Series([timedelta(days=1), expected_val], dtype="m8[ns]") - result = to_timedelta(Series([1, result_val], dtype="Int64"), unit="days") + result = to_timedelta(Series([1, result_val], dtype="Int64"), input_unit="days") tm.assert_series_equal(result, expected) @@ -292,11 +301,11 @@ def test_to_timedelta_numeric_ea(self, any_numeric_ea_dtype): # GH#48796 ser = Series([1, pd.NA], dtype=any_numeric_ea_dtype) result = to_timedelta(ser) - expected = Series([pd.Timedelta(1, unit="ns"), pd.NaT]) + expected = Series([pd.Timedelta(1, input_unit="ns"), pd.NaT]) tm.assert_series_equal(result, expected) def test_to_timedelta_fraction(self): - result = to_timedelta(1.0 / 3, unit="h") + result = to_timedelta(1.0 / 3, input_unit="h") expected = pd.Timedelta("0 days 00:19:59.999999998") assert result == expected diff --git a/pandas/tests/tseries/offsets/test_business_hour.py b/pandas/tests/tseries/offsets/test_business_hour.py index 1b488dc9a47d4..9f0a33be149ea 100644 --- a/pandas/tests/tseries/offsets/test_business_hour.py +++ b/pandas/tests/tseries/offsets/test_business_hour.py @@ -957,7 +957,7 @@ def test_apply_nanoseconds(self): def test_bday_ignores_timedeltas(self, unit, td_unit): # GH#55608 idx = date_range("2010/02/01", "2010/02/10", freq="12h", unit=unit) - td = Timedelta(3, unit="h").as_unit(td_unit) + td = Timedelta(3, input_unit="h").as_unit(td_unit) off = BDay(offset=td) t1 = idx + off @@ -996,7 +996,7 @@ def test_bday_ignores_timedeltas(self, unit, td_unit): def test_add_bday_offset_nanos(self): # GH#55608 idx = date_range("2010/02/01", "2010/02/10", freq="12h", unit="ns") - off = BDay(offset=Timedelta(3, unit="ns")) + off = BDay(offset=Timedelta(3, input_unit="ns")) result = idx + off expected = DatetimeIndex([x + off for x in idx]) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index c4e6733b9a08d..e77561bf6cf4d 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -297,7 +297,7 @@ def fields(self) -> np.ndarray: # structured array of fields @cache_readonly def rep_stamp(self) -> Timestamp: - return Timestamp(self.i8values[0], unit=self.index.unit) + return Timestamp(self.i8values[0], input_unit=self.index.unit) def month_position_check(self) -> str | None: return month_position_check(self.fields, self.index.dayofweek)