From aac7af74a0010bd132c6884b493b91bc92f06745 Mon Sep 17 00:00:00 2001 From: cloudboat <15851404+cloudboat111@user.noreply.gitee.com> Date: Sat, 8 Nov 2025 15:29:32 +0800 Subject: [PATCH 1/5] feat: handle DST transitions in timezone-aware resampling --- pandas/core/resample.py | 103 ++++++--- pandas/tests/resample/test_dst_handling.py | 240 +++++++++++++++++++++ 2 files changed, 309 insertions(+), 34 deletions(-) create mode 100644 pandas/tests/resample/test_dst_handling.py diff --git a/pandas/core/resample.py b/pandas/core/resample.py index f84bedda8d00c..53ae4f1c419b4 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -2576,42 +2576,80 @@ def _get_time_bins(self, ax: DatetimeIndex): ) if len(ax) == 0: - binner = labels = DatetimeIndex( - data=[], freq=self.freq, name=ax.name, dtype=ax.dtype - ) - return binner, [], labels + empty = DatetimeIndex(data=[], freq=self.freq, name=ax.name, dtype=ax.dtype) + return empty, [], empty - first, last = _get_timestamp_range_edges( - ax.min(), - ax.max(), - self.freq, - unit=ax.unit, - closed=self.closed, - origin=self.origin, - offset=self.offset, - ) - # GH #12037 - # use first/last directly instead of call replace() on them - # because replace() will swallow the nanosecond part - # thus last bin maybe slightly before the end if the end contains - # nanosecond part and lead to `Values falls after last bin` error - # GH 25758: If DST lands at midnight (e.g. 'America/Havana'), user feedback - # has noted that ambiguous=True provides the most sensible result - binner = labels = date_range( - freq=self.freq, - start=first, - end=last, - tz=ax.tz, - name=ax.name, - ambiguous=True, - nonexistent="shift_forward", - unit=ax.unit, - ) + if ax.tz is not None: + try: + first, last = _get_timestamp_range_edges( + ax.min(), + ax.max(), + self.freq, + unit=ax.unit, + closed=self.closed, + origin=self.origin, + offset=self.offset, + ) + binner = labels = date_range( + freq=self.freq, + start=first, + end=last, + tz=ax.tz, + name=ax.name, + ambiguous=True, + nonexistent="shift_forward", + unit=ax.unit, + ) + except Exception as e: + if "nonexistent" not in str(e).lower(): + raise + + ax_utc = ax.tz_convert("UTC") + + first_utc, last_utc = _get_timestamp_range_edges( + ax_utc.min(), + ax_utc.max(), + self.freq, + unit=ax.unit, + closed=self.closed, + origin=self.origin, + offset=self.offset, + ) + + binner_utc = date_range( + start=first_utc, + end=last_utc, + freq=self.freq, + tz="UTC", + name=ax.name, + unit=ax.unit, + ) + + binner = labels = binner_utc.tz_convert(ax.tz) + else: + first, last = _get_timestamp_range_edges( + ax.min(), + ax.max(), + self.freq, + unit=ax.unit, + closed=self.closed, + origin=self.origin, + offset=self.offset, + ) + binner = labels = date_range( + freq=self.freq, + start=first, + end=last, + tz=ax.tz, + name=ax.name, + ambiguous=True, + nonexistent="shift_forward", + unit=ax.unit, + ) ax_values = ax.asi8 binner, bin_edges = self._adjust_bin_edges(binner, ax_values) - # general version, knowing nothing about relative frequencies bins = lib.generate_bins_dt64( ax_values, bin_edges, self.closed, hasnans=ax.hasnans ) @@ -2627,9 +2665,6 @@ def _get_time_bins(self, ax: DatetimeIndex): binner = binner.insert(0, NaT) labels = labels.insert(0, NaT) - # if we end up with more labels than bins - # adjust the labels - # GH4076 if len(bins) < len(labels): labels = labels[: len(bins)] diff --git a/pandas/tests/resample/test_dst_handling.py b/pandas/tests/resample/test_dst_handling.py new file mode 100644 index 0000000000000..bc7d0100efab5 --- /dev/null +++ b/pandas/tests/resample/test_dst_handling.py @@ -0,0 +1,240 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + DatetimeIndex, +) + + +class TestResampleDSTAfricaCairo: + """DST transition tests for Africa/Cairo timezone.""" + + def test_resample_across_dst_transition(self): + df = DataFrame( + {"value": [1, 2]}, + index=DatetimeIndex( + [ + "2024-04-26 01:00:00", + "2024-04-27 00:00:00", + ] + ).tz_localize("Africa/Cairo", nonexistent="shift_forward"), + ) + + result = df.resample("D").mean() + + assert len(result) == 2 + assert isinstance(result.index, DatetimeIndex) + assert result.index.tz is not None + assert not result.isna().any().any() + + def test_resample_before_dst_boundary(self): + df = DataFrame( + {"value": [76.0, 42.0]}, + index=DatetimeIndex( + [ + "2024-04-24 00:00:00", + "2024-04-25 00:00:00", + ] + ).tz_localize("Africa/Cairo"), + ) + + result = df.resample("D").mean() + + assert len(result) == 2 + assert isinstance(result.index, DatetimeIndex) + assert "Africa/Cairo" in str(result.index.tz) + assert result.iloc[0, 0] == 76.0 + assert result.iloc[1, 0] == 42.0 + + @pytest.mark.parametrize("freq", ["2h", "6h", "12h"]) + def test_resample_various_freq(self, freq): + df = DataFrame( + {"value": [1, 2, 3, 4, 5]}, + index=DatetimeIndex( + [ + "2024-04-25 22:00:00", + "2024-04-25 23:00:00", + "2024-04-26 01:00:00", + "2024-04-26 02:00:00", + "2024-04-26 03:00:00", + ] + ).tz_localize("Africa/Cairo", nonexistent="shift_forward"), + ) + + result = df.resample(freq).mean() + + assert isinstance(result, DataFrame) + assert len(result) > 0 + assert not result.isna().all().any() + + def test_resample_closed_label_combinations(self): + df = DataFrame( + {"value": [1, 2]}, + index=DatetimeIndex( + [ + "2024-04-26 01:00:00", + "2024-04-27 00:00:00", + ] + ).tz_localize("Africa/Cairo", nonexistent="shift_forward"), + ) + + for closed in ["left", "right"]: + for label in ["left", "right"]: + result = df.resample("D", closed=closed, label=label).mean() + assert len(result) >= 1 + assert not result.isna().all().any() + + def test_resample_nonexistent_times(self): + timestamps = [ + "2024-04-25 23:00:00", + "2024-04-26 00:30:00", + "2024-04-26 01:00:00", + ] + + df = DataFrame( + {"value": [1, 2, 3]}, + index=DatetimeIndex(timestamps).tz_localize( + "Africa/Cairo", nonexistent="shift_forward" + ), + ) + + result = df.resample("h").mean() + + assert len(result) > 0 + assert isinstance(result, DataFrame) + + def test_resample_empty_dataframe(self): + df = DataFrame({"value": []}, index=DatetimeIndex([], tz="Africa/Cairo")) + + result = df.resample("D").mean() + + assert len(result) == 0 + assert isinstance(result.index, DatetimeIndex) + + def test_resample_single_point(self): + df = DataFrame( + {"value": [42.0]}, + index=DatetimeIndex(["2024-04-26 12:00:00"]).tz_localize( + "Africa/Cairo", nonexistent="shift_forward" + ), + ) + + result = df.resample("D").mean() + + assert len(result) == 1 + assert result.iloc[0, 0] == 42.0 + + +class TestResampleDSTMultipleTimezones: + """DST handling across multiple timezones.""" + + def test_resample_multiple_timezones(self): + timezones = [ + ("Africa/Cairo", "2024-04-26 01:00:00", "2024-04-27 00:00:00"), + ("Europe/London", "2024-03-31 01:00:00", "2024-04-01 00:00:00"), + ("America/New_York", "2024-03-10 01:00:00", "2024-03-11 00:00:00"), + ] + + for tz, start, end in timezones: + df = DataFrame( + {"value": [1, 2]}, + index=DatetimeIndex([start, end]).tz_localize( + tz, nonexistent="shift_forward", ambiguous=True + ), + ) + + result = df.resample("D").mean() + + assert len(result) >= 1 + assert isinstance(result.index, DatetimeIndex) + assert result.index.tz is not None + + +class TestResampleDSTEdgeCases: + """Edge cases around DST transitions.""" + + def test_resample_multiple_dst_days(self): + df = DataFrame( + {"value": [1, 2, 3, 4]}, + index=DatetimeIndex( + [ + "2024-04-25 23:00:00", + "2024-04-26 01:00:00", + "2024-04-27 00:00:00", + "2024-04-28 00:00:00", + ] + ).tz_localize("Africa/Cairo", nonexistent="shift_forward"), + ) + + result = df.resample("D").mean() + + assert len(result) >= 3 + + def test_resample_microsecond_precision(self): + df = DataFrame( + {"value": [1.1, 2.2]}, + index=DatetimeIndex( + [ + "2024-04-26 01:00:00.123456", + "2024-04-27 00:00:00.654321", + ] + ).tz_localize("Africa/Cairo", nonexistent="shift_forward"), + ) + + result = df.resample("D").mean() + + assert len(result) == 2 + + def test_resample_with_na_values(self): + df = DataFrame( + {"value": [1.0, np.nan, 3.0]}, + index=DatetimeIndex( + [ + "2024-04-25 23:00:00", + "2024-04-26 01:00:00", + "2024-04-26 02:00:00", + ] + ).tz_localize("Africa/Cairo", nonexistent="shift_forward"), + ) + + result = df.resample("h").mean() + + assert len(result) > 0 + assert isinstance(result, DataFrame) + + +class TestResampleDSTOriginalIssues: + """Tests reproducing the originally reported issues.""" + + def test_original_issue_1(self): + df = DataFrame( + {"value": [1, 2]}, + index=DatetimeIndex( + [ + "2024-04-26 01:00:00", + "2024-04-27 00:00:00", + ] + ).tz_localize("Africa/Cairo", nonexistent="shift_forward"), + ) + + result = df.resample("D").mean() + + assert len(result) > 0 + assert not result.isna().any().any() + + def test_original_issue_2(self): + df = DataFrame( + {"value": [76.0, 42.0]}, + index=DatetimeIndex( + [ + "2024-04-24 00:00:00", + "2024-04-25 00:00:00", + ] + ).tz_localize("Africa/Cairo"), + ) + + result = df.resample("D").mean() + + assert len(result) > 0 + assert not result.isna().any().any() From 013c4939fc66066cc4aad891ab5a484a1001537c Mon Sep 17 00:00:00 2001 From: cloudboat <15851404+cloudboat111@user.noreply.gitee.com> Date: Tue, 11 Nov 2025 11:15:42 +0800 Subject: [PATCH 2/5] Code rewriting --- pandas/core/resample.py | 80 ++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 50 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 53ae4f1c419b4..38a06d2a6349d 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -2571,7 +2571,7 @@ def _get_grouper( def _get_time_bins(self, ax: DatetimeIndex): if not isinstance(ax, DatetimeIndex): raise TypeError( - "axis must be a DatetimeIndex, but got " + "axis must be a DatetimeIndex, but got" f"an instance of {type(ax).__name__}" ) @@ -2579,55 +2579,7 @@ def _get_time_bins(self, ax: DatetimeIndex): empty = DatetimeIndex(data=[], freq=self.freq, name=ax.name, dtype=ax.dtype) return empty, [], empty - if ax.tz is not None: - try: - first, last = _get_timestamp_range_edges( - ax.min(), - ax.max(), - self.freq, - unit=ax.unit, - closed=self.closed, - origin=self.origin, - offset=self.offset, - ) - binner = labels = date_range( - freq=self.freq, - start=first, - end=last, - tz=ax.tz, - name=ax.name, - ambiguous=True, - nonexistent="shift_forward", - unit=ax.unit, - ) - except Exception as e: - if "nonexistent" not in str(e).lower(): - raise - - ax_utc = ax.tz_convert("UTC") - - first_utc, last_utc = _get_timestamp_range_edges( - ax_utc.min(), - ax_utc.max(), - self.freq, - unit=ax.unit, - closed=self.closed, - origin=self.origin, - offset=self.offset, - ) - - binner_utc = date_range( - start=first_utc, - end=last_utc, - freq=self.freq, - tz="UTC", - name=ax.name, - unit=ax.unit, - ) - - binner = labels = binner_utc.tz_convert(ax.tz) - - else: + try: first, last = _get_timestamp_range_edges( ax.min(), ax.max(), @@ -2647,9 +2599,34 @@ def _get_time_bins(self, ax: DatetimeIndex): nonexistent="shift_forward", unit=ax.unit, ) + except Exception: + # Fallback to UTC calculation for timezone-aware data + # to handle DST transition + # 62601 + ax_utc = ax.tz_convert("UTC") + first_utc, last_utc = _get_timestamp_range_edges( + ax_utc.min(), + ax_utc.max(), + self.freq, + unit=ax.unit, + closed=self.closed, + origin=self.origin, + offset=self.offset, + ) + binner_utc = date_range( + freq=self.freq, + start=first_utc, + end=last_utc, + tz="UTC", + name=ax.name, + unit=ax.unit, + ) + binner = labels = binner_utc.tz_convert(ax.tz) + ax_values = ax.asi8 binner, bin_edges = self._adjust_bin_edges(binner, ax_values) + # general version, knowing nothing about relative frequencies bins = lib.generate_bins_dt64( ax_values, bin_edges, self.closed, hasnans=ax.hasnans ) @@ -2665,6 +2642,9 @@ def _get_time_bins(self, ax: DatetimeIndex): binner = binner.insert(0, NaT) labels = labels.insert(0, NaT) + # if we end up with more labels than bins + # adjust the labels + # GH4076 if len(bins) < len(labels): labels = labels[: len(bins)] From 3deef38b3ecf73a0c3225c6e652d8d7385307a20 Mon Sep 17 00:00:00 2001 From: cloudboat <15851404+cloudboat111@user.noreply.gitee.com> Date: Tue, 11 Nov 2025 12:34:28 +0800 Subject: [PATCH 3/5] fix the unreasonable change of none time zone --- pandas/core/resample.py | 52 +++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index d660a7f197420..7c8d5f1e6e583 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -2573,49 +2573,45 @@ def _get_time_bins(self, ax: DatetimeIndex): empty = DatetimeIndex(data=[], freq=self.freq, name=ax.name, dtype=ax.dtype) return empty, [], empty - try: + def _calculate_bins_in_timezone(ax_to_use, tz): + """Calculate time bins in specified timezone""" first, last = _get_timestamp_range_edges( - ax.min(), - ax.max(), + ax_to_use.min(), + ax_to_use.max(), self.freq, unit=ax.unit, closed=self.closed, origin=self.origin, offset=self.offset, ) - binner = labels = date_range( + return date_range( freq=self.freq, start=first, end=last, - tz=ax.tz, + tz=tz, name=ax.name, ambiguous=True, nonexistent="shift_forward", unit=ax.unit, ) - except Exception: - # Fallback to UTC calculation for timezone-aware data - # to handle DST transition - # 62601 - ax_utc = ax.tz_convert("UTC") - first_utc, last_utc = _get_timestamp_range_edges( - ax_utc.min(), - ax_utc.max(), - self.freq, - unit=ax.unit, - closed=self.closed, - origin=self.origin, - offset=self.offset, - ) - binner_utc = date_range( - freq=self.freq, - start=first_utc, - end=last_utc, - tz="UTC", - name=ax.name, - unit=ax.unit, - ) - binner = labels = binner_utc.tz_convert(ax.tz) + + if ax.tz is not None: + try: + # normal way + binner = labels = _calculate_bins_in_timezone(ax, ax.tz) + except Exception as e: + if "nonexistent" in str(e).lower() or "ambiguous" in str(e).lower(): + # Fallback to UTC calculation for timezone-aware data + # to handle DST transitions + # 62601 + ax_utc = ax.tz_convert("UTC") + binner_utc = _calculate_bins_in_timezone(ax_utc, "UTC") + binner = labels = binner_utc.tz_convert(ax.tz) + else: + raise + else: + # no time zone + binner = labels = _calculate_bins_in_timezone(ax, None) ax_values = ax.asi8 binner, bin_edges = self._adjust_bin_edges(binner, ax_values) From 7886dd5dda68cde66582ae578ecaa4dae9d9dabb Mon Sep 17 00:00:00 2001 From: cloudboat <15851404+cloudboat111@user.noreply.gitee.com> Date: Tue, 11 Nov 2025 20:23:45 +0800 Subject: [PATCH 4/5] REF: Replace @appender|@Substitution with hardcoded docstring in strftime --- pandas/core/arrays/datetimelike.py | 293 ++++++++++++++++++++++++++++- 1 file changed, 283 insertions(+), 10 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 2f7330d1e81fe..eef720389063e 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -81,8 +81,6 @@ PerformanceWarning, ) from pandas.util._decorators import ( - Appender, - Substitution, cache_readonly, ) from pandas.util._exceptions import find_stack_level @@ -1779,10 +1777,6 @@ class DatelikeOps(DatetimeLikeArrayMixin): Common ops for DatetimeIndex/PeriodIndex, but not TimedeltaIndex. """ - @Substitution( - URL="https://docs.python.org/3/library/datetime.html" - "#strftime-and-strptime-behavior" - ) def strftime(self, date_format: str) -> npt.NDArray[np.object_]: """ Convert to Index using specified date_format. @@ -1790,7 +1784,7 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]: Return an Index of formatted strings specified by date_format, which supports the same string format as the python standard library. Details of the string format can be found in `python string format - doc <%(URL)s>`__. + doc `__. Formats supported by the C `strftime` API but not by the python string format doc (such as `"%%R"`, `"%%r"`) are not officially supported and should be @@ -2263,31 +2257,310 @@ def _round(self, freq, mode, ambiguous, nonexistent): result = result.view(self._ndarray.dtype) return self._simple_new(result, dtype=self.dtype) - @Appender((_round_doc + _round_example).format(op="round")) def round( self, freq, ambiguous: TimeAmbiguous = "raise", nonexistent: TimeNonexistent = "raise", ) -> Self: + """ + Perform {op} operation on the data to the specified `freq`. + + Parameters + ---------- + freq : str or Offset + The frequency level to {op} the index to. Must be a fixed + frequency like 's' (second) not 'ME' (month end). See + :ref:`frequency aliases ` for + a list of possible `freq` values. + ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' + Only relevant for DatetimeIndex: + + - 'infer' will attempt to infer fall dst-transition hours based on + order + - bool-ndarray where True signifies a DST time, False designates + a non-DST time (note that this flag is only applicable for + ambiguous times) + - 'NaT' will return NaT where there are ambiguous times + - 'raise' will raise a ValueError if there are ambiguous + times. + + nonexistent : 'shift_forward', 'shift_backward', + 'NaT', timedelta, default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + - 'shift_forward' will shift the nonexistent time forward to the + closest existing time + - 'shift_backward' will shift the nonexistent time backward to the + closest existing time + - 'NaT' will return NaT where there are nonexistent times + - timedelta objects will shift nonexistent times by the timedelta + - 'raise' will raise a ValueError if there are + nonexistent times. + + Returns + ------- + DatetimeIndex, TimedeltaIndex, or Series + Index of the same type for a DatetimeIndex or TimedeltaIndex, + or a Series with the same index for a Series. + + Raises + ------ + ValueError if the `freq` cannot be converted. + + See Also + -------- + DatetimeIndex.floor : Perform floor operation on the data + to the specified `freq`. + DatetimeIndex.snap : Snap time stamps to nearest occurring frequency. + + Notes + ----- + If the timestamps have a timezone, {op}ing will take place relative to the + local ("wall") time and re-localized to the same timezone. When {op}ing + near daylight savings time, use ``nonexistent`` and ``ambiguous`` to + control the re-localization behavior. + + Examples + -------- + **DatetimeIndex** + + >>> rng = pd.date_range("1/1/2018 11:59:00", periods=3, freq="min") + >>> rng + DatetimeIndex(['2018-01-01 11:59:00', '2018-01-01 12:00:00', + '2018-01-01 12:01:00'], + dtype='datetime64[ns]', freq='min') + >>> rng.round("h") + DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00', + '2018-01-01 12:00:00'], + dtype='datetime64[ns]', freq=None) + + **Series** + + >>> pd.Series(rng).dt.round("h") + 0 2018-01-01 12:00:00 + 1 2018-01-01 12:00:00 + 2 2018-01-01 12:00:00 + dtype: datetime64[ns] + + When rounding near a daylight savings time transition, use ``ambiguous`` or + ``nonexistent`` to control how the timestamp should be re-localized. + + >>> rng_tz = pd.DatetimeIndex(["2021-10-31 03:30:00"], tz="Europe/Amsterdam") + + >>> rng_tz.floor("2h", ambiguous=False) + DatetimeIndex(['2021-10-31 02:00:00+01:00'], + dtype='datetime64[s, Europe/Amsterdam]', freq=None) + + >>> rng_tz.floor("2h", ambiguous=True) + DatetimeIndex(['2021-10-31 02:00:00+02:00'], + dtype='datetime64[s, Europe/Amsterdam]', freq=None) + """ return self._round(freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent) - @Appender((_round_doc + _floor_example).format(op="floor")) def floor( self, freq, ambiguous: TimeAmbiguous = "raise", nonexistent: TimeNonexistent = "raise", ) -> Self: + """ + Perform {op} operation on the data to the specified `freq`. + + Parameters + ---------- + freq : str or Offset + The frequency level to {op} the index to. Must be a fixed + frequency like 's' (second) not 'ME' (month end). See + :ref:`frequency aliases ` for + a list of possible `freq` values. + ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' + Only relevant for DatetimeIndex: + + - 'infer' will attempt to infer fall dst-transition hours based on + order + - bool-ndarray where True signifies a DST time, False designates + a non-DST time (note that this flag is only applicable for + ambiguous times) + - 'NaT' will return NaT where there are ambiguous times + - 'raise' will raise a ValueError if there are ambiguous + times. + + nonexistent : 'shift_forward', 'shift_backward', 'NaT', timedelta, + default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + - 'shift_forward' will shift the nonexistent time forward to the + closest existing time + - 'shift_backward' will shift the nonexistent time backward to the + closest existing time + - 'NaT' will return NaT where there are nonexistent times + - timedelta objects will shift nonexistent times by the timedelta + - 'raise' will raise a ValueError if there are + nonexistent times. + + Returns + ------- + DatetimeIndex, TimedeltaIndex, or Series + Index of the same type for a DatetimeIndex or TimedeltaIndex, + or a Series with the same index for a Series. + + Raises + ------ + ValueError if the `freq` cannot be converted. + + See Also + -------- + DatetimeIndex.floor : Perform floor operation on the data + to the specified `freq`. + DatetimeIndex.snap : Snap time stamps to nearest occurring frequency. + + Notes + ----- + If the timestamps have a timezone, {op}ing will take place relative to the + local ("wall") time and re-localized to the same timezone. When {op}ing + near daylight savings time, use ``nonexistent`` and ``ambiguous`` to + control the re-localization behavior. + + Examples + -------- + **DatetimeIndex** + + >>> rng = pd.date_range("1/1/2018 11:59:00", periods=3, freq="min") + >>> rng + DatetimeIndex(['2018-01-01 11:59:00', '2018-01-01 12:00:00', + '2018-01-01 12:01:00'], + dtype='datetime64[ns]', freq='min') + >>> rng.floor("h") + DatetimeIndex(['2018-01-01 11:00:00', '2018-01-01 12:00:00', + '2018-01-01 12:00:00'], + dtype='datetime64[ns]', freq=None) + + **Series** + + >>> pd.Series(rng).dt.floor("h") + 0 2018-01-01 11:00:00 + 1 2018-01-01 12:00:00 + 2 2018-01-01 12:00:00 + dtype: datetime64[ns] + + When rounding near a daylight savings time transition, use ``ambiguous`` or + ``nonexistent`` to control how the timestamp should be re-localized. + + >>> rng_tz = pd.DatetimeIndex(["2021-10-31 03:30:00"], tz="Europe/Amsterdam") + + >>> rng_tz.floor("2h", ambiguous=False) + DatetimeIndex(['2021-10-31 02:00:00+01:00'], + dtype='datetime64[s, Europe/Amsterdam]', freq=None) + + >>> rng_tz.floor("2h", ambiguous=True) + DatetimeIndex(['2021-10-31 02:00:00+02:00'], + dtype='datetime64[s, Europe/Amsterdam]', freq=None) + """ return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent) - @Appender((_round_doc + _ceil_example).format(op="ceil")) def ceil( self, freq, ambiguous: TimeAmbiguous = "raise", nonexistent: TimeNonexistent = "raise", ) -> Self: + """ + Perform {op} operation on the data to the specified `freq`. + + Parameters + ---------- + freq : str or Offset + The frequency level to {op} the index to. Must be a fixed + frequency like 's' (second) not 'ME' (month end). See + :ref:`frequency aliases ` for + a list of possible `freq` values. + ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' + Only relevant for DatetimeIndex: + + - 'infer' will attempt to infer fall dst-transition hours based on + order + - bool-ndarray where True signifies a DST time, False designates + a non-DST time (note that this flag is only applicable for + ambiguous times) + - 'NaT' will return NaT where there are ambiguous times + - 'raise' will raise a ValueError if there are ambiguous + times. + + nonexistent : 'shift_forward', 'shift_backward', 'NaT', + timedelta, default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + - 'shift_forward' will shift the nonexistent time forward to the + closest existing time + - 'shift_backward' will shift the nonexistent time backward to the + closest existing time + - 'NaT' will return NaT where there are nonexistent times + - timedelta objects will shift nonexistent times by the timedelta + - 'raise' will raise a ValueError if there are + nonexistent times. + + Returns + ------- + DatetimeIndex, TimedeltaIndex, or Series + Index of the same type for a DatetimeIndex or TimedeltaIndex, + or a Series with the same index for a Series. + + Raises + ------ + ValueError if the `freq` cannot be converted. + + See Also + -------- + DatetimeIndex.floor : Perform floor operation on the data + to the specified `freq`. + DatetimeIndex.snap : Snap time stamps to nearest occurring frequency. + + Notes + ----- + If the timestamps have a timezone, {op}ing will take place relative to the + local ("wall") time and re-localized to the same timezone. When {op}ing + near daylight savings time, use ``nonexistent`` and ``ambiguous`` to + control the re-localization behavior. + + Examples + -------- + **DatetimeIndex** + + >>> rng = pd.date_range("1/1/2018 11:59:00", periods=3, freq="min") + >>> rng + DatetimeIndex(['2018-01-01 11:59:00', '2018-01-01 12:00:00', + '2018-01-01 12:01:00'], + dtype='datetime64[ns]', freq='min') + >>> rng.ceil("h") + DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00', + '2018-01-01 13:00:00'], + dtype='datetime64[ns]', freq=None) + + **Series** + + >>> pd.Series(rng).dt.ceil("h") + 0 2018-01-01 12:00:00 + 1 2018-01-01 12:00:00 + 2 2018-01-01 13:00:00 + dtype: datetime64[ns] + + When rounding near a daylight savings time transition, use ``ambiguous`` or + ``nonexistent`` to control how the timestamp should be re-localized. + + >>> rng_tz = pd.DatetimeIndex(["2021-10-31 01:30:00"], tz="Europe/Amsterdam") + + >>> rng_tz.ceil("h", ambiguous=False) + DatetimeIndex(['2021-10-31 02:00:00+01:00'], + dtype='datetime64[s, Europe/Amsterdam]', freq=None) + + >>> rng_tz.ceil("h", ambiguous=True) + DatetimeIndex(['2021-10-31 02:00:00+02:00'], + dtype='datetime64[s, Europe/Amsterdam]', freq=None) + """ return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent) # -------------------------------------------------------------- From d7d8388def8d8785aa8917a5f59a4ab31e87f3a7 Mon Sep 17 00:00:00 2001 From: cloudboat <15851404+cloudboat111@user.noreply.gitee.com> Date: Tue, 11 Nov 2025 21:01:12 +0800 Subject: [PATCH 5/5] replace the {op} --- pandas/core/arrays/datetimelike.py | 60 +++++++++++++++++------------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index eef720389063e..50397b97422f6 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -2264,12 +2264,12 @@ def round( nonexistent: TimeNonexistent = "raise", ) -> Self: """ - Perform {op} operation on the data to the specified `freq`. + Perform ceil operation on the data to the specified `freq`. Parameters ---------- freq : str or Offset - The frequency level to {op} the index to. Must be a fixed + The frequency level to ceil the index to. Must be a fixed frequency like 's' (second) not 'ME' (month end). See :ref:`frequency aliases ` for a list of possible `freq` values. @@ -2285,8 +2285,8 @@ def round( - 'raise' will raise a ValueError if there are ambiguous times. - nonexistent : 'shift_forward', 'shift_backward', - 'NaT', timedelta, default 'raise' + nonexistent : 'shift_forward', 'shift_backward', 'NaT', timedelta, + default 'raise' A nonexistent time does not exist in a particular timezone where clocks moved forward due to DST. @@ -2312,13 +2312,13 @@ def round( See Also -------- DatetimeIndex.floor : Perform floor operation on the data - to the specified `freq`. + to the specified `freq`. DatetimeIndex.snap : Snap time stamps to nearest occurring frequency. Notes ----- - If the timestamps have a timezone, {op}ing will take place relative to the - local ("wall") time and re-localized to the same timezone. When {op}ing + If the timestamps have a timezone, ceiling will take place relative to the + local ("wall") time and re-localized to the same timezone. When ceiling near daylight savings time, use ``nonexistent`` and ``ambiguous`` to control the re-localization behavior. @@ -2327,18 +2327,20 @@ def round( **DatetimeIndex** >>> rng = pd.date_range("1/1/2018 11:59:00", periods=3, freq="min") + >>> rng DatetimeIndex(['2018-01-01 11:59:00', '2018-01-01 12:00:00', '2018-01-01 12:01:00'], dtype='datetime64[ns]', freq='min') - >>> rng.round("h") - DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00', - '2018-01-01 12:00:00'], - dtype='datetime64[ns]', freq=None) + + >>> rng.ceil("h") + DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00', + '2018-01-01 12:00:00'], + dtype='datetime64[ns]', freq=None) **Series** - >>> pd.Series(rng).dt.round("h") + >>> pd.Series(rng).dt.ceil("h") 0 2018-01-01 12:00:00 1 2018-01-01 12:00:00 2 2018-01-01 12:00:00 @@ -2349,12 +2351,12 @@ def round( >>> rng_tz = pd.DatetimeIndex(["2021-10-31 03:30:00"], tz="Europe/Amsterdam") - >>> rng_tz.floor("2h", ambiguous=False) - DatetimeIndex(['2021-10-31 02:00:00+01:00'], - dtype='datetime64[s, Europe/Amsterdam]', freq=None) + >>> rng_tz.ceil("2h", ambiguous=False) + DatetimeIndex(['2021-10-31 04:00:00+01:00'], + dtype='datetime64[s, Europe/Amsterdam]', freq=None) - >>> rng_tz.floor("2h", ambiguous=True) - DatetimeIndex(['2021-10-31 02:00:00+02:00'], + >>> rng_tz.ceil("2h", ambiguous=True) + DatetimeIndex(['2021-10-31 04:00:00+02:00'], dtype='datetime64[s, Europe/Amsterdam]', freq=None) """ return self._round(freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent) @@ -2366,12 +2368,12 @@ def floor( nonexistent: TimeNonexistent = "raise", ) -> Self: """ - Perform {op} operation on the data to the specified `freq`. + Perform floor operation on the data to the specified `freq`. Parameters ---------- freq : str or Offset - The frequency level to {op} the index to. Must be a fixed + The frequency level to floor the index to. Must be a fixed frequency like 's' (second) not 'ME' (month end). See :ref:`frequency aliases ` for a list of possible `freq` values. @@ -2413,14 +2415,16 @@ def floor( See Also -------- - DatetimeIndex.floor : Perform floor operation on the data + DatetimeIndex.ceil : Perform ceil operation on the data + to the specified `freq`. + DatetimeIndex.round : Perform round operation on the data to the specified `freq`. DatetimeIndex.snap : Snap time stamps to nearest occurring frequency. Notes ----- - If the timestamps have a timezone, {op}ing will take place relative to the - local ("wall") time and re-localized to the same timezone. When {op}ing + If the timestamps have a timezone, flooring will take place relative to the + local ("wall") time and re-localized to the same timezone. When flooring near daylight savings time, use ``nonexistent`` and ``ambiguous`` to control the re-localization behavior. @@ -2429,10 +2433,12 @@ def floor( **DatetimeIndex** >>> rng = pd.date_range("1/1/2018 11:59:00", periods=3, freq="min") + >>> rng DatetimeIndex(['2018-01-01 11:59:00', '2018-01-01 12:00:00', '2018-01-01 12:01:00'], dtype='datetime64[ns]', freq='min') + >>> rng.floor("h") DatetimeIndex(['2018-01-01 11:00:00', '2018-01-01 12:00:00', '2018-01-01 12:00:00'], @@ -2468,12 +2474,12 @@ def ceil( nonexistent: TimeNonexistent = "raise", ) -> Self: """ - Perform {op} operation on the data to the specified `freq`. + Perform ceil operation on the data to the specified `freq`. Parameters ---------- freq : str or Offset - The frequency level to {op} the index to. Must be a fixed + The frequency level to ceil the index to. Must be a fixed frequency like 's' (second) not 'ME' (month end). See :ref:`frequency aliases ` for a list of possible `freq` values. @@ -2517,12 +2523,14 @@ def ceil( -------- DatetimeIndex.floor : Perform floor operation on the data to the specified `freq`. + DatetimeIndex.round : Perform round operation on the data + to the specified `freq`. DatetimeIndex.snap : Snap time stamps to nearest occurring frequency. Notes ----- - If the timestamps have a timezone, {op}ing will take place relative to the - local ("wall") time and re-localized to the same timezone. When {op}ing + If the timestamps have a timezone, ceiling will take place relative to the + local ("wall") time and re-localized to the same timezone. When ceiling near daylight savings time, use ``nonexistent`` and ``ambiguous`` to control the re-localization behavior.