From d91d44f43990aac9ecb45726c1d49adbb143687f Mon Sep 17 00:00:00 2001 From: cloudboat <15851404+cloudboat111@user.noreply.gitee.com> Date: Tue, 25 Nov 2025 22:55:41 +0800 Subject: [PATCH 01/10] removed @DOC --- pandas/core/indexes/datetimelike.py | 346 +++++++++++++++++++++++++- pandas/core/indexes/datetimes.py | 373 +++++++++++++++++++++++++++- 2 files changed, 704 insertions(+), 15 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 58529c5597b6e..4f43e5696cc58 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -92,8 +92,51 @@ class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex, ABC): _can_hold_strings = False _data: DatetimeArray | TimedeltaArray | PeriodArray - @doc(DatetimeLikeArrayMixin.mean) def mean(self, *, skipna: bool = True, axis: int | None = 0): + """ + Return the mean value of the Array. + + Parameters + ---------- + skipna : bool, default True + Whether to ignore any NaT elements. + axis : int, optional, default 0 + Axis for the function to be applied on. + + Returns + ------- + scalar + Timestamp or Timedelta. + + See Also + -------- + numpy.ndarray.mean : Returns the average of array elements along a given axis. + Series.mean : Return the mean value in a Series. + + Notes + ----- + mean is only defined for Datetime and Timedelta dtypes, not for Period. + + Examples + -------- + For :class:`pandas.DatetimeIndex`: + + >>> idx = pd.date_range("2001-01-01 00:00", periods=3) + >>> idx + DatetimeIndex(['2001-01-01', '2001-01-02', '2001-01-03'], + dtype='datetime64[ns]', freq='D') + >>> idx.mean() + Timestamp('2001-01-02 00:00:00') + + For :class:`pandas.TimedeltaIndex`: + + >>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit="D") + >>> tdelta_idx + TimedeltaIndex(['1 days', '2 days', '3 days'], + dtype='timedelta64[ns]', freq=None) + >>> tdelta_idx.mean() + Timedelta('2 days 00:00:00') + """ return self._data.mean(skipna=skipna, axis=axis) @property @@ -136,8 +179,37 @@ def asi8(self) -> npt.NDArray[np.int64]: return self._data.asi8 @property - @doc(DatetimeLikeArrayMixin.freqstr) def freqstr(self) -> str: + """ + Return the frequency object as a string if it's set, otherwise None. + + See Also + -------- + DatetimeIndex.inferred_freq : Returns a string representing a frequency + generated by infer_freq. + + Examples + -------- + For DatetimeIndex: + + >>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00"], freq="D") + >>> idx.freqstr + 'D' + + The frequency can be inferred if there are more than 2 points: + + >>> idx = pd.DatetimeIndex( + ... ["2018-01-01", "2018-01-03", "2018-01-05"], freq="infer" + ... ) + >>> idx.freqstr + '2D' + + For PeriodIndex: + + >>> idx = pd.PeriodIndex(["2023-1", "2023-2", "2023-3"], freq="M") + >>> idx.freqstr + 'M' + """ from pandas import PeriodIndex if self._data.freqstr is not None and isinstance( @@ -155,6 +227,9 @@ def _resolution_obj(self) -> Resolution: ... @cache_readonly @doc(DatetimeLikeArrayMixin.resolution) def resolution(self) -> str: + """ + Returns day, hour, minute, second, millisecond or microsecond + """ return self._data.resolution # ------------------------------------------------------------------------ @@ -199,8 +274,41 @@ def equals(self, other: Any) -> bool: return np.array_equal(self.asi8, other.asi8) - @Appender(Index.__contains__.__doc__) def __contains__(self, key: Any) -> bool: + """ + Return a boolean indicating whether the provided key is in the index. + + Parameters + ---------- + key : label + The key to check if it is present in the index. + + Returns + ------- + bool + Whether the key search is in the index. + + Raises + ------ + TypeError + If the key is not hashable. + + See Also + -------- + Index.isin : Returns an ndarray of boolean dtype indicating whether the + list-like key is in the index. + + Examples + -------- + >>> idx = pd.Index([1, 2, 3, 4]) + >>> idx + Index([1, 2, 3, 4], dtype='int64') + + >>> 2 in idx + True + >>> 6 in idx + False + """ hash(key) try: self.get_loc(key) @@ -243,8 +351,19 @@ def _format_attrs(self): attrs.append(("freq", freq)) return attrs - @Appender(Index._summary.__doc__) def _summary(self, name=None) -> str: + """ + Return a summarized representation. + + Parameters + ---------- + name : str + name to use in the summary representation + + Returns + ------- + String with a summarized representation of the index + """ result = super()._summary(name=name) if self.freq: result += f"\nFreq: {self.freqstr}" @@ -405,8 +524,10 @@ def shift(self, periods: int = 1, freq=None) -> Self: # -------------------------------------------------------------------- - @doc(Index._maybe_cast_listlike_indexer) def _maybe_cast_listlike_indexer(self, keyarr): + """ + Analogue to maybe_cast_indexer for get_indexer instead of get_loc. + """ try: res = self._data._validate_listlike(keyarr, allow_object=True) except (ValueError, TypeError): @@ -497,8 +618,33 @@ def values(self) -> np.ndarray: data.flags.writeable = False return data - @doc(DatetimeIndexOpsMixin.shift) def shift(self, periods: int = 1, freq=None) -> Self: + """ + Shift index by desired number of time frequency increments. + + This method is for shifting the values of datetime-like indexes + by a specified time increment a given number of times. + + Parameters + ---------- + periods : int, default 1 + Number of periods (or increments) to shift by, + can be positive or negative. + freq : pandas.DateOffset, pandas.Timedelta or string, optional + Frequency increment to shift by. + If None, the index is shifted by its own `freq` attribute. + Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc. + + Returns + ------- + pandas.DatetimeIndex + Shifted index. + + See Also + -------- + Index.shift : Shift values of Index. + PeriodIndex.shift : Shift values of PeriodIndex. + """ if freq is not None and freq != self.freq: if isinstance(freq, str): freq = to_offset(freq) @@ -524,8 +670,34 @@ def shift(self, periods: int = 1, freq=None) -> Self: return type(self)._simple_new(result, name=self.name) @cache_readonly - @doc(DatetimeLikeArrayMixin.inferred_freq) def inferred_freq(self) -> str | None: + """ + Tries to return a string representing a frequency generated by infer_freq. + + Returns None if it can't autodetect the frequency. + + See Also + -------- + DatetimeIndex.freqstr : Return the frequency object as a string if it's set, + otherwise None. + + Examples + -------- + For DatetimeIndex: + + >>> idx = pd.DatetimeIndex(["2018-01-01", "2018-01-03", "2018-01-05"]) + >>> idx.inferred_freq + '2D' + + For TimedeltaIndex: + + >>> tdelta_idx = pd.to_timedelta(["0 days", "10 days", "20 days"]) + >>> tdelta_idx + TimedeltaIndex(['0 days', '10 days', '20 days'], + dtype='timedelta64[ns]', freq=None) + >>> tdelta_idx.inferred_freq + '10D' + """ return self._data.inferred_freq # -------------------------------------------------------------------- @@ -816,14 +988,124 @@ def _get_insert_freq(self, loc: int, item): freq = self.freq return freq - @doc(NDArrayBackedExtensionIndex.delete) def delete(self, loc) -> Self: + """ + Make new Index with passed location(-s) deleted. + + Parameters + ---------- + loc : int or list of int + Location of item(-s) which will be deleted. + Use a list of locations to delete more than one value at the same time. + + Returns + ------- + Index + Will be same type as self, except for RangeIndex. + + See Also + -------- + numpy.delete : Delete any rows and column from NumPy array (ndarray). + + Examples + -------- + >>> idx = pd.Index(["a", "b", "c"]) + >>> idx.delete(1) + Index(['a', 'c'], dtype='str') + + >>> idx = pd.Index(["a", "b", "c"]) + >>> idx.delete([0, 2]) + Index(['b'], dtype='str') + + + Make new Index with passed location(-s) deleted. + + Parameters + ---------- + loc : int or list of int + Location of item(-s) which will be deleted. + Use a list of locations to delete more than one value at the same time. + + Returns + ------- + Index + Will be same type as self, except for RangeIndex. + + See Also + -------- + numpy.delete : Delete any rows and column from NumPy array (ndarray). + + Examples + -------- + >>> idx = pd.Index(["a", "b", "c"]) + >>> idx.delete(1) + Index(['a', 'c'], dtype='str') + + >>> idx = pd.Index(["a", "b", "c"]) + >>> idx.delete([0, 2]) + Index(['b'], dtype='str') + """ result = super().delete(loc) result._data._freq = self._get_delete_freq(loc) return result - @doc(NDArrayBackedExtensionIndex.insert) def insert(self, loc: int, item): + """ + Make new Index inserting new item at location. + + Follows Python numpy.insert semantics for negative values. + + Parameters + ---------- + loc : int + The integer location where the new item will be inserted. + item : object + The new item to be inserted into the Index. + + Returns + ------- + Index + Returns a new Index object resulting from inserting the specified item at + the specified location within the original Index. + + See Also + -------- + Index.append : Append a collection of Indexes together. + + Examples + -------- + >>> idx = pd.Index(["a", "b", "c"]) + >>> idx.insert(1, "x") + Index(['a', 'x', 'b', 'c'], dtype='str') + + + Make new Index inserting new item at location. + + Follows Python numpy.insert semantics for negative values. + + Parameters + ---------- + loc : int + The integer location where the new item will be inserted. + item : object + The new item to be inserted into the Index. + + Returns + ------- + Index + Returns a new Index object resulting from inserting the specified item at + the specified location within the original Index. + + See Also + -------- + Index.append : Append a collection of Indexes together. + + Examples + -------- + >>> idx = pd.Index(["a", "b", "c"]) + >>> idx.insert(1, "x") + Index(['a', 'x', 'b', 'c'], dtype='str') + """ result = super().insert(loc, item) if isinstance(result, type(self)): # i.e. parent class method did not cast @@ -833,7 +1115,6 @@ def insert(self, loc: int, item): # -------------------------------------------------------------------- # NDArray-Like Methods - @Appender(_index_shared_docs["take"] % _index_doc_kwargs) def take( self, indices, @@ -842,6 +1123,51 @@ def take( fill_value=None, **kwargs, ) -> Self: + """ + Return a new Index of the values selected by the indices. + + For internal compatibility with numpy arrays. + + Parameters + ---------- + indices : array-like + Indices to be taken. + axis : int, optional + The axis over which to select values, always 0. + allow_fill : bool, default True + How to handle negative values in `indices`. + + * False: negative values in `indices` indicate positional indices + from the right (the default). This is similar to + :func:`numpy.take`. + + * True: negative values in `indices` indicate + missing values. These values are set to `fill_value`. Any other + other negative values raise a ``ValueError``. + + fill_value : scalar, default None + If allow_fill=True and fill_value is not None, indices specified by + -1 are regarded as NA. If Index doesn't hold NA, raise ValueError. + **kwargs + Required for compatibility with numpy. + + Returns + ------- + Index + An index formed of elements at the given indices. Will be the same + type as self, except for RangeIndex. + + See Also + -------- + numpy.ndarray.take: Return an array formed from the + elements of a at the given indices. + + Examples + -------- + >>> idx = pd.Index(['a', 'b', 'c']) + >>> idx.take([2, 2, 1, 2]) + Index(['c', 'c', 'b', 'c'], dtype='str') + """ nv.validate_take((), kwargs) indices = np.asarray(indices, dtype=np.intp) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index d26a9313865cd..faaab44687cc4 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -271,28 +271,321 @@ def _engine_type(self) -> type[libindex.DatetimeEngine]: # -------------------------------------------------------------------- # methods that dispatch to DatetimeArray and wrap result - @doc(DatetimeArray.strftime) def strftime(self, date_format) -> Index: + """ + Convert to Index using specified date_format. + + Return an Index of formatted strings specified by date_format, which + supports the same string format as the python standard library. Details + of the string format can be found in `python string format + doc `__. + + Formats supported by the C `strftime` API but not by the python string format + doc (such as `"%R"`, `"%r"`) are not officially supported and should be + preferably replaced with their supported equivalents (such as `"%H:%M"`, + `"%I:%M:%S %p"`). + + Note that `PeriodIndex` support additional directives, detailed in + `Period.strftime`. + + Parameters + ---------- + date_format : str + Date format string (e.g. "%Y-%m-%d"). + + Returns + ------- + ndarray[object] + NumPy ndarray of formatted strings. + + See Also + -------- + to_datetime : Convert the given argument to datetime. + DatetimeIndex.normalize : Return DatetimeIndex with times to midnight. + DatetimeIndex.round : Round the DatetimeIndex to the specified freq. + DatetimeIndex.floor : Floor the DatetimeIndex to the specified freq. + Timestamp.strftime : Format a single Timestamp. + Period.strftime : Format a single Period. + + Examples + -------- + >>> rng = pd.date_range(pd.Timestamp("2018-03-10 09:00"), periods=3, freq="s") + >>> rng.strftime("%B %d, %Y, %r") + Index(['March 10, 2018, 09:00:00 AM', 'March 10, 2018, 09:00:01 AM', + 'March 10, 2018, 09:00:02 AM'], + dtype='str') + """ arr = self._data.strftime(date_format) return Index(arr, name=self.name, dtype=arr.dtype) - @doc(DatetimeArray.tz_convert) def tz_convert(self, tz) -> Self: + """ + Convert tz-aware Datetime Array/Index from one time zone to another. + + Parameters + ---------- + tz : str, zoneinfo.ZoneInfo, pytz.timezone, dateutil.tz.tzfile, datetime.tzinfo or None + Time zone for time. Corresponding timestamps would be converted + to this time zone of the Datetime Array/Index. A `tz` of None will + convert to UTC and remove the timezone information. + + Returns + ------- + Array or Index + Datetme Array/Index with target `tz`. + + Raises + ------ + TypeError + If Datetime Array/Index is tz-naive. + + See Also + -------- + DatetimeIndex.tz : A timezone that has a variable offset from UTC. + DatetimeIndex.tz_localize : Localize tz-naive DatetimeIndex to a + given time zone, or remove timezone from a tz-aware DatetimeIndex. + + Examples + -------- + With the `tz` parameter, we can change the DatetimeIndex + to other time zones: + + >>> dti = pd.date_range( + ... start="2014-08-01 09:00", freq="h", periods=3, tz="Europe/Berlin" + ... ) + + >>> dti + DatetimeIndex(['2014-08-01 09:00:00+02:00', + '2014-08-01 10:00:00+02:00', + '2014-08-01 11:00:00+02:00'], + dtype='datetime64[ns, Europe/Berlin]', freq='h') + + >>> dti.tz_convert("US/Central") + DatetimeIndex(['2014-08-01 02:00:00-05:00', + '2014-08-01 03:00:00-05:00', + '2014-08-01 04:00:00-05:00'], + dtype='datetime64[ns, US/Central]', freq='h') + + With the ``tz=None``, we can remove the timezone (after converting + to UTC if necessary): + + >>> dti = pd.date_range( + ... start="2014-08-01 09:00", freq="h", periods=3, tz="Europe/Berlin" + ... ) + + >>> dti + DatetimeIndex(['2014-08-01 09:00:00+02:00', + '2014-08-01 10:00:00+02:00', + '2014-08-01 11:00:00+02:00'], + dtype='datetime64[ns, Europe/Berlin]', freq='h') + + >>> dti.tz_convert(None) + DatetimeIndex(['2014-08-01 07:00:00', + '2014-08-01 08:00:00', + '2014-08-01 09:00:00'], + dtype='datetime64[ns]', freq='h') + """ arr = self._data.tz_convert(tz) return type(self)._simple_new(arr, name=self.name, refs=self._references) - @doc(DatetimeArray.tz_localize) def tz_localize( self, tz, ambiguous: TimeAmbiguous = "raise", nonexistent: TimeNonexistent = "raise", ) -> Self: + """ + Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index. + + This method takes a time zone (tz) naive Datetime Array/Index object + and makes this time zone aware. It does not move the time to another + time zone. + + This method can also be used to do the inverse -- to create a time + zone unaware object from an aware object. To that end, pass `tz=None`. + + Parameters + ---------- + tz : str, zoneinfo.ZoneInfo,, pytz.timezone, dateutil.tz.tzfile, datetime.tzinfo or None + Time zone to convert timestamps to. Passing ``None`` will + remove the time zone information preserving local time. + ambiguous : 'infer', 'NaT', bool array, default 'raise' + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from + 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at + 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the + `ambiguous` parameter dictates how ambiguous times should be + handled. + + - 'infer' will attempt to infer fall dst-transition hours based on + order + - bool-ndarray where True signifies a DST time, False signifies a + non-DST time (note that this flag is only applicable for + ambiguous times) + - 'NaT' will return NaT where there are ambiguous times + - 'raise' will raise a ValueError if there are ambiguous + times. + + nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + - 'shift_forward' will shift the nonexistent time forward to the + closest existing time + - 'shift_backward' will shift the nonexistent time backward to the + closest existing time + - 'NaT' will return NaT where there are nonexistent times + - timedelta objects will shift nonexistent times by the timedelta + - 'raise' will raise a ValueError if there are + nonexistent times. + + Returns + ------- + Same type as self + Array/Index converted to the specified time zone. + + Raises + ------ + TypeError + If the Datetime Array/Index is tz-aware and tz is not None. + + See Also + -------- + DatetimeIndex.tz_convert : Convert tz-aware DatetimeIndex from + one time zone to another. + + Examples + -------- + >>> tz_naive = pd.date_range('2018-03-01 09:00', periods=3) + >>> tz_naive + DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00', + '2018-03-03 09:00:00'], + dtype='datetime64[ns]', freq='D') + + Localize DatetimeIndex in US/Eastern time zone: + + >>> tz_aware = tz_naive.tz_localize(tz='US/Eastern') + >>> tz_aware + DatetimeIndex(['2018-03-01 09:00:00-05:00', + '2018-03-02 09:00:00-05:00', + '2018-03-03 09:00:00-05:00'], + dtype='datetime64[ns, US/Eastern]', freq=None) + + With the ``tz=None``, we can remove the time zone information + while keeping the local time (not converted to UTC): + + >>> tz_aware.tz_localize(None) + DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00', + '2018-03-03 09:00:00'], + dtype='datetime64[ns]', freq=None) + + Be careful with DST changes. When there is sequential data, pandas can + infer the DST time: + + >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 03:00:00', + ... '2018-10-28 03:30:00'])) + >>> s.dt.tz_localize('CET', ambiguous='infer') + 0 2018-10-28 01:30:00+02:00 + 1 2018-10-28 02:00:00+02:00 + 2 2018-10-28 02:30:00+02:00 + 3 2018-10-28 02:00:00+01:00 + 4 2018-10-28 02:30:00+01:00 + 5 2018-10-28 03:00:00+01:00 + 6 2018-10-28 03:30:00+01:00 + dtype: datetime64[s, CET] + + In some cases, inferring the DST is impossible. In such cases, you can + pass an ndarray to the ambiguous parameter to set the DST explicitly + + >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:20:00', + ... '2018-10-28 02:36:00', + ... '2018-10-28 03:46:00'])) + >>> s.dt.tz_localize('CET', ambiguous=np.array([True, True, False])) + 0 2018-10-28 01:20:00+02:00 + 1 2018-10-28 02:36:00+02:00 + 2 2018-10-28 03:46:00+01:00 + dtype: datetime64[s, CET] + + If the DST transition causes nonexistent times, you can shift these + dates forward or backwards with a timedelta object or `'shift_forward'` + or `'shift_backwards'`. + + >>> s = pd.to_datetime(pd.Series(['2015-03-29 02:30:00', + ... '2015-03-29 03:30:00'], dtype="M8[ns]")) + >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_forward') + 0 2015-03-29 03:00:00+02:00 + 1 2015-03-29 03:30:00+02:00 + dtype: datetime64[ns, Europe/Warsaw] + + >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_backward') + 0 2015-03-29 01:59:59.999999999+01:00 + 1 2015-03-29 03:30:00+02:00 + dtype: datetime64[ns, Europe/Warsaw] + + >>> s.dt.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1h')) + 0 2015-03-29 03:30:00+02:00 + 1 2015-03-29 03:30:00+02:00 + dtype: datetime64[ns, Europe/Warsaw] + """ arr = self._data.tz_localize(tz, ambiguous, nonexistent) return type(self)._simple_new(arr, name=self.name) - @doc(DatetimeArray.to_period) def to_period(self, freq=None) -> PeriodIndex: + """ + Cast to PeriodArray/PeriodIndex at a particular frequency. + + Converts DatetimeArray/Index to PeriodArray/PeriodIndex. + + Parameters + ---------- + freq : str or Period, optional + One of pandas' :ref:`period aliases ` + or a Period object. Will be inferred by default. + + Returns + ------- + PeriodArray/PeriodIndex + Immutable ndarray holding ordinal values at a particular frequency. + + Raises + ------ + ValueError + When converting a DatetimeArray/Index with non-regular values, + so that a frequency cannot be inferred. + + See Also + -------- + PeriodIndex: Immutable ndarray holding ordinal values. + DatetimeIndex.to_pydatetime: Return DatetimeIndex as object. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"y": [1, 2, 3]}, + ... index=pd.to_datetime( + ... [ + ... "2000-03-31 00:00:00", + ... "2000-05-31 00:00:00", + ... "2000-08-31 00:00:00", + ... ] + ... ), + ... ) + >>> df.index.to_period("M") + PeriodIndex(['2000-03', '2000-05', '2000-08'], + dtype='period[M]') + + Infer the daily frequency + + >>> idx = pd.date_range("2017-01-01", periods=2) + >>> idx.to_period() + PeriodIndex(['2017-01-01', '2017-01-02'], + dtype='period[D]') + """ from pandas.core.indexes.api import PeriodIndex arr = self._data.to_period(freq) @@ -300,11 +593,64 @@ def to_period(self, freq=None) -> PeriodIndex: @doc(DatetimeArray.to_julian_date) def to_julian_date(self) -> Index: + """ + Convert TimeStamp to a Julian Date. + + This method returns the number of days as a float since noon January 1, 4713 BC. + + https://en.wikipedia.org/wiki/Julian_day + + Returns + ------- + ndarray or Index + Float values that represent each date in Julian Calendar. + + See Also + -------- + Timestamp.to_julian_date : Equivalent method on ``Timestamp`` objects. + + Examples + -------- + >>> idx = pd.DatetimeIndex(["2028-08-12 00:54", "2028-08-12 02:06"]) + >>> idx.to_julian_date() + Index([2461995.5375, 2461995.5875], dtype='float64') + """ arr = self._data.to_julian_date() return Index._simple_new(arr, name=self.name) @doc(DatetimeArray.isocalendar) def isocalendar(self) -> DataFrame: + """ + Calculate year, week, and day according to the ISO 8601 standard. + + Returns + ------- + DataFrame + With columns year, week and day. + + See Also + -------- + Timestamp.isocalendar : Function return a 3-tuple containing ISO year, + week number, and weekday for the given Timestamp object. + datetime.date.isocalendar : Return a named tuple object with + three components: year, week and weekday. + + Examples + -------- + >>> idx = pd.date_range(start="2019-12-29", freq="D", periods=4) + >>> idx.isocalendar() + year week day + 2019-12-29 2019 52 7 + 2019-12-30 2020 1 1 + 2019-12-31 2020 1 2 + 2020-01-01 2020 1 3 + >>> idx.isocalendar().week + 2019-12-29 52 + 2019-12-30 1 + 2019-12-31 1 + 2020-01-01 1 + Freq: D, Name: week, dtype: UInt32 + """ df = self._data.isocalendar() return df.set_index(self) @@ -641,8 +987,25 @@ def get_loc(self, key): except KeyError as err: raise KeyError(orig_key) from err - @doc(DatetimeTimedeltaMixin._maybe_cast_slice_bound) def _maybe_cast_slice_bound(self, label, side: str): + """ + This function should be overloaded in subclasses that allow non-trivial + casting on label-slice bounds, e.g. datetime-like indices allowing + strings containing formatted datetimes. + + Parameters + ---------- + label : object + side : {'left', 'right'} + + Returns + ------- + label : object + + Notes + ----- + Value of `side` parameter should be validated in caller. + """ # GH#42855 handle date here instead of get_slice_bound if isinstance(label, dt.date) and not isinstance(label, dt.datetime): # Pandas supports slicing with dates, treated as datetimes at midnight. From 23ff40ee4a1bb26460fefe08cc995bcdb0133b04 Mon Sep 17 00:00:00 2001 From: cloudboat <15851404+cloudboat111@user.noreply.gitee.com> Date: Wed, 26 Nov 2025 14:48:45 +0800 Subject: [PATCH 02/10] fix line too long --- pandas/core/indexes/datetimelike.py | 286 ++++++++++++++++++++++-- pandas/core/indexes/datetimes.py | 334 +++++++++++++++++++++++++++- 2 files changed, 597 insertions(+), 23 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 58529c5597b6e..04a982ae38368 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -36,9 +36,7 @@ NullFrequencyError, ) from pandas.util._decorators import ( - Appender, cache_readonly, - doc, ) from pandas.core.dtypes.common import ( @@ -57,12 +55,10 @@ PeriodArray, TimedeltaArray, ) -from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin import pandas.core.common as com import pandas.core.indexes.base as ibase from pandas.core.indexes.base import ( Index, - _index_shared_docs, ) from pandas.core.indexes.extension import NDArrayBackedExtensionIndex from pandas.core.indexes.range import RangeIndex @@ -92,8 +88,43 @@ class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex, ABC): _can_hold_strings = False _data: DatetimeArray | TimedeltaArray | PeriodArray - @doc(DatetimeLikeArrayMixin.mean) def mean(self, *, skipna: bool = True, axis: int | None = 0): + """ + Return the mean value of the Array. + Parameters + ---------- + skipna : bool, default True + Whether to ignore any NaT elements. + axis : int, optional, default 0 + Axis for the function to be applied on. + Returns + ------- + scalar + Timestamp or Timedelta. + See Also + -------- + numpy.ndarray.mean : Returns the average of array elements along a given axis. + Series.mean : Return the mean value in a Series. + Notes + ----- + mean is only defined for Datetime and Timedelta dtypes, not for Period. + Examples + -------- + For :class:`pandas.DatetimeIndex`: + >>> idx = pd.date_range("2001-01-01 00:00", periods=3) + >>> idx + DatetimeIndex(['2001-01-01', '2001-01-02', '2001-01-03'], + dtype='datetime64[ns]', freq='D') + >>> idx.mean() + Timestamp('2001-01-02 00:00:00') + For :class:`pandas.TimedeltaIndex`: + >>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit="D") + >>> tdelta_idx + TimedeltaIndex(['1 days', '2 days', '3 days'], + dtype='timedelta64[ns]', freq=None) + >>> tdelta_idx.mean() + Timedelta('2 days 00:00:00') + """ return self._data.mean(skipna=skipna, axis=axis) @property @@ -136,8 +167,30 @@ def asi8(self) -> npt.NDArray[np.int64]: return self._data.asi8 @property - @doc(DatetimeLikeArrayMixin.freqstr) def freqstr(self) -> str: + """ + Return the frequency object as a string if it's set, otherwise None. + See Also + -------- + DatetimeIndex.inferred_freq : Returns a string representing a frequency + generated by infer_freq. + Examples + -------- + For DatetimeIndex: + >>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00"], freq="D") + >>> idx.freqstr + 'D' + The frequency can be inferred if there are more than 2 points: + >>> idx = pd.DatetimeIndex( + ... ["2018-01-01", "2018-01-03", "2018-01-05"], freq="infer" + ... ) + >>> idx.freqstr + '2D' + For PeriodIndex: + >>> idx = pd.PeriodIndex(["2023-1", "2023-2", "2023-3"], freq="M") + >>> idx.freqstr + 'M' + """ from pandas import PeriodIndex if self._data.freqstr is not None and isinstance( @@ -153,8 +206,10 @@ def freqstr(self) -> str: def _resolution_obj(self) -> Resolution: ... @cache_readonly - @doc(DatetimeLikeArrayMixin.resolution) def resolution(self) -> str: + """ + Returns day, hour, minute, second, millisecond or microsecond + """ return self._data.resolution # ------------------------------------------------------------------------ @@ -199,8 +254,35 @@ def equals(self, other: Any) -> bool: return np.array_equal(self.asi8, other.asi8) - @Appender(Index.__contains__.__doc__) def __contains__(self, key: Any) -> bool: + """ + Return a boolean indicating whether the provided key is in the index. + Parameters + ---------- + key : label + The key to check if it is present in the index. + Returns + ------- + bool + Whether the key search is in the index. + Raises + ------ + TypeError + If the key is not hashable. + See Also + -------- + Index.isin : Returns an ndarray of boolean dtype indicating whether the + list-like key is in the index. + Examples + -------- + >>> idx = pd.Index([1, 2, 3, 4]) + >>> idx + Index([1, 2, 3, 4], dtype='int64') + >>> 2 in idx + True + >>> 6 in idx + False + """ hash(key) try: self.get_loc(key) @@ -243,8 +325,17 @@ def _format_attrs(self): attrs.append(("freq", freq)) return attrs - @Appender(Index._summary.__doc__) def _summary(self, name=None) -> str: + """ + Return a summarized representation. + Parameters + ---------- + name : str + name to use in the summary representation + Returns + ------- + String with a summarized representation of the index + """ result = super()._summary(name=name) if self.freq: result += f"\nFreq: {self.freqstr}" @@ -405,8 +496,10 @@ def shift(self, periods: int = 1, freq=None) -> Self: # -------------------------------------------------------------------- - @doc(Index._maybe_cast_listlike_indexer) def _maybe_cast_listlike_indexer(self, keyarr): + """ + Analogue to maybe_cast_indexer for get_indexer instead of get_loc. + """ try: res = self._data._validate_listlike(keyarr, allow_object=True) except (ValueError, TypeError): @@ -497,8 +590,29 @@ def values(self) -> np.ndarray: data.flags.writeable = False return data - @doc(DatetimeIndexOpsMixin.shift) def shift(self, periods: int = 1, freq=None) -> Self: + """ + Shift index by desired number of time frequency increments. + This method is for shifting the values of datetime-like indexes + by a specified time increment a given number of times. + Parameters + ---------- + periods : int, default 1 + Number of periods (or increments) to shift by, + can be positive or negative. + freq : pandas.DateOffset, pandas.Timedelta or string, optional + Frequency increment to shift by. + If None, the index is shifted by its own `freq` attribute. + Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc. + Returns + ------- + pandas.DatetimeIndex + Shifted index. + See Also + -------- + Index.shift : Shift values of Index. + PeriodIndex.shift : Shift values of PeriodIndex. + """ if freq is not None and freq != self.freq: if isinstance(freq, str): freq = to_offset(freq) @@ -524,8 +638,28 @@ def shift(self, periods: int = 1, freq=None) -> Self: return type(self)._simple_new(result, name=self.name) @cache_readonly - @doc(DatetimeLikeArrayMixin.inferred_freq) def inferred_freq(self) -> str | None: + """ + Tries to return a string representing a frequency generated by infer_freq. + Returns None if it can't autodetect the frequency. + See Also + -------- + DatetimeIndex.freqstr : Return the frequency object as a string if it's set, + otherwise None. + Examples + -------- + For DatetimeIndex: + >>> idx = pd.DatetimeIndex(["2018-01-01", "2018-01-03", "2018-01-05"]) + >>> idx.inferred_freq + '2D' + For TimedeltaIndex: + >>> tdelta_idx = pd.to_timedelta(["0 days", "10 days", "20 days"]) + >>> tdelta_idx + TimedeltaIndex(['0 days', '10 days', '20 days'], + dtype='timedelta64[ns]', freq=None) + >>> tdelta_idx.inferred_freq + '10D' + """ return self._data.inferred_freq # -------------------------------------------------------------------- @@ -816,14 +950,100 @@ def _get_insert_freq(self, loc: int, item): freq = self.freq return freq - @doc(NDArrayBackedExtensionIndex.delete) def delete(self, loc) -> Self: + """ + Make new Index with passed location(-s) deleted. + Parameters + ---------- + loc : int or list of int + Location of item(-s) which will be deleted. + Use a list of locations to delete more than one value at the same time. + Returns + ------- + Index + Will be same type as self, except for RangeIndex. + See Also + -------- + numpy.delete : Delete any rows and column from NumPy array (ndarray). + Examples + -------- + >>> idx = pd.Index(["a", "b", "c"]) + >>> idx.delete(1) + Index(['a', 'c'], dtype='str') + >>> idx = pd.Index(["a", "b", "c"]) + >>> idx.delete([0, 2]) + Index(['b'], dtype='str') + Make new Index with passed location(-s) deleted. + Parameters + ---------- + loc : int or list of int + Location of item(-s) which will be deleted. + Use a list of locations to delete more than one value at the same time. + Returns + ------- + Index + Will be same type as self, except for RangeIndex. + See Also + -------- + numpy.delete : Delete any rows and column from NumPy array (ndarray). + Examples + -------- + >>> idx = pd.Index(["a", "b", "c"]) + >>> idx.delete(1) + Index(['a', 'c'], dtype='str') + >>> idx = pd.Index(["a", "b", "c"]) + >>> idx.delete([0, 2]) + Index(['b'], dtype='str') + """ result = super().delete(loc) result._data._freq = self._get_delete_freq(loc) return result - @doc(NDArrayBackedExtensionIndex.insert) def insert(self, loc: int, item): + """ + Make new Index inserting new item at location. + Follows Python numpy.insert semantics for negative values. + Parameters + ---------- + loc : int + The integer location where the new item will be inserted. + item : object + The new item to be inserted into the Index. + Returns + ------- + Index + Returns a new Index object resulting from inserting the specified item at + the specified location within the original Index. + See Also + -------- + Index.append : Append a collection of Indexes together. + Examples + -------- + >>> idx = pd.Index(["a", "b", "c"]) + >>> idx.insert(1, "x") + Index(['a', 'x', 'b', 'c'], dtype='str') + Make new Index inserting new item at location. + Follows Python numpy.insert semantics for negative values. + Parameters + ---------- + loc : int + The integer location where the new item will be inserted. + item : object + The new item to be inserted into the Index. + Returns + ------- + Index + Returns a new Index object resulting from inserting the specified item at + the specified location within the original Index. + See Also + -------- + Index.append : Append a collection of Indexes together. + Examples + -------- + >>> idx = pd.Index(["a", "b", "c"]) + >>> idx.insert(1, "x") + Index(['a', 'x', 'b', 'c'], dtype='str') + """ result = super().insert(loc, item) if isinstance(result, type(self)): # i.e. parent class method did not cast @@ -833,7 +1053,6 @@ def insert(self, loc: int, item): # -------------------------------------------------------------------- # NDArray-Like Methods - @Appender(_index_shared_docs["take"] % _index_doc_kwargs) def take( self, indices, @@ -842,6 +1061,43 @@ def take( fill_value=None, **kwargs, ) -> Self: + """ + Return a new Index of the values selected by the indices. + For internal compatibility with numpy arrays. + Parameters + ---------- + indices : array-like + Indices to be taken. + axis : int, optional + The axis over which to select values, always 0. + allow_fill : bool, default True + How to handle negative values in `indices`. + * False: negative values in `indices` indicate positional indices + from the right (the default). This is similar to + :func:`numpy.take`. + * True: negative values in `indices` indicate + missing values. These values are set to `fill_value`. Any other + other negative values raise a ``ValueError``. + fill_value : scalar, default None + If allow_fill=True and fill_value is not None, indices specified by + -1 are regarded as NA. If Index doesn't hold NA, raise ValueError. + **kwargs + Required for compatibility with numpy. + Returns + ------- + Index + An index formed of elements at the given indices. Will be the same + type as self, except for RangeIndex. + See Also + -------- + numpy.ndarray.take: Return an array formed from the + elements of a at the given indices. + Examples + -------- + >>> idx = pd.Index(["a", "b", "c"]) + >>> idx.take([2, 2, 1, 2]) + Index(['c', 'c', 'b', 'c'], dtype='str') + """ nv.validate_take((), kwargs) indices = np.asarray(indices, dtype=np.intp) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index d26a9313865cd..fa89b44f5e050 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -29,7 +29,6 @@ from pandas.errors import Pandas4Warning from pandas.util._decorators import ( cache_readonly, - doc, set_module, ) from pandas.util._exceptions import find_stack_level @@ -271,40 +270,345 @@ def _engine_type(self) -> type[libindex.DatetimeEngine]: # -------------------------------------------------------------------- # methods that dispatch to DatetimeArray and wrap result - @doc(DatetimeArray.strftime) def strftime(self, date_format) -> Index: + """ + Convert to Index using specified date_format. + Return an Index of formatted strings specified by date_format, which + supports the same string format as the python standard library. Details + of the string format can be found in `python string format + doc `__. + Formats supported by the C `strftime` API but not by the python string format + doc (such as `"%R"`, `"%r"`) are not officially supported and should be + preferably replaced with their supported equivalents (such as `"%H:%M"`, + `"%I:%M:%S %p"`). + Note that `PeriodIndex` support additional directives, detailed in + `Period.strftime`. + Parameters + ---------- + date_format : str + Date format string (e.g. "%Y-%m-%d"). + Returns + ------- + ndarray[object] + NumPy ndarray of formatted strings. + See Also + -------- + to_datetime : Convert the given argument to datetime. + DatetimeIndex.normalize : Return DatetimeIndex with times to midnight. + DatetimeIndex.round : Round the DatetimeIndex to the specified freq. + DatetimeIndex.floor : Floor the DatetimeIndex to the specified freq. + Timestamp.strftime : Format a single Timestamp. + Period.strftime : Format a single Period. + Examples + -------- + >>> rng = pd.date_range(pd.Timestamp("2018-03-10 09:00"), periods=3, freq="s") + >>> rng.strftime("%B %d, %Y, %r") + Index(['March 10, 2018, 09:00:00 AM', 'March 10, 2018, 09:00:01 AM', + 'March 10, 2018, 09:00:02 AM'], + dtype='str') + """ arr = self._data.strftime(date_format) return Index(arr, name=self.name, dtype=arr.dtype) - @doc(DatetimeArray.tz_convert) def tz_convert(self, tz) -> Self: + """ + Convert tz-aware Datetime Array/Index from one time zone to another. + Parameters + ---------- + tz : str, zoneinfo.ZoneInfo, pytz.timezone, dateutil.tz.tzfile, + datetime.tzinfo or None + Time zone for time. Corresponding timestamps would be converted + to this time zone of the Datetime Array/Index. A `tz` of None will + convert to UTC and remove the timezone information. + Returns + ------- + Array or Index + Datetme Array/Index with target `tz`. + Raises + ------ + TypeError + If Datetime Array/Index is tz-naive. + See Also + -------- + DatetimeIndex.tz : A timezone that has a variable offset from UTC. + DatetimeIndex.tz_localize : Localize tz-naive DatetimeIndex to a + given time zone, or remove timezone from a tz-aware DatetimeIndex. + Examples + -------- + With the `tz` parameter, we can change the DatetimeIndex + to other time zones: + >>> dti = pd.date_range( + ... start="2014-08-01 09:00", freq="h", periods=3, tz="Europe/Berlin" + ... ) + >>> dti + DatetimeIndex(['2014-08-01 09:00:00+02:00', + '2014-08-01 10:00:00+02:00', + '2014-08-01 11:00:00+02:00'], + dtype='datetime64[ns, Europe/Berlin]', freq='h') + >>> dti.tz_convert("US/Central") + DatetimeIndex(['2014-08-01 02:00:00-05:00', + '2014-08-01 03:00:00-05:00', + '2014-08-01 04:00:00-05:00'], + dtype='datetime64[ns, US/Central]', freq='h') + With the ``tz=None``, we can remove the timezone (after converting + to UTC if necessary): + >>> dti = pd.date_range( + ... start="2014-08-01 09:00", freq="h", periods=3, tz="Europe/Berlin" + ... ) + >>> dti + DatetimeIndex(['2014-08-01 09:00:00+02:00', + '2014-08-01 10:00:00+02:00', + '2014-08-01 11:00:00+02:00'], + dtype='datetime64[ns, Europe/Berlin]', freq='h') + >>> dti.tz_convert(None) + DatetimeIndex(['2014-08-01 07:00:00', + '2014-08-01 08:00:00', + '2014-08-01 09:00:00'], + dtype='datetime64[ns]', freq='h') + """ arr = self._data.tz_convert(tz) return type(self)._simple_new(arr, name=self.name, refs=self._references) - @doc(DatetimeArray.tz_localize) def tz_localize( self, tz, ambiguous: TimeAmbiguous = "raise", nonexistent: TimeNonexistent = "raise", ) -> Self: + """ + Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index. + This method takes a time zone (tz) naive Datetime Array/Index object + and makes this time zone aware. It does not move the time to another + time zone. + This method can also be used to do the inverse -- to create a time + zone unaware object from an aware object. To that end, pass `tz=None`. + Parameters + ---------- + tz : str, zoneinfo.ZoneInfo,, pytz.timezone, dateutil.tz.tzfile, + datetime.tzinfo or None + Time zone to convert timestamps to. Passing ``None`` will + remove the time zone information preserving local time. + ambiguous : 'infer', 'NaT', bool array, default 'raise' + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from + 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at + 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the + `ambiguous` parameter dictates how ambiguous times should be + handled. + - 'infer' will attempt to infer fall dst-transition hours based on + order + - bool-ndarray where True signifies a DST time, False signifies a + non-DST time (note that this flag is only applicable for + ambiguous times) + - 'NaT' will return NaT where there are ambiguous times + - 'raise' will raise a ValueError if there are ambiguous + times. + nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, + default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + - 'shift_forward' will shift the nonexistent time forward to the + closest existing time + - 'shift_backward' will shift the nonexistent time backward to the + closest existing time + - 'NaT' will return NaT where there are nonexistent times + - timedelta objects will shift nonexistent times by the timedelta + - 'raise' will raise a ValueError if there are + nonexistent times. + Returns + ------- + Same type as self + Array/Index converted to the specified time zone. + Raises + ------ + TypeError + If the Datetime Array/Index is tz-aware and tz is not None. + See Also + -------- + DatetimeIndex.tz_convert : Convert tz-aware DatetimeIndex from + one time zone to another. + Examples + -------- + >>> tz_naive = pd.date_range("2018-03-01 09:00", periods=3) + >>> tz_naive + DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00', + '2018-03-03 09:00:00'], + dtype='datetime64[ns]', freq='D') + Localize DatetimeIndex in US/Eastern time zone: + >>> tz_aware = tz_naive.tz_localize(tz="US/Eastern") + >>> tz_aware + DatetimeIndex(['2018-03-01 09:00:00-05:00', + '2018-03-02 09:00:00-05:00', + '2018-03-03 09:00:00-05:00'], + dtype='datetime64[ns, US/Eastern]', freq=None) + With the ``tz=None``, we can remove the time zone information + while keeping the local time (not converted to UTC): + >>> tz_aware.tz_localize(None) + DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00', + '2018-03-03 09:00:00'], + dtype='datetime64[ns]', freq=None) + Be careful with DST changes. When there is sequential data, pandas can + infer the DST time: + >>> s = pd.to_datetime( + ... pd.Series( + ... [ + ... "2018-10-28 01:30:00", + ... "2018-10-28 02:00:00", + ... "2018-10-28 02:30:00", + ... "2018-10-28 02:00:00", + ... "2018-10-28 02:30:00", + ... "2018-10-28 03:00:00", + ... "2018-10-28 03:30:00", + ... ] + ... ) + ... ) + >>> s.dt.tz_localize("CET", ambiguous="infer") + 0 2018-10-28 01:30:00+02:00 + 1 2018-10-28 02:00:00+02:00 + 2 2018-10-28 02:30:00+02:00 + 3 2018-10-28 02:00:00+01:00 + 4 2018-10-28 02:30:00+01:00 + 5 2018-10-28 03:00:00+01:00 + 6 2018-10-28 03:30:00+01:00 + dtype: datetime64[s, CET] + In some cases, inferring the DST is impossible. In such cases, you can + pass an ndarray to the ambiguous parameter to set the DST explicitly + >>> s = pd.to_datetime( + ... pd.Series( + ... [ + ... "2018-10-28 01:20:00", + ... "2018-10-28 02:36:00", + ... "2018-10-28 03:46:00", + ... ] + ... ) + ... ) + >>> s.dt.tz_localize("CET", ambiguous=np.array([True, True, False])) + 0 2018-10-28 01:20:00+02:00 + 1 2018-10-28 02:36:00+02:00 + 2 2018-10-28 03:46:00+01:00 + dtype: datetime64[s, CET] + If the DST transition causes nonexistent times, you can shift these + dates forward or backwards with a timedelta object or `'shift_forward'` + or `'shift_backwards'`. + >>> s = pd.to_datetime( + ... pd.Series( + ... ["2015-03-29 02:30:00", "2015-03-29 03:30:00"], dtype="M8[ns]" + ... ) + ... ) + >>> s.dt.tz_localize("Europe/Warsaw", nonexistent="shift_forward") + 0 2015-03-29 03:00:00+02:00 + 1 2015-03-29 03:30:00+02:00 + dtype: datetime64[ns, Europe/Warsaw] + >>> s.dt.tz_localize("Europe/Warsaw", nonexistent="shift_backward") + 0 2015-03-29 01:59:59.999999999+01:00 + 1 2015-03-29 03:30:00+02:00 + dtype: datetime64[ns, Europe/Warsaw] + >>> s.dt.tz_localize("Europe/Warsaw", nonexistent=pd.Timedelta("1h")) + 0 2015-03-29 03:30:00+02:00 + 1 2015-03-29 03:30:00+02:00 + dtype: datetime64[ns, Europe/Warsaw] + """ arr = self._data.tz_localize(tz, ambiguous, nonexistent) return type(self)._simple_new(arr, name=self.name) - @doc(DatetimeArray.to_period) def to_period(self, freq=None) -> PeriodIndex: + """ + Cast to PeriodArray/PeriodIndex at a particular frequency. + Converts DatetimeArray/Index to PeriodArray/PeriodIndex. + Parameters + ---------- + freq : str or Period, optional + One of pandas' :ref:`period aliases ` + or a Period object. Will be inferred by default. + Returns + ------- + PeriodArray/PeriodIndex + Immutable ndarray holding ordinal values at a particular frequency. + Raises + ------ + ValueError + When converting a DatetimeArray/Index with non-regular values, + so that a frequency cannot be inferred. + See Also + -------- + PeriodIndex: Immutable ndarray holding ordinal values. + DatetimeIndex.to_pydatetime: Return DatetimeIndex as object. + Examples + -------- + >>> df = pd.DataFrame( + ... {"y": [1, 2, 3]}, + ... index=pd.to_datetime( + ... [ + ... "2000-03-31 00:00:00", + ... "2000-05-31 00:00:00", + ... "2000-08-31 00:00:00", + ... ] + ... ), + ... ) + >>> df.index.to_period("M") + PeriodIndex(['2000-03', '2000-05', '2000-08'], + dtype='period[M]') + Infer the daily frequency + >>> idx = pd.date_range("2017-01-01", periods=2) + >>> idx.to_period() + PeriodIndex(['2017-01-01', '2017-01-02'], + dtype='period[D]') + """ from pandas.core.indexes.api import PeriodIndex arr = self._data.to_period(freq) return PeriodIndex._simple_new(arr, name=self.name) - @doc(DatetimeArray.to_julian_date) def to_julian_date(self) -> Index: + """ + Convert TimeStamp to a Julian Date. + This method returns the number of days as a float since noon January 1, 4713 BC. + https://en.wikipedia.org/wiki/Julian_day + Returns + ------- + ndarray or Index + Float values that represent each date in Julian Calendar. + See Also + -------- + Timestamp.to_julian_date : Equivalent method on ``Timestamp`` objects. + Examples + -------- + >>> idx = pd.DatetimeIndex(["2028-08-12 00:54", "2028-08-12 02:06"]) + >>> idx.to_julian_date() + Index([2461995.5375, 2461995.5875], dtype='float64') + """ arr = self._data.to_julian_date() return Index._simple_new(arr, name=self.name) - @doc(DatetimeArray.isocalendar) def isocalendar(self) -> DataFrame: + """ + Calculate year, week, and day according to the ISO 8601 standard. + Returns + ------- + DataFrame + With columns year, week and day. + See Also + -------- + Timestamp.isocalendar : Function return a 3-tuple containing ISO year, + week number, and weekday for the given Timestamp object. + datetime.date.isocalendar : Return a named tuple object with + three components: year, week and weekday. + Examples + -------- + >>> idx = pd.date_range(start="2019-12-29", freq="D", periods=4) + >>> idx.isocalendar() + year week day + 2019-12-29 2019 52 7 + 2019-12-30 2020 1 1 + 2019-12-31 2020 1 2 + 2020-01-01 2020 1 3 + >>> idx.isocalendar().week + 2019-12-29 52 + 2019-12-30 1 + 2019-12-31 1 + 2020-01-01 1 + Freq: D, Name: week, dtype: UInt32 + """ df = self._data.isocalendar() return df.set_index(self) @@ -641,8 +945,22 @@ def get_loc(self, key): except KeyError as err: raise KeyError(orig_key) from err - @doc(DatetimeTimedeltaMixin._maybe_cast_slice_bound) def _maybe_cast_slice_bound(self, label, side: str): + """ + This function should be overloaded in subclasses that allow non-trivial + casting on label-slice bounds, e.g. datetime-like indices allowing + strings containing formatted datetimes. + Parameters + ---------- + label : object + side : {'left', 'right'} + Returns + ------- + label : object + Notes + ----- + Value of `side` parameter should be validated in caller. + """ # GH#42855 handle date here instead of get_slice_bound if isinstance(label, dt.date) and not isinstance(label, dt.datetime): # Pandas supports slicing with dates, treated as datetimes at midnight. From 8f5d26a5eabe6a897c2c234ff16cec67380a3b93 Mon Sep 17 00:00:00 2001 From: cloudboat <15851404+cloudboat111@user.noreply.gitee.com> Date: Wed, 26 Nov 2025 17:02:48 +0800 Subject: [PATCH 03/10] fix Block quote ends without a blank line --- pandas/core/indexes/datetimelike.py | 46 +++--- pandas/core/indexes/datetimes.py | 211 ++++++++++++++++------------ 2 files changed, 153 insertions(+), 104 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 04a982ae38368..6a843a2a8f096 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -91,33 +91,41 @@ class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex, ABC): def mean(self, *, skipna: bool = True, axis: int | None = 0): """ Return the mean value of the Array. + Parameters ---------- skipna : bool, default True Whether to ignore any NaT elements. axis : int, optional, default 0 Axis for the function to be applied on. + Returns ------- scalar Timestamp or Timedelta. + See Also -------- numpy.ndarray.mean : Returns the average of array elements along a given axis. Series.mean : Return the mean value in a Series. + Notes ----- mean is only defined for Datetime and Timedelta dtypes, not for Period. + Examples -------- For :class:`pandas.DatetimeIndex`: + >>> idx = pd.date_range("2001-01-01 00:00", periods=3) >>> idx DatetimeIndex(['2001-01-01', '2001-01-02', '2001-01-03'], dtype='datetime64[ns]', freq='D') >>> idx.mean() Timestamp('2001-01-02 00:00:00') + For :class:`pandas.TimedeltaIndex`: + >>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit="D") >>> tdelta_idx TimedeltaIndex(['1 days', '2 days', '3 days'], @@ -130,30 +138,34 @@ def mean(self, *, skipna: bool = True, axis: int | None = 0): @property def freq(self) -> BaseOffset | None: """ - Return the frequency object if it is set, otherwise None. - - To learn more about the frequency strings, please see - :ref:`this link`. + Return the frequency object as a string if it's set, otherwise None. See Also -------- - DatetimeIndex.freq : Return the frequency object if it is set, otherwise None. - PeriodIndex.freq : Return the frequency object if it is set, otherwise None. + DatetimeIndex.inferred_freq : Returns a string representing a frequency + generated by infer_freq. Examples -------- - >>> datetimeindex = pd.date_range( - ... "2022-02-22 02:22:22", periods=10, tz="America/Chicago", freq="h" + For DatetimeIndex: + + >>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00"], freq="D") + >>> idx.freqstr + 'D' + + The frequency can be inferred if there are more than 2 points: + + >>> idx = pd.DatetimeIndex( + ... ["2018-01-01", "2018-01-03", "2018-01-05"], freq="infer" ... ) - >>> datetimeindex - DatetimeIndex(['2022-02-22 02:22:22-06:00', '2022-02-22 03:22:22-06:00', - '2022-02-22 04:22:22-06:00', '2022-02-22 05:22:22-06:00', - '2022-02-22 06:22:22-06:00', '2022-02-22 07:22:22-06:00', - '2022-02-22 08:22:22-06:00', '2022-02-22 09:22:22-06:00', - '2022-02-22 10:22:22-06:00', '2022-02-22 11:22:22-06:00'], - dtype='datetime64[ns, America/Chicago]', freq='h') - >>> datetimeindex.freq - + >>> idx.freqstr + '2D' + + For PeriodIndex: + + >>> idx = pd.PeriodIndex(["2023-1", "2023-2", "2023-3"], freq="M") + >>> idx.freqstr + 'M' """ return self._data.freq diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index fa89b44f5e050..cb88f88a6e968 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -273,24 +273,29 @@ def _engine_type(self) -> type[libindex.DatetimeEngine]: def strftime(self, date_format) -> Index: """ Convert to Index using specified date_format. + Return an Index of formatted strings specified by date_format, which supports the same string format as the python standard library. Details of the string format can be found in `python string format doc `__. + Formats supported by the C `strftime` API but not by the python string format doc (such as `"%R"`, `"%r"`) are not officially supported and should be preferably replaced with their supported equivalents (such as `"%H:%M"`, `"%I:%M:%S %p"`). Note that `PeriodIndex` support additional directives, detailed in `Period.strftime`. + Parameters ---------- date_format : str Date format string (e.g. "%Y-%m-%d"). + Returns ------- ndarray[object] NumPy ndarray of formatted strings. + See Also -------- to_datetime : Convert the given argument to datetime. @@ -299,13 +304,14 @@ def strftime(self, date_format) -> Index: DatetimeIndex.floor : Floor the DatetimeIndex to the specified freq. Timestamp.strftime : Format a single Timestamp. Period.strftime : Format a single Period. + Examples -------- >>> rng = pd.date_range(pd.Timestamp("2018-03-10 09:00"), periods=3, freq="s") >>> rng.strftime("%B %d, %Y, %r") Index(['March 10, 2018, 09:00:00 AM', 'March 10, 2018, 09:00:01 AM', 'March 10, 2018, 09:00:02 AM'], - dtype='str') + dtype='str') """ arr = self._data.strftime(date_format) return Index(arr, name=self.name, dtype=arr.dtype) @@ -313,59 +319,70 @@ def strftime(self, date_format) -> Index: def tz_convert(self, tz) -> Self: """ Convert tz-aware Datetime Array/Index from one time zone to another. + Parameters ---------- - tz : str, zoneinfo.ZoneInfo, pytz.timezone, dateutil.tz.tzfile, - datetime.tzinfo or None + tz : str, zoneinfo.ZoneInfo, pytz.timezone, dateutil.tz.tzfile, datetime.tzinfo or None Time zone for time. Corresponding timestamps would be converted to this time zone of the Datetime Array/Index. A `tz` of None will convert to UTC and remove the timezone information. + Returns ------- Array or Index Datetme Array/Index with target `tz`. + Raises ------ TypeError If Datetime Array/Index is tz-naive. + See Also -------- DatetimeIndex.tz : A timezone that has a variable offset from UTC. DatetimeIndex.tz_localize : Localize tz-naive DatetimeIndex to a given time zone, or remove timezone from a tz-aware DatetimeIndex. + Examples -------- With the `tz` parameter, we can change the DatetimeIndex to other time zones: + >>> dti = pd.date_range( ... start="2014-08-01 09:00", freq="h", periods=3, tz="Europe/Berlin" ... ) + >>> dti DatetimeIndex(['2014-08-01 09:00:00+02:00', '2014-08-01 10:00:00+02:00', '2014-08-01 11:00:00+02:00'], dtype='datetime64[ns, Europe/Berlin]', freq='h') + >>> dti.tz_convert("US/Central") DatetimeIndex(['2014-08-01 02:00:00-05:00', '2014-08-01 03:00:00-05:00', '2014-08-01 04:00:00-05:00'], dtype='datetime64[ns, US/Central]', freq='h') + With the ``tz=None``, we can remove the timezone (after converting to UTC if necessary): + >>> dti = pd.date_range( ... start="2014-08-01 09:00", freq="h", periods=3, tz="Europe/Berlin" ... ) + >>> dti DatetimeIndex(['2014-08-01 09:00:00+02:00', '2014-08-01 10:00:00+02:00', '2014-08-01 11:00:00+02:00'], dtype='datetime64[ns, Europe/Berlin]', freq='h') + >>> dti.tz_convert(None) DatetimeIndex(['2014-08-01 07:00:00', '2014-08-01 08:00:00', '2014-08-01 09:00:00'], dtype='datetime64[ns]', freq='h') - """ + """ # noqa: E501 arr = self._data.tz_convert(tz) return type(self)._simple_new(arr, name=self.name, refs=self._references) @@ -377,15 +394,17 @@ def tz_localize( ) -> Self: """ Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index. + This method takes a time zone (tz) naive Datetime Array/Index object and makes this time zone aware. It does not move the time to another time zone. + This method can also be used to do the inverse -- to create a time zone unaware object from an aware object. To that end, pass `tz=None`. + Parameters ---------- - tz : str, zoneinfo.ZoneInfo,, pytz.timezone, dateutil.tz.tzfile, - datetime.tzinfo or None + tz : str, zoneinfo.ZoneInfo,, pytz.timezone, dateutil.tz.tzfile, datetime.tzinfo or None Time zone to convert timestamps to. Passing ``None`` will remove the time zone information preserving local time. ambiguous : 'infer', 'NaT', bool array, default 'raise' @@ -395,6 +414,7 @@ def tz_localize( 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the `ambiguous` parameter dictates how ambiguous times should be handled. + - 'infer' will attempt to infer fall dst-transition hours based on order - bool-ndarray where True signifies a DST time, False signifies a @@ -403,10 +423,12 @@ def tz_localize( - 'NaT' will return NaT where there are ambiguous times - 'raise' will raise a ValueError if there are ambiguous times. - nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, - default 'raise' + + nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \ +default 'raise' A nonexistent time does not exist in a particular timezone where clocks moved forward due to DST. + - 'shift_forward' will shift the nonexistent time forward to the closest existing time - 'shift_backward' will shift the nonexistent time backward to the @@ -415,124 +437,131 @@ def tz_localize( - timedelta objects will shift nonexistent times by the timedelta - 'raise' will raise a ValueError if there are nonexistent times. + Returns ------- Same type as self Array/Index converted to the specified time zone. + Raises ------ TypeError If the Datetime Array/Index is tz-aware and tz is not None. + See Also -------- DatetimeIndex.tz_convert : Convert tz-aware DatetimeIndex from one time zone to another. + Examples -------- - >>> tz_naive = pd.date_range("2018-03-01 09:00", periods=3) - >>> tz_naive - DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00', - '2018-03-03 09:00:00'], - dtype='datetime64[ns]', freq='D') - Localize DatetimeIndex in US/Eastern time zone: - >>> tz_aware = tz_naive.tz_localize(tz="US/Eastern") - >>> tz_aware - DatetimeIndex(['2018-03-01 09:00:00-05:00', - '2018-03-02 09:00:00-05:00', - '2018-03-03 09:00:00-05:00'], - dtype='datetime64[ns, US/Eastern]', freq=None) - With the ``tz=None``, we can remove the time zone information - while keeping the local time (not converted to UTC): - >>> tz_aware.tz_localize(None) - DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00', - '2018-03-03 09:00:00'], - dtype='datetime64[ns]', freq=None) - Be careful with DST changes. When there is sequential data, pandas can - infer the DST time: - >>> s = pd.to_datetime( - ... pd.Series( - ... [ - ... "2018-10-28 01:30:00", - ... "2018-10-28 02:00:00", - ... "2018-10-28 02:30:00", - ... "2018-10-28 02:00:00", - ... "2018-10-28 02:30:00", - ... "2018-10-28 03:00:00", - ... "2018-10-28 03:30:00", - ... ] - ... ) - ... ) - >>> s.dt.tz_localize("CET", ambiguous="infer") - 0 2018-10-28 01:30:00+02:00 - 1 2018-10-28 02:00:00+02:00 - 2 2018-10-28 02:30:00+02:00 - 3 2018-10-28 02:00:00+01:00 - 4 2018-10-28 02:30:00+01:00 - 5 2018-10-28 03:00:00+01:00 - 6 2018-10-28 03:30:00+01:00 - dtype: datetime64[s, CET] - In some cases, inferring the DST is impossible. In such cases, you can - pass an ndarray to the ambiguous parameter to set the DST explicitly - >>> s = pd.to_datetime( - ... pd.Series( - ... [ - ... "2018-10-28 01:20:00", - ... "2018-10-28 02:36:00", - ... "2018-10-28 03:46:00", - ... ] - ... ) - ... ) - >>> s.dt.tz_localize("CET", ambiguous=np.array([True, True, False])) - 0 2018-10-28 01:20:00+02:00 - 1 2018-10-28 02:36:00+02:00 - 2 2018-10-28 03:46:00+01:00 - dtype: datetime64[s, CET] - If the DST transition causes nonexistent times, you can shift these - dates forward or backwards with a timedelta object or `'shift_forward'` - or `'shift_backwards'`. - >>> s = pd.to_datetime( - ... pd.Series( - ... ["2015-03-29 02:30:00", "2015-03-29 03:30:00"], dtype="M8[ns]" - ... ) - ... ) - >>> s.dt.tz_localize("Europe/Warsaw", nonexistent="shift_forward") - 0 2015-03-29 03:00:00+02:00 - 1 2015-03-29 03:30:00+02:00 - dtype: datetime64[ns, Europe/Warsaw] - >>> s.dt.tz_localize("Europe/Warsaw", nonexistent="shift_backward") - 0 2015-03-29 01:59:59.999999999+01:00 - 1 2015-03-29 03:30:00+02:00 - dtype: datetime64[ns, Europe/Warsaw] - >>> s.dt.tz_localize("Europe/Warsaw", nonexistent=pd.Timedelta("1h")) - 0 2015-03-29 03:30:00+02:00 - 1 2015-03-29 03:30:00+02:00 - dtype: datetime64[ns, Europe/Warsaw] - """ + >>> tz_naive = pd.date_range('2018-03-01 09:00', periods=3) + >>> tz_naive + DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00', + '2018-03-03 09:00:00'], + dtype='datetime64[ns]', freq='D') + + Localize DatetimeIndex in US/Eastern time zone: + + >>> tz_aware = tz_naive.tz_localize(tz='US/Eastern') + >>> tz_aware + DatetimeIndex(['2018-03-01 09:00:00-05:00', + '2018-03-02 09:00:00-05:00', + '2018-03-03 09:00:00-05:00'], + dtype='datetime64[ns, US/Eastern]', freq=None) + + With the ``tz=None``, we can remove the time zone information + while keeping the local time (not converted to UTC): + + >>> tz_aware.tz_localize(None) + DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00', + '2018-03-03 09:00:00'], + dtype='datetime64[ns]', freq=None) + + Be careful with DST changes. When there is sequential data, pandas can + infer the DST time: + + >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 03:00:00', + ... '2018-10-28 03:30:00'])) + >>> s.dt.tz_localize('CET', ambiguous='infer') + 0 2018-10-28 01:30:00+02:00 + 1 2018-10-28 02:00:00+02:00 + 2 2018-10-28 02:30:00+02:00 + 3 2018-10-28 02:00:00+01:00 + 4 2018-10-28 02:30:00+01:00 + 5 2018-10-28 03:00:00+01:00 + 6 2018-10-28 03:30:00+01:00 + dtype: datetime64[s, CET] + + In some cases, inferring the DST is impossible. In such cases, you can + pass an ndarray to the ambiguous parameter to set the DST explicitly + + >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:20:00', + ... '2018-10-28 02:36:00', + ... '2018-10-28 03:46:00'])) + >>> s.dt.tz_localize('CET', ambiguous=np.array([True, True, False])) + 0 2018-10-28 01:20:00+02:00 + 1 2018-10-28 02:36:00+02:00 + 2 2018-10-28 03:46:00+01:00 + dtype: datetime64[s, CET] + + If the DST transition causes nonexistent times, you can shift these + dates forward or backwards with a timedelta object or `'shift_forward'` + or `'shift_backwards'`. + + >>> s = pd.to_datetime(pd.Series(['2015-03-29 02:30:00', + ... '2015-03-29 03:30:00'], dtype="M8[ns]")) + >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_forward') + 0 2015-03-29 03:00:00+02:00 + 1 2015-03-29 03:30:00+02:00 + dtype: datetime64[ns, Europe/Warsaw] + + >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_backward') + 0 2015-03-29 01:59:59.999999999+01:00 + 1 2015-03-29 03:30:00+02:00 + dtype: datetime64[ns, Europe/Warsaw] + + >>> s.dt.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1h')) + 0 2015-03-29 03:30:00+02:00 + 1 2015-03-29 03:30:00+02:00 + dtype: datetime64[ns, Europe/Warsaw] + """ # noqa: E501 arr = self._data.tz_localize(tz, ambiguous, nonexistent) return type(self)._simple_new(arr, name=self.name) def to_period(self, freq=None) -> PeriodIndex: """ Cast to PeriodArray/PeriodIndex at a particular frequency. + Converts DatetimeArray/Index to PeriodArray/PeriodIndex. + Parameters ---------- freq : str or Period, optional One of pandas' :ref:`period aliases ` or a Period object. Will be inferred by default. + Returns ------- PeriodArray/PeriodIndex Immutable ndarray holding ordinal values at a particular frequency. + Raises ------ ValueError When converting a DatetimeArray/Index with non-regular values, so that a frequency cannot be inferred. + See Also -------- PeriodIndex: Immutable ndarray holding ordinal values. DatetimeIndex.to_pydatetime: Return DatetimeIndex as object. + Examples -------- >>> df = pd.DataFrame( @@ -548,7 +577,9 @@ def to_period(self, freq=None) -> PeriodIndex: >>> df.index.to_period("M") PeriodIndex(['2000-03', '2000-05', '2000-08'], dtype='period[M]') + Infer the daily frequency + >>> idx = pd.date_range("2017-01-01", periods=2) >>> idx.to_period() PeriodIndex(['2017-01-01', '2017-01-02'], @@ -562,15 +593,20 @@ def to_period(self, freq=None) -> PeriodIndex: def to_julian_date(self) -> Index: """ Convert TimeStamp to a Julian Date. + This method returns the number of days as a float since noon January 1, 4713 BC. + https://en.wikipedia.org/wiki/Julian_day + Returns ------- ndarray or Index Float values that represent each date in Julian Calendar. + See Also -------- Timestamp.to_julian_date : Equivalent method on ``Timestamp`` objects. + Examples -------- >>> idx = pd.DatetimeIndex(["2028-08-12 00:54", "2028-08-12 02:06"]) @@ -593,6 +629,7 @@ def isocalendar(self) -> DataFrame: week number, and weekday for the given Timestamp object. datetime.date.isocalendar : Return a named tuple object with three components: year, week and weekday. + Examples -------- >>> idx = pd.date_range(start="2019-12-29", freq="D", periods=4) From 344a81111f2b6f4ef63e4297971a0e54d72f145b Mon Sep 17 00:00:00 2001 From: cloudboat <15851404+cloudboat111@user.noreply.gitee.com> Date: Wed, 26 Nov 2025 23:20:27 +0800 Subject: [PATCH 04/10] fix ERROR: Unexpected indentation --- pandas/core/indexes/datetimelike.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 6a843a2a8f096..e29c2187a798f 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -182,23 +182,30 @@ def asi8(self) -> npt.NDArray[np.int64]: def freqstr(self) -> str: """ Return the frequency object as a string if it's set, otherwise None. + See Also -------- DatetimeIndex.inferred_freq : Returns a string representing a frequency generated by infer_freq. + Examples -------- For DatetimeIndex: + >>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00"], freq="D") >>> idx.freqstr 'D' + The frequency can be inferred if there are more than 2 points: + >>> idx = pd.DatetimeIndex( ... ["2018-01-01", "2018-01-03", "2018-01-05"], freq="infer" ... ) >>> idx.freqstr '2D' + For PeriodIndex: + >>> idx = pd.PeriodIndex(["2023-1", "2023-2", "2023-3"], freq="M") >>> idx.freqstr 'M' @@ -269,22 +276,27 @@ def equals(self, other: Any) -> bool: def __contains__(self, key: Any) -> bool: """ Return a boolean indicating whether the provided key is in the index. + Parameters ---------- key : label The key to check if it is present in the index. + Returns ------- bool Whether the key search is in the index. + Raises ------ TypeError If the key is not hashable. + See Also -------- Index.isin : Returns an ndarray of boolean dtype indicating whether the list-like key is in the index. + Examples -------- >>> idx = pd.Index([1, 2, 3, 4]) @@ -340,10 +352,12 @@ def _format_attrs(self): def _summary(self, name=None) -> str: """ Return a summarized representation. + Parameters ---------- name : str name to use in the summary representation + Returns ------- String with a summarized representation of the index @@ -607,6 +621,7 @@ def shift(self, periods: int = 1, freq=None) -> Self: Shift index by desired number of time frequency increments. This method is for shifting the values of datetime-like indexes by a specified time increment a given number of times. + Parameters ---------- periods : int, default 1 @@ -616,10 +631,12 @@ def shift(self, periods: int = 1, freq=None) -> Self: Frequency increment to shift by. If None, the index is shifted by its own `freq` attribute. Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc. + Returns ------- pandas.DatetimeIndex Shifted index. + See Also -------- Index.shift : Shift values of Index. @@ -654,10 +671,12 @@ def inferred_freq(self) -> str | None: """ Tries to return a string representing a frequency generated by infer_freq. Returns None if it can't autodetect the frequency. + See Also -------- DatetimeIndex.freqstr : Return the frequency object as a string if it's set, otherwise None. + Examples -------- For DatetimeIndex: From b34333031462a89ec6a67473eab4bad34880086e Mon Sep 17 00:00:00 2001 From: cloudboat <15851404+cloudboat111@user.noreply.gitee.com> Date: Wed, 26 Nov 2025 23:52:53 +0800 Subject: [PATCH 05/10] fix unexpected docstring --- pandas/core/indexes/datetimelike.py | 50 +++++------------------------ 1 file changed, 8 insertions(+), 42 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index e29c2187a798f..7285b872577f0 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -984,39 +984,22 @@ def _get_insert_freq(self, loc: int, item): def delete(self, loc) -> Self: """ Make new Index with passed location(-s) deleted. + Parameters ---------- loc : int or list of int Location of item(-s) which will be deleted. Use a list of locations to delete more than one value at the same time. + Returns ------- Index Will be same type as self, except for RangeIndex. + See Also -------- numpy.delete : Delete any rows and column from NumPy array (ndarray). - Examples - -------- - >>> idx = pd.Index(["a", "b", "c"]) - >>> idx.delete(1) - Index(['a', 'c'], dtype='str') - >>> idx = pd.Index(["a", "b", "c"]) - >>> idx.delete([0, 2]) - Index(['b'], dtype='str') - Make new Index with passed location(-s) deleted. - Parameters - ---------- - loc : int or list of int - Location of item(-s) which will be deleted. - Use a list of locations to delete more than one value at the same time. - Returns - ------- - Index - Will be same type as self, except for RangeIndex. - See Also - -------- - numpy.delete : Delete any rows and column from NumPy array (ndarray). + Examples -------- >>> idx = pd.Index(["a", "b", "c"]) @@ -1032,43 +1015,26 @@ def delete(self, loc) -> Self: def insert(self, loc: int, item): """ - Make new Index inserting new item at location. - Follows Python numpy.insert semantics for negative values. - Parameters - ---------- - loc : int - The integer location where the new item will be inserted. - item : object - The new item to be inserted into the Index. - Returns - ------- - Index - Returns a new Index object resulting from inserting the specified item at - the specified location within the original Index. - See Also - -------- - Index.append : Append a collection of Indexes together. - Examples - -------- - >>> idx = pd.Index(["a", "b", "c"]) - >>> idx.insert(1, "x") - Index(['a', 'x', 'b', 'c'], dtype='str') Make new Index inserting new item at location. Follows Python numpy.insert semantics for negative values. + Parameters ---------- loc : int The integer location where the new item will be inserted. item : object The new item to be inserted into the Index. + Returns ------- Index Returns a new Index object resulting from inserting the specified item at the specified location within the original Index. + See Also -------- Index.append : Append a collection of Indexes together. + Examples -------- >>> idx = pd.Index(["a", "b", "c"]) From bd4127bd8a0ea817748b8a1f2edc299b3ca70cb3 Mon Sep 17 00:00:00 2001 From: cloudboat <15851404+cloudboat111@user.noreply.gitee.com> Date: Thu, 27 Nov 2025 00:19:17 +0800 Subject: [PATCH 06/10] fix summary is not in 1 line --- pandas/core/arrays/datetimelike.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 95e03eae2133c..ca335c9c62102 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -908,7 +908,6 @@ def freqstr(self) -> str | None: def inferred_freq(self) -> str | None: """ Tries to return a string representing a frequency generated by infer_freq. - Returns None if it can't autodetect the frequency. See Also From 649b0512d59707dc0ed56699acbab31411c19704 Mon Sep 17 00:00:00 2001 From: cloudboat <15851404+cloudboat111@user.noreply.gitee.com> Date: Thu, 27 Nov 2025 01:15:53 +0800 Subject: [PATCH 07/10] Try to fix Summary should fit in a single line --- pandas/core/arrays/datetimelike.py | 1 + pandas/core/indexes/datetimelike.py | 56 +++++++++++++++-------------- 2 files changed, 31 insertions(+), 26 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index ca335c9c62102..95e03eae2133c 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -908,6 +908,7 @@ def freqstr(self) -> str | None: def inferred_freq(self) -> str | None: """ Tries to return a string representing a frequency generated by infer_freq. + Returns None if it can't autodetect the frequency. See Also diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 7285b872577f0..248b0d45f2d9c 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -138,34 +138,30 @@ def mean(self, *, skipna: bool = True, axis: int | None = 0): @property def freq(self) -> BaseOffset | None: """ - Return the frequency object as a string if it's set, otherwise None. + Return the frequency object if it is set, otherwise None. + + To learn more about the frequency strings, please see + :ref:`this link`. See Also -------- - DatetimeIndex.inferred_freq : Returns a string representing a frequency - generated by infer_freq. + DatetimeIndex.freq : Return the frequency object if it is set, otherwise None. + PeriodIndex.freq : Return the frequency object if it is set, otherwise None. Examples -------- - For DatetimeIndex: - - >>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00"], freq="D") - >>> idx.freqstr - 'D' - - The frequency can be inferred if there are more than 2 points: - - >>> idx = pd.DatetimeIndex( - ... ["2018-01-01", "2018-01-03", "2018-01-05"], freq="infer" + >>> datetimeindex = pd.date_range( + ... "2022-02-22 02:22:22", periods=10, tz="America/Chicago", freq="h" ... ) - >>> idx.freqstr - '2D' - - For PeriodIndex: - - >>> idx = pd.PeriodIndex(["2023-1", "2023-2", "2023-3"], freq="M") - >>> idx.freqstr - 'M' + >>> datetimeindex + DatetimeIndex(['2022-02-22 02:22:22-06:00', '2022-02-22 03:22:22-06:00', + '2022-02-22 04:22:22-06:00', '2022-02-22 05:22:22-06:00', + '2022-02-22 06:22:22-06:00', '2022-02-22 07:22:22-06:00', + '2022-02-22 08:22:22-06:00', '2022-02-22 09:22:22-06:00', + '2022-02-22 10:22:22-06:00', '2022-02-22 11:22:22-06:00'], + dtype='datetime64[ns, America/Chicago]', freq='h') + >>> datetimeindex.freq + """ return self._data.freq @@ -669,21 +665,29 @@ def shift(self, periods: int = 1, freq=None) -> Self: @cache_readonly def inferred_freq(self) -> str | None: """ - Tries to return a string representing a frequency generated by infer_freq. - Returns None if it can't autodetect the frequency. + Return the inferred frequency of the index. + + Returns + ------- + str or None + A string representing a frequency generated by ``infer_freq``. + Returns ``None`` if the frequency cannot be inferred. See Also -------- DatetimeIndex.freqstr : Return the frequency object as a string if it's set, - otherwise None. + otherwise ``None``. Examples -------- - For DatetimeIndex: + For ``DatetimeIndex``: + >>> idx = pd.DatetimeIndex(["2018-01-01", "2018-01-03", "2018-01-05"]) >>> idx.inferred_freq '2D' - For TimedeltaIndex: + + For ``TimedeltaIndex``: + >>> tdelta_idx = pd.to_timedelta(["0 days", "10 days", "20 days"]) >>> tdelta_idx TimedeltaIndex(['0 days', '10 days', '20 days'], From 956c3f4dfb5cd53e0af05cbcfcb8951260557af1 Mon Sep 17 00:00:00 2001 From: cloudboat <15851404+cloudboat111@user.noreply.gitee.com> Date: Thu, 27 Nov 2025 12:57:03 +0800 Subject: [PATCH 08/10] Supplement the required spaces --- pandas/core/indexes/datetimelike.py | 8 +- pandas/core/indexes/datetimes.py | 157 ++++++++++++++-------------- 2 files changed, 86 insertions(+), 79 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 248b0d45f2d9c..35c680ece2996 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -1065,12 +1065,13 @@ def take( """ Return a new Index of the values selected by the indices. For internal compatibility with numpy arrays. + Parameters ---------- indices : array-like Indices to be taken. - axis : int, optional - The axis over which to select values, always 0. + axis : {0 or 'index'}, optional + The axis over which to select values, always 0 or 'index'. allow_fill : bool, default True How to handle negative values in `indices`. * False: negative values in `indices` indicate positional indices @@ -1084,15 +1085,18 @@ def take( -1 are regarded as NA. If Index doesn't hold NA, raise ValueError. **kwargs Required for compatibility with numpy. + Returns ------- Index An index formed of elements at the given indices. Will be the same type as self, except for RangeIndex. + See Also -------- numpy.ndarray.take: Return an array formed from the elements of a at the given indices. + Examples -------- >>> idx = pd.Index(["a", "b", "c"]) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index cb88f88a6e968..351a49fa30d1a 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -425,7 +425,7 @@ def tz_localize( times. nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \ -default 'raise' + default 'raise' A nonexistent time does not exist in a particular timezone where clocks moved forward due to DST. @@ -455,82 +455,82 @@ def tz_localize( Examples -------- - >>> tz_naive = pd.date_range('2018-03-01 09:00', periods=3) - >>> tz_naive - DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00', - '2018-03-03 09:00:00'], - dtype='datetime64[ns]', freq='D') - - Localize DatetimeIndex in US/Eastern time zone: - - >>> tz_aware = tz_naive.tz_localize(tz='US/Eastern') - >>> tz_aware - DatetimeIndex(['2018-03-01 09:00:00-05:00', - '2018-03-02 09:00:00-05:00', - '2018-03-03 09:00:00-05:00'], - dtype='datetime64[ns, US/Eastern]', freq=None) - - With the ``tz=None``, we can remove the time zone information - while keeping the local time (not converted to UTC): - - >>> tz_aware.tz_localize(None) - DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00', - '2018-03-03 09:00:00'], - dtype='datetime64[ns]', freq=None) - - Be careful with DST changes. When there is sequential data, pandas can - infer the DST time: - - >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:30:00', - ... '2018-10-28 02:00:00', - ... '2018-10-28 02:30:00', - ... '2018-10-28 02:00:00', - ... '2018-10-28 02:30:00', - ... '2018-10-28 03:00:00', - ... '2018-10-28 03:30:00'])) - >>> s.dt.tz_localize('CET', ambiguous='infer') - 0 2018-10-28 01:30:00+02:00 - 1 2018-10-28 02:00:00+02:00 - 2 2018-10-28 02:30:00+02:00 - 3 2018-10-28 02:00:00+01:00 - 4 2018-10-28 02:30:00+01:00 - 5 2018-10-28 03:00:00+01:00 - 6 2018-10-28 03:30:00+01:00 - dtype: datetime64[s, CET] - - In some cases, inferring the DST is impossible. In such cases, you can - pass an ndarray to the ambiguous parameter to set the DST explicitly - - >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:20:00', - ... '2018-10-28 02:36:00', - ... '2018-10-28 03:46:00'])) - >>> s.dt.tz_localize('CET', ambiguous=np.array([True, True, False])) - 0 2018-10-28 01:20:00+02:00 - 1 2018-10-28 02:36:00+02:00 - 2 2018-10-28 03:46:00+01:00 - dtype: datetime64[s, CET] - - If the DST transition causes nonexistent times, you can shift these - dates forward or backwards with a timedelta object or `'shift_forward'` - or `'shift_backwards'`. - - >>> s = pd.to_datetime(pd.Series(['2015-03-29 02:30:00', - ... '2015-03-29 03:30:00'], dtype="M8[ns]")) - >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_forward') - 0 2015-03-29 03:00:00+02:00 - 1 2015-03-29 03:30:00+02:00 - dtype: datetime64[ns, Europe/Warsaw] - - >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_backward') - 0 2015-03-29 01:59:59.999999999+01:00 - 1 2015-03-29 03:30:00+02:00 - dtype: datetime64[ns, Europe/Warsaw] - - >>> s.dt.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1h')) - 0 2015-03-29 03:30:00+02:00 - 1 2015-03-29 03:30:00+02:00 - dtype: datetime64[ns, Europe/Warsaw] - """ # noqa: E501 + >>> tz_naive = pd.date_range('2018-03-01 09:00', periods=3) + >>> tz_naive + DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00', + '2018-03-03 09:00:00'], + dtype='datetime64[ns]', freq='D') + + Localize DatetimeIndex in US/Eastern time zone: + + >>> tz_aware = tz_naive.tz_localize(tz='US/Eastern') + >>> tz_aware + DatetimeIndex(['2018-03-01 09:00:00-05:00', + '2018-03-02 09:00:00-05:00', + '2018-03-03 09:00:00-05:00'], + dtype='datetime64[ns, US/Eastern]', freq=None) + + With the ``tz=None``, we can remove the time zone information + while keeping the local time (not converted to UTC): + + >>> tz_aware.tz_localize(None) + DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00', + '2018-03-03 09:00:00'], + dtype='datetime64[ns]', freq=None) + + Be careful with DST changes. When there is sequential data, pandas can + infer the DST time: + + >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 03:00:00', + ... '2018-10-28 03:30:00'])) + >>> s.dt.tz_localize('CET', ambiguous='infer') + 0 2018-10-28 01:30:00+02:00 + 1 2018-10-28 02:00:00+02:00 + 2 2018-10-28 02:30:00+02:00 + 3 2018-10-28 02:00:00+01:00 + 4 2018-10-28 02:30:00+01:00 + 5 2018-10-28 03:00:00+01:00 + 6 2018-10-28 03:30:00+01:00 + dtype: datetime64[s, CET] + + In some cases, inferring the DST is impossible. In such cases, you can + pass an ndarray to the ambiguous parameter to set the DST explicitly + + >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:20:00', + ... '2018-10-28 02:36:00', + ... '2018-10-28 03:46:00'])) + >>> s.dt.tz_localize('CET', ambiguous=np.array([True, True, False])) + 0 2018-10-28 01:20:00+02:00 + 1 2018-10-28 02:36:00+02:00 + 2 2018-10-28 03:46:00+01:00 + dtype: datetime64[s, CET] + + If the DST transition causes nonexistent times, you can shift these + dates forward or backwards with a timedelta object or `'shift_forward'` + or `'shift_backwards'`. + + >>> s = pd.to_datetime(pd.Series(['2015-03-29 02:30:00', + ... '2015-03-29 03:30:00'], dtype="M8[ns]")) + >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_forward') + 0 2015-03-29 03:00:00+02:00 + 1 2015-03-29 03:30:00+02:00 + dtype: datetime64[ns, Europe/Warsaw] + + >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_backward') + 0 2015-03-29 01:59:59.999999999+01:00 + 1 2015-03-29 03:30:00+02:00 + dtype: datetime64[ns, Europe/Warsaw] + + >>> s.dt.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1h')) + 0 2015-03-29 03:30:00+02:00 + 1 2015-03-29 03:30:00+02:00 + dtype: datetime64[ns, Europe/Warsaw] + """ # noqa: E501 arr = self._data.tz_localize(tz, ambiguous, nonexistent) return type(self)._simple_new(arr, name=self.name) @@ -987,13 +987,16 @@ def _maybe_cast_slice_bound(self, label, side: str): This function should be overloaded in subclasses that allow non-trivial casting on label-slice bounds, e.g. datetime-like indices allowing strings containing formatted datetimes. + Parameters ---------- label : object side : {'left', 'right'} + Returns ------- label : object + Notes ----- Value of `side` parameter should be validated in caller. From 38b71b0eef9ae7e0c48ff494faeb8d44e6abbcff Mon Sep 17 00:00:00 2001 From: cloudboat <15851404+cloudboat111@user.noreply.gitee.com> Date: Sat, 29 Nov 2025 15:12:32 +0800 Subject: [PATCH 09/10] Fix the accuracy --- pandas/core/indexes/datetimelike.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 35c680ece2996..6cbb5cf027d31 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -120,7 +120,7 @@ def mean(self, *, skipna: bool = True, axis: int | None = 0): >>> idx = pd.date_range("2001-01-01 00:00", periods=3) >>> idx DatetimeIndex(['2001-01-01', '2001-01-02', '2001-01-03'], - dtype='datetime64[ns]', freq='D') + dtype='datetime64[us]', freq='D') >>> idx.mean() Timestamp('2001-01-02 00:00:00') @@ -129,7 +129,7 @@ def mean(self, *, skipna: bool = True, axis: int | None = 0): >>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit="D") >>> tdelta_idx TimedeltaIndex(['1 days', '2 days', '3 days'], - dtype='timedelta64[ns]', freq=None) + dtype='timedelta64[us]', freq=None) >>> tdelta_idx.mean() Timedelta('2 days 00:00:00') """ From afd8a601e9264dfebba423a158e9b69bb3d63e86 Mon Sep 17 00:00:00 2001 From: cloudboat <15851404+cloudboat111@user.noreply.gitee.com> Date: Sat, 29 Nov 2025 16:11:23 +0800 Subject: [PATCH 10/10] Fix accuracy in specific place --- pandas/core/indexes/datetimelike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 298bf6a8e8d44..01b1016007951 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -129,7 +129,7 @@ def mean(self, *, skipna: bool = True, axis: int | None = 0): >>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit="D") >>> tdelta_idx TimedeltaIndex(['1 days', '2 days', '3 days'], - dtype='timedelta64[us]', freq=None) + dtype='timedelta64[ns]', freq=None) >>> tdelta_idx.mean() Timedelta('2 days 00:00:00') """