From 05b4e9290f53f11c72fd5cc16289884101fd2aaa Mon Sep 17 00:00:00 2001 From: Jixun Sun <160219251+AnonToky@users.noreply.github.com> Date: Mon, 10 Nov 2025 19:10:57 +0800 Subject: [PATCH] Add docstrings for ExtensionArray methods Added docstrings for several methods in the ExtensionArray class, including _concat_same_type, searchsorted, shift, and fillna, to improve documentation clarity and usability. --- pandas/core/arrays/_mixins.py | 173 ++++++++++++++++++++++++++++++++-- 1 file changed, 167 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 11928e79ffc62..ea3f9b0406f02 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -229,30 +229,155 @@ def unique(self) -> Self: return self._from_backing_data(new_data) @classmethod - @doc(ExtensionArray._concat_same_type) def _concat_same_type( cls, to_concat: Sequence[Self], axis: AxisInt = 0, ) -> Self: + """ + Concatenate multiple array of this dtype. + + Parameters + ---------- + to_concat : sequence of this type + An array of the same dtype to concatenate. + + Returns + ------- + ExtensionArray + + See Also + -------- + api.extensions.ExtensionArray._explode : Transform each element of + list-like to a row. + api.extensions.ExtensionArray._formatter : Formatting function for + scalar values. + api.extensions.ExtensionArray._from_factorized : Reconstruct an + ExtensionArray after factorization. + + Examples + -------- + >>> arr1 = pd.array([1, 2, 3]) + >>> arr2 = pd.array([4, 5, 6]) + >>> pd.arrays.IntegerArray._concat_same_type([arr1, arr2]) + + [1, 2, 3, 4, 5, 6] + Length: 6, dtype: Int64 + """ if not lib.dtypes_all_equal([x.dtype for x in to_concat]): dtypes = {str(x.dtype) for x in to_concat} raise ValueError("to_concat must have the same dtype", dtypes) return super()._concat_same_type(to_concat, axis=axis) - @doc(ExtensionArray.searchsorted) def searchsorted( self, value: NumpyValueArrayLike | ExtensionArray, side: Literal["left", "right"] = "left", sorter: NumpySorter | None = None, ) -> npt.NDArray[np.intp] | np.intp: - npvalue = self._validate_setitem_value(value) - return self._ndarray.searchsorted(npvalue, side=side, sorter=sorter) + """ + Find indices where elements should be inserted to maintain order. + + Find the indices into a sorted array `self` (a) such that, if the + corresponding elements in `value` were inserted before the indices, + the order of `self` would be preserved. + + Assuming that `self` is sorted: + + ====== ================================ + `side` returned index `i` satisfies + ====== ================================ + left ``self[i-1] < value <= self[i]`` + right ``self[i-1] <= value < self[i]`` + ====== ================================ + + Parameters + ---------- + value : array-like, list or scalar + Value(s) to insert into `self`. + side : {'left', 'right'}, optional + If 'left', the index of the first suitable location found is given. + If 'right', return the last such index. If there is no suitable + index, return either 0 or N (where N is the length of `self`). + sorter : 1-D array-like, optional + Optional array of integer indices that sort array a into ascending + order. They are typically the result of argsort. + + Returns + ------- + array of ints or int + If value is array-like, array of insertion points. + If value is scalar, a single integer. + + See Also + -------- + numpy.searchsorted : Similar method from NumPy. + + Examples + -------- + >>> arr = pd.array([1, 2, 3, 5]) + >>> arr.searchsorted([4]) + array([3]) + """ + # Note: the base tests provided by pandas only test the basics. + # We do not test + # 1. Values outside the range of the `data_for_sorting` fixture + # 2. Values between the values in the `data_for_sorting` fixture + # 3. Missing values. + arr = self.astype(object) + if isinstance(value, ExtensionArray): + value = value.astype(object) + return arr.searchsorted(value, side=side, sorter=sorter) - @doc(ExtensionArray.shift) def shift(self, periods: int = 1, fill_value=None) -> Self: + """ + Shift values by desired number. + + Newly introduced missing values are filled with + ``self.dtype.na_value``. + + Parameters + ---------- + periods : int, default 1 + The number of periods to shift. Negative values are allowed + for shifting backwards. + + fill_value : object, optional + The scalar value to use for newly introduced missing values. + The default is ``self.dtype.na_value``. + + Returns + ------- + ExtensionArray + Shifted. + + See Also + -------- + api.extensions.ExtensionArray.transpose : Return a transposed view on + this array. + api.extensions.ExtensionArray.factorize : Encode the extension array as an + enumerated type. + + Notes + ----- + If ``self`` is empty or ``periods`` is 0, a copy of ``self`` is + returned. + + If ``periods > len(self)``, then an array of size + len(self) is returned, with all values filled with + ``self.dtype.na_value``. + + For 2-dimensional ExtensionArrays, we are always shifting along axis=0. + + Examples + -------- + >>> arr = pd.array([1, 2, 3]) + >>> arr.shift(2) + + [, , 1] + Length: 3, dtype: Int64 + """ # NB: shift is always along axis=0 axis = 0 fill_value = self._validate_scalar(fill_value) @@ -338,8 +463,44 @@ def _pad_or_backfill( new_values = self return new_values - @doc(ExtensionArray.fillna) def fillna(self, value, limit: int | None = None, copy: bool = True) -> Self: + """ + Fill NA/NaN values using the specified method. + + Parameters + ---------- + value : scalar, array-like + If a scalar value is passed it is used to fill all missing values. + Alternatively, an array-like "value" can be given. It's expected + that the array-like have the same length as 'self'. + limit : int, default None + The maximum number of entries where NA values will be filled. + copy : bool, default True + Whether to make a copy of the data before filling. If False, then + the original should be modified and no new memory should be allocated. + For ExtensionArray subclasses that cannot do this, it is at the + author's discretion whether to ignore "copy=False" or to raise. + + Returns + ------- + ExtensionArray + With NA/NaN filled. + + See Also + -------- + api.extensions.ExtensionArray.dropna : Return ExtensionArray without + NA values. + api.extensions.ExtensionArray.isna : A 1-D array indicating if + each value is missing. + + Examples + -------- + >>> arr = pd.array([np.nan, np.nan, 2, 3, np.nan, np.nan]) + >>> arr.fillna(0) + + [0, 0, 2, 3, 0, 0] + Length: 6, dtype: Int64 + """ mask = self.isna() if limit is not None and limit < len(self): # mypy doesn't like that mask can be an EA which need not have `cumsum`