Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
173 changes: 167 additions & 6 deletions pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,30 +229,155 @@ def unique(self) -> Self:
return self._from_backing_data(new_data)

@classmethod
@doc(ExtensionArray._concat_same_type)
def _concat_same_type(
cls,
to_concat: Sequence[Self],
axis: AxisInt = 0,
) -> Self:
"""
Concatenate multiple array of this dtype.

Parameters
----------
to_concat : sequence of this type
An array of the same dtype to concatenate.

Returns
-------
ExtensionArray

See Also
--------
api.extensions.ExtensionArray._explode : Transform each element of
list-like to a row.
api.extensions.ExtensionArray._formatter : Formatting function for
scalar values.
api.extensions.ExtensionArray._from_factorized : Reconstruct an
ExtensionArray after factorization.

Examples
--------
>>> arr1 = pd.array([1, 2, 3])
>>> arr2 = pd.array([4, 5, 6])
>>> pd.arrays.IntegerArray._concat_same_type([arr1, arr2])
<IntegerArray>
[1, 2, 3, 4, 5, 6]
Length: 6, dtype: Int64
"""
if not lib.dtypes_all_equal([x.dtype for x in to_concat]):
dtypes = {str(x.dtype) for x in to_concat}
raise ValueError("to_concat must have the same dtype", dtypes)

return super()._concat_same_type(to_concat, axis=axis)

@doc(ExtensionArray.searchsorted)
def searchsorted(
self,
value: NumpyValueArrayLike | ExtensionArray,
side: Literal["left", "right"] = "left",
sorter: NumpySorter | None = None,
) -> npt.NDArray[np.intp] | np.intp:
npvalue = self._validate_setitem_value(value)
return self._ndarray.searchsorted(npvalue, side=side, sorter=sorter)
"""
Find indices where elements should be inserted to maintain order.

Find the indices into a sorted array `self` (a) such that, if the
corresponding elements in `value` were inserted before the indices,
the order of `self` would be preserved.

Assuming that `self` is sorted:

====== ================================
`side` returned index `i` satisfies
====== ================================
left ``self[i-1] < value <= self[i]``
right ``self[i-1] <= value < self[i]``
====== ================================

Parameters
----------
value : array-like, list or scalar
Value(s) to insert into `self`.
side : {'left', 'right'}, optional
If 'left', the index of the first suitable location found is given.
If 'right', return the last such index. If there is no suitable
index, return either 0 or N (where N is the length of `self`).
sorter : 1-D array-like, optional
Optional array of integer indices that sort array a into ascending
order. They are typically the result of argsort.

Returns
-------
array of ints or int
If value is array-like, array of insertion points.
If value is scalar, a single integer.

See Also
--------
numpy.searchsorted : Similar method from NumPy.

Examples
--------
>>> arr = pd.array([1, 2, 3, 5])
>>> arr.searchsorted([4])
array([3])
"""
# Note: the base tests provided by pandas only test the basics.
# We do not test
# 1. Values outside the range of the `data_for_sorting` fixture
# 2. Values between the values in the `data_for_sorting` fixture
# 3. Missing values.
arr = self.astype(object)
if isinstance(value, ExtensionArray):
value = value.astype(object)
return arr.searchsorted(value, side=side, sorter=sorter)

@doc(ExtensionArray.shift)
def shift(self, periods: int = 1, fill_value=None) -> Self:
"""
Shift values by desired number.

Newly introduced missing values are filled with
``self.dtype.na_value``.

Parameters
----------
periods : int, default 1
The number of periods to shift. Negative values are allowed
for shifting backwards.

fill_value : object, optional
The scalar value to use for newly introduced missing values.
The default is ``self.dtype.na_value``.

Returns
-------
ExtensionArray
Shifted.

See Also
--------
api.extensions.ExtensionArray.transpose : Return a transposed view on
this array.
api.extensions.ExtensionArray.factorize : Encode the extension array as an
enumerated type.

Notes
-----
If ``self`` is empty or ``periods`` is 0, a copy of ``self`` is
returned.

If ``periods > len(self)``, then an array of size
len(self) is returned, with all values filled with
``self.dtype.na_value``.

For 2-dimensional ExtensionArrays, we are always shifting along axis=0.

Examples
--------
>>> arr = pd.array([1, 2, 3])
>>> arr.shift(2)
<IntegerArray>
[<NA>, <NA>, 1]
Length: 3, dtype: Int64
"""
# NB: shift is always along axis=0
axis = 0
fill_value = self._validate_scalar(fill_value)
Expand Down Expand Up @@ -338,8 +463,44 @@ def _pad_or_backfill(
new_values = self
return new_values

@doc(ExtensionArray.fillna)
def fillna(self, value, limit: int | None = None, copy: bool = True) -> Self:
"""
Fill NA/NaN values using the specified method.

Parameters
----------
value : scalar, array-like
If a scalar value is passed it is used to fill all missing values.
Alternatively, an array-like "value" can be given. It's expected
that the array-like have the same length as 'self'.
limit : int, default None
The maximum number of entries where NA values will be filled.
copy : bool, default True
Whether to make a copy of the data before filling. If False, then
the original should be modified and no new memory should be allocated.
For ExtensionArray subclasses that cannot do this, it is at the
author's discretion whether to ignore "copy=False" or to raise.

Returns
-------
ExtensionArray
With NA/NaN filled.

See Also
--------
api.extensions.ExtensionArray.dropna : Return ExtensionArray without
NA values.
api.extensions.ExtensionArray.isna : A 1-D array indicating if
each value is missing.

Examples
--------
>>> arr = pd.array([np.nan, np.nan, 2, 3, np.nan, np.nan])
>>> arr.fillna(0)
<IntegerArray>
[0, 0, 2, 3, 0, 0]
Length: 6, dtype: Int64
"""
mask = self.isna()
if limit is not None and limit < len(self):
# mypy doesn't like that mask can be an EA which need not have `cumsum`
Expand Down
Loading