Skip to content

Commit 13e69ae

Browse files
committed
Merge branch 'main' into depr-tuple-arith
2 parents f245a45 + 1efb8c3 commit 13e69ae

File tree

24 files changed

+2751
-1544
lines changed

24 files changed

+2751
-1544
lines changed

.github/workflows/codeql.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ jobs:
2828

2929
steps:
3030
- uses: actions/checkout@v5
31-
- uses: github/codeql-action/init@v3
31+
- uses: github/codeql-action/init@v4
3232
with:
3333
languages: ${{ matrix.language }}
34-
- uses: github/codeql-action/autobuild@v3
35-
- uses: github/codeql-action/analyze@v3
34+
- uses: github/codeql-action/autobuild@v4
35+
- uses: github/codeql-action/analyze@v4

.github/workflows/wheels.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ jobs:
162162
run: echo "sdist_name=$(cd ./dist && ls -d */)" >> "$GITHUB_ENV"
163163

164164
- name: Build wheels
165-
uses: pypa/cibuildwheel@v3.2.0
165+
uses: pypa/cibuildwheel@v3.2.1
166166
with:
167167
package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
168168
env:

ci/deps/actions-311.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ dependencies:
4444
- psycopg2>=2.9.10
4545
- pyarrow>=13.0.0
4646
- pyiceberg>=0.8.1
47+
- pydantic<2.12.0 # TMP pin to avoid pyiceberg/pydantic issues
4748
- pymysql>=1.1.1
4849
- pyreadstat>=1.2.8
4950
- pytables>=3.10.1

ci/deps/actions-312.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ dependencies:
4444
- psycopg2>=2.9.10
4545
- pyarrow>=13.0.0
4646
- pyiceberg>=0.8.1
47+
- pydantic<2.12.0 # TMP pin to avoid pyiceberg/pydantic issues
4748
- pymysql>=1.1.1
4849
- pyreadstat>=1.2.8
4950
- pytables>=3.10.1

pandas/_testing/_warnings.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,9 @@ class for all warnings. To raise multiple types of exceptions,
110110
if isinstance(match, tuple)
111111
else (match,) * len(expected_warning)
112112
)
113-
for warning_type, warning_match in zip(expected_warning, match):
113+
for warning_type, warning_match in zip(
114+
expected_warning, match, strict=True
115+
):
114116
_assert_caught_expected_warnings(
115117
caught_warnings=w,
116118
expected_warning=warning_type,

pandas/_testing/asserters.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -675,7 +675,7 @@ def _raise(left, right, err_msg) -> NoReturn:
675675
)
676676

677677
diff = 0
678-
for left_arr, right_arr in zip(left, right):
678+
for left_arr, right_arr in zip(left, right, strict=True):
679679
# count up differences
680680
if not array_equivalent(left_arr, right_arr, strict_nan=strict_nan):
681681
diff += 1
@@ -1447,7 +1447,7 @@ def assert_copy(iter1, iter2, **eql_kwargs) -> None:
14471447
the same object. (Does not check that items
14481448
in sequences are also not the same object)
14491449
"""
1450-
for elem1, elem2 in zip(iter1, iter2):
1450+
for elem1, elem2 in zip(iter1, iter2, strict=True):
14511451
assert_almost_equal(elem1, elem2, **eql_kwargs)
14521452
msg = (
14531453
f"Expected object {type(elem1)!r} and object {type(elem2)!r} to be "

pandas/core/frame.py

Lines changed: 141 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -9358,21 +9358,140 @@ def update(
93589358

93599359
# ----------------------------------------------------------------------
93609360
# Data reshaping
9361-
@Appender(
9362-
dedent(
9363-
"""
9361+
@deprecate_nonkeyword_arguments(
9362+
Pandas4Warning, allowed_args=["self", "by", "level"], name="groupby"
9363+
)
9364+
def groupby(
9365+
self,
9366+
by=None,
9367+
level: IndexLabel | None = None,
9368+
as_index: bool = True,
9369+
sort: bool = True,
9370+
group_keys: bool = True,
9371+
observed: bool = True,
9372+
dropna: bool = True,
9373+
) -> DataFrameGroupBy:
9374+
"""
9375+
Group DataFrame using a mapper or by a Series of columns.
9376+
9377+
A groupby operation involves some combination of splitting the
9378+
object, applying a function, and combining the results. This can be
9379+
used to group large amounts of data and compute operations on these
9380+
groups.
9381+
9382+
Parameters
9383+
----------
9384+
by : mapping, function, label, pd.Grouper or list of such
9385+
Used to determine the groups for the groupby.
9386+
If ``by`` is a function, it's called on each value of the object's
9387+
index. If a dict or Series is passed, the Series or dict VALUES
9388+
will be used to determine the groups (the Series' values are first
9389+
aligned; see ``.align()`` method). If a list or ndarray of length
9390+
equal to the selected axis is passed (see the `groupby user guide
9391+
<https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#splitting-an-object-into-groups>`_),
9392+
the values are used as-is to determine the groups. A label or list
9393+
of labels may be passed to group by the columns in ``self``.
9394+
Notice that a tuple is interpreted as a (single) key.
9395+
level : int, level name, or sequence of such, default None
9396+
If the axis is a MultiIndex (hierarchical), group by a particular
9397+
level or levels. Do not specify both ``by`` and ``level``.
9398+
as_index : bool, default True
9399+
Return object with group labels as the
9400+
index. Only relevant for DataFrame input. as_index=False is
9401+
effectively "SQL-style" grouped output. This argument has no effect
9402+
on filtrations (see the `filtrations in the user guide
9403+
<https://pandas.pydata.org/docs/dev/user_guide/groupby.html#filtration>`_),
9404+
such as ``head()``, ``tail()``, ``nth()`` and in transformations
9405+
(see the `transformations in the user guide
9406+
<https://pandas.pydata.org/docs/dev/user_guide/groupby.html#transformation>`_).
9407+
sort : bool, default True
9408+
Sort group keys. Get better performance by turning this off.
9409+
Note this does not influence the order of observations within each
9410+
group. Groupby preserves the order of rows within each group. If False,
9411+
the groups will appear in the same order as they did in the original
9412+
DataFrame.
9413+
This argument has no effect on filtrations (see the `filtrations
9414+
in the user guide
9415+
<https://pandas.pydata.org/docs/dev/user_guide/groupby.html#filtration>`_),
9416+
such as ``head()``, ``tail()``, ``nth()`` and in transformations
9417+
(see the `transformations in the user guide
9418+
<https://pandas.pydata.org/docs/dev/user_guide/groupby.html#transformation>`_).
9419+
9420+
.. versionchanged:: 2.0.0
9421+
9422+
Specifying ``sort=False`` with an ordered categorical grouper will no
9423+
longer sort the values.
9424+
9425+
group_keys : bool, default True
9426+
When calling apply and the ``by`` argument produces a like-indexed
9427+
(i.e. :ref:`a transform <groupby.transform>`) result, add group keys to
9428+
index to identify pieces. By default group keys are not included
9429+
when the result's index (and column) labels match the inputs, and
9430+
are included otherwise.
9431+
9432+
.. versionchanged:: 1.5.0
9433+
9434+
Warns that ``group_keys`` will no longer be ignored when the
9435+
result from ``apply`` is a like-indexed Series or DataFrame.
9436+
Specify ``group_keys`` explicitly to include the group keys or
9437+
not.
9438+
9439+
.. versionchanged:: 2.0.0
9440+
9441+
``group_keys`` now defaults to ``True``.
9442+
9443+
observed : bool, default True
9444+
This only applies if any of the groupers are Categoricals.
9445+
If True: only show observed values for categorical groupers.
9446+
If False: show all values for categorical groupers.
9447+
9448+
.. versionchanged:: 3.0.0
9449+
9450+
The default value is now ``True``.
9451+
9452+
dropna : bool, default True
9453+
If True, and if group keys contain NA values, NA values together
9454+
with row/column will be dropped.
9455+
If False, NA values will also be treated as the key in groups.
9456+
9457+
Returns
9458+
-------
9459+
pandas.api.typing.DataFrameGroupBy
9460+
Returns a groupby object that contains information about the groups.
9461+
9462+
See Also
9463+
--------
9464+
resample : Convenience method for frequency conversion and resampling
9465+
of time series.
9466+
9467+
Notes
9468+
-----
9469+
See the `user guide
9470+
<https://pandas.pydata.org/pandas-docs/stable/groupby.html>`__ for more
9471+
detailed usage and examples, including splitting an object into groups,
9472+
iterating through groups, selecting a group, aggregation, and more.
9473+
9474+
The implementation of groupby is hash-based, meaning in particular that
9475+
objects that compare as equal will be considered to be in the same group.
9476+
An exception to this is that pandas has special handling of NA values:
9477+
any NA values will be collapsed to a single group, regardless of how
9478+
they compare. See the user guide linked above for more details.
9479+
93649480
Examples
93659481
--------
9366-
>>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
9367-
... 'Parrot', 'Parrot'],
9368-
... 'Max Speed': [380., 370., 24., 26.]})
9482+
>>> df = pd.DataFrame(
9483+
... {
9484+
... "Animal": ["Falcon", "Falcon", "Parrot", "Parrot"],
9485+
... "Max Speed": [380.0, 370.0, 24.0, 26.0],
9486+
... }
9487+
... )
93699488
>>> df
93709489
Animal Max Speed
93719490
0 Falcon 380.0
93729491
1 Falcon 370.0
93739492
2 Parrot 24.0
93749493
3 Parrot 26.0
9375-
>>> df.groupby(['Animal']).mean()
9494+
>>> df.groupby(["Animal"]).mean()
93769495
Max Speed
93779496
Animal
93789497
Falcon 375.0
@@ -9383,11 +9502,12 @@ def update(
93839502
We can groupby different levels of a hierarchical index
93849503
using the `level` parameter:
93859504
9386-
>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
9387-
... ['Captive', 'Wild', 'Captive', 'Wild']]
9388-
>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
9389-
>>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
9390-
... index=index)
9505+
>>> arrays = [
9506+
... ["Falcon", "Falcon", "Parrot", "Parrot"],
9507+
... ["Captive", "Wild", "Captive", "Wild"],
9508+
... ]
9509+
>>> index = pd.MultiIndex.from_arrays(arrays, names=("Animal", "Type"))
9510+
>>> df = pd.DataFrame({"Max Speed": [390.0, 350.0, 30.0, 20.0]}, index=index)
93919511
>>> df
93929512
Max Speed
93939513
Animal Type
@@ -9425,7 +9545,7 @@ def update(
94259545
2.0 2 5
94269546
NaN 1 4
94279547
9428-
>>> arr = [["a", 12, 12], [None, 12.3, 33.], ["b", 12.3, 123], ["a", 1, 1]]
9548+
>>> arr = [["a", 12, 12], [None, 12.3, 33.0], ["b", 12.3, 123], ["a", 1, 1]]
94299549
>>> df = pd.DataFrame(arr, columns=["a", "b", "c"])
94309550
94319551
>>> df.groupby(by="a").sum()
@@ -9444,40 +9564,27 @@ def update(
94449564
When using ``.apply()``, use ``group_keys`` to include or exclude the
94459565
group keys. The ``group_keys`` argument defaults to ``True`` (include).
94469566
9447-
>>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
9448-
... 'Parrot', 'Parrot'],
9449-
... 'Max Speed': [380., 370., 24., 26.]})
9450-
>>> df.groupby("Animal", group_keys=True)[['Max Speed']].apply(lambda x: x)
9567+
>>> df = pd.DataFrame(
9568+
... {
9569+
... "Animal": ["Falcon", "Falcon", "Parrot", "Parrot"],
9570+
... "Max Speed": [380.0, 370.0, 24.0, 26.0],
9571+
... }
9572+
... )
9573+
>>> df.groupby("Animal", group_keys=True)[["Max Speed"]].apply(lambda x: x)
94519574
Max Speed
94529575
Animal
94539576
Falcon 0 380.0
94549577
1 370.0
94559578
Parrot 2 24.0
94569579
3 26.0
94579580
9458-
>>> df.groupby("Animal", group_keys=False)[['Max Speed']].apply(lambda x: x)
9581+
>>> df.groupby("Animal", group_keys=False)[["Max Speed"]].apply(lambda x: x)
94599582
Max Speed
94609583
0 380.0
94619584
1 370.0
94629585
2 24.0
94639586
3 26.0
94649587
"""
9465-
)
9466-
)
9467-
@Appender(_shared_docs["groupby"] % _shared_doc_kwargs)
9468-
@deprecate_nonkeyword_arguments(
9469-
Pandas4Warning, allowed_args=["self", "by", "level"], name="groupby"
9470-
)
9471-
def groupby(
9472-
self,
9473-
by=None,
9474-
level: IndexLabel | None = None,
9475-
as_index: bool = True,
9476-
sort: bool = True,
9477-
group_keys: bool = True,
9478-
observed: bool = True,
9479-
dropna: bool = True,
9480-
) -> DataFrameGroupBy:
94819588
from pandas.core.groupby.generic import DataFrameGroupBy
94829589

94839590
if level is None and by is None:

pandas/core/generic.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7627,8 +7627,8 @@ def replace(
76277627
# Operate column-wise
76287628
if self.ndim == 1:
76297629
raise ValueError(
7630-
"Series.replace cannot use dict-like to_replace "
7631-
"and non-None value"
7630+
"Series.replace cannot specify both a dict-like "
7631+
"'to_replace' and a 'value'"
76327632
)
76337633
mapping = {
76347634
col: (to_rep, value) for col, to_rep in to_replace.items()

pandas/core/indexes/interval.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1056,8 +1056,8 @@ def _intersection_non_unique(self, other: IntervalIndex) -> IntervalIndex:
10561056
first_nan_loc = np.arange(len(self))[self.isna()][0]
10571057
mask[first_nan_loc] = True
10581058

1059-
other_tups = set(zip(other.left, other.right))
1060-
for i, tup in enumerate(zip(self.left, self.right)):
1059+
other_tups = set(zip(other.left, other.right, strict=True))
1060+
for i, tup in enumerate(zip(self.left, self.right, strict=True)):
10611061
if tup in other_tups:
10621062
mask[i] = True
10631063

0 commit comments

Comments
 (0)