Skip to content

Commit c770006

Browse files
authored
Merge branch 'main' into replace_ensureclean_test_sas
2 parents aaa4c1c + 066a4f7 commit c770006

33 files changed

+2800
-1562
lines changed

.github/workflows/codeql.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ jobs:
2828

2929
steps:
3030
- uses: actions/checkout@v5
31-
- uses: github/codeql-action/init@v3
31+
- uses: github/codeql-action/init@v4
3232
with:
3333
languages: ${{ matrix.language }}
34-
- uses: github/codeql-action/autobuild@v3
35-
- uses: github/codeql-action/analyze@v3
34+
- uses: github/codeql-action/autobuild@v4
35+
- uses: github/codeql-action/analyze@v4

.github/workflows/wheels.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ jobs:
162162
run: echo "sdist_name=$(cd ./dist && ls -d */)" >> "$GITHUB_ENV"
163163

164164
- name: Build wheels
165-
uses: pypa/cibuildwheel@v3.2.0
165+
uses: pypa/cibuildwheel@v3.2.1
166166
with:
167167
package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
168168
env:

ci/deps/actions-311.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ dependencies:
4444
- psycopg2>=2.9.10
4545
- pyarrow>=13.0.0
4646
- pyiceberg>=0.8.1
47+
- pydantic<2.12.0 # TMP pin to avoid pyiceberg/pydantic issues
4748
- pymysql>=1.1.1
4849
- pyreadstat>=1.2.8
4950
- pytables>=3.10.1

ci/deps/actions-312.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ dependencies:
4444
- psycopg2>=2.9.10
4545
- pyarrow>=13.0.0
4646
- pyiceberg>=0.8.1
47+
- pydantic<2.12.0 # TMP pin to avoid pyiceberg/pydantic issues
4748
- pymysql>=1.1.1
4849
- pyreadstat>=1.2.8
4950
- pytables>=3.10.1

doc/source/whatsnew/v3.0.0.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -716,6 +716,7 @@ Other Deprecations
716716
- Deprecated using ``epoch`` date format in :meth:`DataFrame.to_json` and :meth:`Series.to_json`, use ``iso`` instead. (:issue:`57063`)
717717
- Deprecated allowing ``fill_value`` that cannot be held in the original dtype (excepting NA values for integer and bool dtypes) in :meth:`Series.unstack` and :meth:`DataFrame.unstack` (:issue:`12189`, :issue:`53868`)
718718
- Deprecated allowing ``fill_value`` that cannot be held in the original dtype (excepting NA values for integer and bool dtypes) in :meth:`Series.shift` and :meth:`DataFrame.shift` (:issue:`53802`)
719+
- Deprecated option "future.no_silent_downcasting", as it is no longer used. In a future version accessing this option will raise (:issue:`59502`)
719720
- Deprecated slicing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` using a ``datetime.date`` object, explicitly cast to :class:`Timestamp` instead (:issue:`35830`)
720721

721722
.. ---------------------------------------------------------------------------
@@ -1016,8 +1017,8 @@ Strings
10161017
Interval
10171018
^^^^^^^^
10181019
- :meth:`Index.is_monotonic_decreasing`, :meth:`Index.is_monotonic_increasing`, and :meth:`Index.is_unique` could incorrectly be ``False`` for an ``Index`` created from a slice of another ``Index``. (:issue:`57911`)
1020+
- Bug in :class:`Index`, :class:`Series`, :class:`DataFrame` constructors when given a sequence of :class:`Interval` subclass objects casting them to :class:`Interval` (:issue:`46945`)
10191021
- Bug in :func:`interval_range` where start and end numeric types were always cast to 64 bit (:issue:`57268`)
1020-
-
10211022

10221023
Indexing
10231024
^^^^^^^^

pandas/_libs/lib.pyx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2255,7 +2255,8 @@ cpdef bint is_interval_array(ndarray values):
22552255
for i in range(n):
22562256
val = values[i]
22572257

2258-
if isinstance(val, Interval):
2258+
if type(val) is Interval:
2259+
# GH#46945 catch Interval exactly, excluding subclasses
22592260
if closed is None:
22602261
closed = val.closed
22612262
numeric = (

pandas/_testing/_warnings.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,9 @@ class for all warnings. To raise multiple types of exceptions,
110110
if isinstance(match, tuple)
111111
else (match,) * len(expected_warning)
112112
)
113-
for warning_type, warning_match in zip(expected_warning, match):
113+
for warning_type, warning_match in zip(
114+
expected_warning, match, strict=True
115+
):
114116
_assert_caught_expected_warnings(
115117
caught_warnings=w,
116118
expected_warning=warning_type,

pandas/_testing/asserters.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -675,7 +675,7 @@ def _raise(left, right, err_msg) -> NoReturn:
675675
)
676676

677677
diff = 0
678-
for left_arr, right_arr in zip(left, right):
678+
for left_arr, right_arr in zip(left, right, strict=True):
679679
# count up differences
680680
if not array_equivalent(left_arr, right_arr, strict_nan=strict_nan):
681681
diff += 1
@@ -1447,7 +1447,7 @@ def assert_copy(iter1, iter2, **eql_kwargs) -> None:
14471447
the same object. (Does not check that items
14481448
in sequences are also not the same object)
14491449
"""
1450-
for elem1, elem2 in zip(iter1, iter2):
1450+
for elem1, elem2 in zip(iter1, iter2, strict=True):
14511451
assert_almost_equal(elem1, elem2, **eql_kwargs)
14521452
msg = (
14531453
f"Expected object {type(elem1)!r} and object {type(elem2)!r} to be "

pandas/core/config_init.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
is_text,
2929
)
3030

31+
from pandas.errors import Pandas4Warning
32+
3133
# compute
3234

3335
use_bottleneck_doc = """
@@ -899,10 +901,10 @@ def register_converter_cb(key: str) -> None:
899901
cf.register_option(
900902
"no_silent_downcasting",
901903
False,
902-
"Whether to opt-in to the future behavior which will *not* silently "
903-
"downcast results from Series and DataFrame `where`, `mask`, and `clip` "
904-
"methods. "
905-
"Silent downcasting will be removed in pandas 3.0 "
906-
"(at which point this option will be deprecated).",
904+
"This option is deprecated and will be removed in a future version. "
905+
"It has no effect.",
907906
validator=is_one_of_factory([True, False]),
908907
)
908+
909+
# GH#59502
910+
cf.deprecate_option("future.no_silent_downcasting", Pandas4Warning)

pandas/core/frame.py

Lines changed: 141 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -9346,21 +9346,140 @@ def update(
93469346

93479347
# ----------------------------------------------------------------------
93489348
# Data reshaping
9349-
@Appender(
9350-
dedent(
9351-
"""
9349+
@deprecate_nonkeyword_arguments(
9350+
Pandas4Warning, allowed_args=["self", "by", "level"], name="groupby"
9351+
)
9352+
def groupby(
9353+
self,
9354+
by=None,
9355+
level: IndexLabel | None = None,
9356+
as_index: bool = True,
9357+
sort: bool = True,
9358+
group_keys: bool = True,
9359+
observed: bool = True,
9360+
dropna: bool = True,
9361+
) -> DataFrameGroupBy:
9362+
"""
9363+
Group DataFrame using a mapper or by a Series of columns.
9364+
9365+
A groupby operation involves some combination of splitting the
9366+
object, applying a function, and combining the results. This can be
9367+
used to group large amounts of data and compute operations on these
9368+
groups.
9369+
9370+
Parameters
9371+
----------
9372+
by : mapping, function, label, pd.Grouper or list of such
9373+
Used to determine the groups for the groupby.
9374+
If ``by`` is a function, it's called on each value of the object's
9375+
index. If a dict or Series is passed, the Series or dict VALUES
9376+
will be used to determine the groups (the Series' values are first
9377+
aligned; see ``.align()`` method). If a list or ndarray of length
9378+
equal to the selected axis is passed (see the `groupby user guide
9379+
<https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#splitting-an-object-into-groups>`_),
9380+
the values are used as-is to determine the groups. A label or list
9381+
of labels may be passed to group by the columns in ``self``.
9382+
Notice that a tuple is interpreted as a (single) key.
9383+
level : int, level name, or sequence of such, default None
9384+
If the axis is a MultiIndex (hierarchical), group by a particular
9385+
level or levels. Do not specify both ``by`` and ``level``.
9386+
as_index : bool, default True
9387+
Return object with group labels as the
9388+
index. Only relevant for DataFrame input. as_index=False is
9389+
effectively "SQL-style" grouped output. This argument has no effect
9390+
on filtrations (see the `filtrations in the user guide
9391+
<https://pandas.pydata.org/docs/dev/user_guide/groupby.html#filtration>`_),
9392+
such as ``head()``, ``tail()``, ``nth()`` and in transformations
9393+
(see the `transformations in the user guide
9394+
<https://pandas.pydata.org/docs/dev/user_guide/groupby.html#transformation>`_).
9395+
sort : bool, default True
9396+
Sort group keys. Get better performance by turning this off.
9397+
Note this does not influence the order of observations within each
9398+
group. Groupby preserves the order of rows within each group. If False,
9399+
the groups will appear in the same order as they did in the original
9400+
DataFrame.
9401+
This argument has no effect on filtrations (see the `filtrations
9402+
in the user guide
9403+
<https://pandas.pydata.org/docs/dev/user_guide/groupby.html#filtration>`_),
9404+
such as ``head()``, ``tail()``, ``nth()`` and in transformations
9405+
(see the `transformations in the user guide
9406+
<https://pandas.pydata.org/docs/dev/user_guide/groupby.html#transformation>`_).
9407+
9408+
.. versionchanged:: 2.0.0
9409+
9410+
Specifying ``sort=False`` with an ordered categorical grouper will no
9411+
longer sort the values.
9412+
9413+
group_keys : bool, default True
9414+
When calling apply and the ``by`` argument produces a like-indexed
9415+
(i.e. :ref:`a transform <groupby.transform>`) result, add group keys to
9416+
index to identify pieces. By default group keys are not included
9417+
when the result's index (and column) labels match the inputs, and
9418+
are included otherwise.
9419+
9420+
.. versionchanged:: 1.5.0
9421+
9422+
Warns that ``group_keys`` will no longer be ignored when the
9423+
result from ``apply`` is a like-indexed Series or DataFrame.
9424+
Specify ``group_keys`` explicitly to include the group keys or
9425+
not.
9426+
9427+
.. versionchanged:: 2.0.0
9428+
9429+
``group_keys`` now defaults to ``True``.
9430+
9431+
observed : bool, default True
9432+
This only applies if any of the groupers are Categoricals.
9433+
If True: only show observed values for categorical groupers.
9434+
If False: show all values for categorical groupers.
9435+
9436+
.. versionchanged:: 3.0.0
9437+
9438+
The default value is now ``True``.
9439+
9440+
dropna : bool, default True
9441+
If True, and if group keys contain NA values, NA values together
9442+
with row/column will be dropped.
9443+
If False, NA values will also be treated as the key in groups.
9444+
9445+
Returns
9446+
-------
9447+
pandas.api.typing.DataFrameGroupBy
9448+
Returns a groupby object that contains information about the groups.
9449+
9450+
See Also
9451+
--------
9452+
resample : Convenience method for frequency conversion and resampling
9453+
of time series.
9454+
9455+
Notes
9456+
-----
9457+
See the `user guide
9458+
<https://pandas.pydata.org/pandas-docs/stable/groupby.html>`__ for more
9459+
detailed usage and examples, including splitting an object into groups,
9460+
iterating through groups, selecting a group, aggregation, and more.
9461+
9462+
The implementation of groupby is hash-based, meaning in particular that
9463+
objects that compare as equal will be considered to be in the same group.
9464+
An exception to this is that pandas has special handling of NA values:
9465+
any NA values will be collapsed to a single group, regardless of how
9466+
they compare. See the user guide linked above for more details.
9467+
93529468
Examples
93539469
--------
9354-
>>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
9355-
... 'Parrot', 'Parrot'],
9356-
... 'Max Speed': [380., 370., 24., 26.]})
9470+
>>> df = pd.DataFrame(
9471+
... {
9472+
... "Animal": ["Falcon", "Falcon", "Parrot", "Parrot"],
9473+
... "Max Speed": [380.0, 370.0, 24.0, 26.0],
9474+
... }
9475+
... )
93579476
>>> df
93589477
Animal Max Speed
93599478
0 Falcon 380.0
93609479
1 Falcon 370.0
93619480
2 Parrot 24.0
93629481
3 Parrot 26.0
9363-
>>> df.groupby(['Animal']).mean()
9482+
>>> df.groupby(["Animal"]).mean()
93649483
Max Speed
93659484
Animal
93669485
Falcon 375.0
@@ -9371,11 +9490,12 @@ def update(
93719490
We can groupby different levels of a hierarchical index
93729491
using the `level` parameter:
93739492
9374-
>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
9375-
... ['Captive', 'Wild', 'Captive', 'Wild']]
9376-
>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
9377-
>>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
9378-
... index=index)
9493+
>>> arrays = [
9494+
... ["Falcon", "Falcon", "Parrot", "Parrot"],
9495+
... ["Captive", "Wild", "Captive", "Wild"],
9496+
... ]
9497+
>>> index = pd.MultiIndex.from_arrays(arrays, names=("Animal", "Type"))
9498+
>>> df = pd.DataFrame({"Max Speed": [390.0, 350.0, 30.0, 20.0]}, index=index)
93799499
>>> df
93809500
Max Speed
93819501
Animal Type
@@ -9413,7 +9533,7 @@ def update(
94139533
2.0 2 5
94149534
NaN 1 4
94159535
9416-
>>> arr = [["a", 12, 12], [None, 12.3, 33.], ["b", 12.3, 123], ["a", 1, 1]]
9536+
>>> arr = [["a", 12, 12], [None, 12.3, 33.0], ["b", 12.3, 123], ["a", 1, 1]]
94179537
>>> df = pd.DataFrame(arr, columns=["a", "b", "c"])
94189538
94199539
>>> df.groupby(by="a").sum()
@@ -9432,40 +9552,27 @@ def update(
94329552
When using ``.apply()``, use ``group_keys`` to include or exclude the
94339553
group keys. The ``group_keys`` argument defaults to ``True`` (include).
94349554
9435-
>>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
9436-
... 'Parrot', 'Parrot'],
9437-
... 'Max Speed': [380., 370., 24., 26.]})
9438-
>>> df.groupby("Animal", group_keys=True)[['Max Speed']].apply(lambda x: x)
9555+
>>> df = pd.DataFrame(
9556+
... {
9557+
... "Animal": ["Falcon", "Falcon", "Parrot", "Parrot"],
9558+
... "Max Speed": [380.0, 370.0, 24.0, 26.0],
9559+
... }
9560+
... )
9561+
>>> df.groupby("Animal", group_keys=True)[["Max Speed"]].apply(lambda x: x)
94399562
Max Speed
94409563
Animal
94419564
Falcon 0 380.0
94429565
1 370.0
94439566
Parrot 2 24.0
94449567
3 26.0
94459568
9446-
>>> df.groupby("Animal", group_keys=False)[['Max Speed']].apply(lambda x: x)
9569+
>>> df.groupby("Animal", group_keys=False)[["Max Speed"]].apply(lambda x: x)
94479570
Max Speed
94489571
0 380.0
94499572
1 370.0
94509573
2 24.0
94519574
3 26.0
94529575
"""
9453-
)
9454-
)
9455-
@Appender(_shared_docs["groupby"] % _shared_doc_kwargs)
9456-
@deprecate_nonkeyword_arguments(
9457-
Pandas4Warning, allowed_args=["self", "by", "level"], name="groupby"
9458-
)
9459-
def groupby(
9460-
self,
9461-
by=None,
9462-
level: IndexLabel | None = None,
9463-
as_index: bool = True,
9464-
sort: bool = True,
9465-
group_keys: bool = True,
9466-
observed: bool = True,
9467-
dropna: bool = True,
9468-
) -> DataFrameGroupBy:
94699576
from pandas.core.groupby.generic import DataFrameGroupBy
94709577

94719578
if level is None and by is None:

0 commit comments

Comments
 (0)