@@ -9346,21 +9346,140 @@ def update(
93469346
93479347 # ----------------------------------------------------------------------
93489348 # Data reshaping
9349- @Appender (
9350- dedent (
9351- """
9349+ @deprecate_nonkeyword_arguments (
9350+ Pandas4Warning , allowed_args = ["self" , "by" , "level" ], name = "groupby"
9351+ )
9352+ def groupby (
9353+ self ,
9354+ by = None ,
9355+ level : IndexLabel | None = None ,
9356+ as_index : bool = True ,
9357+ sort : bool = True ,
9358+ group_keys : bool = True ,
9359+ observed : bool = True ,
9360+ dropna : bool = True ,
9361+ ) -> DataFrameGroupBy :
9362+ """
9363+ Group DataFrame using a mapper or by a Series of columns.
9364+
9365+ A groupby operation involves some combination of splitting the
9366+ object, applying a function, and combining the results. This can be
9367+ used to group large amounts of data and compute operations on these
9368+ groups.
9369+
9370+ Parameters
9371+ ----------
9372+ by : mapping, function, label, pd.Grouper or list of such
9373+ Used to determine the groups for the groupby.
9374+ If ``by`` is a function, it's called on each value of the object's
9375+ index. If a dict or Series is passed, the Series or dict VALUES
9376+ will be used to determine the groups (the Series' values are first
9377+ aligned; see ``.align()`` method). If a list or ndarray of length
9378+ equal to the selected axis is passed (see the `groupby user guide
9379+ <https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#splitting-an-object-into-groups>`_),
9380+ the values are used as-is to determine the groups. A label or list
9381+ of labels may be passed to group by the columns in ``self``.
9382+ Notice that a tuple is interpreted as a (single) key.
9383+ level : int, level name, or sequence of such, default None
9384+ If the axis is a MultiIndex (hierarchical), group by a particular
9385+ level or levels. Do not specify both ``by`` and ``level``.
9386+ as_index : bool, default True
9387+ Return object with group labels as the
9388+ index. Only relevant for DataFrame input. as_index=False is
9389+ effectively "SQL-style" grouped output. This argument has no effect
9390+ on filtrations (see the `filtrations in the user guide
9391+ <https://pandas.pydata.org/docs/dev/user_guide/groupby.html#filtration>`_),
9392+ such as ``head()``, ``tail()``, ``nth()`` and in transformations
9393+ (see the `transformations in the user guide
9394+ <https://pandas.pydata.org/docs/dev/user_guide/groupby.html#transformation>`_).
9395+ sort : bool, default True
9396+ Sort group keys. Get better performance by turning this off.
9397+ Note this does not influence the order of observations within each
9398+ group. Groupby preserves the order of rows within each group. If False,
9399+ the groups will appear in the same order as they did in the original
9400+ DataFrame.
9401+ This argument has no effect on filtrations (see the `filtrations
9402+ in the user guide
9403+ <https://pandas.pydata.org/docs/dev/user_guide/groupby.html#filtration>`_),
9404+ such as ``head()``, ``tail()``, ``nth()`` and in transformations
9405+ (see the `transformations in the user guide
9406+ <https://pandas.pydata.org/docs/dev/user_guide/groupby.html#transformation>`_).
9407+
9408+ .. versionchanged:: 2.0.0
9409+
9410+ Specifying ``sort=False`` with an ordered categorical grouper will no
9411+ longer sort the values.
9412+
9413+ group_keys : bool, default True
9414+ When calling apply and the ``by`` argument produces a like-indexed
9415+ (i.e. :ref:`a transform <groupby.transform>`) result, add group keys to
9416+ index to identify pieces. By default group keys are not included
9417+ when the result's index (and column) labels match the inputs, and
9418+ are included otherwise.
9419+
9420+ .. versionchanged:: 1.5.0
9421+
9422+ Warns that ``group_keys`` will no longer be ignored when the
9423+ result from ``apply`` is a like-indexed Series or DataFrame.
9424+ Specify ``group_keys`` explicitly to include the group keys or
9425+ not.
9426+
9427+ .. versionchanged:: 2.0.0
9428+
9429+ ``group_keys`` now defaults to ``True``.
9430+
9431+ observed : bool, default True
9432+ This only applies if any of the groupers are Categoricals.
9433+ If True: only show observed values for categorical groupers.
9434+ If False: show all values for categorical groupers.
9435+
9436+ .. versionchanged:: 3.0.0
9437+
9438+ The default value is now ``True``.
9439+
9440+ dropna : bool, default True
9441+ If True, and if group keys contain NA values, NA values together
9442+ with row/column will be dropped.
9443+ If False, NA values will also be treated as the key in groups.
9444+
9445+ Returns
9446+ -------
9447+ pandas.api.typing.DataFrameGroupBy
9448+ Returns a groupby object that contains information about the groups.
9449+
9450+ See Also
9451+ --------
9452+ resample : Convenience method for frequency conversion and resampling
9453+ of time series.
9454+
9455+ Notes
9456+ -----
9457+ See the `user guide
9458+ <https://pandas.pydata.org/pandas-docs/stable/groupby.html>`__ for more
9459+ detailed usage and examples, including splitting an object into groups,
9460+ iterating through groups, selecting a group, aggregation, and more.
9461+
9462+ The implementation of groupby is hash-based, meaning in particular that
9463+ objects that compare as equal will be considered to be in the same group.
9464+ An exception to this is that pandas has special handling of NA values:
9465+ any NA values will be collapsed to a single group, regardless of how
9466+ they compare. See the user guide linked above for more details.
9467+
93529468 Examples
93539469 --------
9354- >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
9355- ... 'Parrot', 'Parrot'],
9356- ... 'Max Speed': [380., 370., 24., 26.]})
9470+ >>> df = pd.DataFrame(
9471+ ... {
9472+ ... "Animal": ["Falcon", "Falcon", "Parrot", "Parrot"],
9473+ ... "Max Speed": [380.0, 370.0, 24.0, 26.0],
9474+ ... }
9475+ ... )
93579476 >>> df
93589477 Animal Max Speed
93599478 0 Falcon 380.0
93609479 1 Falcon 370.0
93619480 2 Parrot 24.0
93629481 3 Parrot 26.0
9363- >>> df.groupby([' Animal' ]).mean()
9482+ >>> df.groupby([" Animal" ]).mean()
93649483 Max Speed
93659484 Animal
93669485 Falcon 375.0
@@ -9371,11 +9490,12 @@ def update(
93719490 We can groupby different levels of a hierarchical index
93729491 using the `level` parameter:
93739492
9374- >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
9375- ... ['Captive', 'Wild', 'Captive', 'Wild']]
9376- >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
9377- >>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
9378- ... index=index)
9493+ >>> arrays = [
9494+ ... ["Falcon", "Falcon", "Parrot", "Parrot"],
9495+ ... ["Captive", "Wild", "Captive", "Wild"],
9496+ ... ]
9497+ >>> index = pd.MultiIndex.from_arrays(arrays, names=("Animal", "Type"))
9498+ >>> df = pd.DataFrame({"Max Speed": [390.0, 350.0, 30.0, 20.0]}, index=index)
93799499 >>> df
93809500 Max Speed
93819501 Animal Type
@@ -9413,7 +9533,7 @@ def update(
94139533 2.0 2 5
94149534 NaN 1 4
94159535
9416- >>> arr = [["a", 12, 12], [None, 12.3, 33.], ["b", 12.3, 123], ["a", 1, 1]]
9536+ >>> arr = [["a", 12, 12], [None, 12.3, 33.0 ], ["b", 12.3, 123], ["a", 1, 1]]
94179537 >>> df = pd.DataFrame(arr, columns=["a", "b", "c"])
94189538
94199539 >>> df.groupby(by="a").sum()
@@ -9432,40 +9552,27 @@ def update(
94329552 When using ``.apply()``, use ``group_keys`` to include or exclude the
94339553 group keys. The ``group_keys`` argument defaults to ``True`` (include).
94349554
9435- >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
9436- ... 'Parrot', 'Parrot'],
9437- ... 'Max Speed': [380., 370., 24., 26.]})
9438- >>> df.groupby("Animal", group_keys=True)[['Max Speed']].apply(lambda x: x)
9555+ >>> df = pd.DataFrame(
9556+ ... {
9557+ ... "Animal": ["Falcon", "Falcon", "Parrot", "Parrot"],
9558+ ... "Max Speed": [380.0, 370.0, 24.0, 26.0],
9559+ ... }
9560+ ... )
9561+ >>> df.groupby("Animal", group_keys=True)[["Max Speed"]].apply(lambda x: x)
94399562 Max Speed
94409563 Animal
94419564 Falcon 0 380.0
94429565 1 370.0
94439566 Parrot 2 24.0
94449567 3 26.0
94459568
9446- >>> df.groupby("Animal", group_keys=False)[[' Max Speed' ]].apply(lambda x: x)
9569+ >>> df.groupby("Animal", group_keys=False)[[" Max Speed" ]].apply(lambda x: x)
94479570 Max Speed
94489571 0 380.0
94499572 1 370.0
94509573 2 24.0
94519574 3 26.0
94529575 """
9453- )
9454- )
9455- @Appender (_shared_docs ["groupby" ] % _shared_doc_kwargs )
9456- @deprecate_nonkeyword_arguments (
9457- Pandas4Warning , allowed_args = ["self" , "by" , "level" ], name = "groupby"
9458- )
9459- def groupby (
9460- self ,
9461- by = None ,
9462- level : IndexLabel | None = None ,
9463- as_index : bool = True ,
9464- sort : bool = True ,
9465- group_keys : bool = True ,
9466- observed : bool = True ,
9467- dropna : bool = True ,
9468- ) -> DataFrameGroupBy :
94699576 from pandas .core .groupby .generic import DataFrameGroupBy
94709577
94719578 if level is None and by is None :
0 commit comments