@@ -9358,21 +9358,140 @@ def update(
93589358
93599359 # ----------------------------------------------------------------------
93609360 # Data reshaping
9361- @Appender (
9362- dedent (
9363- """
9361+ @deprecate_nonkeyword_arguments (
9362+ Pandas4Warning , allowed_args = ["self" , "by" , "level" ], name = "groupby"
9363+ )
9364+ def groupby (
9365+ self ,
9366+ by = None ,
9367+ level : IndexLabel | None = None ,
9368+ as_index : bool = True ,
9369+ sort : bool = True ,
9370+ group_keys : bool = True ,
9371+ observed : bool = True ,
9372+ dropna : bool = True ,
9373+ ) -> DataFrameGroupBy :
9374+ """
9375+ Group DataFrame using a mapper or by a Series of columns.
9376+
9377+ A groupby operation involves some combination of splitting the
9378+ object, applying a function, and combining the results. This can be
9379+ used to group large amounts of data and compute operations on these
9380+ groups.
9381+
9382+ Parameters
9383+ ----------
9384+ by : mapping, function, label, pd.Grouper or list of such
9385+ Used to determine the groups for the groupby.
9386+ If ``by`` is a function, it's called on each value of the object's
9387+ index. If a dict or Series is passed, the Series or dict VALUES
9388+ will be used to determine the groups (the Series' values are first
9389+ aligned; see ``.align()`` method). If a list or ndarray of length
9390+ equal to the selected axis is passed (see the `groupby user guide
9391+ <https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#splitting-an-object-into-groups>`_),
9392+ the values are used as-is to determine the groups. A label or list
9393+ of labels may be passed to group by the columns in ``self``.
9394+ Notice that a tuple is interpreted as a (single) key.
9395+ level : int, level name, or sequence of such, default None
9396+ If the axis is a MultiIndex (hierarchical), group by a particular
9397+ level or levels. Do not specify both ``by`` and ``level``.
9398+ as_index : bool, default True
9399+ Return object with group labels as the
9400+ index. Only relevant for DataFrame input. as_index=False is
9401+ effectively "SQL-style" grouped output. This argument has no effect
9402+ on filtrations (see the `filtrations in the user guide
9403+ <https://pandas.pydata.org/docs/dev/user_guide/groupby.html#filtration>`_),
9404+ such as ``head()``, ``tail()``, ``nth()`` and in transformations
9405+ (see the `transformations in the user guide
9406+ <https://pandas.pydata.org/docs/dev/user_guide/groupby.html#transformation>`_).
9407+ sort : bool, default True
9408+ Sort group keys. Get better performance by turning this off.
9409+ Note this does not influence the order of observations within each
9410+ group. Groupby preserves the order of rows within each group. If False,
9411+ the groups will appear in the same order as they did in the original
9412+ DataFrame.
9413+ This argument has no effect on filtrations (see the `filtrations
9414+ in the user guide
9415+ <https://pandas.pydata.org/docs/dev/user_guide/groupby.html#filtration>`_),
9416+ such as ``head()``, ``tail()``, ``nth()`` and in transformations
9417+ (see the `transformations in the user guide
9418+ <https://pandas.pydata.org/docs/dev/user_guide/groupby.html#transformation>`_).
9419+
9420+ .. versionchanged:: 2.0.0
9421+
9422+ Specifying ``sort=False`` with an ordered categorical grouper will no
9423+ longer sort the values.
9424+
9425+ group_keys : bool, default True
9426+ When calling apply and the ``by`` argument produces a like-indexed
9427+ (i.e. :ref:`a transform <groupby.transform>`) result, add group keys to
9428+ index to identify pieces. By default group keys are not included
9429+ when the result's index (and column) labels match the inputs, and
9430+ are included otherwise.
9431+
9432+ .. versionchanged:: 1.5.0
9433+
9434+ Warns that ``group_keys`` will no longer be ignored when the
9435+ result from ``apply`` is a like-indexed Series or DataFrame.
9436+ Specify ``group_keys`` explicitly to include the group keys or
9437+ not.
9438+
9439+ .. versionchanged:: 2.0.0
9440+
9441+ ``group_keys`` now defaults to ``True``.
9442+
9443+ observed : bool, default True
9444+ This only applies if any of the groupers are Categoricals.
9445+ If True: only show observed values for categorical groupers.
9446+ If False: show all values for categorical groupers.
9447+
9448+ .. versionchanged:: 3.0.0
9449+
9450+ The default value is now ``True``.
9451+
9452+ dropna : bool, default True
9453+ If True, and if group keys contain NA values, NA values together
9454+ with row/column will be dropped.
9455+ If False, NA values will also be treated as the key in groups.
9456+
9457+ Returns
9458+ -------
9459+ pandas.api.typing.DataFrameGroupBy
9460+ Returns a groupby object that contains information about the groups.
9461+
9462+ See Also
9463+ --------
9464+ resample : Convenience method for frequency conversion and resampling
9465+ of time series.
9466+
9467+ Notes
9468+ -----
9469+ See the `user guide
9470+ <https://pandas.pydata.org/pandas-docs/stable/groupby.html>`__ for more
9471+ detailed usage and examples, including splitting an object into groups,
9472+ iterating through groups, selecting a group, aggregation, and more.
9473+
9474+ The implementation of groupby is hash-based, meaning in particular that
9475+ objects that compare as equal will be considered to be in the same group.
9476+ An exception to this is that pandas has special handling of NA values:
9477+ any NA values will be collapsed to a single group, regardless of how
9478+ they compare. See the user guide linked above for more details.
9479+
93649480 Examples
93659481 --------
9366- >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
9367- ... 'Parrot', 'Parrot'],
9368- ... 'Max Speed': [380., 370., 24., 26.]})
9482+ >>> df = pd.DataFrame(
9483+ ... {
9484+ ... "Animal": ["Falcon", "Falcon", "Parrot", "Parrot"],
9485+ ... "Max Speed": [380.0, 370.0, 24.0, 26.0],
9486+ ... }
9487+ ... )
93699488 >>> df
93709489 Animal Max Speed
93719490 0 Falcon 380.0
93729491 1 Falcon 370.0
93739492 2 Parrot 24.0
93749493 3 Parrot 26.0
9375- >>> df.groupby([' Animal' ]).mean()
9494+ >>> df.groupby([" Animal" ]).mean()
93769495 Max Speed
93779496 Animal
93789497 Falcon 375.0
@@ -9383,11 +9502,12 @@ def update(
93839502 We can groupby different levels of a hierarchical index
93849503 using the `level` parameter:
93859504
9386- >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
9387- ... ['Captive', 'Wild', 'Captive', 'Wild']]
9388- >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
9389- >>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
9390- ... index=index)
9505+ >>> arrays = [
9506+ ... ["Falcon", "Falcon", "Parrot", "Parrot"],
9507+ ... ["Captive", "Wild", "Captive", "Wild"],
9508+ ... ]
9509+ >>> index = pd.MultiIndex.from_arrays(arrays, names=("Animal", "Type"))
9510+ >>> df = pd.DataFrame({"Max Speed": [390.0, 350.0, 30.0, 20.0]}, index=index)
93919511 >>> df
93929512 Max Speed
93939513 Animal Type
@@ -9425,7 +9545,7 @@ def update(
94259545 2.0 2 5
94269546 NaN 1 4
94279547
9428- >>> arr = [["a", 12, 12], [None, 12.3, 33.], ["b", 12.3, 123], ["a", 1, 1]]
9548+ >>> arr = [["a", 12, 12], [None, 12.3, 33.0 ], ["b", 12.3, 123], ["a", 1, 1]]
94299549 >>> df = pd.DataFrame(arr, columns=["a", "b", "c"])
94309550
94319551 >>> df.groupby(by="a").sum()
@@ -9444,40 +9564,27 @@ def update(
94449564 When using ``.apply()``, use ``group_keys`` to include or exclude the
94459565 group keys. The ``group_keys`` argument defaults to ``True`` (include).
94469566
9447- >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
9448- ... 'Parrot', 'Parrot'],
9449- ... 'Max Speed': [380., 370., 24., 26.]})
9450- >>> df.groupby("Animal", group_keys=True)[['Max Speed']].apply(lambda x: x)
9567+ >>> df = pd.DataFrame(
9568+ ... {
9569+ ... "Animal": ["Falcon", "Falcon", "Parrot", "Parrot"],
9570+ ... "Max Speed": [380.0, 370.0, 24.0, 26.0],
9571+ ... }
9572+ ... )
9573+ >>> df.groupby("Animal", group_keys=True)[["Max Speed"]].apply(lambda x: x)
94519574 Max Speed
94529575 Animal
94539576 Falcon 0 380.0
94549577 1 370.0
94559578 Parrot 2 24.0
94569579 3 26.0
94579580
9458- >>> df.groupby("Animal", group_keys=False)[[' Max Speed' ]].apply(lambda x: x)
9581+ >>> df.groupby("Animal", group_keys=False)[[" Max Speed" ]].apply(lambda x: x)
94599582 Max Speed
94609583 0 380.0
94619584 1 370.0
94629585 2 24.0
94639586 3 26.0
94649587 """
9465- )
9466- )
9467- @Appender (_shared_docs ["groupby" ] % _shared_doc_kwargs )
9468- @deprecate_nonkeyword_arguments (
9469- Pandas4Warning , allowed_args = ["self" , "by" , "level" ], name = "groupby"
9470- )
9471- def groupby (
9472- self ,
9473- by = None ,
9474- level : IndexLabel | None = None ,
9475- as_index : bool = True ,
9476- sort : bool = True ,
9477- group_keys : bool = True ,
9478- observed : bool = True ,
9479- dropna : bool = True ,
9480- ) -> DataFrameGroupBy :
94819588 from pandas .core .groupby .generic import DataFrameGroupBy
94829589
94839590 if level is None and by is None :
0 commit comments