From 0e66668cd6dbd1bb62af8f174e0e2e1c92192b67 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 20:48:12 -0400 Subject: [PATCH 01/41] code impl and examples --- pandas/core/groupby/generic.py | 51 +++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index d279594617235..cb85121079ead 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -16,7 +16,6 @@ TYPE_CHECKING, Any, Literal, - NamedTuple, TypeAlias, TypeVar, cast, @@ -113,11 +112,32 @@ @set_module("pandas") -class NamedAgg(NamedTuple): +class NamedAgg(tuple): + __slots__ = () + + def __new__(cls, column, aggfunc, *args, **kwargs): + if ( + callable(aggfunc) + and not getattr(aggfunc, "_is_wrapped", False) + and (args or kwargs) + ): + original_func = aggfunc + + def wrapped(*call_args, **call_kwargs): + series = call_args[0] + final_args = call_args[1:] + args + final_kwargs = {**kwargs, **call_kwargs} + return original_func(series, *final_args, **final_kwargs) + + wrapped._is_wrapped = True + aggfunc = wrapped + return super().__new__(cls, (column, aggfunc)) + """ - Helper for column specific aggregation with control over output column names. + Helper for column specific aggregation with with flexible argument passing and + control over output column names. - Subclass of typing.NamedTuple. + Subclass of tuple that wraps an aggregation function. Parameters ---------- @@ -126,6 +146,10 @@ class NamedAgg(NamedTuple): aggfunc : function or str Function to apply to the provided column. If string, the name of a built-in pandas function. + *args : tuple, optional + Positional arguments to pass to `aggfunc` when it is called. + **kwargs : dict, optional + Keyword arguments to pass to `aggfunc` when it is called. See Also -------- @@ -141,6 +165,25 @@ class NamedAgg(NamedTuple): key 1 -1 10.5 2 1 12.0 + + def n_between(ser, low, high, **kwargs): + return ser.between(low, high, **kwargs).sum() + + Using positional arguments + agg_between = pd.NamedAgg("a", n_between, 0, 1) + df.groupby("key").agg(count_between=agg_between) + count_between + key + 1 1 + 2 1 + + Using both positional and keyword arguments + agg_between_kw = pd.NamedAgg("a", n_between, 0, 1, inclusive="both") + df.groupby("key").agg(count_between_kw=agg_between_kw) + count_between_kw + key + 1 1 + 2 1 """ column: Hashable From f390c79425bca9dd4b409ee006459081baa9b834 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 22:04:27 -0400 Subject: [PATCH 02/41] unit tests --- .../tests/groupby/aggregate/test_aggregate.py | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index c968587c469d1..5fb3666b4cdb3 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -866,6 +866,57 @@ def test_agg_namedtuple(self): expected = df.groupby("A").agg(b=("B", "sum"), c=("B", "count")) tm.assert_frame_equal(result, expected) + def n_between(self, ser, low, high, **kwargs): + return ser.between(low, high, **kwargs).sum() + + def test_namedagg_args(self): + df = DataFrame({"A": [0, 0, 1, 1], "B": [-1, 0, 1, 2]}) + + result = df.groupby("A").agg( + count_between=pd.NamedAgg("B", self.n_between, 0, 1) + ) + expected = DataFrame({"count_between": [1, 1]}, index=Index([0, 1], name="A")) + tm.assert_frame_equal(result, expected) + + def test_namedagg_kwargs(self): + df = DataFrame({"A": [0, 0, 1, 1], "B": [-1, 0, 1, 2]}) + + result = df.groupby("A").agg( + count_between_kw=pd.NamedAgg("B", self.n_between, 0, 1, inclusive="both") + ) + expected = DataFrame( + {"count_between_kw": [1, 1]}, index=Index([0, 1], name="A") + ) + tm.assert_frame_equal(result, expected) + + def test_namedagg_args_and_kwargs(self): + df = DataFrame({"A": [0, 0, 1, 1], "B": [-1, 0, 1, 2]}) + + result = df.groupby("A").agg( + count_between_mix=pd.NamedAgg( + "B", self.n_between, 0, 1, inclusive="neither" + ) + ) + expected = DataFrame( + {"count_between_mix": [0, 0]}, index=Index([0, 1], name="A") + ) + tm.assert_frame_equal(result, expected) + + def test_multiple_named_agg_with_args_and_kwargs(self): + df = DataFrame({"A": [0, 1, 2, 3], "B": [1, 2, 3, 4]}) + + result = df.groupby("A").agg( + n_between01=pd.NamedAgg("B", self.n_between, 0, 1), + n_between13=pd.NamedAgg("B", self.n_between, 1, 3), + n_between02=pd.NamedAgg("B", self.n_between, 0, 2), + ) + expected = df.groupby("A").agg( + n_between01=("B", lambda x: x.between(0, 1).sum()), + n_between13=("B", lambda x: x.between(0, 3).sum()), + n_between02=("B", lambda x: x.between(0, 2).sum()), + ) + tm.assert_frame_equal(result, expected) + def test_mangled(self): df = DataFrame({"A": [0, 1], "B": [1, 2], "C": [3, 4]}) result = df.groupby("A").agg(b=("B", lambda x: 0), c=("C", lambda x: 1)) From bd8b13151c9de2608039e47f60c2ee35ac86f2ab Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 22:13:31 -0400 Subject: [PATCH 03/41] add typehint --- pandas/core/groupby/generic.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index cb85121079ead..a4a5be6c459b7 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -16,6 +16,7 @@ TYPE_CHECKING, Any, Literal, + Self, TypeAlias, TypeVar, cast, @@ -115,7 +116,13 @@ class NamedAgg(tuple): __slots__ = () - def __new__(cls, column, aggfunc, *args, **kwargs): + def __new__( + cls, + column: Hashable, + aggfunc: Callable[..., Any] | str, + *args: Any, + **kwargs: Any, + ) -> Self: if ( callable(aggfunc) and not getattr(aggfunc, "_is_wrapped", False) @@ -123,7 +130,7 @@ def __new__(cls, column, aggfunc, *args, **kwargs): ): original_func = aggfunc - def wrapped(*call_args, **call_kwargs): + def wrapped(*call_args: Any, **call_kwargs: Any) -> Any: series = call_args[0] final_args = call_args[1:] + args final_kwargs = {**kwargs, **call_kwargs} From 65489604486b715d694cdd6c68a0b334177d2bb2 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 22:16:30 -0400 Subject: [PATCH 04/41] whats new --- doc/source/whatsnew/v3.0.0.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 380915b3494a3..676e872ae5f67 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -156,6 +156,8 @@ All warnings for upcoming changes in pandas will have the base class :class:`pan Other enhancements ^^^^^^^^^^^^^^^^^^ +- :class:`pandas.NamedAgg` now forwards any ``*args`` and ``**kwargs`` + to calls of ``aggfunc`` (:issue:`58283`) - :func:`pandas.merge` propagates the ``attrs`` attribute to the result if all inputs have identical ``attrs``, as has so far already been the case for :func:`pandas.concat`. From 3d48574a1eea0732f37f502ba762c9a1162d2594 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 22:58:38 -0400 Subject: [PATCH 05/41] mypy --- pandas/core/groupby/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index a4a5be6c459b7..4e178cc878dff 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -136,7 +136,7 @@ def wrapped(*call_args: Any, **call_kwargs: Any) -> Any: final_kwargs = {**kwargs, **call_kwargs} return original_func(series, *final_args, **final_kwargs) - wrapped._is_wrapped = True + wrapped._is_wrapped = True # type: ignore[attr-defined] aggfunc = wrapped return super().__new__(cls, (column, aggfunc)) From 54aa3982e6cff2424e597304878911f60cec00a9 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 23:00:36 -0400 Subject: [PATCH 06/41] mypy --- pandas/core/groupby/generic.py | 52 +++++++++++++++++----------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 4e178cc878dff..37dd3cb59b527 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -114,32 +114,6 @@ @set_module("pandas") class NamedAgg(tuple): - __slots__ = () - - def __new__( - cls, - column: Hashable, - aggfunc: Callable[..., Any] | str, - *args: Any, - **kwargs: Any, - ) -> Self: - if ( - callable(aggfunc) - and not getattr(aggfunc, "_is_wrapped", False) - and (args or kwargs) - ): - original_func = aggfunc - - def wrapped(*call_args: Any, **call_kwargs: Any) -> Any: - series = call_args[0] - final_args = call_args[1:] + args - final_kwargs = {**kwargs, **call_kwargs} - return original_func(series, *final_args, **final_kwargs) - - wrapped._is_wrapped = True # type: ignore[attr-defined] - aggfunc = wrapped - return super().__new__(cls, (column, aggfunc)) - """ Helper for column specific aggregation with with flexible argument passing and control over output column names. @@ -196,6 +170,32 @@ def n_between(ser, low, high, **kwargs): column: Hashable aggfunc: AggScalar + __slots__ = () + + def __new__( + cls, + column: Hashable, + aggfunc: Callable[..., Any] | str, + *args: Any, + **kwargs: Any, + ) -> Self: + if ( + callable(aggfunc) + and not getattr(aggfunc, "_is_wrapped", False) + and (args or kwargs) + ): + original_func = aggfunc + + def wrapped(*call_args: Any, **call_kwargs: Any) -> Any: + series = call_args[0] + final_args = call_args[1:] + args + final_kwargs = {**kwargs, **call_kwargs} + return original_func(series, *final_args, **final_kwargs) + + wrapped._is_wrapped = True # type: ignore[attr-defined] + aggfunc = wrapped + return super().__new__(cls, (column, aggfunc)) + @set_module("pandas.api.typing") class SeriesGroupBy(GroupBy[Series]): From 5ff65d6f8251732e9aaaed2a35d25432dd0f0c18 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 23:26:05 -0400 Subject: [PATCH 07/41] doc string validation --- pandas/core/groupby/generic.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 37dd3cb59b527..43941b0d4cd54 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -115,8 +115,7 @@ @set_module("pandas") class NamedAgg(tuple): """ - Helper for column specific aggregation with with flexible argument passing and - control over output column names. + Helper for defining named aggregations in groupby operations. Subclass of tuple that wraps an aggregation function. From d999923331dcb8d8eeef5795578deda6e32d1eb2 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 23:52:21 -0400 Subject: [PATCH 08/41] doc --- pandas/core/groupby/generic.py | 37 ++++++++++++++++------------------ 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 43941b0d4cd54..ea0c4bab68688 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -115,9 +115,10 @@ @set_module("pandas") class NamedAgg(tuple): """ - Helper for defining named aggregations in groupby operations. + Helper for defining named aggregations in ``DataFrame.groupby().agg``. - Subclass of tuple that wraps an aggregation function. + Use ``pd.NamedAgg`` to specify column-specific aggregations with explicit + output names. Parameters ---------- @@ -126,10 +127,8 @@ class NamedAgg(tuple): aggfunc : function or str Function to apply to the provided column. If string, the name of a built-in pandas function. - *args : tuple, optional - Positional arguments to pass to `aggfunc` when it is called. - **kwargs : dict, optional - Keyword arguments to pass to `aggfunc` when it is called. + *args, **kwargs : + Optional positional and keyword arguments passed to ``aggfunc``. See Also -------- @@ -137,30 +136,28 @@ class NamedAgg(tuple): Examples -------- - >>> df = pd.DataFrame({"key": [1, 1, 2], "a": [-1, 0, 1], 1: [10, 11, 12]}) + >>> df = pd.DataFrame({"key": [1, 1, 2], "a": [-1, 0, 1], "b": [10, 11, 12]}) >>> agg_a = pd.NamedAgg(column="a", aggfunc="min") - >>> agg_1 = pd.NamedAgg(column=1, aggfunc=lambda x: np.mean(x)) - >>> df.groupby("key").agg(result_a=agg_a, result_1=agg_1) - result_a result_1 + >>> agg_b = pd.NamedAgg(column="b", aggfunc=lambda x: x.mean()) + >>> df.groupby("key").agg(result_a=agg_a, result_b=agg_b) + result_a result_b key 1 -1 10.5 2 1 12.0 - def n_between(ser, low, high, **kwargs): - return ser.between(low, high, **kwargs).sum() + >>> def n_between(ser, low, high, **kwargs): + ... return ser.between(low, high, **kwargs).sum() - Using positional arguments - agg_between = pd.NamedAgg("a", n_between, 0, 1) - df.groupby("key").agg(count_between=agg_between) - count_between + >>> agg_between = pd.NamedAgg("a", n_between, 0, 1) + >>> df.groupby("key").agg(count_between=agg_between) + count_between key 1 1 2 1 - Using both positional and keyword arguments - agg_between_kw = pd.NamedAgg("a", n_between, 0, 1, inclusive="both") - df.groupby("key").agg(count_between_kw=agg_between_kw) - count_between_kw + >>> agg_between_kw = pd.NamedAgg("a", n_between, 0, 1, inclusive="both") + >>> df.groupby("key").agg(count_between_kw=agg_between_kw) + count_between_kw key 1 1 2 1 From 25fe854d5862a8e2448d1e5eff1e9796345e313d Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 23:54:22 -0400 Subject: [PATCH 09/41] doc --- pandas/core/groupby/generic.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index ea0c4bab68688..c4a8049a307ac 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -115,10 +115,9 @@ @set_module("pandas") class NamedAgg(tuple): """ - Helper for defining named aggregations in ``DataFrame.groupby().agg``. + Helper for column specific aggregation with control over output column names. - Use ``pd.NamedAgg`` to specify column-specific aggregations with explicit - output names. + Subclass of tuple. Parameters ---------- From 173b7fb9353d9fdf852b1403b08a69baa62852df Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Tue, 28 Oct 2025 17:06:46 -0400 Subject: [PATCH 10/41] reverting an example that was changed accidentally --- pandas/core/groupby/generic.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index c4a8049a307ac..6923fb0e1780e 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -135,11 +135,11 @@ class NamedAgg(tuple): Examples -------- - >>> df = pd.DataFrame({"key": [1, 1, 2], "a": [-1, 0, 1], "b": [10, 11, 12]}) + >>> df = pd.DataFrame({"key": [1, 1, 2], "a": [-1, 0, 1], 1: [10, 11, 12]}) >>> agg_a = pd.NamedAgg(column="a", aggfunc="min") - >>> agg_b = pd.NamedAgg(column="b", aggfunc=lambda x: x.mean()) - >>> df.groupby("key").agg(result_a=agg_a, result_b=agg_b) - result_a result_b + >>> agg_1 = pd.NamedAgg(column=1, aggfunc=lambda x: np.mean(x)) + >>> df.groupby("key").agg(result_a=agg_a, result_1=agg_1) + result_a result_1 key 1 -1 10.5 2 1 12.0 From 02d4bf37afda1d99b58adf7f023eeabdde256c75 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Tue, 28 Oct 2025 19:54:17 -0400 Subject: [PATCH 11/41] review comments --- pandas/core/apply.py | 25 ++++++++++++++--- pandas/core/groupby/generic.py | 49 +++++++++++++++++----------------- 2 files changed, 45 insertions(+), 29 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 468f24a07cb4a..917b2274de785 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1745,7 +1745,13 @@ def reconstruct_func( >>> reconstruct_func("min") (False, 'min', None, None) """ - relabeling = func is None and is_multi_agg_with_relabel(**kwargs) + from pandas.core.groupby.generic import NamedAgg + + relabeling = func is None and ( + is_multi_agg_with_relabel(**kwargs) + or any(isinstance(v, NamedAgg) for v in kwargs.values()) + ) + columns: tuple[str, ...] | None = None order: npt.NDArray[np.intp] | None = None @@ -1766,9 +1772,20 @@ def reconstruct_func( # "Callable[..., Any] | str | list[Callable[..., Any] | str] | # MutableMapping[Hashable, Callable[..., Any] | str | list[Callable[..., Any] | # str]] | None") - func, columns, order = normalize_keyword_aggregation( # type: ignore[assignment] - kwargs - ) + converted_kwargs = {} + for key, val in kwargs.items(): + if isinstance(val, NamedAgg): + aggfunc = val.aggfunc + if getattr(val, "args", ()) or getattr(val, "kwargs", {}): + a = getattr(val, "args", ()) + kw = getattr(val, "kwargs", {}) + aggfunc = lambda x, func=aggfunc, a=a, kw=kw: func(x, *a, **kw) + converted_kwargs[key] = (val.column, aggfunc) + else: + converted_kwargs[key] = val + + func, columns, order = normalize_keyword_aggregation(converted_kwargs) + assert func is not None return relabeling, func, columns, order diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 6923fb0e1780e..cf1d56f071a2d 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -10,13 +10,13 @@ from collections import abc from collections.abc import Callable +import dataclasses from functools import partial from textwrap import dedent from typing import ( TYPE_CHECKING, Any, Literal, - Self, TypeAlias, TypeVar, cast, @@ -113,12 +113,11 @@ @set_module("pandas") -class NamedAgg(tuple): +@dataclasses.dataclass +class NamedAgg: """ Helper for column specific aggregation with control over output column names. - Subclass of tuple. - Parameters ---------- column : Hashable @@ -164,32 +163,32 @@ class NamedAgg(tuple): column: Hashable aggfunc: AggScalar + args: tuple[Any, ...] = dataclasses.field(default_factory=tuple) + kwargs: dict[str, Any] = dataclasses.field(default_factory=dict) - __slots__ = () - - def __new__( - cls, + def __init__( + self, column: Hashable, aggfunc: Callable[..., Any] | str, *args: Any, **kwargs: Any, - ) -> Self: - if ( - callable(aggfunc) - and not getattr(aggfunc, "_is_wrapped", False) - and (args or kwargs) - ): - original_func = aggfunc - - def wrapped(*call_args: Any, **call_kwargs: Any) -> Any: - series = call_args[0] - final_args = call_args[1:] + args - final_kwargs = {**kwargs, **call_kwargs} - return original_func(series, *final_args, **final_kwargs) - - wrapped._is_wrapped = True # type: ignore[attr-defined] - aggfunc = wrapped - return super().__new__(cls, (column, aggfunc)) + ) -> None: + self.column = column + self.aggfunc = aggfunc + self.args = args + self.kwargs = kwargs + + def __getitem__(self, key: int) -> Any: + """Provide backward-compatible tuple-style access.""" + if key == 0: + return self.column + elif key == 1: + return self.aggfunc + elif key == 2: + return self.args + elif key == 3: + return self.kwargs + raise IndexError("index out of range") @set_module("pandas.api.typing") From 3842aa5ef641c0b9a3aa637dd1f402682b2800ed Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 20:48:12 -0400 Subject: [PATCH 12/41] code impl and examples --- pandas/core/groupby/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index cf1d56f071a2d..27e45a7d580c8 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -3448,4 +3448,4 @@ def _wrap_transform_general_frame( elif isinstance(res, DataFrame) and not res.index.is_(group.index): return res._align_frame(group)[0] else: - return res + return res \ No newline at end of file From bf31b35ecdff3cd9ab1f0c0dce9e1521e66a13ed Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 20:48:12 -0400 Subject: [PATCH 13/41] code impl and examples --- pandas/core/groupby/generic.py | 56 +++++++++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 27e45a7d580c8..10093358a0dff 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -113,11 +113,41 @@ @set_module("pandas") +<<<<<<< HEAD @dataclasses.dataclass class NamedAgg: +======= +class NamedAgg(tuple): + __slots__ = () + + def __new__(cls, column, aggfunc, *args, **kwargs): + if ( + callable(aggfunc) + and not getattr(aggfunc, "_is_wrapped", False) + and (args or kwargs) + ): + original_func = aggfunc + + def wrapped(*call_args, **call_kwargs): + series = call_args[0] + final_args = call_args[1:] + args + final_kwargs = {**kwargs, **call_kwargs} + return original_func(series, *final_args, **final_kwargs) + + wrapped._is_wrapped = True + aggfunc = wrapped + return super().__new__(cls, (column, aggfunc)) + +>>>>>>> 4e8ccf6958 (code impl and examples) """ - Helper for column specific aggregation with control over output column names. + Helper for column specific aggregation with with flexible argument passing and + control over output column names. + +<<<<<<< HEAD +======= + Subclass of tuple that wraps an aggregation function. +>>>>>>> 4e8ccf6958 (code impl and examples) Parameters ---------- column : Hashable @@ -125,8 +155,15 @@ class NamedAgg: aggfunc : function or str Function to apply to the provided column. If string, the name of a built-in pandas function. +<<<<<<< HEAD *args, **kwargs : Optional positional and keyword arguments passed to ``aggfunc``. +======= + *args : tuple, optional + Positional arguments to pass to `aggfunc` when it is called. + **kwargs : dict, optional + Keyword arguments to pass to `aggfunc` when it is called. +>>>>>>> 4e8ccf6958 (code impl and examples) See Also -------- @@ -143,19 +180,36 @@ class NamedAgg: 1 -1 10.5 2 1 12.0 +<<<<<<< HEAD >>> def n_between(ser, low, high, **kwargs): ... return ser.between(low, high, **kwargs).sum() >>> agg_between = pd.NamedAgg("a", n_between, 0, 1) >>> df.groupby("key").agg(count_between=agg_between) count_between +======= + def n_between(ser, low, high, **kwargs): + return ser.between(low, high, **kwargs).sum() + + Using positional arguments + agg_between = pd.NamedAgg("a", n_between, 0, 1) + df.groupby("key").agg(count_between=agg_between) + count_between +>>>>>>> 4e8ccf6958 (code impl and examples) key 1 1 2 1 +<<<<<<< HEAD >>> agg_between_kw = pd.NamedAgg("a", n_between, 0, 1, inclusive="both") >>> df.groupby("key").agg(count_between_kw=agg_between_kw) count_between_kw +======= + Using both positional and keyword arguments + agg_between_kw = pd.NamedAgg("a", n_between, 0, 1, inclusive="both") + df.groupby("key").agg(count_between_kw=agg_between_kw) + count_between_kw +>>>>>>> 4e8ccf6958 (code impl and examples) key 1 1 2 1 From bef8a80c00aed787701d61361459cbf035522d87 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 22:13:31 -0400 Subject: [PATCH 14/41] add typehint --- pandas/core/groupby/generic.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 10093358a0dff..3cce973ca382d 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -17,6 +17,7 @@ TYPE_CHECKING, Any, Literal, + Self, TypeAlias, TypeVar, cast, @@ -120,7 +121,13 @@ class NamedAgg: class NamedAgg(tuple): __slots__ = () - def __new__(cls, column, aggfunc, *args, **kwargs): + def __new__( + cls, + column: Hashable, + aggfunc: Callable[..., Any] | str, + *args: Any, + **kwargs: Any, + ) -> Self: if ( callable(aggfunc) and not getattr(aggfunc, "_is_wrapped", False) @@ -128,7 +135,7 @@ def __new__(cls, column, aggfunc, *args, **kwargs): ): original_func = aggfunc - def wrapped(*call_args, **call_kwargs): + def wrapped(*call_args: Any, **call_kwargs: Any) -> Any: series = call_args[0] final_args = call_args[1:] + args final_kwargs = {**kwargs, **call_kwargs} From 16337af5692c37eb560005a35d867de9905cda52 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 22:58:38 -0400 Subject: [PATCH 15/41] mypy --- pandas/core/groupby/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 3cce973ca382d..0ba7a8a5254c3 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -141,7 +141,7 @@ def wrapped(*call_args: Any, **call_kwargs: Any) -> Any: final_kwargs = {**kwargs, **call_kwargs} return original_func(series, *final_args, **final_kwargs) - wrapped._is_wrapped = True + wrapped._is_wrapped = True # type: ignore[attr-defined] aggfunc = wrapped return super().__new__(cls, (column, aggfunc)) From d960dd51e6be7e42d2d10d596639bb9849822bc1 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 23:00:36 -0400 Subject: [PATCH 16/41] mypy --- pandas/core/groupby/generic.py | 63 +--------------------------------- 1 file changed, 1 insertion(+), 62 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 0ba7a8a5254c3..27e45a7d580c8 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -17,7 +17,6 @@ TYPE_CHECKING, Any, Literal, - Self, TypeAlias, TypeVar, cast, @@ -114,47 +113,11 @@ @set_module("pandas") -<<<<<<< HEAD @dataclasses.dataclass class NamedAgg: -======= -class NamedAgg(tuple): - __slots__ = () - - def __new__( - cls, - column: Hashable, - aggfunc: Callable[..., Any] | str, - *args: Any, - **kwargs: Any, - ) -> Self: - if ( - callable(aggfunc) - and not getattr(aggfunc, "_is_wrapped", False) - and (args or kwargs) - ): - original_func = aggfunc - - def wrapped(*call_args: Any, **call_kwargs: Any) -> Any: - series = call_args[0] - final_args = call_args[1:] + args - final_kwargs = {**kwargs, **call_kwargs} - return original_func(series, *final_args, **final_kwargs) - - wrapped._is_wrapped = True # type: ignore[attr-defined] - aggfunc = wrapped - return super().__new__(cls, (column, aggfunc)) - ->>>>>>> 4e8ccf6958 (code impl and examples) """ - Helper for column specific aggregation with with flexible argument passing and - control over output column names. - -<<<<<<< HEAD -======= - Subclass of tuple that wraps an aggregation function. + Helper for column specific aggregation with control over output column names. ->>>>>>> 4e8ccf6958 (code impl and examples) Parameters ---------- column : Hashable @@ -162,15 +125,8 @@ def wrapped(*call_args: Any, **call_kwargs: Any) -> Any: aggfunc : function or str Function to apply to the provided column. If string, the name of a built-in pandas function. -<<<<<<< HEAD *args, **kwargs : Optional positional and keyword arguments passed to ``aggfunc``. -======= - *args : tuple, optional - Positional arguments to pass to `aggfunc` when it is called. - **kwargs : dict, optional - Keyword arguments to pass to `aggfunc` when it is called. ->>>>>>> 4e8ccf6958 (code impl and examples) See Also -------- @@ -187,36 +143,19 @@ def wrapped(*call_args: Any, **call_kwargs: Any) -> Any: 1 -1 10.5 2 1 12.0 -<<<<<<< HEAD >>> def n_between(ser, low, high, **kwargs): ... return ser.between(low, high, **kwargs).sum() >>> agg_between = pd.NamedAgg("a", n_between, 0, 1) >>> df.groupby("key").agg(count_between=agg_between) count_between -======= - def n_between(ser, low, high, **kwargs): - return ser.between(low, high, **kwargs).sum() - - Using positional arguments - agg_between = pd.NamedAgg("a", n_between, 0, 1) - df.groupby("key").agg(count_between=agg_between) - count_between ->>>>>>> 4e8ccf6958 (code impl and examples) key 1 1 2 1 -<<<<<<< HEAD >>> agg_between_kw = pd.NamedAgg("a", n_between, 0, 1, inclusive="both") >>> df.groupby("key").agg(count_between_kw=agg_between_kw) count_between_kw -======= - Using both positional and keyword arguments - agg_between_kw = pd.NamedAgg("a", n_between, 0, 1, inclusive="both") - df.groupby("key").agg(count_between_kw=agg_between_kw) - count_between_kw ->>>>>>> 4e8ccf6958 (code impl and examples) key 1 1 2 1 From 654dea4cd9737c41cfa77e34f2ee31d56008bf99 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Tue, 28 Oct 2025 20:42:46 -0400 Subject: [PATCH 17/41] keep comment --- pandas/core/apply.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 917b2274de785..b3bedf73ad1a7 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1784,7 +1784,9 @@ def reconstruct_func( else: converted_kwargs[key] = val - func, columns, order = normalize_keyword_aggregation(converted_kwargs) + func, columns, order = normalize_keyword_aggregation( # type: ignore[assignment] + converted_kwargs + ) assert func is not None From 622ca7a848df268e5149c77b6b53515f0a198c43 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 29 Oct 2025 00:50:19 +0000 Subject: [PATCH 18/41] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pandas/core/groupby/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 27e45a7d580c8..cf1d56f071a2d 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -3448,4 +3448,4 @@ def _wrap_transform_general_frame( elif isinstance(res, DataFrame) and not res.index.is_(group.index): return res._align_frame(group)[0] else: - return res \ No newline at end of file + return res From b3dd5324f08e85b87d9d35d4b2f9ea8545bc8a8f Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 20:48:12 -0400 Subject: [PATCH 19/41] code impl and examples --- pandas/core/groupby/generic.py | 51 +++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index d279594617235..cb85121079ead 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -16,7 +16,6 @@ TYPE_CHECKING, Any, Literal, - NamedTuple, TypeAlias, TypeVar, cast, @@ -113,11 +112,32 @@ @set_module("pandas") -class NamedAgg(NamedTuple): +class NamedAgg(tuple): + __slots__ = () + + def __new__(cls, column, aggfunc, *args, **kwargs): + if ( + callable(aggfunc) + and not getattr(aggfunc, "_is_wrapped", False) + and (args or kwargs) + ): + original_func = aggfunc + + def wrapped(*call_args, **call_kwargs): + series = call_args[0] + final_args = call_args[1:] + args + final_kwargs = {**kwargs, **call_kwargs} + return original_func(series, *final_args, **final_kwargs) + + wrapped._is_wrapped = True + aggfunc = wrapped + return super().__new__(cls, (column, aggfunc)) + """ - Helper for column specific aggregation with control over output column names. + Helper for column specific aggregation with with flexible argument passing and + control over output column names. - Subclass of typing.NamedTuple. + Subclass of tuple that wraps an aggregation function. Parameters ---------- @@ -126,6 +146,10 @@ class NamedAgg(NamedTuple): aggfunc : function or str Function to apply to the provided column. If string, the name of a built-in pandas function. + *args : tuple, optional + Positional arguments to pass to `aggfunc` when it is called. + **kwargs : dict, optional + Keyword arguments to pass to `aggfunc` when it is called. See Also -------- @@ -141,6 +165,25 @@ class NamedAgg(NamedTuple): key 1 -1 10.5 2 1 12.0 + + def n_between(ser, low, high, **kwargs): + return ser.between(low, high, **kwargs).sum() + + Using positional arguments + agg_between = pd.NamedAgg("a", n_between, 0, 1) + df.groupby("key").agg(count_between=agg_between) + count_between + key + 1 1 + 2 1 + + Using both positional and keyword arguments + agg_between_kw = pd.NamedAgg("a", n_between, 0, 1, inclusive="both") + df.groupby("key").agg(count_between_kw=agg_between_kw) + count_between_kw + key + 1 1 + 2 1 """ column: Hashable From 7ddfbadb20257791f6a57315c0ae5abd467c4346 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 22:04:27 -0400 Subject: [PATCH 20/41] unit tests --- .../tests/groupby/aggregate/test_aggregate.py | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index c968587c469d1..5fb3666b4cdb3 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -866,6 +866,57 @@ def test_agg_namedtuple(self): expected = df.groupby("A").agg(b=("B", "sum"), c=("B", "count")) tm.assert_frame_equal(result, expected) + def n_between(self, ser, low, high, **kwargs): + return ser.between(low, high, **kwargs).sum() + + def test_namedagg_args(self): + df = DataFrame({"A": [0, 0, 1, 1], "B": [-1, 0, 1, 2]}) + + result = df.groupby("A").agg( + count_between=pd.NamedAgg("B", self.n_between, 0, 1) + ) + expected = DataFrame({"count_between": [1, 1]}, index=Index([0, 1], name="A")) + tm.assert_frame_equal(result, expected) + + def test_namedagg_kwargs(self): + df = DataFrame({"A": [0, 0, 1, 1], "B": [-1, 0, 1, 2]}) + + result = df.groupby("A").agg( + count_between_kw=pd.NamedAgg("B", self.n_between, 0, 1, inclusive="both") + ) + expected = DataFrame( + {"count_between_kw": [1, 1]}, index=Index([0, 1], name="A") + ) + tm.assert_frame_equal(result, expected) + + def test_namedagg_args_and_kwargs(self): + df = DataFrame({"A": [0, 0, 1, 1], "B": [-1, 0, 1, 2]}) + + result = df.groupby("A").agg( + count_between_mix=pd.NamedAgg( + "B", self.n_between, 0, 1, inclusive="neither" + ) + ) + expected = DataFrame( + {"count_between_mix": [0, 0]}, index=Index([0, 1], name="A") + ) + tm.assert_frame_equal(result, expected) + + def test_multiple_named_agg_with_args_and_kwargs(self): + df = DataFrame({"A": [0, 1, 2, 3], "B": [1, 2, 3, 4]}) + + result = df.groupby("A").agg( + n_between01=pd.NamedAgg("B", self.n_between, 0, 1), + n_between13=pd.NamedAgg("B", self.n_between, 1, 3), + n_between02=pd.NamedAgg("B", self.n_between, 0, 2), + ) + expected = df.groupby("A").agg( + n_between01=("B", lambda x: x.between(0, 1).sum()), + n_between13=("B", lambda x: x.between(0, 3).sum()), + n_between02=("B", lambda x: x.between(0, 2).sum()), + ) + tm.assert_frame_equal(result, expected) + def test_mangled(self): df = DataFrame({"A": [0, 1], "B": [1, 2], "C": [3, 4]}) result = df.groupby("A").agg(b=("B", lambda x: 0), c=("C", lambda x: 1)) From fc338d3b6817c8ee667c00e44e90aeae6a88de72 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 22:13:31 -0400 Subject: [PATCH 21/41] add typehint --- pandas/core/groupby/generic.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index cb85121079ead..a4a5be6c459b7 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -16,6 +16,7 @@ TYPE_CHECKING, Any, Literal, + Self, TypeAlias, TypeVar, cast, @@ -115,7 +116,13 @@ class NamedAgg(tuple): __slots__ = () - def __new__(cls, column, aggfunc, *args, **kwargs): + def __new__( + cls, + column: Hashable, + aggfunc: Callable[..., Any] | str, + *args: Any, + **kwargs: Any, + ) -> Self: if ( callable(aggfunc) and not getattr(aggfunc, "_is_wrapped", False) @@ -123,7 +130,7 @@ def __new__(cls, column, aggfunc, *args, **kwargs): ): original_func = aggfunc - def wrapped(*call_args, **call_kwargs): + def wrapped(*call_args: Any, **call_kwargs: Any) -> Any: series = call_args[0] final_args = call_args[1:] + args final_kwargs = {**kwargs, **call_kwargs} From 601ffbd8692fd94299574413637bde0f85b30700 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 22:16:30 -0400 Subject: [PATCH 22/41] whats new --- doc/source/whatsnew/v3.0.0.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 126a5e10c0e85..4ae7eef82de79 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -156,6 +156,8 @@ All warnings for upcoming changes in pandas will have the base class :class:`pan Other enhancements ^^^^^^^^^^^^^^^^^^ +- :class:`pandas.NamedAgg` now forwards any ``*args`` and ``**kwargs`` + to calls of ``aggfunc`` (:issue:`58283`) - :func:`pandas.merge` propagates the ``attrs`` attribute to the result if all inputs have identical ``attrs``, as has so far already been the case for :func:`pandas.concat`. From 57a4ee2aed3cc7684d2f18f1aa3345a05a0ba0c4 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 22:58:38 -0400 Subject: [PATCH 23/41] mypy --- pandas/core/groupby/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index a4a5be6c459b7..4e178cc878dff 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -136,7 +136,7 @@ def wrapped(*call_args: Any, **call_kwargs: Any) -> Any: final_kwargs = {**kwargs, **call_kwargs} return original_func(series, *final_args, **final_kwargs) - wrapped._is_wrapped = True + wrapped._is_wrapped = True # type: ignore[attr-defined] aggfunc = wrapped return super().__new__(cls, (column, aggfunc)) From 4ee036a825d351d0cb9baaa53f9b41c1f8ac77dd Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 23:00:36 -0400 Subject: [PATCH 24/41] mypy --- pandas/core/groupby/generic.py | 52 +++++++++++++++++----------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 4e178cc878dff..37dd3cb59b527 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -114,32 +114,6 @@ @set_module("pandas") class NamedAgg(tuple): - __slots__ = () - - def __new__( - cls, - column: Hashable, - aggfunc: Callable[..., Any] | str, - *args: Any, - **kwargs: Any, - ) -> Self: - if ( - callable(aggfunc) - and not getattr(aggfunc, "_is_wrapped", False) - and (args or kwargs) - ): - original_func = aggfunc - - def wrapped(*call_args: Any, **call_kwargs: Any) -> Any: - series = call_args[0] - final_args = call_args[1:] + args - final_kwargs = {**kwargs, **call_kwargs} - return original_func(series, *final_args, **final_kwargs) - - wrapped._is_wrapped = True # type: ignore[attr-defined] - aggfunc = wrapped - return super().__new__(cls, (column, aggfunc)) - """ Helper for column specific aggregation with with flexible argument passing and control over output column names. @@ -196,6 +170,32 @@ def n_between(ser, low, high, **kwargs): column: Hashable aggfunc: AggScalar + __slots__ = () + + def __new__( + cls, + column: Hashable, + aggfunc: Callable[..., Any] | str, + *args: Any, + **kwargs: Any, + ) -> Self: + if ( + callable(aggfunc) + and not getattr(aggfunc, "_is_wrapped", False) + and (args or kwargs) + ): + original_func = aggfunc + + def wrapped(*call_args: Any, **call_kwargs: Any) -> Any: + series = call_args[0] + final_args = call_args[1:] + args + final_kwargs = {**kwargs, **call_kwargs} + return original_func(series, *final_args, **final_kwargs) + + wrapped._is_wrapped = True # type: ignore[attr-defined] + aggfunc = wrapped + return super().__new__(cls, (column, aggfunc)) + @set_module("pandas.api.typing") class SeriesGroupBy(GroupBy[Series]): From 1900160bb44ae2736f85dd2c1e0f7c771903ed11 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 23:26:05 -0400 Subject: [PATCH 25/41] doc string validation --- pandas/core/groupby/generic.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 37dd3cb59b527..43941b0d4cd54 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -115,8 +115,7 @@ @set_module("pandas") class NamedAgg(tuple): """ - Helper for column specific aggregation with with flexible argument passing and - control over output column names. + Helper for defining named aggregations in groupby operations. Subclass of tuple that wraps an aggregation function. From 5e6d051cc5e10dcc61d810ef4f54ac7ca3588aab Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 23:52:21 -0400 Subject: [PATCH 26/41] doc --- pandas/core/groupby/generic.py | 37 ++++++++++++++++------------------ 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 43941b0d4cd54..ea0c4bab68688 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -115,9 +115,10 @@ @set_module("pandas") class NamedAgg(tuple): """ - Helper for defining named aggregations in groupby operations. + Helper for defining named aggregations in ``DataFrame.groupby().agg``. - Subclass of tuple that wraps an aggregation function. + Use ``pd.NamedAgg`` to specify column-specific aggregations with explicit + output names. Parameters ---------- @@ -126,10 +127,8 @@ class NamedAgg(tuple): aggfunc : function or str Function to apply to the provided column. If string, the name of a built-in pandas function. - *args : tuple, optional - Positional arguments to pass to `aggfunc` when it is called. - **kwargs : dict, optional - Keyword arguments to pass to `aggfunc` when it is called. + *args, **kwargs : + Optional positional and keyword arguments passed to ``aggfunc``. See Also -------- @@ -137,30 +136,28 @@ class NamedAgg(tuple): Examples -------- - >>> df = pd.DataFrame({"key": [1, 1, 2], "a": [-1, 0, 1], 1: [10, 11, 12]}) + >>> df = pd.DataFrame({"key": [1, 1, 2], "a": [-1, 0, 1], "b": [10, 11, 12]}) >>> agg_a = pd.NamedAgg(column="a", aggfunc="min") - >>> agg_1 = pd.NamedAgg(column=1, aggfunc=lambda x: np.mean(x)) - >>> df.groupby("key").agg(result_a=agg_a, result_1=agg_1) - result_a result_1 + >>> agg_b = pd.NamedAgg(column="b", aggfunc=lambda x: x.mean()) + >>> df.groupby("key").agg(result_a=agg_a, result_b=agg_b) + result_a result_b key 1 -1 10.5 2 1 12.0 - def n_between(ser, low, high, **kwargs): - return ser.between(low, high, **kwargs).sum() + >>> def n_between(ser, low, high, **kwargs): + ... return ser.between(low, high, **kwargs).sum() - Using positional arguments - agg_between = pd.NamedAgg("a", n_between, 0, 1) - df.groupby("key").agg(count_between=agg_between) - count_between + >>> agg_between = pd.NamedAgg("a", n_between, 0, 1) + >>> df.groupby("key").agg(count_between=agg_between) + count_between key 1 1 2 1 - Using both positional and keyword arguments - agg_between_kw = pd.NamedAgg("a", n_between, 0, 1, inclusive="both") - df.groupby("key").agg(count_between_kw=agg_between_kw) - count_between_kw + >>> agg_between_kw = pd.NamedAgg("a", n_between, 0, 1, inclusive="both") + >>> df.groupby("key").agg(count_between_kw=agg_between_kw) + count_between_kw key 1 1 2 1 From aeaab186abcc1afeb775d7cf8c9d56798e32bbe3 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 23:54:22 -0400 Subject: [PATCH 27/41] doc --- pandas/core/groupby/generic.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index ea0c4bab68688..c4a8049a307ac 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -115,10 +115,9 @@ @set_module("pandas") class NamedAgg(tuple): """ - Helper for defining named aggregations in ``DataFrame.groupby().agg``. + Helper for column specific aggregation with control over output column names. - Use ``pd.NamedAgg`` to specify column-specific aggregations with explicit - output names. + Subclass of tuple. Parameters ---------- From 8440ac069276841f4b834d8a694c02382ff340f0 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Tue, 28 Oct 2025 17:06:46 -0400 Subject: [PATCH 28/41] reverting an example that was changed accidentally --- pandas/core/groupby/generic.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index c4a8049a307ac..6923fb0e1780e 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -135,11 +135,11 @@ class NamedAgg(tuple): Examples -------- - >>> df = pd.DataFrame({"key": [1, 1, 2], "a": [-1, 0, 1], "b": [10, 11, 12]}) + >>> df = pd.DataFrame({"key": [1, 1, 2], "a": [-1, 0, 1], 1: [10, 11, 12]}) >>> agg_a = pd.NamedAgg(column="a", aggfunc="min") - >>> agg_b = pd.NamedAgg(column="b", aggfunc=lambda x: x.mean()) - >>> df.groupby("key").agg(result_a=agg_a, result_b=agg_b) - result_a result_b + >>> agg_1 = pd.NamedAgg(column=1, aggfunc=lambda x: np.mean(x)) + >>> df.groupby("key").agg(result_a=agg_a, result_1=agg_1) + result_a result_1 key 1 -1 10.5 2 1 12.0 From 21220ff98b7c01cfa1c0ac401e2d80b4d97f8672 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Tue, 28 Oct 2025 19:54:17 -0400 Subject: [PATCH 29/41] review comments --- pandas/core/apply.py | 25 ++++++++++++++--- pandas/core/groupby/generic.py | 49 +++++++++++++++++----------------- 2 files changed, 45 insertions(+), 29 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 468f24a07cb4a..917b2274de785 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1745,7 +1745,13 @@ def reconstruct_func( >>> reconstruct_func("min") (False, 'min', None, None) """ - relabeling = func is None and is_multi_agg_with_relabel(**kwargs) + from pandas.core.groupby.generic import NamedAgg + + relabeling = func is None and ( + is_multi_agg_with_relabel(**kwargs) + or any(isinstance(v, NamedAgg) for v in kwargs.values()) + ) + columns: tuple[str, ...] | None = None order: npt.NDArray[np.intp] | None = None @@ -1766,9 +1772,20 @@ def reconstruct_func( # "Callable[..., Any] | str | list[Callable[..., Any] | str] | # MutableMapping[Hashable, Callable[..., Any] | str | list[Callable[..., Any] | # str]] | None") - func, columns, order = normalize_keyword_aggregation( # type: ignore[assignment] - kwargs - ) + converted_kwargs = {} + for key, val in kwargs.items(): + if isinstance(val, NamedAgg): + aggfunc = val.aggfunc + if getattr(val, "args", ()) or getattr(val, "kwargs", {}): + a = getattr(val, "args", ()) + kw = getattr(val, "kwargs", {}) + aggfunc = lambda x, func=aggfunc, a=a, kw=kw: func(x, *a, **kw) + converted_kwargs[key] = (val.column, aggfunc) + else: + converted_kwargs[key] = val + + func, columns, order = normalize_keyword_aggregation(converted_kwargs) + assert func is not None return relabeling, func, columns, order diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 6923fb0e1780e..cf1d56f071a2d 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -10,13 +10,13 @@ from collections import abc from collections.abc import Callable +import dataclasses from functools import partial from textwrap import dedent from typing import ( TYPE_CHECKING, Any, Literal, - Self, TypeAlias, TypeVar, cast, @@ -113,12 +113,11 @@ @set_module("pandas") -class NamedAgg(tuple): +@dataclasses.dataclass +class NamedAgg: """ Helper for column specific aggregation with control over output column names. - Subclass of tuple. - Parameters ---------- column : Hashable @@ -164,32 +163,32 @@ class NamedAgg(tuple): column: Hashable aggfunc: AggScalar + args: tuple[Any, ...] = dataclasses.field(default_factory=tuple) + kwargs: dict[str, Any] = dataclasses.field(default_factory=dict) - __slots__ = () - - def __new__( - cls, + def __init__( + self, column: Hashable, aggfunc: Callable[..., Any] | str, *args: Any, **kwargs: Any, - ) -> Self: - if ( - callable(aggfunc) - and not getattr(aggfunc, "_is_wrapped", False) - and (args or kwargs) - ): - original_func = aggfunc - - def wrapped(*call_args: Any, **call_kwargs: Any) -> Any: - series = call_args[0] - final_args = call_args[1:] + args - final_kwargs = {**kwargs, **call_kwargs} - return original_func(series, *final_args, **final_kwargs) - - wrapped._is_wrapped = True # type: ignore[attr-defined] - aggfunc = wrapped - return super().__new__(cls, (column, aggfunc)) + ) -> None: + self.column = column + self.aggfunc = aggfunc + self.args = args + self.kwargs = kwargs + + def __getitem__(self, key: int) -> Any: + """Provide backward-compatible tuple-style access.""" + if key == 0: + return self.column + elif key == 1: + return self.aggfunc + elif key == 2: + return self.args + elif key == 3: + return self.kwargs + raise IndexError("index out of range") @set_module("pandas.api.typing") From be4e1c6fd67b14097c566a45e7dd6b335628f6b3 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 20:48:12 -0400 Subject: [PATCH 30/41] code impl and examples --- pandas/core/groupby/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index cf1d56f071a2d..27e45a7d580c8 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -3448,4 +3448,4 @@ def _wrap_transform_general_frame( elif isinstance(res, DataFrame) and not res.index.is_(group.index): return res._align_frame(group)[0] else: - return res + return res \ No newline at end of file From c659fd67b97154ce6d4d924dbc05cbfa67586d8c Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 20:48:12 -0400 Subject: [PATCH 31/41] code impl and examples --- pandas/core/groupby/generic.py | 56 +++++++++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 27e45a7d580c8..10093358a0dff 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -113,11 +113,41 @@ @set_module("pandas") +<<<<<<< HEAD @dataclasses.dataclass class NamedAgg: +======= +class NamedAgg(tuple): + __slots__ = () + + def __new__(cls, column, aggfunc, *args, **kwargs): + if ( + callable(aggfunc) + and not getattr(aggfunc, "_is_wrapped", False) + and (args or kwargs) + ): + original_func = aggfunc + + def wrapped(*call_args, **call_kwargs): + series = call_args[0] + final_args = call_args[1:] + args + final_kwargs = {**kwargs, **call_kwargs} + return original_func(series, *final_args, **final_kwargs) + + wrapped._is_wrapped = True + aggfunc = wrapped + return super().__new__(cls, (column, aggfunc)) + +>>>>>>> 4e8ccf6958 (code impl and examples) """ - Helper for column specific aggregation with control over output column names. + Helper for column specific aggregation with with flexible argument passing and + control over output column names. + +<<<<<<< HEAD +======= + Subclass of tuple that wraps an aggregation function. +>>>>>>> 4e8ccf6958 (code impl and examples) Parameters ---------- column : Hashable @@ -125,8 +155,15 @@ class NamedAgg: aggfunc : function or str Function to apply to the provided column. If string, the name of a built-in pandas function. +<<<<<<< HEAD *args, **kwargs : Optional positional and keyword arguments passed to ``aggfunc``. +======= + *args : tuple, optional + Positional arguments to pass to `aggfunc` when it is called. + **kwargs : dict, optional + Keyword arguments to pass to `aggfunc` when it is called. +>>>>>>> 4e8ccf6958 (code impl and examples) See Also -------- @@ -143,19 +180,36 @@ class NamedAgg: 1 -1 10.5 2 1 12.0 +<<<<<<< HEAD >>> def n_between(ser, low, high, **kwargs): ... return ser.between(low, high, **kwargs).sum() >>> agg_between = pd.NamedAgg("a", n_between, 0, 1) >>> df.groupby("key").agg(count_between=agg_between) count_between +======= + def n_between(ser, low, high, **kwargs): + return ser.between(low, high, **kwargs).sum() + + Using positional arguments + agg_between = pd.NamedAgg("a", n_between, 0, 1) + df.groupby("key").agg(count_between=agg_between) + count_between +>>>>>>> 4e8ccf6958 (code impl and examples) key 1 1 2 1 +<<<<<<< HEAD >>> agg_between_kw = pd.NamedAgg("a", n_between, 0, 1, inclusive="both") >>> df.groupby("key").agg(count_between_kw=agg_between_kw) count_between_kw +======= + Using both positional and keyword arguments + agg_between_kw = pd.NamedAgg("a", n_between, 0, 1, inclusive="both") + df.groupby("key").agg(count_between_kw=agg_between_kw) + count_between_kw +>>>>>>> 4e8ccf6958 (code impl and examples) key 1 1 2 1 From 9f8a4c31e08abb2a75e29ce47016c00261016ac6 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 22:13:31 -0400 Subject: [PATCH 32/41] add typehint --- pandas/core/groupby/generic.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 10093358a0dff..3cce973ca382d 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -17,6 +17,7 @@ TYPE_CHECKING, Any, Literal, + Self, TypeAlias, TypeVar, cast, @@ -120,7 +121,13 @@ class NamedAgg: class NamedAgg(tuple): __slots__ = () - def __new__(cls, column, aggfunc, *args, **kwargs): + def __new__( + cls, + column: Hashable, + aggfunc: Callable[..., Any] | str, + *args: Any, + **kwargs: Any, + ) -> Self: if ( callable(aggfunc) and not getattr(aggfunc, "_is_wrapped", False) @@ -128,7 +135,7 @@ def __new__(cls, column, aggfunc, *args, **kwargs): ): original_func = aggfunc - def wrapped(*call_args, **call_kwargs): + def wrapped(*call_args: Any, **call_kwargs: Any) -> Any: series = call_args[0] final_args = call_args[1:] + args final_kwargs = {**kwargs, **call_kwargs} From 196d1fb39891af122f4febab7cc946eb2c024f14 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 22:58:38 -0400 Subject: [PATCH 33/41] mypy --- pandas/core/groupby/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 3cce973ca382d..0ba7a8a5254c3 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -141,7 +141,7 @@ def wrapped(*call_args: Any, **call_kwargs: Any) -> Any: final_kwargs = {**kwargs, **call_kwargs} return original_func(series, *final_args, **final_kwargs) - wrapped._is_wrapped = True + wrapped._is_wrapped = True # type: ignore[attr-defined] aggfunc = wrapped return super().__new__(cls, (column, aggfunc)) From 939631304e469e19964919c5a724f6a2a2a672d1 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Fri, 17 Oct 2025 23:00:36 -0400 Subject: [PATCH 34/41] mypy --- pandas/core/groupby/generic.py | 63 +--------------------------------- 1 file changed, 1 insertion(+), 62 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 0ba7a8a5254c3..27e45a7d580c8 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -17,7 +17,6 @@ TYPE_CHECKING, Any, Literal, - Self, TypeAlias, TypeVar, cast, @@ -114,47 +113,11 @@ @set_module("pandas") -<<<<<<< HEAD @dataclasses.dataclass class NamedAgg: -======= -class NamedAgg(tuple): - __slots__ = () - - def __new__( - cls, - column: Hashable, - aggfunc: Callable[..., Any] | str, - *args: Any, - **kwargs: Any, - ) -> Self: - if ( - callable(aggfunc) - and not getattr(aggfunc, "_is_wrapped", False) - and (args or kwargs) - ): - original_func = aggfunc - - def wrapped(*call_args: Any, **call_kwargs: Any) -> Any: - series = call_args[0] - final_args = call_args[1:] + args - final_kwargs = {**kwargs, **call_kwargs} - return original_func(series, *final_args, **final_kwargs) - - wrapped._is_wrapped = True # type: ignore[attr-defined] - aggfunc = wrapped - return super().__new__(cls, (column, aggfunc)) - ->>>>>>> 4e8ccf6958 (code impl and examples) """ - Helper for column specific aggregation with with flexible argument passing and - control over output column names. - -<<<<<<< HEAD -======= - Subclass of tuple that wraps an aggregation function. + Helper for column specific aggregation with control over output column names. ->>>>>>> 4e8ccf6958 (code impl and examples) Parameters ---------- column : Hashable @@ -162,15 +125,8 @@ def wrapped(*call_args: Any, **call_kwargs: Any) -> Any: aggfunc : function or str Function to apply to the provided column. If string, the name of a built-in pandas function. -<<<<<<< HEAD *args, **kwargs : Optional positional and keyword arguments passed to ``aggfunc``. -======= - *args : tuple, optional - Positional arguments to pass to `aggfunc` when it is called. - **kwargs : dict, optional - Keyword arguments to pass to `aggfunc` when it is called. ->>>>>>> 4e8ccf6958 (code impl and examples) See Also -------- @@ -187,36 +143,19 @@ def wrapped(*call_args: Any, **call_kwargs: Any) -> Any: 1 -1 10.5 2 1 12.0 -<<<<<<< HEAD >>> def n_between(ser, low, high, **kwargs): ... return ser.between(low, high, **kwargs).sum() >>> agg_between = pd.NamedAgg("a", n_between, 0, 1) >>> df.groupby("key").agg(count_between=agg_between) count_between -======= - def n_between(ser, low, high, **kwargs): - return ser.between(low, high, **kwargs).sum() - - Using positional arguments - agg_between = pd.NamedAgg("a", n_between, 0, 1) - df.groupby("key").agg(count_between=agg_between) - count_between ->>>>>>> 4e8ccf6958 (code impl and examples) key 1 1 2 1 -<<<<<<< HEAD >>> agg_between_kw = pd.NamedAgg("a", n_between, 0, 1, inclusive="both") >>> df.groupby("key").agg(count_between_kw=agg_between_kw) count_between_kw -======= - Using both positional and keyword arguments - agg_between_kw = pd.NamedAgg("a", n_between, 0, 1, inclusive="both") - df.groupby("key").agg(count_between_kw=agg_between_kw) - count_between_kw ->>>>>>> 4e8ccf6958 (code impl and examples) key 1 1 2 1 From 65d0d9648deb9aa87aefcfb12c89efb1cb0a0e70 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Tue, 28 Oct 2025 20:42:46 -0400 Subject: [PATCH 35/41] keep comment --- pandas/core/apply.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 917b2274de785..b3bedf73ad1a7 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1784,7 +1784,9 @@ def reconstruct_func( else: converted_kwargs[key] = val - func, columns, order = normalize_keyword_aggregation(converted_kwargs) + func, columns, order = normalize_keyword_aggregation( # type: ignore[assignment] + converted_kwargs + ) assert func is not None From 3826e2630ba5831b891e43b1290b1d8ce8dcc3f5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 29 Oct 2025 00:50:19 +0000 Subject: [PATCH 36/41] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pandas/core/groupby/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 27e45a7d580c8..cf1d56f071a2d 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -3448,4 +3448,4 @@ def _wrap_transform_general_frame( elif isinstance(res, DataFrame) and not res.index.is_(group.index): return res._align_frame(group)[0] else: - return res \ No newline at end of file + return res From 7361570acbe6168ff2629c33135599e7d549eb4e Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Mon, 3 Nov 2025 15:23:59 -0500 Subject: [PATCH 37/41] review comments --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/core/apply.py | 22 ++++++++++++------- pandas/core/groupby/generic.py | 4 ++-- .../tests/groupby/aggregate/test_aggregate.py | 15 +++++++++---- 4 files changed, 28 insertions(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 7b53bae2b0eb8..53ceb1f92ce2d 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -156,7 +156,7 @@ All warnings for upcoming changes in pandas will have the base class :class:`pan Other enhancements ^^^^^^^^^^^^^^^^^^ -- :class:`pandas.NamedAgg` now forwards any ``*args`` and ``**kwargs`` +- :class:`pandas.NamedAgg` now supports passing ``*args`` and ``**kwargs`` to calls of ``aggfunc`` (:issue:`58283`) - :func:`pandas.merge` propagates the ``attrs`` attribute to the result if all inputs have identical ``attrs``, as has so far already been the case for diff --git a/pandas/core/apply.py b/pandas/core/apply.py index b3bedf73ad1a7..5e6fa23c6a1a2 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -49,6 +49,7 @@ from pandas.core._numba.executor import generate_apply_looper import pandas.core.common as com from pandas.core.construction import ensure_wrapped_if_datetimelike +from pandas.core.groupby.generic import NamedAgg from pandas.core.util.numba_ import ( get_jit_arguments, prepare_function_arguments, @@ -1714,7 +1715,12 @@ def reconstruct_func( or not and also normalize the keyword to get new order of columns. If named aggregation is applied, `func` will be None, and kwargs contains the - column and aggregation function information to be parsed; + column and aggregation function information to be parsed. + Each value in kwargs can be either: + - a tuple of (column, aggfunc) + - or a NamedAgg instance, which may also include additional *args and **kwargs + to be passed to the aggregation function. + If named aggregation is not applied, `func` is either string (e.g. 'min') or Callable, or list of them (e.g. ['min', np.max]), or the dictionary of column name and str/Callable/list of them (e.g. {'A': 'min'}, or {'A': [np.min, lambda x: x]}) @@ -1727,8 +1733,9 @@ def reconstruct_func( ---------- func: agg function (e.g. 'min' or Callable) or list of agg functions (e.g. ['min', np.max]) or dictionary (e.g. {'A': ['min', np.max]}). - **kwargs: dict, kwargs used in is_multi_agg_with_relabel and - normalize_keyword_aggregation function for relabelling + **kwargs : dict + Keyword arguments used in is_multi_agg_with_relabel and + normalize_keyword_aggregation function for relabelling. Returns ------- @@ -1745,7 +1752,6 @@ def reconstruct_func( >>> reconstruct_func("min") (False, 'min', None, None) """ - from pandas.core.groupby.generic import NamedAgg relabeling = func is None and ( is_multi_agg_with_relabel(**kwargs) @@ -1776,10 +1782,10 @@ def reconstruct_func( for key, val in kwargs.items(): if isinstance(val, NamedAgg): aggfunc = val.aggfunc - if getattr(val, "args", ()) or getattr(val, "kwargs", {}): - a = getattr(val, "args", ()) - kw = getattr(val, "kwargs", {}) - aggfunc = lambda x, func=aggfunc, a=a, kw=kw: func(x, *a, **kw) + if val.args or val.kwargs: + aggfunc = lambda x, func=aggfunc, a=val.args, kw=val.kwargs: func( + x, *a, **kw + ) converted_kwargs[key] = (val.column, aggfunc) else: converted_kwargs[key] = val diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index cf1d56f071a2d..512f82c495007 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -125,7 +125,7 @@ class NamedAgg: aggfunc : function or str Function to apply to the provided column. If string, the name of a built-in pandas function. - *args, **kwargs : + *args, **kwargs : Any Optional positional and keyword arguments passed to ``aggfunc``. See Also @@ -163,7 +163,7 @@ class NamedAgg: column: Hashable aggfunc: AggScalar - args: tuple[Any, ...] = dataclasses.field(default_factory=tuple) + args: tuple[Any, ...] = () kwargs: dict[str, Any] = dataclasses.field(default_factory=dict) def __init__( diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 5fb3666b4cdb3..f7529e8e57d2c 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -870,6 +870,7 @@ def n_between(self, ser, low, high, **kwargs): return ser.between(low, high, **kwargs).sum() def test_namedagg_args(self): + # https://github.com/pandas-dev/pandas/issues/58283 df = DataFrame({"A": [0, 0, 1, 1], "B": [-1, 0, 1, 2]}) result = df.groupby("A").agg( @@ -879,6 +880,7 @@ def test_namedagg_args(self): tm.assert_frame_equal(result, expected) def test_namedagg_kwargs(self): + # https://github.com/pandas-dev/pandas/issues/58283 df = DataFrame({"A": [0, 0, 1, 1], "B": [-1, 0, 1, 2]}) result = df.groupby("A").agg( @@ -890,6 +892,7 @@ def test_namedagg_kwargs(self): tm.assert_frame_equal(result, expected) def test_namedagg_args_and_kwargs(self): + # https://github.com/pandas-dev/pandas/issues/58283 df = DataFrame({"A": [0, 0, 1, 1], "B": [-1, 0, 1, 2]}) result = df.groupby("A").agg( @@ -903,6 +906,7 @@ def test_namedagg_args_and_kwargs(self): tm.assert_frame_equal(result, expected) def test_multiple_named_agg_with_args_and_kwargs(self): + # https://github.com/pandas-dev/pandas/issues/58283 df = DataFrame({"A": [0, 1, 2, 3], "B": [1, 2, 3, 4]}) result = df.groupby("A").agg( @@ -910,10 +914,13 @@ def test_multiple_named_agg_with_args_and_kwargs(self): n_between13=pd.NamedAgg("B", self.n_between, 1, 3), n_between02=pd.NamedAgg("B", self.n_between, 0, 2), ) - expected = df.groupby("A").agg( - n_between01=("B", lambda x: x.between(0, 1).sum()), - n_between13=("B", lambda x: x.between(0, 3).sum()), - n_between02=("B", lambda x: x.between(0, 2).sum()), + expected = DataFrame( + { + "n_between01": [2, 0], + "n_between13": [2, 1], + "n_between02": [2, 1], + }, + index=Index(["a", "b"], name="A"), ) tm.assert_frame_equal(result, expected) From 9241409081d6e963cfcf5929d55975b1f5ad4648 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Mon, 3 Nov 2025 15:53:43 -0500 Subject: [PATCH 38/41] fix circular import error --- pandas/core/apply.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 5e6fa23c6a1a2..210a90568e288 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -49,7 +49,6 @@ from pandas.core._numba.executor import generate_apply_looper import pandas.core.common as com from pandas.core.construction import ensure_wrapped_if_datetimelike -from pandas.core.groupby.generic import NamedAgg from pandas.core.util.numba_ import ( get_jit_arguments, prepare_function_arguments, @@ -1752,6 +1751,7 @@ def reconstruct_func( >>> reconstruct_func("min") (False, 'min', None, None) """ + from pandas.core.groupby.generic import NamedAgg relabeling = func is None and ( is_multi_agg_with_relabel(**kwargs) From e4bdd4081f95457dad78dd748a610e0f64a5348a Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Mon, 3 Nov 2025 18:19:34 -0500 Subject: [PATCH 39/41] fix failing test --- pandas/tests/groupby/aggregate/test_aggregate.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index f7529e8e57d2c..2dc4911459989 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -916,11 +916,11 @@ def test_multiple_named_agg_with_args_and_kwargs(self): ) expected = DataFrame( { - "n_between01": [2, 0], - "n_between13": [2, 1], - "n_between02": [2, 1], + "n_between01": [1, 0, 0, 0], + "n_between13": [1, 1, 1, 0], + "n_between02": [1, 1, 0, 0], }, - index=Index(["a", "b"], name="A"), + index=Index([0, 1, 2, 3], name="A"), ) tm.assert_frame_equal(result, expected) From 8fc814ce6e7c889e985a723510369dd7f881cf3d Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Mon, 3 Nov 2025 21:25:23 -0500 Subject: [PATCH 40/41] restore comment --- pandas/core/apply.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 210a90568e288..4cd6e26b6a1ba 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1714,11 +1714,8 @@ def reconstruct_func( or not and also normalize the keyword to get new order of columns. If named aggregation is applied, `func` will be None, and kwargs contains the - column and aggregation function information to be parsed. - Each value in kwargs can be either: - - a tuple of (column, aggfunc) - - or a NamedAgg instance, which may also include additional *args and **kwargs - to be passed to the aggregation function. + column and aggregation function information to be parsed; + If named aggregation is not applied, `func` is either string (e.g. 'min') or Callable, or list of them (e.g. ['min', np.max]), or the dictionary of column name @@ -1732,9 +1729,8 @@ def reconstruct_func( ---------- func: agg function (e.g. 'min' or Callable) or list of agg functions (e.g. ['min', np.max]) or dictionary (e.g. {'A': ['min', np.max]}). - **kwargs : dict - Keyword arguments used in is_multi_agg_with_relabel and - normalize_keyword_aggregation function for relabelling. + **kwargs: dict, kwargs used in is_multi_agg_with_relabel and + normalize_keyword_aggregation function for relabelling Returns ------- From db9f7c9e96527113a5a4567be37a5d2d1cfc7b22 Mon Sep 17 00:00:00 2001 From: sreeja97 Date: Mon, 3 Nov 2025 21:27:07 -0500 Subject: [PATCH 41/41] restore comment --- pandas/core/apply.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 4cd6e26b6a1ba..1098ceb4c3929 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1715,8 +1715,6 @@ def reconstruct_func( If named aggregation is applied, `func` will be None, and kwargs contains the column and aggregation function information to be parsed; - - If named aggregation is not applied, `func` is either string (e.g. 'min') or Callable, or list of them (e.g. ['min', np.max]), or the dictionary of column name and str/Callable/list of them (e.g. {'A': 'min'}, or {'A': [np.min, lambda x: x]})