Skip to content

Commit cbfe34a

Browse files
committed
BUG: Fix numeric_only ignored with list of functions in agg (#49352)
1 parent f7447cc commit cbfe34a

File tree

4 files changed

+510
-1
lines changed

4 files changed

+510
-1
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1144,6 +1144,7 @@ Groupby/resample/rolling
11441144
- Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`)
11451145
- Bug in :meth:`Series.resample` could raise when the date range ended shortly before a non-existent time. (:issue:`58380`)
11461146
- Bug in :meth:`Series.rolling.var` and :meth:`Series.rolling.std` where the end of window was not indexed correctly. (:issue:`47721`, :issue:`52407`, :issue:`54518`, :issue:`55343`)
1147+
- Bug in :meth:`DataFrameGroupBy.agg` where `numeric_only=True` was ignored when passing a list of aggregation functions (:issue:`49352`)
11471148

11481149
Reshaping
11491150
^^^^^^^^^
@@ -1236,7 +1237,7 @@ Other
12361237
- Fixed bug in the :meth:`Series.rank` with object dtype and extremely small float values (:issue:`62036`)
12371238
- Fixed bug where the :class:`DataFrame` constructor misclassified array-like objects with a ``.name`` attribute as :class:`Series` or :class:`Index` (:issue:`61443`)
12381239
- Fixed regression in :meth:`DataFrame.from_records` not initializing subclasses properly (:issue:`57008`)
1239-
1240+
- Bug in :meth:`DataFrame.agg` where `numeric_only=True` was ignored when passing a list of aggregation functions, causing non-numeric columns to be included or raising TypeError (:issue:49352)
12401241
.. ***DO NOT USE THIS SECTION***
12411242
12421243
-

pandas/core/apply.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -835,6 +835,31 @@ def agg_or_apply_list_like(
835835
if getattr(obj, "axis", 0) == 1:
836836
raise NotImplementedError("axis other than 0 is not supported")
837837

838+
# GH#49352 - Handle numeric_only with list of functions
839+
# When numeric_only=True is passed with a list of functions, filter
840+
# to numeric columns before processing to avoid TypeError on non-numeric Series
841+
if op_name == "agg" and kwargs.get("numeric_only", False):
842+
# Check if obj is a DataFrame (not Series) with 2 dimensions
843+
if isinstance(obj, ABCDataFrame) and obj.ndim == 2:
844+
# Filter to numeric columns before processing
845+
numeric_obj = obj.select_dtypes(include="number")
846+
847+
# Only proceed if we have numeric columns
848+
if not numeric_obj.empty:
849+
# Create kwargs without numeric_only to avoid passing it to Series methods
850+
kwargs_filtered = {k: v for k, v in kwargs.items() if k != "numeric_only"}
851+
852+
# Compute with filtered object and cleaned kwargs
853+
keys, results = self.compute_list_like(op_name, numeric_obj, kwargs_filtered)
854+
result = self.wrap_results_list_like(keys, results)
855+
return result
856+
else:
857+
# No numeric columns - return empty result
858+
from pandas import DataFrame
859+
# Get function names for index
860+
keys = list(self.func) if is_list_like(self.func) else []
861+
return DataFrame(index=keys)
862+
838863
keys, results = self.compute_list_like(op_name, obj, kwargs)
839864
result = self.wrap_results_list_like(keys, results)
840865
return result
@@ -1627,6 +1652,19 @@ def agg_or_apply_list_like(
16271652
else:
16281653
selected_obj = obj._obj_with_exclusions
16291654

1655+
# GH#49352 - Handle numeric_only with list of functions for GroupBy
1656+
# Filter to numeric columns before processing to avoid TypeError
1657+
if op_name == "agg" and kwargs.get("numeric_only", False):
1658+
# For GroupBy, filter the selected object to numeric columns
1659+
if selected_obj.ndim == 2:
1660+
numeric_obj = selected_obj.select_dtypes(include="number")
1661+
1662+
if not numeric_obj.empty:
1663+
# Update selected_obj to filtered numeric columns
1664+
selected_obj = numeric_obj
1665+
# Remove numeric_only from kwargs to avoid passing to Series methods
1666+
kwargs = {k: v for k, v in kwargs.items() if k != "numeric_only"}
1667+
16301668
# Only set as_index=True on groupby objects, not Window or Resample
16311669
# that inherit from this class.
16321670
with com.temp_setattr(
Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
"""
2+
Tests for DataFrame.agg with numeric_only parameter and list of functions.
3+
This tests the fix for GH#49352.
4+
"""
5+
6+
import numpy as np
7+
import pytest
8+
import pandas as pd
9+
from pandas import DataFrame
10+
import pandas._testing as tm
11+
12+
13+
class TestFrameAggNumericOnly:
14+
"""Tests for DataFrame.agg with numeric_only parameter and list of functions."""
15+
16+
def test_agg_list_numeric_only_mixed_dtypes(self):
17+
"""GH#49352 - Main test case from the issue."""
18+
df = DataFrame({
19+
'A': [1, 2, 3, 4, 5],
20+
'B': [10.5, 20.5, 30.5, 40.5, 50.5],
21+
'C': ['a', 'b', 'c', 'd', 'e']
22+
})
23+
result = df.agg(['min', 'max', 'mean'], numeric_only=True)
24+
expected = DataFrame({
25+
'A': [1.0, 5.0, 3.0],
26+
'B': [10.5, 50.5, 30.5]
27+
}, index=['min', 'max', 'mean'])
28+
tm.assert_frame_equal(result, expected)
29+
30+
def test_agg_list_numeric_only_all_numeric(self):
31+
"""Should work when all columns are numeric."""
32+
df = DataFrame({
33+
'A': [1, 2, 3],
34+
'B': [10, 20, 30]
35+
})
36+
result = df.agg(['sum', 'mean'], numeric_only=True)
37+
expected = DataFrame({
38+
'A': [6.0, 2.0],
39+
'B': [60.0, 20.0]
40+
}, index=['sum', 'mean'])
41+
tm.assert_frame_equal(result, expected)
42+
43+
def test_agg_list_numeric_only_no_numeric(self):
44+
"""Should return empty DataFrame when no numeric columns."""
45+
df = DataFrame({
46+
'A': ['a', 'b', 'c'],
47+
'B': ['x', 'y', 'z']
48+
})
49+
result = df.agg(['min', 'max'], numeric_only=True)
50+
expected = DataFrame(index=['min', 'max'])
51+
tm.assert_frame_equal(result, expected)
52+
53+
@pytest.mark.parametrize("funcs,expected_index", [
54+
(['sum', 'mean'], ['sum', 'mean']),
55+
([np.sum, np.mean], ['sum', 'mean']),
56+
(['sum', np.mean], ['sum', 'mean']),
57+
([np.sum, 'mean'], ['sum', 'mean']),
58+
])
59+
def test_agg_list_numeric_only_various_function_types(self, funcs, expected_index):
60+
"""Test with different combinations of string and numpy functions."""
61+
df = DataFrame({
62+
'A': [1, 2, 3],
63+
'B': [10, 20, 30],
64+
'C': ['a', 'b', 'c']
65+
})
66+
result = df.agg(funcs, numeric_only=True)
67+
expected = DataFrame({
68+
'A': [6.0, 2.0],
69+
'B': [60.0, 20.0]
70+
}, index=expected_index)
71+
tm.assert_frame_equal(result, expected)
72+
73+
@pytest.mark.parametrize("funcs", [
74+
['min', 'max'],
75+
['sum', 'mean', 'std'],
76+
['min', 'max', 'mean', 'median'],
77+
])
78+
def test_agg_list_numeric_only_different_function_counts(self, funcs):
79+
"""Test with different numbers of functions."""
80+
df = DataFrame({
81+
'A': [1, 2, 3, 4, 5],
82+
'B': [10, 20, 30, 40, 50],
83+
'C': ['a', 'b', 'c', 'd', 'e']
84+
})
85+
result = df.agg(funcs, numeric_only=True)
86+
87+
# Verify structure
88+
assert isinstance(result, DataFrame)
89+
assert list(result.columns) == ['A', 'B']
90+
assert list(result.index) == funcs
91+
assert result.shape == (len(funcs), 2)
92+
93+
@pytest.mark.parametrize("data,expected_cols", [
94+
# Only integers
95+
({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': ['x', 'y', 'z']}, ['A', 'B']),
96+
# Only floats
97+
({'A': [1.1, 2.2], 'B': [3.3, 4.4], 'C': ['x', 'y']}, ['A', 'B']),
98+
# Mix of int and float
99+
({'int': [1, 2], 'float': [1.5, 2.5], 'str': ['a', 'b']}, ['int', 'float']),
100+
# Single numeric column
101+
({'num': [1, 2, 3], 'text': ['a', 'b', 'c']}, ['num']),
102+
])
103+
def test_agg_list_numeric_only_various_dtypes(self, data, expected_cols):
104+
"""Test with various numeric dtype combinations."""
105+
df = DataFrame(data)
106+
result = df.agg(['sum', 'mean'], numeric_only=True)
107+
108+
assert isinstance(result, DataFrame)
109+
assert list(result.columns) == expected_cols
110+
assert list(result.index) == ['sum', 'mean']
111+
112+
@pytest.mark.parametrize("numeric_only", [True, False, None])
113+
def test_agg_list_numeric_only_parameter_values(self, numeric_only):
114+
"""Test with different numeric_only parameter values."""
115+
df = DataFrame({
116+
'A': [1, 2, 3],
117+
'B': [10, 20, 30]
118+
})
119+
120+
if numeric_only is None:
121+
result = df.agg(['sum', 'mean'])
122+
else:
123+
result = df.agg(['sum', 'mean'], numeric_only=numeric_only)
124+
125+
expected = DataFrame({
126+
'A': [6, 2.0],
127+
'B': [60, 20.0]
128+
}, index=['sum', 'mean'])
129+
tm.assert_frame_equal(result, expected)
130+
131+
def test_agg_list_numeric_only_false_with_strings(self):
132+
"""Verify numeric_only=False works with min/max on strings."""
133+
df = DataFrame({
134+
'A': [1, 2, 3],
135+
'B': ['a', 'b', 'c']
136+
})
137+
result = df.agg(['min', 'max'], numeric_only=False)
138+
expected = DataFrame({
139+
'A': [1, 3],
140+
'B': ['a', 'c']
141+
}, index=['min', 'max'])
142+
tm.assert_frame_equal(result, expected)
143+
144+
def test_agg_list_numeric_only_preserves_column_order(self):
145+
"""Test that column order is preserved."""
146+
df = DataFrame({
147+
'Z': [1, 2, 3],
148+
'A': [10, 20, 30],
149+
'M': [100, 200, 300],
150+
'text': ['a', 'b', 'c']
151+
})
152+
result = df.agg(['sum', 'mean'], numeric_only=True)
153+
154+
assert list(result.columns) == ['Z', 'A', 'M']
155+
156+
@pytest.mark.parametrize("single_func", ['sum', 'mean', 'min', 'max'])
157+
def test_agg_single_function_still_works(self, single_func):
158+
"""Verify that single function (not a list) still works."""
159+
df = DataFrame({
160+
'A': [1, 2, 3],
161+
'B': [10, 20, 30],
162+
'C': ['a', 'b', 'c']
163+
})
164+
result = df.agg(single_func, numeric_only=True)
165+
166+
assert isinstance(result, pd.Series)
167+
assert 'A' in result.index
168+
assert 'B' in result.index
169+
assert 'C' not in result.index
170+
171+
def test_agg_list_numeric_only_with_int_and_float(self):
172+
"""Test that both int and float columns are included."""
173+
df = DataFrame({
174+
'int_col': [1, 2, 3],
175+
'float_col': [1.5, 2.5, 3.5],
176+
'str_col': ['a', 'b', 'c']
177+
})
178+
result = df.agg(['sum', 'mean'], numeric_only=True)
179+
expected = DataFrame({
180+
'int_col': [6.0, 2.0],
181+
'float_col': [7.5, 2.5]
182+
}, index=['sum', 'mean'])
183+
tm.assert_frame_equal(result, expected)
184+
185+
def test_agg_list_numeric_only_single_row(self):
186+
"""Test with single row DataFrame."""
187+
df = DataFrame({
188+
'A': [1],
189+
'B': [10],
190+
'C': ['x']
191+
})
192+
result = df.agg(['sum', 'mean'], numeric_only=True)
193+
expected = DataFrame({
194+
'A': [1.0, 1.0],
195+
'B': [10.0, 10.0]
196+
}, index=['sum', 'mean'])
197+
tm.assert_frame_equal(result, expected)
198+
199+
# ========== NEW TESTS - Additional Edge Cases ==========
200+
201+
def test_agg_list_numeric_only_with_nans(self):
202+
"""Test DataFrame with NaN values."""
203+
df = DataFrame({
204+
'A': [1, np.nan, 3],
205+
'B': [10, 20, np.nan],
206+
'C': ['x', 'y', 'z']
207+
})
208+
result = df.agg(['sum', 'mean'], numeric_only=True)
209+
expected = DataFrame({
210+
'A': [4.0, 2.0],
211+
'B': [30.0, 15.0]
212+
}, index=['sum', 'mean'])
213+
tm.assert_frame_equal(result, expected)
214+
215+
def test_agg_list_numeric_only_with_datetime(self):
216+
"""Test that datetime columns are excluded with numeric_only=True."""
217+
df = DataFrame({
218+
'num': [1, 2, 3],
219+
'date': pd.date_range('2020-01-01', periods=3),
220+
'text': ['a', 'b', 'c']
221+
})
222+
result = df.agg(['sum', 'mean'], numeric_only=True)
223+
expected = DataFrame({
224+
'num': [6.0, 2.0]
225+
}, index=['sum', 'mean'])
226+
tm.assert_frame_equal(result, expected)
227+
228+
def test_agg_list_numeric_only_large_dataframe(self):
229+
"""Test with a larger DataFrame for performance verification."""
230+
np.random.seed(42)
231+
df = DataFrame({
232+
'A': np.random.randint(1, 100, 1000),
233+
'B': np.random.randn(1000),
234+
'C': ['text'] * 1000
235+
})
236+
result = df.agg(['sum', 'mean', 'std'], numeric_only=True)
237+
238+
# Just verify structure, not exact values due to randomness
239+
assert isinstance(result, DataFrame)
240+
assert list(result.columns) == ['A', 'B']
241+
assert list(result.index) == ['sum', 'mean', 'std']
242+
assert result.shape == (3, 2)

0 commit comments

Comments
 (0)