Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1758,9 +1758,9 @@ def _cython_agg_general(

data = self._get_data_to_aggregate(numeric_only=numeric_only, name=how)

def array_func(values: ArrayLike) -> ArrayLike:
def array_func(values: ArrayLike) -> ArrayLike:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you undo the changes in this file?

try:
result = self._grouper._cython_operation(
result = self._grouper._cython_operation(
"aggregate",
values,
how,
Expand Down
61 changes: 61 additions & 0 deletions pandas/tests/groupby/aggregate/test_cython.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@
Timestamp,
bdate_range,
)
import pyarrow as pa
import decimal
import pandas._testing as tm
import math


@pytest.mark.parametrize(
Expand Down Expand Up @@ -413,3 +416,61 @@ def test_cython_agg_EA_known_dtypes(data, op_name, action, with_na):

result = grouped["col"].aggregate(op_name)
assert result.dtype == expected_dtype

#testing groupby.var() when called with pyarrow datatype

@pytest.mark.parametrize("with_na", [False, True])
def test_groupby_var_arrow_decimal(with_na):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Can you put this test in pandas/tests/extension/test_arrow.py?
  2. Can you just use the example in BUG: groupby.var() does not return arrow types with arrow backed series as input.  #54627 (comment)? You should be able to use tm.assert_frame_equal

# Create Arrow-backed decimal Series
data = pd.Series(
[
decimal.Decimal("123.000"),
decimal.Decimal("12.000"),
decimal.Decimal("5.5"),
decimal.Decimal("7.25")
],
dtype=pd.ArrowDtype(pa.decimal128(6, 3))
)

if with_na:
data.iloc[3] = pd.NA # introduce a missing value

df = DataFrame({"key": ["a", "a", "b", "b"], "col": data})
grouped = df.groupby("key")

# Perform the aggregation using .var() (calls _cython_agg_general internally)
result = grouped.var()#it correctly converts it to double[pyarrow]


# Check dtype is still Arrow double
expected_dtype = pd.ArrowDtype(pa.float64())
assert isinstance(result["col"].dtype, pd.ArrowDtype)
assert result["col"].dtype == expected_dtype


# Compute expected variance manually for group "a"
vals_a = [123.0, 12.0] # convert to float
if with_na:
vals_b = [5.5] # single value → var is NA
else:
vals_b = [5.5, 7.25]

# Compute variance using pandas (float)
expected_var_a = pd.Series(vals_a).var()
expected_var_b = pd.Series(vals_b).var() if len(vals_b) > 1 else pd.NA

# Helper function for float comparison with NA support
def _almost_equal_or_na(a, b, tol=1e-12):
if pd.isna(a) and pd.isna(b):
return True
return math.isclose(float(a), float(b), rel_tol=tol, abs_tol=tol)

# Compare the DataFrame result
assert _almost_equal_or_na(result.loc["a", "col"], expected_var_a)
assert _almost_equal_or_na(result.loc["b", "col"], expected_var_b)

# Also test the SeriesGroupBy path
result_series = grouped["col"].var()
assert _almost_equal_or_na(result_series.loc["a"], expected_var_a)
assert _almost_equal_or_na(result_series.loc["b"], expected_var_b)

Loading