Skip to content

Commit c9f12c0

Browse files
suzyahyahAlvaro-Kothe
authored andcommitted
Fix rolling var bug
1 parent f4851e5 commit c9f12c0

File tree

3 files changed

+82
-20
lines changed

3 files changed

+82
-20
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1260,6 +1260,7 @@ Groupby/resample/rolling
12601260
- Bug in :meth:`Rolling.skew` incorrectly computing skewness for windows following outliers due to numerical instability. The calculation now properly handles catastrophic cancellation by recomputing affected windows (:issue:`47461`)
12611261
- Bug in :meth:`Series.resample` could raise when the date range ended shortly before a non-existent time. (:issue:`58380`)
12621262
- Bug in :meth:`Series.resample` raising error when resampling non-nanosecond resolutions out of bounds for nanosecond precision (:issue:`57427`)
1263+
- Bug in :meth:`Series.rolling.var` and :meth:`Series.rolling.std` computing incorrect results due to numerical instability. (:issue:`47721`, :issue:`52407`, :issue:`54518`, :issue:`55343`)
12631264

12641265
Reshaping
12651266
^^^^^^^^^

pandas/_libs/window/aggregations.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -449,7 +449,7 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start,
449449

450450
# Over the first window, observations can only be added
451451
# never removed
452-
if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]:
452+
if i == 0 or not is_monotonic_increasing_bounds or s < end[i]:
453453

454454
prev_value = values[s]
455455
num_consecutive_same_value = 0

pandas/tests/window/test_rolling.py

Lines changed: 80 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,6 @@
88

99
from pandas.compat import (
1010
IS64,
11-
is_platform_arm,
12-
is_platform_power,
13-
is_platform_riscv64,
1411
)
1512
from pandas.errors import Pandas4Warning
1613

@@ -1085,27 +1082,91 @@ def test_rolling_sem(frame_or_series):
10851082
tm.assert_series_equal(result, expected)
10861083

10871084

1088-
@pytest.mark.xfail(
1089-
is_platform_arm() or is_platform_power() or is_platform_riscv64(),
1090-
reason="GH 38921",
1091-
)
10921085
@pytest.mark.parametrize(
1093-
("func", "third_value", "values"),
1086+
("func", "values", "window", "ddof", "expected_values"),
10941087
[
1095-
("var", 1, [5e33, 0, 0.5, 0.5, 2, 0]),
1096-
("std", 1, [7.071068e16, 0, 0.7071068, 0.7071068, 1.414214, 0]),
1097-
("var", 2, [5e33, 0.5, 0, 0.5, 2, 0]),
1098-
("std", 2, [7.071068e16, 0.7071068, 0, 0.7071068, 1.414214, 0]),
1088+
("var", [99999999999999999, 1, 1, 2, 3, 1, 1], 2, 1, [5e33, 0, 0.5, 0.5, 2, 0]),
1089+
(
1090+
"std",
1091+
[99999999999999999, 1, 1, 2, 3, 1, 1],
1092+
2,
1093+
1,
1094+
[7.071068e16, 0, 0.7071068, 0.7071068, 1.414214, 0],
1095+
),
1096+
("var", [99999999999999999, 1, 2, 2, 3, 1, 1], 2, 1, [5e33, 0.5, 0, 0.5, 2, 0]),
1097+
(
1098+
"std",
1099+
[99999999999999999, 1, 2, 2, 3, 1, 1],
1100+
2,
1101+
1,
1102+
[7.071068e16, 0.7071068, 0, 0.7071068, 1.414214, 0],
1103+
),
1104+
(
1105+
"std",
1106+
[1.2e03, 1.3e17, 1.5e17, 1.995e03, 1.990e03],
1107+
2,
1108+
1,
1109+
[9.192388e16, 1.414214e16, 1.060660e17, 3.535534e00],
1110+
),
1111+
(
1112+
"var",
1113+
[
1114+
0.00000000e00,
1115+
0.00000000e00,
1116+
3.16188252e-18,
1117+
2.95781651e-16,
1118+
2.23153542e-51,
1119+
0.00000000e00,
1120+
0.00000000e00,
1121+
5.39943432e-48,
1122+
1.38206260e-73,
1123+
0.00000000e00,
1124+
],
1125+
3,
1126+
1,
1127+
[
1128+
3.33250036e-036,
1129+
2.88538519e-032,
1130+
2.88538519e-032,
1131+
2.91622617e-032,
1132+
1.65991678e-102,
1133+
9.71796366e-096,
1134+
9.71796366e-096,
1135+
9.71796366e-096,
1136+
],
1137+
),
1138+
(
1139+
"std",
1140+
[1, -1, 0, 1, 3, 2, -2, 10000000000, 1, 2, 0, -2, 1, 3, 0, 1],
1141+
6,
1142+
1,
1143+
[
1144+
1.41421356e00,
1145+
1.87082869e00,
1146+
4.08248290e09,
1147+
4.08248290e09,
1148+
4.08248290e09,
1149+
4.08248290e09,
1150+
4.08248290e09,
1151+
4.08248290e09,
1152+
1.72240142e00,
1153+
1.75119007e00,
1154+
1.64316767e00,
1155+
],
1156+
),
10991157
],
11001158
)
1101-
def test_rolling_var_numerical_issues(func, third_value, values):
1102-
# GH: 37051
1103-
ds = Series([99999999999999999, 1, third_value, 2, 3, 1, 1])
1104-
result = getattr(ds.rolling(2), func)()
1105-
expected = Series([np.nan] + values)
1106-
tm.assert_series_equal(result, expected)
1159+
def test_rolling_var_correctness(func, values, window, ddof, expected_values):
1160+
# GH: 37051, 42064, 54518, 52407, 47721
1161+
ts = Series(values)
1162+
result = getattr(ts.rolling(window=window), func)(ddof=ddof)
1163+
if result.last_valid_index():
1164+
result = result[
1165+
result.first_valid_index() : result.last_valid_index() + 1
1166+
].reset_index(drop=True)
1167+
expected = Series(expected_values)
1168+
tm.assert_series_equal(result, expected, atol=1e-55)
11071169
# GH 42064
1108-
# new `roll_var` will output 0.0 correctly
11091170
tm.assert_series_equal(result == 0, expected == 0)
11101171

11111172

0 commit comments

Comments
 (0)