Fix rolling var bug

suzyahyah · Alvaro-Kothe · commit c9f12c02ac9c · 2025-11-08T21:14:33.000-03:00
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -1260,6 +1260,7 @@ Groupby/resample/rolling
 - Bug in :meth:`Rolling.skew` incorrectly computing skewness for windows following outliers due to numerical instability. The calculation now properly handles catastrophic cancellation by recomputing affected windows (:issue:`47461`)
 - Bug in :meth:`Series.resample` could raise when the date range ended shortly before a non-existent time. (:issue:`58380`)
 - Bug in :meth:`Series.resample` raising error when resampling non-nanosecond resolutions out of bounds for nanosecond precision (:issue:`57427`)
+- Bug in :meth:`Series.rolling.var` and :meth:`Series.rolling.std` computing incorrect results due to numerical instability. (:issue:`47721`, :issue:`52407`, :issue:`54518`, :issue:`55343`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx
@@ -449,7 +449,7 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start,
 
             # Over the first window, observations can only be added
             # never removed
-            if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]:
+            if i == 0 or not is_monotonic_increasing_bounds or s < end[i]:
 
                 prev_value = values[s]
                 num_consecutive_same_value = 0
diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py
@@ -8,9 +8,6 @@
 
 from pandas.compat import (
     IS64,
-    is_platform_arm,
-    is_platform_power,
-    is_platform_riscv64,
 )
 from pandas.errors import Pandas4Warning
 
@@ -1085,27 +1082,91 @@ def test_rolling_sem(frame_or_series):
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.xfail(
-    is_platform_arm() or is_platform_power() or is_platform_riscv64(),
-    reason="GH 38921",
-)
 @pytest.mark.parametrize(
-    ("func", "third_value", "values"),
+    ("func", "values", "window", "ddof", "expected_values"),
     [
-        ("var", 1, [5e33, 0, 0.5, 0.5, 2, 0]),
-        ("std", 1, [7.071068e16, 0, 0.7071068, 0.7071068, 1.414214, 0]),
-        ("var", 2, [5e33, 0.5, 0, 0.5, 2, 0]),
-        ("std", 2, [7.071068e16, 0.7071068, 0, 0.7071068, 1.414214, 0]),
+        ("var", [99999999999999999, 1, 1, 2, 3, 1, 1], 2, 1, [5e33, 0, 0.5, 0.5, 2, 0]),
+        (
+            "std",
+            [99999999999999999, 1, 1, 2, 3, 1, 1],
+            2,
+            1,
+            [7.071068e16, 0, 0.7071068, 0.7071068, 1.414214, 0],
+        ),
+        ("var", [99999999999999999, 1, 2, 2, 3, 1, 1], 2, 1, [5e33, 0.5, 0, 0.5, 2, 0]),
+        (
+            "std",
+            [99999999999999999, 1, 2, 2, 3, 1, 1],
+            2,
+            1,
+            [7.071068e16, 0.7071068, 0, 0.7071068, 1.414214, 0],
+        ),
+        (
+            "std",
+            [1.2e03, 1.3e17, 1.5e17, 1.995e03, 1.990e03],
+            2,
+            1,
+            [9.192388e16, 1.414214e16, 1.060660e17, 3.535534e00],
+        ),
+        (
+            "var",
+            [
+                0.00000000e00,
+                0.00000000e00,
+                3.16188252e-18,
+                2.95781651e-16,
+                2.23153542e-51,
+                0.00000000e00,
+                0.00000000e00,
+                5.39943432e-48,
+                1.38206260e-73,
+                0.00000000e00,
+            ],
+            3,
+            1,
+            [
+                3.33250036e-036,
+                2.88538519e-032,
+                2.88538519e-032,
+                2.91622617e-032,
+                1.65991678e-102,
+                9.71796366e-096,
+                9.71796366e-096,
+                9.71796366e-096,
+            ],
+        ),
+        (
+            "std",
+            [1, -1, 0, 1, 3, 2, -2, 10000000000, 1, 2, 0, -2, 1, 3, 0, 1],
+            6,
+            1,
+            [
+                1.41421356e00,
+                1.87082869e00,
+                4.08248290e09,
+                4.08248290e09,
+                4.08248290e09,
+                4.08248290e09,
+                4.08248290e09,
+                4.08248290e09,
+                1.72240142e00,
+                1.75119007e00,
+                1.64316767e00,
+            ],
+        ),
     ],
 )
-def test_rolling_var_numerical_issues(func, third_value, values):
-    # GH: 37051
-    ds = Series([99999999999999999, 1, third_value, 2, 3, 1, 1])
-    result = getattr(ds.rolling(2), func)()
-    expected = Series([np.nan] + values)
-    tm.assert_series_equal(result, expected)
+def test_rolling_var_correctness(func, values, window, ddof, expected_values):
+    # GH: 37051, 42064, 54518, 52407, 47721
+    ts = Series(values)
+    result = getattr(ts.rolling(window=window), func)(ddof=ddof)
+    if result.last_valid_index():
+        result = result[
+            result.first_valid_index() : result.last_valid_index() + 1
+        ].reset_index(drop=True)
+    expected = Series(expected_values)
+    tm.assert_series_equal(result, expected, atol=1e-55)
     # GH 42064
-    # new `roll_var` will output 0.0 correctly
     tm.assert_series_equal(result == 0, expected == 0)