Skip to content

Commit cca03e9

Browse files
committed
fix: undo NaN changes
1 parent 6f21167 commit cca03e9

File tree

1 file changed

+26
-5
lines changed

1 file changed

+26
-5
lines changed

pandas/_libs/window/aggregations.pyx

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,7 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start,
485485

486486
cdef float64_t calc_skew(int64_t minp, int64_t nobs,
487487
float64_t mean, float64_t m2, float64_t m3,
488+
int64_t num_consecutive_same_value
488489
) noexcept nogil:
489490
cdef:
490491
float64_t result, dnobs
@@ -495,6 +496,10 @@ cdef float64_t calc_skew(int64_t minp, int64_t nobs,
495496

496497
if nobs < 3:
497498
result = NaN
499+
# GH 42064 46431
500+
# uniform case, force result to be 0
501+
elif num_consecutive_same_value >= nobs:
502+
result = 0.0
498503
# #18044: with degenerate distribution, floating issue will
499504
# cause m2 != 0. and cause the result is a very
500505
# large number.
@@ -519,7 +524,9 @@ cdef float64_t calc_skew(int64_t minp, int64_t nobs,
519524
cdef void add_skew(float64_t val, int64_t *nobs,
520525
float64_t *mean, float64_t *m2,
521526
float64_t *m3,
522-
bint *numerically_unstable
527+
bint *numerically_unstable,
528+
int64_t *num_consecutive_same_value,
529+
float64_t *prev_value,
523530
) noexcept nogil:
524531
""" add a value from the skew calc """
525532
cdef:
@@ -546,6 +553,14 @@ cdef void add_skew(float64_t val, int64_t *nobs,
546553
m2[0] += term1
547554
mean[0] += delta_n
548555

556+
# GH#42064, record num of same values to remove floating point artifacts
557+
if val == prev_value[0]:
558+
num_consecutive_same_value[0] += 1
559+
else:
560+
# reset to 1 (include current value itself)
561+
num_consecutive_same_value[0] = 1
562+
prev_value[0] = val
563+
549564

550565
cdef void remove_skew(float64_t val, int64_t *nobs,
551566
float64_t *mean, float64_t *m2,
@@ -591,9 +606,11 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
591606
Py_ssize_t i, j
592607
float64_t val
593608
float64_t mean, m2, m3
609+
float64_t prev_value
594610
int64_t nobs = 0, N = len(start)
595-
int64_t s, e
611+
int64_t s, e, num_consecutive_same_value
596612
ndarray[float64_t] output
613+
bint is_monotonic_increasing_bounds
597614
bint requires_recompute, numerically_unstable = False
598615

599616
minp = max(minp, 3)
@@ -604,6 +621,7 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
604621

605622
with nogil:
606623
for i in range(0, N):
624+
607625
s = start[i]
608626
e = end[i]
609627

@@ -615,26 +633,29 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
615633
)
616634

617635
if not requires_recompute:
636+
# calculate deletes
618637
for j in range(start[i - 1], s):
619638
val = values[j]
620639
remove_skew(val, &nobs, &mean, &m2, &m3, &numerically_unstable)
621640

622641
# calculate adds
623642
for j in range(end[i - 1], e):
624643
val = values[j]
625-
add_skew(val, &nobs, &mean, &m2, &m3, &numerically_unstable)
644+
add_skew(val, &nobs, &mean, &m2, &m3, &numerically_unstable,
645+
&num_consecutive_same_value, &prev_value)
626646

627647
if requires_recompute or numerically_unstable:
628648
mean = m2 = m3 = 0.0
629649
nobs = 0
630650

631651
for j in range(s, e):
632652
val = values[j]
633-
add_skew(val, &nobs, &mean, &m2, &m3, &numerically_unstable)
653+
add_skew(val, &nobs, &mean, &m2, &m3, &numerically_unstable,
654+
&num_consecutive_same_value, &prev_value)
634655

635656
numerically_unstable = False
636657

637-
output[i] = calc_skew(minp, nobs, mean, m2, m3)
658+
output[i] = calc_skew(minp, nobs, mean, m2, m3, num_consecutive_same_value)
638659

639660
if not is_monotonic_increasing_bounds:
640661
nobs = 0

0 commit comments

Comments
 (0)