Skip to content

Commit f0519a3

Browse files
committed
perf: remove consecutive value counts
1 parent e095daf commit f0519a3

File tree

1 file changed

+4
-25
lines changed

1 file changed

+4
-25
lines changed

pandas/_libs/window/aggregations.pyx

Lines changed: 4 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -332,19 +332,13 @@ cdef float64_t calc_var(
332332
int ddof,
333333
float64_t nobs,
334334
float64_t ssqdm_x,
335-
int64_t num_consecutive_same_value
336335
) noexcept nogil:
337336
cdef:
338337
float64_t result
339338

340339
# Variance is unchanged if no observation is added or removed
341340
if (nobs >= minp) and (nobs > ddof):
342-
343-
# pathological case & repeatedly same values case
344-
if nobs == 1 or num_consecutive_same_value >= nobs:
345-
result = 0
346-
else:
347-
result = ssqdm_x / (nobs - <float64_t>ddof)
341+
result = ssqdm_x / (nobs - <float64_t>ddof)
348342
else:
349343
result = NaN
350344

@@ -357,8 +351,6 @@ cdef void add_var(
357351
float64_t *mean_x,
358352
float64_t *ssqdm_x,
359353
float64_t *compensation,
360-
int64_t *num_consecutive_same_value,
361-
float64_t *prev_value,
362354
bint *numerically_unstable,
363355
) noexcept nogil:
364356
""" add a value from the var calc """
@@ -372,14 +364,6 @@ cdef void add_var(
372364

373365
nobs[0] = nobs[0] + 1
374366

375-
# GH#42064, record num of same values to remove floating point artifacts
376-
if val == prev_value[0]:
377-
num_consecutive_same_value[0] += 1
378-
else:
379-
# reset to 1 (include current value itself)
380-
num_consecutive_same_value[0] = 1
381-
prev_value[0] = val
382-
383367
# Welford's method for the online variance-calculation
384368
# using Kahan summation
385369
# https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
@@ -441,8 +425,8 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start,
441425
"""
442426
cdef:
443427
float64_t mean_x, ssqdm_x, nobs, compensation_add,
444-
float64_t compensation_remove, prev_value
445-
int64_t s, e, num_consecutive_same_value
428+
float64_t compensation_remove
429+
int64_t s, e
446430
Py_ssize_t i, j, N = len(start)
447431
ndarray[float64_t] output
448432
bint is_monotonic_increasing_bounds
@@ -481,22 +465,17 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start,
481465
# calculate adds
482466
for j in range(end[i - 1], e):
483467
add_var(values[j], &nobs, &mean_x, &ssqdm_x, &compensation_add,
484-
&num_consecutive_same_value, &prev_value,
485468
&numerically_unstable)
486469

487470
if requires_recompute or numerically_unstable:
488471

489-
prev_value = values[s]
490-
num_consecutive_same_value = 0
491-
492472
mean_x = ssqdm_x = nobs = compensation_add = compensation_remove = 0
493473
for j in range(s, e):
494474
add_var(values[j], &nobs, &mean_x, &ssqdm_x, &compensation_add,
495-
&num_consecutive_same_value, &prev_value,
496475
&numerically_unstable)
497476
numerically_unstable = False
498477

499-
output[i] = calc_var(minp, ddof, nobs, ssqdm_x, num_consecutive_same_value)
478+
output[i] = calc_var(minp, ddof, nobs, ssqdm_x)
500479

501480
if not is_monotonic_increasing_bounds:
502481
nobs = 0.0

0 commit comments

Comments
 (0)