-
-
Notifications
You must be signed in to change notification settings - Fork 19.3k
BUG: Fixed issue where rolling.kurt() calculations would be effected by values outside of scope #61481
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
BUG: Fixed issue where rolling.kurt() calculations would be effected by values outside of scope #61481
Changes from all commits
e2e7260
76cb380
7763a83
01ab52e
321c093
d76ad5c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,10 +1,14 @@ | ||
| # cython: boundscheck=False, wraparound=False, cdivision=True | ||
|
|
||
| from libc.math cimport ( | ||
| abs, | ||
| isfinite, | ||
| log10, | ||
| pow, | ||
| round, | ||
| signbit, | ||
| sqrt, | ||
| ) | ||
| from libcpp cimport bool | ||
| from libcpp.deque cimport deque | ||
| from libcpp.stack cimport stack | ||
| from libcpp.unordered_map cimport unordered_map | ||
|
|
@@ -724,6 +728,49 @@ cdef float64_t calc_kurt(int64_t minp, int64_t nobs, | |
|
|
||
| return result | ||
|
|
||
| cdef void update_sum_of_window(float64_t val, | ||
| float64_t **x_value, | ||
| float64_t **comp_value, | ||
| int power_of_element, | ||
| bool add_mode, # 1 for add_kurt, 0 for remove_kurt | ||
| ) noexcept nogil: | ||
|
|
||
| cdef: | ||
| float64_t val_raised | ||
| double val_length, x_length | ||
| bool val_length_flag, x_length_flag | ||
|
|
||
| if add_mode: | ||
| val_raised = pow(val, power_of_element) | ||
| else: | ||
| val_raised = -pow(val, power_of_element) | ||
|
|
||
| x_length = abs(log10(abs(x_value[0][0]))) | ||
| val_length = abs(log10(abs(val_raised))) | ||
|
|
||
| x_length_flag = x_length > 15 and isfinite(x_length) | ||
| val_length_flag = val_length > 15 and isfinite(val_length) | ||
|
Comment on lines
+751
to
+752
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This verification doesn't make much sense to me.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You should compare the magnitude between x_value and val directly. |
||
|
|
||
| # We'll try to maintain comp_value as the counter for | ||
| # numbers <1e15 to keep it from getting rounded out. | ||
| if x_length_flag and val_length_flag: | ||
| # Both > 1e15 or < 1-e15 | ||
| x_value[0][0] += val_raised | ||
|
Comment on lines
+756
to
+758
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are you not using the kahan compensation? |
||
|
|
||
| elif x_length_flag: | ||
| comp_value[0][0] += val_raised | ||
|
Comment on lines
+760
to
+761
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the rationale for this? |
||
|
|
||
| elif val_length_flag: | ||
| comp_value[0][0] += x_value[0][0] | ||
| x_value[0][0] = val_raised | ||
|
Comment on lines
+764
to
+765
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the rationale for this? |
||
|
|
||
| else: | ||
| # Neither are >1e15/<1e-15, safe to proceed | ||
| x_value[0][0] += val_raised | ||
|
|
||
| if comp_value[0][0] != 0: | ||
| x_value[0][0] += comp_value[0][0] | ||
| comp_value[0][0] = 0 | ||
|
|
||
| cdef void add_kurt(float64_t val, int64_t *nobs, | ||
| float64_t *x, float64_t *xx, | ||
|
|
@@ -736,29 +783,15 @@ cdef void add_kurt(float64_t val, int64_t *nobs, | |
| float64_t *prev_value | ||
| ) noexcept nogil: | ||
| """ add a value from the kurotic calc """ | ||
| cdef: | ||
| float64_t y, t | ||
|
|
||
| # Not NaN | ||
| if val == val: | ||
| nobs[0] = nobs[0] + 1 | ||
|
|
||
| y = val - compensation_x[0] | ||
| t = x[0] + y | ||
| compensation_x[0] = t - x[0] - y | ||
| x[0] = t | ||
| y = val * val - compensation_xx[0] | ||
| t = xx[0] + y | ||
| compensation_xx[0] = t - xx[0] - y | ||
| xx[0] = t | ||
| y = val * val * val - compensation_xxx[0] | ||
| t = xxx[0] + y | ||
| compensation_xxx[0] = t - xxx[0] - y | ||
| xxx[0] = t | ||
| y = val * val * val * val - compensation_xxxx[0] | ||
| t = xxxx[0] + y | ||
| compensation_xxxx[0] = t - xxxx[0] - y | ||
| xxxx[0] = t | ||
| update_sum_of_window(val, &x, &compensation_x, 1, 1) | ||
| update_sum_of_window(val, &xx, &compensation_xx, 2, 1) | ||
| update_sum_of_window(val, &xxx, &compensation_xxx, 3, 1) | ||
| update_sum_of_window(val, &xxxx, &compensation_xxxx, 4, 1) | ||
|
|
||
| # GH#42064, record num of same values to remove floating point artifacts | ||
| if val == prev_value[0]: | ||
|
|
@@ -768,7 +801,6 @@ cdef void add_kurt(float64_t val, int64_t *nobs, | |
| num_consecutive_same_value[0] = 1 | ||
| prev_value[0] = val | ||
|
|
||
|
|
||
| cdef void remove_kurt(float64_t val, int64_t *nobs, | ||
| float64_t *x, float64_t *xx, | ||
| float64_t *xxx, float64_t *xxxx, | ||
|
|
@@ -777,40 +809,26 @@ cdef void remove_kurt(float64_t val, int64_t *nobs, | |
| float64_t *compensation_xxx, | ||
| float64_t *compensation_xxxx) noexcept nogil: | ||
| """ remove a value from the kurotic calc """ | ||
| cdef: | ||
| float64_t y, t | ||
|
|
||
| # Not NaN | ||
| if val == val: | ||
| nobs[0] = nobs[0] - 1 | ||
|
|
||
| y = - val - compensation_x[0] | ||
| t = x[0] + y | ||
| compensation_x[0] = t - x[0] - y | ||
| x[0] = t | ||
| y = - val * val - compensation_xx[0] | ||
| t = xx[0] + y | ||
| compensation_xx[0] = t - xx[0] - y | ||
| xx[0] = t | ||
| y = - val * val * val - compensation_xxx[0] | ||
| t = xxx[0] + y | ||
| compensation_xxx[0] = t - xxx[0] - y | ||
| xxx[0] = t | ||
| y = - val * val * val * val - compensation_xxxx[0] | ||
| t = xxxx[0] + y | ||
| compensation_xxxx[0] = t - xxxx[0] - y | ||
| xxxx[0] = t | ||
| update_sum_of_window(val, &x, &compensation_x, 1, 0) | ||
| update_sum_of_window(val, &xx, &compensation_xx, 2, 0) | ||
| update_sum_of_window(val, &xxx, &compensation_xxx, 3, 0) | ||
| update_sum_of_window(val, &xxxx, &compensation_xxxx, 4, 0) | ||
|
|
||
|
|
||
| def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start, | ||
| ndarray[int64_t] end, int64_t minp) -> np.ndarray: | ||
| cdef: | ||
| Py_ssize_t i, j | ||
| float64_t val, mean_val, min_val, sum_val = 0 | ||
| float64_t compensation_xxxx_add, compensation_xxxx_remove | ||
| float64_t compensation_xxx_remove, compensation_xxx_add | ||
| float64_t compensation_xx_remove, compensation_xx_add | ||
| float64_t compensation_x_remove, compensation_x_add | ||
| float64_t compensation_xxxx | ||
| float64_t compensation_xxx | ||
| float64_t compensation_xx | ||
| float64_t compensation_x | ||
| float64_t x, xx, xxx, xxxx | ||
| float64_t prev_value | ||
| int64_t nobs, s, e, num_consecutive_same_value | ||
|
|
@@ -851,16 +869,16 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start, | |
| prev_value = values[s] | ||
| num_consecutive_same_value = 0 | ||
|
|
||
| compensation_xxxx_add = compensation_xxxx_remove = 0 | ||
| compensation_xxx_remove = compensation_xxx_add = 0 | ||
| compensation_xx_remove = compensation_xx_add = 0 | ||
| compensation_x_remove = compensation_x_add = 0 | ||
| compensation_xxxx = 0 | ||
| compensation_xxx = 0 | ||
| compensation_xx = 0 | ||
| compensation_x = 0 | ||
| x = xx = xxx = xxxx = 0 | ||
| nobs = 0 | ||
| for j in range(s, e): | ||
| add_kurt(values_copy[j], &nobs, &x, &xx, &xxx, &xxxx, | ||
| &compensation_x_add, &compensation_xx_add, | ||
| &compensation_xxx_add, &compensation_xxxx_add, | ||
| &compensation_x, &compensation_xx, | ||
| &compensation_xxx, &compensation_xxxx, | ||
| &num_consecutive_same_value, &prev_value) | ||
|
|
||
| else: | ||
|
|
@@ -870,14 +888,14 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start, | |
| # calculate deletes | ||
| for j in range(start[i - 1], s): | ||
| remove_kurt(values_copy[j], &nobs, &x, &xx, &xxx, &xxxx, | ||
| &compensation_x_remove, &compensation_xx_remove, | ||
| &compensation_xxx_remove, &compensation_xxxx_remove) | ||
| &compensation_x, &compensation_xx, | ||
| &compensation_xxx, &compensation_xxxx) | ||
|
|
||
| # calculate adds | ||
| for j in range(end[i - 1], e): | ||
| add_kurt(values_copy[j], &nobs, &x, &xx, &xxx, &xxxx, | ||
| &compensation_x_add, &compensation_xx_add, | ||
| &compensation_xxx_add, &compensation_xxxx_add, | ||
| &compensation_x, &compensation_xx, | ||
| &compensation_xxx, &compensation_xxxx, | ||
| &num_consecutive_same_value, &prev_value) | ||
|
|
||
| output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx, | ||
|
|
||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you write some tests that assert the values explicitly? Make sure that they fail on main. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think that computing the
valproduct is more efficient than usingpow