Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit a8e9d26

Browse files
authored
Scale Series.var/std(skipna=True) (#616)
Add perf test for var with skipna=True Add numpy_like var Add numpy_like nanmean Add test for numpy_like.nanvar Add perf test for numpy_like.nanvar Add perf test for Series.std(skipna=True)
1 parent 4dd90c4 commit a8e9d26

File tree

5 files changed

+54
-5
lines changed

5 files changed

+54
-5
lines changed

sdc/datatypes/hpat_pandas_series_functions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1497,7 +1497,7 @@ def hpat_pandas_series_var_impl(self, axis=None, skipna=None, level=None, ddof=1
14971497
if valuable_length <= ddof:
14981498
return numpy.nan
14991499

1500-
return numpy.nanvar(self._data) * valuable_length / (valuable_length - ddof)
1500+
return numpy_like.nanvar(self._data) * valuable_length / (valuable_length - ddof)
15011501

15021502
if len(self._data) <= ddof:
15031503
return numpy.nan

sdc/functions/numpy_like.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,3 +497,32 @@ def nanmean_impl(a):
497497
return np.divide(c, count)
498498

499499
return nanmean_impl
500+
501+
502+
def nanvar(a):
503+
pass
504+
505+
506+
@sdc_overload(nanvar)
507+
def np_nanvar(a):
508+
if not isinstance(a, types.Array):
509+
return
510+
isnan = get_isnan(a.dtype)
511+
512+
def nanvar_impl(a):
513+
# Compute the mean
514+
m = nanmean(a)
515+
516+
# Compute the sum of square diffs
517+
ssd = 0.0
518+
count = 0
519+
for i in prange(len(a)):
520+
v = a[i]
521+
if not isnan(v):
522+
val = (v.item() - m)
523+
ssd += np.real(val * np.conj(val))
524+
count += 1
525+
# np.divide() doesn't raise ZeroDivisionError
526+
return np.divide(ssd, count)
527+
528+
return nanvar_impl

sdc/tests/test_sdc_numpy.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,10 @@ def sdc_impl():
243243

244244
class TestArrayReductions(TestCase):
245245

246-
def check_reduction_basic(self, pyfunc, alt_pyfunc, all_nans=True):
246+
def check_reduction_basic(self, pyfunc, alt_pyfunc, all_nans=True, comparator=None):
247+
if not comparator:
248+
comparator = np.testing.assert_array_equal
249+
247250
alt_cfunc = self.jit(alt_pyfunc)
248251

249252
def cases():
@@ -262,7 +265,7 @@ def cases():
262265

263266
for case in cases():
264267
with self.subTest(data=case):
265-
np.testing.assert_array_equal(alt_cfunc(case), pyfunc(case))
268+
comparator(alt_cfunc(case), pyfunc(case))
266269

267270
def test_nanmean(self):
268271
def ref_impl(a):
@@ -309,6 +312,16 @@ def sdc_impl(a):
309312

310313
self.check_reduction_basic(ref_impl, sdc_impl)
311314

315+
def test_nanvar(self):
316+
def ref_impl(a):
317+
return np.nanvar(a)
318+
319+
def sdc_impl(a):
320+
return numpy_like.nanvar(a)
321+
322+
self.check_reduction_basic(ref_impl, sdc_impl,
323+
comparator=np.testing.assert_array_almost_equal)
324+
312325
def test_sum(self):
313326
def ref_impl(a):
314327
return np.sum(a)

sdc/tests/tests_perf/test_perf_numpy.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,11 @@ def _test_case(self, cases, name, total_data_length, data_num=1, input_data=test
109109
CE(type_='Numba', code='np.nanprod(data)', jitted=True),
110110
CE(type_='SDC', code='sdc.functions.numpy_like.nanprod(data)', jitted=True),
111111
], usecase_params='data'),
112+
TC(name='nanvar', size=[10 ** 7], call_expr=[
113+
CE(type_='Python', code='np.nanvar(data)', jitted=False),
114+
CE(type_='Numba', code='np.nanvar(data)', jitted=True),
115+
CE(type_='SDC', code='sdc.functions.numpy_like.nanvar(data)', jitted=True),
116+
], usecase_params='data'),
112117
TC(name='sum', size=[10 ** 7], call_expr=[
113118
CE(type_='Python', code='np.sum(data)', jitted=False),
114119
CE(type_='Numba', code='np.sum(data)', jitted=True),

sdc/tests/tests_perf/test_perf_series.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,14 +127,16 @@ def _test_case(self, pyfunc, name, total_data_length, data_num=1, input_data=tes
127127
TC(name='shift', size=[10 ** 8]),
128128
TC(name='size', size=[10 ** 7], call_expr='data.size', usecase_params='data'),
129129
TC(name='sort_values', size=[10 ** 5]),
130-
TC(name='std', size=[10 ** 7]),
130+
TC(name='std', size=[10 ** 7], params='skipna=True'),
131+
TC(name='std', size=[10 ** 7], params='skipna=False'),
131132
TC(name='sub', size=[10 ** 7], params='other', data_num=2),
132133
TC(name='sum', size=[10 ** 8]),
133134
TC(name='take', size=[10 ** 7], call_expr='data.take([0])', usecase_params='data'),
134135
TC(name='truediv', size=[10 ** 7], params='other', data_num=2),
135136
TC(name='values', size=[10 ** 7], call_expr='data.values', usecase_params='data'),
136137
TC(name='value_counts', size=[10 ** 6]),
137-
TC(name='var', size=[10 ** 8]),
138+
TC(name='var', size=[10 ** 8], params='skipna=True'),
139+
TC(name='var', size=[10 ** 8], params='skipna=False'),
138140
TC(name='unique', size=[10 ** 5]),
139141
]
140142

0 commit comments

Comments
 (0)