Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit f23c3bf

Browse files
authored
Scale Series.mean(skipna=True) (#610)
* Add perf test for np.mean() * Add numpy_like nanmean * Refactor tests for Series.mean() * Add tests for numpy_like.nanmean() * Add perf test for nanmean
1 parent e45ad86 commit f23c3bf

File tree

6 files changed

+66
-40
lines changed

6 files changed

+66
-40
lines changed

sdc/datatypes/hpat_pandas_series_functions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3787,7 +3787,7 @@ def hpat_pandas_series_mean_impl(self, axis=None, skipna=None, level=None, numer
37873787
_skipna = skipna
37883788

37893789
if _skipna:
3790-
return numpy.nanmean(self._data)
3790+
return numpy_like.nanmean(self._data)
37913791

37923792
return self._data.mean()
37933793

sdc/functions/numpy_like.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333

3434
import numba
3535
import numpy
36+
import numpy as np
3637

3738
from numba import types, jit, prange, numpy_support, literally
3839
from numba.errors import TypingError
@@ -472,3 +473,27 @@ def nanprod_impl(a):
472473
return c
473474

474475
return nanprod_impl
476+
477+
478+
def nanmean(a):
479+
pass
480+
481+
482+
@sdc_overload(nanmean)
483+
def np_nanmean(a):
484+
if not isinstance(a, types.Array):
485+
return
486+
isnan = get_isnan(a.dtype)
487+
488+
def nanmean_impl(a):
489+
c = 0.0
490+
count = 0
491+
for i in prange(len(a)):
492+
v = a[i]
493+
if not isnan(v):
494+
c += v
495+
count += 1
496+
# np.divide() doesn't raise ZeroDivisionError
497+
return np.divide(c, count)
498+
499+
return nanmean_impl

sdc/tests/test_sdc_numpy.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,15 @@ def cases():
264264
with self.subTest(data=case):
265265
np.testing.assert_array_equal(alt_cfunc(case), pyfunc(case))
266266

267+
def test_nanmean(self):
268+
def ref_impl(a):
269+
return np.nanmean(a)
270+
271+
def sdc_impl(a):
272+
return numpy_like.nanmean(a)
273+
274+
self.check_reduction_basic(ref_impl, sdc_impl)
275+
267276
def test_nanmin(self):
268277
def ref_impl(a):
269278
return np.nanmin(a)

sdc/tests/test_series.py

Lines changed: 24 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -2410,58 +2410,44 @@ def test_impl(S):
24102410
S = pd.Series(['aa', 'bb', np.nan])
24112411
self.assertEqual(hpat_func(S), test_impl(S))
24122412

2413+
def _mean_data_samples(self):
2414+
yield [6, 6, 2, 1, 3, 3, 2, 1, 2]
2415+
yield [1.1, 0.3, 2.1, 1, 3, 0.3, 2.1, 1.1, 2.2]
2416+
yield [6, 6.1, 2.2, 1, 3, 3, 2.2, 1, 2]
2417+
yield [6, 6, np.nan, 2, np.nan, 1, 3, 3, np.inf, 2, 1, 2, np.inf]
2418+
yield [1.1, 0.3, np.nan, 1.0, np.inf, 0.3, 2.1, np.nan, 2.2, np.inf]
2419+
yield [1.1, 0.3, np.nan, 1, np.inf, 0, 1.1, np.nan, 2.2, np.inf, 2, 2]
2420+
yield [np.nan, np.nan, np.nan]
2421+
yield [np.nan, np.nan, np.inf]
2422+
2423+
def _check_mean(self, pyfunc, *args):
2424+
cfunc = self.jit(pyfunc)
2425+
2426+
actual = cfunc(*args)
2427+
expected = pyfunc(*args)
2428+
if np.isnan(actual) or np.isnan(expected):
2429+
self.assertEqual(np.isnan(actual), np.isnan(expected))
2430+
else:
2431+
self.assertEqual(actual, expected)
2432+
24132433
def test_series_mean(self):
24142434
def test_impl(S):
24152435
return S.mean()
2416-
hpat_func = self.jit(test_impl)
2417-
2418-
data_samples = [
2419-
[6, 6, 2, 1, 3, 3, 2, 1, 2],
2420-
[1.1, 0.3, 2.1, 1, 3, 0.3, 2.1, 1.1, 2.2],
2421-
[6, 6.1, 2.2, 1, 3, 3, 2.2, 1, 2],
2422-
[6, 6, np.nan, 2, np.nan, 1, 3, 3, np.inf, 2, 1, 2, np.inf],
2423-
[1.1, 0.3, np.nan, 1.0, np.inf, 0.3, 2.1, np.nan, 2.2, np.inf],
2424-
[1.1, 0.3, np.nan, 1, np.inf, 0, 1.1, np.nan, 2.2, np.inf, 2, 2],
2425-
[np.nan, np.nan, np.nan],
2426-
[np.nan, np.nan, np.inf],
2427-
]
24282436

2429-
for data in data_samples:
2437+
for data in self._mean_data_samples():
24302438
with self.subTest(data=data):
24312439
S = pd.Series(data)
2432-
actual = hpat_func(S)
2433-
expected = test_impl(S)
2434-
if np.isnan(actual) or np.isnan(expected):
2435-
self.assertEqual(np.isnan(actual), np.isnan(expected))
2436-
else:
2437-
self.assertEqual(actual, expected)
2440+
self._check_mean(test_impl, S)
24382441

24392442
@skip_sdc_jit("Series.mean() any parameters unsupported")
24402443
def test_series_mean_skipna(self):
24412444
def test_impl(S, skipna):
24422445
return S.mean(skipna=skipna)
2443-
hpat_func = self.jit(test_impl)
2444-
2445-
data_samples = [
2446-
[6, 6, 2, 1, 3, 3, 2, 1, 2],
2447-
[1.1, 0.3, 2.1, 1, 3, 0.3, 2.1, 1.1, 2.2],
2448-
[6, 6.1, 2.2, 1, 3, 3, 2.2, 1, 2],
2449-
[6, 6, np.nan, 2, np.nan, 1, 3, 3, np.inf, 2, 1, 2, np.inf],
2450-
[1.1, 0.3, np.nan, 1.0, np.inf, 0.3, 2.1, np.nan, 2.2, np.inf],
2451-
[1.1, 0.3, np.nan, 1, np.inf, 0, 1.1, np.nan, 2.2, np.inf, 2, 2],
2452-
[np.nan, np.nan, np.nan],
2453-
[np.nan, np.nan, np.inf],
2454-
]
24552446

24562447
for skipna in [True, False]:
2457-
for data in data_samples:
2448+
for data in self._mean_data_samples():
24582449
S = pd.Series(data)
2459-
actual = hpat_func(S, skipna)
2460-
expected = test_impl(S, skipna)
2461-
if np.isnan(actual) or np.isnan(expected):
2462-
self.assertAlmostEqual(np.isnan(actual), np.isnan(expected))
2463-
else:
2464-
self.assertAlmostEqual(actual, expected)
2450+
self._check_mean(test_impl, S, skipna)
24652451

24662452
def test_series_var1(self):
24672453
def test_impl(S):

sdc/tests/tests_perf/test_perf_numpy.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,11 @@ def _test_case(self, cases, name, total_data_length, data_num=1, input_data=test
9595
CE(type_='Numba', code='np.isnan(data)', jitted=True),
9696
CE(type_='SDC', code='sdc.functions.numpy_like.isnan(data)', jitted=True),
9797
], usecase_params='data'),
98+
TC(name='nanmean', size=[10 ** 8], call_expr=[
99+
CE(type_='Python', code='np.nanmean(data)', jitted=False),
100+
CE(type_='Numba', code='np.nanmean(data)', jitted=True),
101+
CE(type_='SDC', code='sdc.functions.numpy_like.nanmean(data)', jitted=True),
102+
], usecase_params='data'),
98103
TC(name='nansum', size=[10 ** 7], call_expr=[
99104
CE(type_='Python', code='np.nansum(data)', jitted=False),
100105
CE(type_='SDC', code='sdc.functions.numpy_like.nansum(data)', jitted=True),

sdc/tests/tests_perf/test_perf_series.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,8 @@ def _test_case(self, pyfunc, name, total_data_length, data_num=1, input_data=tes
103103
TC(name='map', size=[10 ** 7], params='{2.: 42., 4.: 3.14}'),
104104
TC(name='max', size=[10 ** 8], params='skipna=True'),
105105
TC(name='max', size=[10 ** 8], params='skipna=False'),
106-
TC(name='mean', size=[10 ** 8]),
106+
TC(name='mean', size=[10 ** 8], params='skipna=True'),
107+
TC(name='mean', size=[10 ** 8], params='skipna=False'),
107108
TC(name='median', size=[10 ** 8]),
108109
TC(name='min', size=[10 ** 8], params='skipna=True'),
109110
TC(name='min', size=[10 ** 8], params='skipna=False'),

0 commit comments

Comments
 (0)