Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit 0e155d4

Browse files
authored
Impl Series.skew() (#813)
1 parent 6d77677 commit 0e155d4

File tree

7 files changed

+277
-9
lines changed

7 files changed

+277
-9
lines changed

examples/series/series_skew.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# *****************************************************************************
2+
# Copyright (c) 2020, Intel Corporation All rights reserved.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions are met:
6+
#
7+
# Redistributions of source code must retain the above copyright notice,
8+
# this list of conditions and the following disclaimer.
9+
#
10+
# Redistributions in binary form must reproduce the above copyright notice,
11+
# this list of conditions and the following disclaimer in the documentation
12+
# and/or other materials provided with the distribution.
13+
#
14+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
16+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
18+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21+
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22+
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23+
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24+
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25+
# *****************************************************************************
26+
27+
import pandas as pd
28+
import numpy as np
29+
from numba import njit
30+
31+
32+
@njit
33+
def series_skew():
34+
s = pd.Series([np.nan, -2., 3., 5.0])
35+
36+
return s.skew() # Expect -1.1520696383139375
37+
38+
39+
print(series_skew())

sdc/datatypes/hpat_pandas_series_functions.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4818,3 +4818,67 @@ def sdc_pandas_series_groupby_impl(self, by=None, axis=0, level=None, as_index=T
48184818
return init_series_groupby(self, by, grouped, sort)
48194819

48204820
return sdc_pandas_series_groupby_impl
4821+
4822+
4823+
@sdc_overload_method(SeriesType, 'skew')
4824+
def sdc_pandas_series_skew(self, axis=None, skipna=None, level=None, numeric_only=None):
4825+
"""
4826+
Intel Scalable Dataframe Compiler User Guide
4827+
********************************************
4828+
4829+
Pandas API: pandas.Series.skew
4830+
4831+
Limitations
4832+
-----------
4833+
- Parameters ``level`` and ``numeric_only`` are supported only with default value ``None``.
4834+
4835+
Examples
4836+
--------
4837+
.. literalinclude:: ../../../examples/series/series_skew.py
4838+
:language: python
4839+
:lines: 27-
4840+
:caption: Unbiased rolling skewness.
4841+
:name: ex_series_skew
4842+
4843+
.. command-output:: python ./series/series_skew.py
4844+
:cwd: ../../../examples
4845+
4846+
Intel Scalable Dataframe Compiler Developer Guide
4847+
*************************************************
4848+
Pandas Series method :meth:`pandas.Series.skew` implementation.
4849+
4850+
.. only:: developer
4851+
Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_skew*
4852+
"""
4853+
_func_name = 'Method Series.skew()'
4854+
4855+
ty_checker = TypeChecker(_func_name)
4856+
ty_checker.check(self, SeriesType)
4857+
4858+
if not isinstance(axis, (types.Integer, types.NoneType, types.Omitted)) and axis is not None:
4859+
ty_checker.raise_exc(axis, 'int64', 'axis')
4860+
4861+
if not isinstance(skipna, (types.Boolean, types.NoneType, types.Omitted)) and skipna is not None:
4862+
ty_checker.raise_exc(skipna, 'bool', 'skipna')
4863+
4864+
if not isinstance(level, (types.Omitted, types.NoneType)) and level is not None:
4865+
ty_checker.raise_exc(level, 'None', 'level')
4866+
4867+
if not isinstance(numeric_only, (types.Omitted, types.NoneType)) and numeric_only is not None:
4868+
ty_checker.raise_exc(numeric_only, 'None', 'numeric_only')
4869+
4870+
def sdc_pandas_series_skew_impl(self, axis=None, skipna=None, level=None, numeric_only=None):
4871+
if axis != 0 and axis is not None:
4872+
raise ValueError('Parameter axis must be only 0 or None.')
4873+
4874+
if skipna is None:
4875+
_skipna = True
4876+
else:
4877+
_skipna = skipna
4878+
4879+
if _skipna:
4880+
return numpy_like.nanskew(self._data)
4881+
4882+
return numpy_like.skew(self._data)
4883+
4884+
return sdc_pandas_series_skew_impl

sdc/datatypes/hpat_pandas_series_rolling_functions.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636

3737
from sdc.datatypes.common_functions import _almost_equal
3838
from sdc.datatypes.hpat_pandas_series_rolling_types import SeriesRollingType
39+
from sdc.functions.statistics import skew_formula
3940
from sdc.hiframes.pd_series_type import SeriesType
4041
from sdc.utilities.prange_utils import parallel_chunks
4142
from sdc.utilities.sdc_typing_utils import TypeChecker
@@ -549,15 +550,7 @@ def skew_result_or_nan(nfinite, minp, result):
549550

550551
_sum, square_sum, cube_sum = result
551552

552-
n = nfinite
553-
m2 = (square_sum - _sum * _sum / n) / n
554-
m3 = (cube_sum - 3.*_sum*square_sum/n + 2.*_sum*_sum*_sum/n/n) / n
555-
res = 0 if m2 == 0 else m3 / m2 ** 1.5
556-
557-
if (n > 2) & (m2 > 0):
558-
res = numpy.sqrt((n - 1.) * n) / (n - 2.) * m3 / m2 ** 1.5
559-
560-
return res
553+
return skew_formula(nfinite, _sum, square_sum, cube_sum)
561554

562555

563556
@sdc_register_jitable

sdc/functions/numpy_like.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
from numba.typed import List
4545

4646
import sdc
47+
from sdc.functions.statistics import skew_formula
4748
from sdc.utilities.sdc_typing_utils import TypeChecker
4849
from sdc.utilities.utils import (sdc_overload, sdc_register_jitable,
4950
min_dtype_int_val, max_dtype_int_val, min_dtype_float_val,
@@ -988,3 +989,65 @@ def getitem_by_mask_impl(arr, idx):
988989
return result_data
989990

990991
return getitem_by_mask_impl
992+
993+
994+
def skew(a):
995+
pass
996+
997+
998+
def nanskew(a):
999+
pass
1000+
1001+
1002+
@sdc_overload(skew)
1003+
def np_skew(arr):
1004+
if not isinstance(arr, types.Array):
1005+
return
1006+
1007+
def skew_impl(arr):
1008+
len_val = len(arr)
1009+
n = 0
1010+
_sum = 0.
1011+
square_sum = 0.
1012+
cube_sum = 0.
1013+
1014+
for idx in numba.prange(len_val):
1015+
if not numpy.isnan(arr[idx]):
1016+
n += 1
1017+
_sum += arr[idx]
1018+
square_sum += arr[idx] ** 2
1019+
cube_sum += arr[idx] ** 3
1020+
1021+
if n == 0 or n < len_val:
1022+
return numpy.nan
1023+
1024+
return skew_formula(n, _sum, square_sum, cube_sum)
1025+
1026+
return skew_impl
1027+
1028+
1029+
@sdc_overload(nanskew)
1030+
def np_nanskew(arr):
1031+
if not isinstance(arr, types.Array):
1032+
return
1033+
1034+
def nanskew_impl(arr):
1035+
len_val = len(arr)
1036+
n = 0
1037+
_sum = 0.
1038+
square_sum = 0.
1039+
cube_sum = 0.
1040+
1041+
for idx in numba.prange(len_val):
1042+
if not numpy.isnan(arr[idx]):
1043+
n += 1
1044+
_sum += arr[idx]
1045+
square_sum += arr[idx] ** 2
1046+
cube_sum += arr[idx] ** 3
1047+
1048+
if n == 0:
1049+
return numpy.nan
1050+
1051+
return skew_formula(n, _sum, square_sum, cube_sum)
1052+
1053+
return nanskew_impl

sdc/functions/statistics.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# -*- coding: utf-8 -*-
2+
# *****************************************************************************
3+
# Copyright (c) 2020, Intel Corporation All rights reserved.
4+
#
5+
# Redistribution and use in source and binary forms, with or without
6+
# modification, are permitted provided that the following conditions are met:
7+
#
8+
# Redistributions of source code must retain the above copyright notice,
9+
# this list of conditions and the following disclaimer.
10+
#
11+
# Redistributions in binary form must reproduce the above copyright notice,
12+
# this list of conditions and the following disclaimer in the documentation
13+
# and/or other materials provided with the distribution.
14+
#
15+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
17+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
19+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22+
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23+
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24+
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25+
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
# *****************************************************************************
27+
28+
import numpy
29+
from sdc.utilities.utils import sdc_register_jitable
30+
31+
32+
@sdc_register_jitable
33+
def skew_formula(n, _sum, square_sum, cube_sum):
34+
m2 = (square_sum - _sum * _sum / n) / n
35+
m3 = (cube_sum - 3. * _sum * square_sum / n + 2. * _sum * _sum * _sum / n / n) / n
36+
res = numpy.nan if m2 == 0 else m3 / m2 ** 1.5
37+
38+
if (n > 2) & (m2 > 0):
39+
res = numpy.sqrt((n - 1.) * n) / (n - 2.) * m3 / m2 ** 1.5
40+
41+
return res

sdc/tests/test_series.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7188,6 +7188,73 @@ def test_impl(A, idx):
71887188
msg = 'The index of boolean indexer is not comparable to Series index.'
71897189
self.assertIn(msg, str(raises.exception))
71907190

7191+
def test_series_skew(self):
7192+
def test_impl(series, axis, skipna):
7193+
return series.skew(axis=axis, skipna=skipna)
7194+
7195+
hpat_func = self.jit(test_impl)
7196+
test_data = [[6, 6, 2, 1, 3, 3, 2, 1, 2],
7197+
[1.1, 0.3, 2.1, 1, 3, 0.3, 2.1, 1.1, 2.2],
7198+
[6, 6.1, 2.2, 1, 3, 3, 2.2, 1, 2],
7199+
[],
7200+
[6, 6, np.nan, 2, np.nan, 1, 3, 3, np.inf, 2, 1, 2, np.inf],
7201+
[1.1, 0.3, np.nan, 1.0, np.inf, 0.3, 2.1, np.nan, 2.2, np.inf],
7202+
[1.1, 0.3, np.nan, 1, np.inf, 0, 1.1, np.nan, 2.2, np.inf, 2, 2],
7203+
[np.nan, np.nan, np.nan],
7204+
[np.nan, np.nan, np.inf],
7205+
[np.inf, 0, np.inf, 1, 2, 3, 4, 5]
7206+
]
7207+
all_test_data = test_data + test_global_input_data_float64
7208+
for data in all_test_data:
7209+
with self.subTest(data=data):
7210+
s = pd.Series(data)
7211+
for axis in [0, None]:
7212+
with self.subTest(axis=axis):
7213+
for skipna in [None, False, True]:
7214+
with self.subTest(skipna=skipna):
7215+
res1 = test_impl(s, axis, skipna)
7216+
res2 = hpat_func(s, axis, skipna)
7217+
np.testing.assert_allclose(res1, res2)
7218+
7219+
def test_series_skew_default(self):
7220+
def test_impl():
7221+
s = pd.Series([np.nan, -2., 3., 9.1])
7222+
return s.skew()
7223+
7224+
hpat_func = self.jit(test_impl)
7225+
np.testing.assert_allclose(test_impl(), hpat_func())
7226+
7227+
def test_series_skew_not_supported(self):
7228+
def test_impl(series, axis=None, skipna=None, level=None, numeric_only=None):
7229+
return series.skew(axis=axis, skipna=skipna, level=level, numeric_only=numeric_only)
7230+
7231+
hpat_func = self.jit(test_impl)
7232+
s = pd.Series([1.1, 0.3, np.nan, 1, np.inf, 0, 1.1, np.nan, 2.2, np.inf, 2, 2])
7233+
with self.assertRaises(TypingError) as raises:
7234+
hpat_func(s, axis=0.75)
7235+
msg = 'TypingError: Method Series.skew() The object axis\n given: float64\n expected: int64'
7236+
self.assertIn(msg, str(raises.exception))
7237+
7238+
with self.assertRaises(TypingError) as raises:
7239+
hpat_func(s, skipna=0)
7240+
msg = 'TypingError: Method Series.skew() The object skipna\n given: int64\n expected: bool'
7241+
self.assertIn(msg, str(raises.exception))
7242+
7243+
with self.assertRaises(TypingError) as raises:
7244+
hpat_func(s, level=0)
7245+
msg = 'TypingError: Method Series.skew() The object level\n given: int64\n expected: None'
7246+
self.assertIn(msg, str(raises.exception))
7247+
7248+
with self.assertRaises(TypingError) as raises:
7249+
hpat_func(s, numeric_only=0)
7250+
msg = 'TypingError: Method Series.skew() The object numeric_only\n given: int64\n expected: None'
7251+
self.assertIn(msg, str(raises.exception))
7252+
7253+
with self.assertRaises(ValueError) as raises:
7254+
hpat_func(s, axis=5)
7255+
msg = 'Parameter axis must be only 0 or None.'
7256+
self.assertIn(msg, str(raises.exception))
7257+
71917258

71927259
if __name__ == "__main__":
71937260
unittest.main()

sdc/tests/tests_perf/test_perf_series.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ def _test_case(self, pyfunc, name, total_data_length, input_data=None, data_num=
137137
TC(name='shape', size=[10 ** 7], call_expr='data.shape', usecase_params='data'),
138138
TC(name='shift', size=[10 ** 8]),
139139
TC(name='size', size=[10 ** 7], call_expr='data.size', usecase_params='data'),
140+
TC(name='skew', size=[10 ** 8], check_skipna=True),
140141
TC(name='sort_values', size=[10 ** 5]),
141142
TC(name='std', size=[10 ** 7], check_skipna=True),
142143
TC(name='sub', size=[10 ** 7], params='other', data_num=2),

0 commit comments

Comments
 (0)