Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit 69c950f

Browse files
authored
Add check_skipna for perf tests (#621)
* Add check_skipna * Add check_skipna for DataFrame perf tests
1 parent 6b53d4c commit 69c950f

File tree

3 files changed

+38
-26
lines changed

3 files changed

+38
-26
lines changed

sdc/tests/tests_perf/generator.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ class TestCase(NamedTuple):
2626
data_num: total number of generated data, e.g. 2 (data, other)
2727
input_data: input data for generating test data
2828
skip: flag for skipping a test
29+
check_skipna: flag for checking a function with both parameters skipna=True and skipna=False
2930
"""
3031
name: str
3132
size: list
@@ -35,15 +36,32 @@ class TestCase(NamedTuple):
3536
data_num: int = 1
3637
input_data: list = None
3738
skip: bool = False
39+
check_skipna: bool = False
3840

3941

4042
def to_varname_without_excess_underscores(string):
4143
"""Removing excess underscores from the string."""
4244
return '_'.join(i for i in to_varname(string).split('_') if i)
4345

4446

47+
def skipna_cases(cases):
48+
"""Generator. Replaces a test case containing check_skipna=True
49+
with two cases containing parameters skipna=True and skipna=False
50+
"""
51+
for case in cases:
52+
if case.check_skipna:
53+
for skipna in [True, False]:
54+
params = case.params
55+
if params:
56+
params += ', '
57+
params += f'skipna={skipna}'
58+
yield case._replace(params=params)
59+
else:
60+
yield case
61+
62+
4563
def generate_test_cases(cases, class_add, typ, prefix=''):
46-
for test_case in cases:
64+
for test_case in skipna_cases(cases):
4765
test_name_parts = ['test', typ, prefix, test_case.name, gen_params_wo_data(test_case)]
4866
test_name = to_varname_without_excess_underscores('_'.join(test_name_parts))
4967

sdc/tests/tests_perf/test_perf_df.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -64,15 +64,15 @@ def _test_case(self, pyfunc, name, total_data_length, input_data, data_num=1):
6464
TC(name='append', size=[10 ** 7], params='other', data_num=2),
6565
TC(name='count', size=[10 ** 7]),
6666
TC(name='drop', size=[10 ** 8], params='columns="f0"'),
67-
TC(name='max', size=[10 ** 7]),
68-
TC(name='mean', size=[10 ** 7]),
69-
TC(name='median', size=[10 ** 7]),
70-
TC(name='min', size=[10 ** 7]),
67+
TC(name='max', size=[10 ** 7], check_skipna=True),
68+
TC(name='mean', size=[10 ** 7], check_skipna=True),
69+
TC(name='median', size=[10 ** 7], check_skipna=True),
70+
TC(name='min', size=[10 ** 7], check_skipna=True),
7171
TC(name='pct_change', size=[10 ** 7]),
72-
TC(name='prod', size=[10 ** 7]),
73-
TC(name='std', size=[10 ** 7]),
74-
TC(name='sum', size=[10 ** 7]),
75-
TC(name='var', size=[10 ** 7]),
72+
TC(name='prod', size=[10 ** 7], check_skipna=True),
73+
TC(name='std', size=[10 ** 7], check_skipna=True),
74+
TC(name='sum', size=[10 ** 7], check_skipna=True),
75+
TC(name='var', size=[10 ** 7], check_skipna=True),
7676
]
7777

7878
generate_test_cases(cases, TestDataFrameMethods, 'df')

sdc/tests/tests_perf/test_perf_series.py

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def _test_case(self, pyfunc, name, total_data_length, data_num=1, input_data=tes
7777
TC(name='corr', size=[10 ** 7],params='other', data_num=2),
7878
TC(name='count', size=[10 ** 8]),
7979
TC(name='cov', size=[10 ** 8], params='other', data_num=2),
80-
TC(name='cumsum', size=[10 ** 8]),
80+
TC(name='cumsum', size=[10 ** 8], check_skipna=True),
8181
TC(name='describe', size=[10 ** 7]),
8282
TC(name='div', size=[10 ** 7], params='other', data_num=2),
8383
TC(name='dropna', size=[10 ** 7]),
@@ -89,8 +89,8 @@ def _test_case(self, pyfunc, name, total_data_length, data_num=1, input_data=tes
8989
TC(name='gt', size=[10 ** 7],params='other', data_num=2),
9090
TC(name='head', size=[10 ** 8]),
9191
TC(name='iat', size=[10 ** 7], call_expr='data.iat[100000]', usecase_params='data'),
92-
TC(name='idxmax', size=[10 ** 8]),
93-
TC(name='idxmin', size=[10 ** 8]),
92+
TC(name='idxmax', size=[10 ** 8], check_skipna=True),
93+
TC(name='idxmin', size=[10 ** 8], check_skipna=True),
9494
TC(name='iloc', size=[10 ** 7], call_expr='data.iloc[100000]', usecase_params='data'),
9595
TC(name='index', size=[10 ** 7], call_expr='data.index', usecase_params='data'),
9696
TC(name='isin', size=[10 ** 7], call_expr='data.isin([0])', usecase_params='data'),
@@ -101,13 +101,10 @@ def _test_case(self, pyfunc, name, total_data_length, data_num=1, input_data=tes
101101
TC(name='lt', size=[10 ** 7], params='other', data_num=2),
102102
TC(name='map', size=[10 ** 7], params='lambda x: x * 2'),
103103
TC(name='map', size=[10 ** 7], params='{2.: 42., 4.: 3.14}'),
104-
TC(name='max', size=[10 ** 8], params='skipna=True'),
105-
TC(name='max', size=[10 ** 8], params='skipna=False'),
106-
TC(name='mean', size=[10 ** 8], params='skipna=True'),
107-
TC(name='mean', size=[10 ** 8], params='skipna=False'),
108-
TC(name='median', size=[10 ** 8]),
109-
TC(name='min', size=[10 ** 8], params='skipna=True'),
110-
TC(name='min', size=[10 ** 8], params='skipna=False'),
104+
TC(name='max', size=[10 ** 8], check_skipna=True),
105+
TC(name='mean', size=[10 ** 8], check_skipna=True),
106+
TC(name='median', size=[10 ** 8], check_skipna=True),
107+
TC(name='min', size=[10 ** 8], check_skipna=True),
111108
TC(name='mod', size=[10 ** 7], params='other', data_num=2),
112109
TC(name='mul', size=[10 ** 7], params='other', data_num=2),
113110
TC(name='ndim', size=[10 ** 7], call_expr='data.ndim', usecase_params='data'),
@@ -116,8 +113,7 @@ def _test_case(self, pyfunc, name, total_data_length, data_num=1, input_data=tes
116113
TC(name='notna', size=[10 ** 7]),
117114
TC(name='nsmallest', size=[10 ** 6]),
118115
TC(name='nunique', size=[10 ** 7]),
119-
TC(name='prod', size=[10 ** 8], params='skipna=True'),
120-
TC(name='prod', size=[10 ** 8], params='skipna=False'),
116+
TC(name='prod', size=[10 ** 8], check_skipna=True),
121117
TC(name='pct_change', size=[10 ** 7], params='periods=1, limit=None, freq=None'),
122118
TC(name='pow', size=[10 ** 7], params='other', data_num=2),
123119
TC(name='quantile', size=[10 ** 8]),
@@ -127,16 +123,14 @@ def _test_case(self, pyfunc, name, total_data_length, data_num=1, input_data=tes
127123
TC(name='shift', size=[10 ** 8]),
128124
TC(name='size', size=[10 ** 7], call_expr='data.size', usecase_params='data'),
129125
TC(name='sort_values', size=[10 ** 5]),
130-
TC(name='std', size=[10 ** 7], params='skipna=True'),
131-
TC(name='std', size=[10 ** 7], params='skipna=False'),
126+
TC(name='std', size=[10 ** 7], check_skipna=True),
132127
TC(name='sub', size=[10 ** 7], params='other', data_num=2),
133-
TC(name='sum', size=[10 ** 8]),
128+
TC(name='sum', size=[10 ** 8], check_skipna=True),
134129
TC(name='take', size=[10 ** 7], call_expr='data.take([0])', usecase_params='data'),
135130
TC(name='truediv', size=[10 ** 7], params='other', data_num=2),
136131
TC(name='values', size=[10 ** 7], call_expr='data.values', usecase_params='data'),
137132
TC(name='value_counts', size=[10 ** 6]),
138-
TC(name='var', size=[10 ** 8], params='skipna=True'),
139-
TC(name='var', size=[10 ** 8], params='skipna=False'),
133+
TC(name='var', size=[10 ** 8], check_skipna=True),
140134
TC(name='unique', size=[10 ** 5]),
141135
]
142136

0 commit comments

Comments
 (0)