|
31 | 31 |
|
32 | 32 | import operator |
33 | 33 | import pandas |
| 34 | +import copy |
| 35 | +import numpy |
| 36 | + |
| 37 | +import sdc |
34 | 38 |
|
35 | 39 | from numba import types |
36 | 40 | from numba.extending import (overload, overload_method, overload_attribute) |
| 41 | +from sdc.hiframes.pd_dataframe_ext import DataFrameType |
37 | 42 | from numba.errors import TypingError |
| 43 | +import sdc.datatypes.hpat_pandas_dataframe_types |
| 44 | + |
| 45 | +from sdc.datatypes.hpat_pandas_series_functions import TypeChecker |
| 46 | + |
| 47 | + |
| 48 | +# Example func_text for func_name='count' columns=('A', 'B'): |
| 49 | +# |
| 50 | +# def _df_count_impl(df, axis=0, level=None, numeric_only=False): |
| 51 | +# series_A = init_series(get_dataframe_data(df, 0)) |
| 52 | +# result_A = series_A.count(level=level) |
| 53 | +# series_B = init_series(get_dataframe_data(df, 1)) |
| 54 | +# result_B = series_B.count(level=level) |
| 55 | +# return pandas.Series([result_A, result_B], ['A', 'B']) |
| 56 | + |
| 57 | + |
| 58 | +def _dataframe_reduce_columns_codegen(func_name, func_params, series_params, columns): |
| 59 | + result_name_list = [] |
| 60 | + joined = ', '.join(func_params) |
| 61 | + func_lines = [f'def _df_{func_name}_impl({joined}):'] |
| 62 | + for i, c in enumerate(columns): |
| 63 | + result_c = f'result_{c}' |
| 64 | + func_lines += [f' series_{c} = init_series(get_dataframe_data({func_params[0]}, {i}))', |
| 65 | + f' {result_c} = series_{c}.{func_name}({series_params})'] |
| 66 | + result_name_list.append(result_c) |
| 67 | + all_results = ', '.join(result_name_list) |
| 68 | + all_columns = ', '.join([f"'{c}'" for c in columns]) |
| 69 | + |
| 70 | + func_lines += [f' return pandas.Series([{all_results}], [{all_columns}])'] |
| 71 | + func_text = '\n'.join(func_lines) |
| 72 | + |
| 73 | + global_vars = {'pandas': pandas, 'np': numpy, |
| 74 | + 'init_series': sdc.hiframes.api.init_series, |
| 75 | + 'get_dataframe_data': sdc.hiframes.pd_dataframe_ext.get_dataframe_data} |
| 76 | + |
| 77 | + return func_text, global_vars |
38 | 78 |
|
39 | | -from sdc.datatypes.hpat_pandas_dataframe_types import DataFrameType |
40 | | -from sdc.utils import sdc_overload_method |
41 | 79 |
|
| 80 | +def sdc_pandas_dataframe_reduce_columns(df, func_name, params, ser_params): |
| 81 | + all_params = ['df'] |
| 82 | + ser_par = [] |
42 | 83 |
|
43 | | -@sdc_overload_method(DataFrameType, 'count') |
44 | | -def sdc_pandas_dataframe_count(self, axis=0, level=None, numeric_only=False): |
| 84 | + for key, value in params.items(): |
| 85 | + all_params.append('{}={}'.format(key, value)) |
| 86 | + for key, value in ser_params.items(): |
| 87 | + ser_par.append('{}={}'.format(key, value)) |
| 88 | + |
| 89 | + s_par = '{}'.format(', '.join(ser_par[:])) |
| 90 | + |
| 91 | + df_func_name = f'_df_{func_name}_impl' |
| 92 | + |
| 93 | + func_text, global_vars = _dataframe_reduce_columns_codegen(func_name, all_params, s_par, df.columns) |
| 94 | + |
| 95 | + loc_vars = {} |
| 96 | + exec(func_text, global_vars, loc_vars) |
| 97 | + _reduce_impl = loc_vars[df_func_name] |
| 98 | + |
| 99 | + return _reduce_impl |
| 100 | + |
| 101 | + |
| 102 | +@overload_method(DataFrameType, 'count') |
| 103 | +def count_overload(df, axis=0, level=None, numeric_only=False): |
45 | 104 | """ |
46 | 105 | Pandas DataFrame method :meth:`pandas.DataFrame.count` implementation. |
47 | 106 |
|
48 | 107 | .. only:: developer |
49 | 108 |
|
50 | | - Test: python -m sdc.runtests sdc.tests.test_dataframe.TestDataFrame.test_count |
| 109 | + Test: python -m sdc.runtests sdc.tests.test_dataframe.TestDataFrame.test_count |
| 110 | + Test: python -m sdc.runtests sdc.tests.test_dataframe.TestDataFrame.test_count1 |
51 | 111 |
|
52 | 112 | Parameters |
53 | 113 | ----------- |
54 | 114 | self: :class:`pandas.DataFrame` |
55 | | - input arg |
| 115 | + input arg |
56 | 116 | axis: |
57 | | - *unsupported* |
| 117 | + *unsupported* |
58 | 118 | level: |
59 | | - *unsupported* |
| 119 | + *unsupported* |
60 | 120 | numeric_only: |
61 | | - *unsupported* |
| 121 | + *unsupported* |
62 | 122 |
|
63 | 123 | Returns |
64 | 124 | ------- |
65 | 125 | :obj:`pandas.Series` or `pandas.DataFrame` |
66 | | - returns: For each column/row the number of non-NA/null entries. If level is specified returns a DataFrame. |
| 126 | + for each column/row the number of non-NA/null entries. If level is specified returns a DataFrame. |
67 | 127 | """ |
68 | 128 |
|
69 | | - _func_name = 'Method pandas.dataframe.count().' |
| 129 | + name = 'count' |
70 | 130 |
|
71 | | - if not isinstance(self, DataFrameType): |
72 | | - raise TypingError('{} The object must be a pandas.dataframe. Given: {}'.format(_func_name, self)) |
| 131 | + ty_checker = TypeChecker('Method {}().'.format(name)) |
| 132 | + ty_checker.check(df, DataFrameType) |
73 | 133 |
|
74 | 134 | if not (isinstance(axis, types.Omitted) or axis == 0): |
75 | | - raise TypingError("{} 'axis' unsupported. Given: {}".format(_func_name, axis)) |
| 135 | + ty_checker.raise_exc(axis, 'unsupported', 'axis') |
76 | 136 |
|
77 | 137 | if not (isinstance(level, types.Omitted) or level is None): |
78 | | - raise TypingError("{} 'level' unsupported. Given: {}".format(_func_name, axis)) |
| 138 | + ty_checker.raise_exc(level, 'unsupported', 'level') |
79 | 139 |
|
80 | 140 | if not (isinstance(numeric_only, types.Omitted) or numeric_only is False): |
81 | | - raise TypingError("{} 'numeric_only' unsupported. Given: {}".format(_func_name, axis)) |
82 | | - |
83 | | - def sdc_pandas_dataframe_count_impl(self, axis=0, level=None, numeric_only=False): |
84 | | - result_data = [] |
85 | | - result_index = [] |
86 | | - |
87 | | - for dataframe_item in self._data: |
88 | | - item_count = dataframe_item.count() |
89 | | - item_name = dataframe_item._name |
90 | | - result_data.append(item_count) |
91 | | - result_index.append(item_name) |
| 141 | + ty_checker.raise_exc(numeric_only, 'unsupported', 'numeric_only') |
92 | 142 |
|
93 | | - return pandas.Series(data=result_data, index=result_index) |
| 143 | + params = {'axis': 0, 'level': None, 'numeric_only': False} |
| 144 | + ser_par = {'level': 'level'} |
94 | 145 |
|
95 | | - return sdc_pandas_dataframe_count_impl |
| 146 | + return sdc_pandas_dataframe_reduce_columns(df, name, params, ser_par) |
0 commit comments