Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit b57c9ad

Browse files
Move DataFrameType and SeriesType to separate files in order to avoid circular import (#459)
* Move DataFrameType and SeriesType to separate files in order to avoid circular imports * Clean up dataframe functions * Modify sdc __init_.py * Remove datatypes.hpat_pandas_functions from hiframes_untyped.py * Remove datatypes.hpat_pandas_series_functions from hiframes/pd_series_ext.py * Move imports from datatypes init to sdc init * Fix series.rolling * Fix style issues, etc * Fix style issues pt2 * Remove noqa * Fix istitle
1 parent d80cda0 commit b57c9ad

12 files changed

+452
-385
lines changed

sdc/__init__.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,32 +24,28 @@
2424
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2525
# *****************************************************************************
2626

27-
28-
from ._version import get_versions
2927
import numba
3028

3129
# re-export from Numba
3230
from numba import (typeof, prange, pndindex, gdb, gdb_breakpoint, gdb_init,
3331
stencil, threading_layer, jitclass, objmode)
3432

33+
import sdc.config
3534
import sdc.dict_ext
3635
import sdc.set_ext
37-
import sdc.compiler
3836
import sdc.io
3937
import sdc.io.np_io
4038
import sdc.hiframes.pd_timestamp_ext
4139
import sdc.hiframes.boxing
42-
import sdc.config
4340
import sdc.timsort
4441
from sdc.decorators import jit
45-
import sdc.rewrites.dataframe_constructor
4642

47-
multithread_mode = False
48-
49-
50-
__version__ = get_versions()['version']
51-
del get_versions
43+
import sdc.datatypes.hpat_pandas_series_functions
44+
import sdc.datatypes.hpat_pandas_series_rolling_functions
45+
import sdc.datatypes.hpat_pandas_seriesgroupby_functions
46+
import sdc.datatypes.hpat_pandas_stringmethods_functions
5247

48+
from ._version import get_versions
5349

5450
if not sdc.config.config_pipeline_hpat_default:
5551
"""
@@ -65,6 +61,16 @@
6561
# numba.compiler.DefaultPassBuilder.define_nopython_pipeline = \
6662
# sdc.datatypes.hpat_pandas_dataframe_pass.sdc_nopython_pipeline_lite_register
6763

64+
import sdc.rewrites.dataframe_constructor
65+
import sdc.datatypes.hpat_pandas_functions
66+
else:
67+
import sdc.compiler
68+
69+
multithread_mode = False
70+
71+
72+
__version__ = get_versions()['version']
73+
del get_versions
6874

6975
def _init_extension():
7076
'''Register Pandas classes and functions with Numba.

sdc/datatypes/__init__.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,4 @@
2525
# *****************************************************************************
2626

2727

28-
import sdc.datatypes.hpat_pandas_dataframe_pass
29-
import sdc.datatypes.hpat_pandas_series_rolling_functions
30-
import sdc.datatypes.hpat_pandas_seriesgroupby_functions
31-
import sdc.datatypes.hpat_pandas_stringmethods_functions
28+
# import sdc.datatypes.hpat_pandas_dataframe_pass

sdc/datatypes/hpat_pandas_dataframe_functions.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,15 +34,13 @@
3434
import copy
3535
import numpy
3636

37-
import sdc
38-
3937
from numba import types
4038
from numba.extending import (overload, overload_method, overload_attribute)
41-
from sdc.hiframes.pd_dataframe_ext import DataFrameType
39+
from sdc.hiframes.pd_dataframe_type import DataFrameType
4240
from numba.errors import TypingError
43-
import sdc.datatypes.hpat_pandas_dataframe_types
4441

4542
from sdc.datatypes.hpat_pandas_series_functions import TypeChecker
43+
from sdc.hiframes.pd_dataframe_ext import get_dataframe_data
4644

4745

4846
# Example func_text for func_name='count' columns=('A', 'B'):
@@ -61,7 +59,7 @@ def _dataframe_reduce_columns_codegen(func_name, func_params, series_params, col
6159
func_lines = [f'def _df_{func_name}_impl({joined}):']
6260
for i, c in enumerate(columns):
6361
result_c = f'result_{c}'
64-
func_lines += [f' series_{c} = init_series(get_dataframe_data({func_params[0]}, {i}))',
62+
func_lines += [f' series_{c} = pandas.Series(get_dataframe_data({func_params[0]}, {i}))',
6563
f' {result_c} = series_{c}.{func_name}({series_params})']
6664
result_name_list.append(result_c)
6765
all_results = ', '.join(result_name_list)
@@ -71,8 +69,7 @@ def _dataframe_reduce_columns_codegen(func_name, func_params, series_params, col
7169
func_text = '\n'.join(func_lines)
7270

7371
global_vars = {'pandas': pandas, 'np': numpy,
74-
'init_series': sdc.hiframes.api.init_series,
75-
'get_dataframe_data': sdc.hiframes.pd_dataframe_ext.get_dataframe_data}
72+
'get_dataframe_data': get_dataframe_data}
7673

7774
return func_text, global_vars
7875

sdc/datatypes/hpat_pandas_series_autogenerated.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
from sdc.datatypes.common_functions import TypeChecker
4545
from sdc.datatypes.common_functions import (check_index_is_numeric, find_common_dtype_from_numpy_dtypes,
4646
sdc_join_series_indexes, sdc_check_indexes_equal, check_types_comparable)
47-
from sdc.hiframes.pd_series_ext import SeriesType
47+
from sdc.hiframes.pd_series_type import SeriesType
4848
from sdc.str_arr_ext import (string_array_type, num_total_chars, str_arr_is_na)
4949

5050

sdc/datatypes/hpat_pandas_series_functions.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
sdc_join_series_indexes)
4646
from sdc.datatypes.hpat_pandas_series_rolling_types import _hpat_pandas_series_rolling_init
4747
from sdc.datatypes.hpat_pandas_stringmethods_types import StringMethodsType
48-
from sdc.hiframes.pd_series_ext import SeriesType
48+
from sdc.hiframes.pd_series_type import SeriesType
4949
from sdc.str_arr_ext import (StringArrayType, string_array_type, str_arr_is_na, str_arr_set_na,
5050
num_total_chars, pre_alloc_string_array, cp_str_list_to_array)
5151
from sdc.utils import to_array, sdc_overload, sdc_overload_method, sdc_overload_attribute
@@ -4489,4 +4489,3 @@ def hpat_pandas_series_pct_change_impl(self, periods=1, fill_method='pad', limit
44894489
return pandas.Series(result)
44904490

44914491
return hpat_pandas_series_pct_change_impl
4492-

sdc/datatypes/sdc_function_templates.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
from sdc.datatypes.common_functions import TypeChecker
4646
from sdc.datatypes.common_functions import (check_index_is_numeric, find_common_dtype_from_numpy_dtypes,
4747
sdc_join_series_indexes, sdc_check_indexes_equal, check_types_comparable)
48-
from sdc.hiframes.pd_series_ext import SeriesType
48+
from sdc.hiframes.pd_series_type import SeriesType
4949
from sdc.str_arr_ext import (string_array_type, num_total_chars, str_arr_is_na)
5050

5151

sdc/hiframes/boxing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
from numba.targets.boxing import _NumbaTypeHelper
4040
from numba.targets import listobj
4141

42-
from sdc.hiframes.pd_dataframe_ext import DataFrameType
42+
from sdc.hiframes.pd_dataframe_type import DataFrameType
4343
from sdc.hiframes.pd_timestamp_ext import (datetime_date_type,
4444
unbox_datetime_date_array, box_datetime_date_array)
4545
from sdc.str_ext import string_type, list_string_array_type

sdc/hiframes/hiframes_untyped.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1942,6 +1942,3 @@ def simple_block_copy_propagate(block):
19421942
for k in lhs_kill:
19431943
var_dict.pop(k, None)
19441944
return
1945-
1946-
1947-
from sdc.datatypes.hpat_pandas_functions import *

sdc/hiframes/pd_dataframe_ext.py

Lines changed: 2 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -33,89 +33,18 @@
3333
import numba
3434
from numba import types, cgutils
3535
from numba.extending import (models, register_model, lower_cast, infer_getattr,
36-
type_callable, infer, overload, make_attribute_wrapper, intrinsic,
36+
type_callable, infer, overload, intrinsic,
3737
lower_builtin, overload_method)
3838
from numba.typing.templates import (infer_global, AbstractTemplate, signature,
3939
AttributeTemplate, bound_function)
4040
from numba.targets.imputils import impl_ret_new_ref, impl_ret_borrowed
4141

4242
import sdc
4343
from sdc.hiframes.pd_series_ext import SeriesType
44+
from sdc.hiframes.pd_dataframe_type import DataFrameType
4445
from sdc.str_ext import string_type
4546
from sdc.str_arr_ext import string_array_type
4647

47-
48-
class DataFrameType(types.Type): # TODO: IterableType over column names
49-
"""Temporary type class for DataFrame objects.
50-
"""
51-
52-
def __init__(self, data=None, index=None, columns=None, has_parent=False):
53-
self.data = data
54-
if index is None:
55-
index = types.none
56-
self.index = index
57-
self.columns = columns
58-
# keeping whether it is unboxed from Python to enable reflection of new
59-
# columns
60-
self.has_parent = has_parent
61-
super(DataFrameType, self).__init__(
62-
name="dataframe({}, {}, {}, {})".format(data, index, columns, has_parent))
63-
64-
def copy(self, index=None, has_parent=None):
65-
# XXX is copy necessary?
66-
if index is None:
67-
index = types.none if self.index == types.none else self.index.copy()
68-
data = tuple(a.copy() for a in self.data)
69-
if has_parent is None:
70-
has_parent = self.has_parent
71-
return DataFrameType(data, index, self.columns, has_parent)
72-
73-
@property
74-
def key(self):
75-
# needed?
76-
return self.data, self.index, self.columns, self.has_parent
77-
78-
def unify(self, typingctx, other):
79-
if (isinstance(other, DataFrameType)
80-
and len(other.data) == len(self.data)
81-
and other.columns == self.columns
82-
and other.has_parent == self.has_parent):
83-
new_index = types.none
84-
if self.index != types.none and other.index != types.none:
85-
new_index = self.index.unify(typingctx, other.index)
86-
elif other.index != types.none:
87-
new_index = other.index
88-
elif self.index != types.none:
89-
new_index = self.index
90-
91-
data = tuple(a.unify(typingctx, b) for a, b in zip(self.data, other.data))
92-
return DataFrameType(data, new_index, self.columns, self.has_parent)
93-
94-
def is_precise(self):
95-
return all(a.is_precise() for a in self.data) and self.index.is_precise()
96-
97-
@register_model(DataFrameType)
98-
class DataFrameModel(models.StructModel):
99-
def __init__(self, dmm, fe_type):
100-
n_cols = len(fe_type.columns)
101-
members = [
102-
('data', types.Tuple(fe_type.data)),
103-
('index', fe_type.index),
104-
('columns', types.UniTuple(string_type, n_cols)),
105-
# for lazy unboxing of df coming from Python (usually argument)
106-
# list of flags noting which columns and index are unboxed
107-
# index flag is last
108-
('unboxed', types.UniTuple(types.int8, n_cols + 1)),
109-
('parent', types.pyobject),
110-
]
111-
super(DataFrameModel, self).__init__(dmm, fe_type, members)
112-
113-
make_attribute_wrapper(DataFrameType, 'data', '_data')
114-
make_attribute_wrapper(DataFrameType, 'index', '_index')
115-
make_attribute_wrapper(DataFrameType, 'columns', '_columns')
116-
make_attribute_wrapper(DataFrameType, 'unboxed', '_unboxed')
117-
make_attribute_wrapper(DataFrameType, 'parent', '_parent')
118-
11948
@infer_getattr
12049
class DataFrameAttribute(AttributeTemplate):
12150
key = DataFrameType

sdc/hiframes/pd_dataframe_type.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
# *****************************************************************************
2+
# Copyright (c) 2019, Intel Corporation All rights reserved.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions are met:
6+
#
7+
# Redistributions of source code must retain the above copyright notice,
8+
# this list of conditions and the following disclaimer.
9+
#
10+
# Redistributions in binary form must reproduce the above copyright notice,
11+
# this list of conditions and the following disclaimer in the documentation
12+
# and/or other materials provided with the distribution.
13+
#
14+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
16+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
18+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21+
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22+
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23+
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24+
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25+
# *****************************************************************************
26+
27+
28+
import numba
29+
from numba import types, cgutils
30+
from numba.extending import (models, register_model, make_attribute_wrapper)
31+
32+
from sdc.str_ext import string_type
33+
34+
35+
class DataFrameType(types.Type): # TODO: IterableType over column names
36+
"""Temporary type class for DataFrame objects.
37+
"""
38+
39+
def __init__(self, data=None, index=None, columns=None, has_parent=False):
40+
self.data = data
41+
if index is None:
42+
index = types.none
43+
self.index = index
44+
self.columns = columns
45+
# keeping whether it is unboxed from Python to enable reflection of new
46+
# columns
47+
self.has_parent = has_parent
48+
super(DataFrameType, self).__init__(
49+
name="dataframe({}, {}, {}, {})".format(data, index, columns, has_parent))
50+
51+
def copy(self, index=None, has_parent=None):
52+
# XXX is copy necessary?
53+
if index is None:
54+
index = types.none if self.index == types.none else self.index.copy()
55+
data = tuple(a.copy() for a in self.data)
56+
if has_parent is None:
57+
has_parent = self.has_parent
58+
return DataFrameType(data, index, self.columns, has_parent)
59+
60+
@property
61+
def key(self):
62+
# needed?
63+
return self.data, self.index, self.columns, self.has_parent
64+
65+
def unify(self, typingctx, other):
66+
if (isinstance(other, DataFrameType)
67+
and len(other.data) == len(self.data)
68+
and other.columns == self.columns
69+
and other.has_parent == self.has_parent):
70+
new_index = types.none
71+
if self.index != types.none and other.index != types.none:
72+
new_index = self.index.unify(typingctx, other.index)
73+
elif other.index != types.none:
74+
new_index = other.index
75+
elif self.index != types.none:
76+
new_index = self.index
77+
78+
data = tuple(a.unify(typingctx, b) for a, b in zip(self.data, other.data))
79+
return DataFrameType(data, new_index, self.columns, self.has_parent)
80+
81+
def is_precise(self):
82+
return all(a.is_precise() for a in self.data) and self.index.is_precise()
83+
84+
85+
@register_model(DataFrameType)
86+
class DataFrameModel(models.StructModel):
87+
def __init__(self, dmm, fe_type):
88+
n_cols = len(fe_type.columns)
89+
members = [
90+
('data', types.Tuple(fe_type.data)),
91+
('index', fe_type.index),
92+
('columns', types.UniTuple(string_type, n_cols)),
93+
# for lazy unboxing of df coming from Python (usually argument)
94+
# list of flags noting which columns and index are unboxed
95+
# index flag is last
96+
('unboxed', types.UniTuple(types.int8, n_cols + 1)),
97+
('parent', types.pyobject),
98+
]
99+
super(DataFrameModel, self).__init__(dmm, fe_type, members)
100+
101+
102+
make_attribute_wrapper(DataFrameType, 'data', '_data')
103+
make_attribute_wrapper(DataFrameType, 'index', '_index')
104+
make_attribute_wrapper(DataFrameType, 'columns', '_columns')
105+
make_attribute_wrapper(DataFrameType, 'unboxed', '_unboxed')
106+
make_attribute_wrapper(DataFrameType, 'parent', '_parent')

0 commit comments

Comments
 (0)