Move DataFrameType and SeriesType to separate files in order to avoid circular import (#459)

AlexanderKalistratov · web-flow · commit b57c9adebb90 · 2019-12-30T16:25:59.000+03:00
* Move DataFrameType and SeriesType to separate files in order to avoid circular imports

* Clean up dataframe functions

* Modify sdc __init_.py

* Remove datatypes.hpat_pandas_functions from hiframes_untyped.py

* Remove datatypes.hpat_pandas_series_functions from hiframes/pd_series_ext.py

* Move imports from datatypes init to sdc init

* Fix series.rolling

* Fix style issues, etc

* Fix style issues pt2

* Remove noqa

* Fix istitle
diff --git a/sdc/__init__.py b/sdc/__init__.py
@@ -24,32 +24,28 @@
 # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 # *****************************************************************************
 
-
-from ._version import get_versions
 import numba
 
 # re-export from Numba
 from numba import (typeof, prange, pndindex, gdb, gdb_breakpoint, gdb_init,
                    stencil, threading_layer, jitclass, objmode)
 
+import sdc.config
 import sdc.dict_ext
 import sdc.set_ext
-import sdc.compiler
 import sdc.io
 import sdc.io.np_io
 import sdc.hiframes.pd_timestamp_ext
 import sdc.hiframes.boxing
-import sdc.config
 import sdc.timsort
 from sdc.decorators import jit
-import sdc.rewrites.dataframe_constructor
 
-multithread_mode = False
-
-
-__version__ = get_versions()['version']
-del get_versions
+import sdc.datatypes.hpat_pandas_series_functions
+import sdc.datatypes.hpat_pandas_series_rolling_functions
+import sdc.datatypes.hpat_pandas_seriesgroupby_functions
+import sdc.datatypes.hpat_pandas_stringmethods_functions
 
+from ._version import get_versions
 
 if not sdc.config.config_pipeline_hpat_default:
     """
@@ -65,6 +61,16 @@
     # numba.compiler.DefaultPassBuilder.define_nopython_pipeline = \
     # sdc.datatypes.hpat_pandas_dataframe_pass.sdc_nopython_pipeline_lite_register
 
+    import sdc.rewrites.dataframe_constructor
+    import sdc.datatypes.hpat_pandas_functions
+else:
+    import sdc.compiler
+
+multithread_mode = False
+
+
+__version__ = get_versions()['version']
+del get_versions
 
 def _init_extension():
     '''Register Pandas classes and functions with Numba.
diff --git a/sdc/datatypes/__init__.py b/sdc/datatypes/__init__.py
@@ -25,7 +25,4 @@
 # *****************************************************************************
 
 
-import sdc.datatypes.hpat_pandas_dataframe_pass
-import sdc.datatypes.hpat_pandas_series_rolling_functions
-import sdc.datatypes.hpat_pandas_seriesgroupby_functions
-import sdc.datatypes.hpat_pandas_stringmethods_functions
+# import sdc.datatypes.hpat_pandas_dataframe_pass
diff --git a/sdc/datatypes/hpat_pandas_dataframe_functions.py b/sdc/datatypes/hpat_pandas_dataframe_functions.py
@@ -34,15 +34,13 @@
 import copy
 import numpy
 
-import sdc
-
 from numba import types
 from numba.extending import (overload, overload_method, overload_attribute)
-from sdc.hiframes.pd_dataframe_ext import DataFrameType
+from sdc.hiframes.pd_dataframe_type import DataFrameType
 from numba.errors import TypingError
-import sdc.datatypes.hpat_pandas_dataframe_types
 
 from sdc.datatypes.hpat_pandas_series_functions import TypeChecker
+from sdc.hiframes.pd_dataframe_ext import get_dataframe_data
 
 
 # Example func_text for func_name='count' columns=('A', 'B'):
@@ -61,7 +59,7 @@ def _dataframe_reduce_columns_codegen(func_name, func_params, series_params, col
     func_lines = [f'def _df_{func_name}_impl({joined}):']
     for i, c in enumerate(columns):
         result_c = f'result_{c}'
-        func_lines += [f'  series_{c} = init_series(get_dataframe_data({func_params[0]}, {i}))',
+        func_lines += [f'  series_{c} = pandas.Series(get_dataframe_data({func_params[0]}, {i}))',
                        f'  {result_c} = series_{c}.{func_name}({series_params})']
         result_name_list.append(result_c)
     all_results = ', '.join(result_name_list)
@@ -71,8 +69,7 @@ def _dataframe_reduce_columns_codegen(func_name, func_params, series_params, col
     func_text = '\n'.join(func_lines)
 
     global_vars = {'pandas': pandas, 'np': numpy,
-                   'init_series': sdc.hiframes.api.init_series,
-                   'get_dataframe_data': sdc.hiframes.pd_dataframe_ext.get_dataframe_data}
+                   'get_dataframe_data': get_dataframe_data}
 
     return func_text, global_vars
 
diff --git a/sdc/datatypes/hpat_pandas_series_autogenerated.py b/sdc/datatypes/hpat_pandas_series_autogenerated.py
@@ -44,7 +44,7 @@
 from sdc.datatypes.common_functions import TypeChecker
 from sdc.datatypes.common_functions import (check_index_is_numeric, find_common_dtype_from_numpy_dtypes,
                                             sdc_join_series_indexes, sdc_check_indexes_equal, check_types_comparable)
-from sdc.hiframes.pd_series_ext import SeriesType
+from sdc.hiframes.pd_series_type import SeriesType
 from sdc.str_arr_ext import (string_array_type, num_total_chars, str_arr_is_na)
 
 
diff --git a/sdc/datatypes/hpat_pandas_series_functions.py b/sdc/datatypes/hpat_pandas_series_functions.py
@@ -45,7 +45,7 @@
                                             sdc_join_series_indexes)
 from sdc.datatypes.hpat_pandas_series_rolling_types import _hpat_pandas_series_rolling_init
 from sdc.datatypes.hpat_pandas_stringmethods_types import StringMethodsType
-from sdc.hiframes.pd_series_ext import SeriesType
+from sdc.hiframes.pd_series_type import SeriesType
 from sdc.str_arr_ext import (StringArrayType, string_array_type, str_arr_is_na, str_arr_set_na,
                              num_total_chars, pre_alloc_string_array, cp_str_list_to_array)
 from sdc.utils import to_array, sdc_overload, sdc_overload_method, sdc_overload_attribute
@@ -4489,4 +4489,3 @@ def hpat_pandas_series_pct_change_impl(self, periods=1, fill_method='pad', limit
         return pandas.Series(result)
 
     return hpat_pandas_series_pct_change_impl
-
diff --git a/sdc/datatypes/sdc_function_templates.py b/sdc/datatypes/sdc_function_templates.py
@@ -45,7 +45,7 @@
 from sdc.datatypes.common_functions import TypeChecker
 from sdc.datatypes.common_functions import (check_index_is_numeric, find_common_dtype_from_numpy_dtypes,
                                             sdc_join_series_indexes, sdc_check_indexes_equal, check_types_comparable)
-from sdc.hiframes.pd_series_ext import SeriesType
+from sdc.hiframes.pd_series_type import SeriesType
 from sdc.str_arr_ext import (string_array_type, num_total_chars, str_arr_is_na)
 
 
diff --git a/sdc/hiframes/boxing.py b/sdc/hiframes/boxing.py
@@ -39,7 +39,7 @@
 from numba.targets.boxing import _NumbaTypeHelper
 from numba.targets import listobj
 
-from sdc.hiframes.pd_dataframe_ext import DataFrameType
+from sdc.hiframes.pd_dataframe_type import DataFrameType
 from sdc.hiframes.pd_timestamp_ext import (datetime_date_type,
                                             unbox_datetime_date_array, box_datetime_date_array)
 from sdc.str_ext import string_type, list_string_array_type
diff --git a/sdc/hiframes/hiframes_untyped.py b/sdc/hiframes/hiframes_untyped.py
@@ -1942,6 +1942,3 @@ def simple_block_copy_propagate(block):
             for k in lhs_kill:
                 var_dict.pop(k, None)
     return
-
-
-from sdc.datatypes.hpat_pandas_functions import *
diff --git a/sdc/hiframes/pd_dataframe_ext.py b/sdc/hiframes/pd_dataframe_ext.py
@@ -33,89 +33,18 @@
 import numba
 from numba import types, cgutils
 from numba.extending import (models, register_model, lower_cast, infer_getattr,
-                             type_callable, infer, overload, make_attribute_wrapper, intrinsic,
+                             type_callable, infer, overload, intrinsic,
                              lower_builtin, overload_method)
 from numba.typing.templates import (infer_global, AbstractTemplate, signature,
                                     AttributeTemplate, bound_function)
 from numba.targets.imputils import impl_ret_new_ref, impl_ret_borrowed
 
 import sdc
 from sdc.hiframes.pd_series_ext import SeriesType
+from sdc.hiframes.pd_dataframe_type import DataFrameType
 from sdc.str_ext import string_type
 from sdc.str_arr_ext import string_array_type
 
-
-class DataFrameType(types.Type):  # TODO: IterableType over column names
-    """Temporary type class for DataFrame objects.
-    """
-
-    def __init__(self, data=None, index=None, columns=None, has_parent=False):
-        self.data = data
-        if index is None:
-            index = types.none
-        self.index = index
-        self.columns = columns
-        # keeping whether it is unboxed from Python to enable reflection of new
-        # columns
-        self.has_parent = has_parent
-        super(DataFrameType, self).__init__(
-            name="dataframe({}, {}, {}, {})".format(data, index, columns, has_parent))
-
-    def copy(self, index=None, has_parent=None):
-        # XXX is copy necessary?
-        if index is None:
-            index = types.none if self.index == types.none else self.index.copy()
-        data = tuple(a.copy() for a in self.data)
-        if has_parent is None:
-            has_parent = self.has_parent
-        return DataFrameType(data, index, self.columns, has_parent)
-
-    @property
-    def key(self):
-        # needed?
-        return self.data, self.index, self.columns, self.has_parent
-
-    def unify(self, typingctx, other):
-        if (isinstance(other, DataFrameType)
-                and len(other.data) == len(self.data)
-                and other.columns == self.columns
-                and other.has_parent == self.has_parent):
-            new_index = types.none
-            if self.index != types.none and other.index != types.none:
-                new_index = self.index.unify(typingctx, other.index)
-            elif other.index != types.none:
-                new_index = other.index
-            elif self.index != types.none:
-                new_index = self.index
-
-            data = tuple(a.unify(typingctx, b) for a, b in zip(self.data, other.data))
-            return DataFrameType(data, new_index, self.columns, self.has_parent)
-
-    def is_precise(self):
-        return all(a.is_precise() for a in self.data) and self.index.is_precise()
-
-@register_model(DataFrameType)
-class DataFrameModel(models.StructModel):
-    def __init__(self, dmm, fe_type):
-        n_cols = len(fe_type.columns)
-        members = [
-            ('data', types.Tuple(fe_type.data)),
-            ('index', fe_type.index),
-            ('columns', types.UniTuple(string_type, n_cols)),
-            # for lazy unboxing of df coming from Python (usually argument)
-            # list of flags noting which columns and index are unboxed
-            # index flag is last
-            ('unboxed', types.UniTuple(types.int8, n_cols + 1)),
-            ('parent', types.pyobject),
-        ]
-        super(DataFrameModel, self).__init__(dmm, fe_type, members)
-
-make_attribute_wrapper(DataFrameType, 'data', '_data')
-make_attribute_wrapper(DataFrameType, 'index', '_index')
-make_attribute_wrapper(DataFrameType, 'columns', '_columns')
-make_attribute_wrapper(DataFrameType, 'unboxed', '_unboxed')
-make_attribute_wrapper(DataFrameType, 'parent', '_parent')
-
 @infer_getattr
 class DataFrameAttribute(AttributeTemplate):
     key = DataFrameType
diff --git a/sdc/hiframes/pd_dataframe_type.py b/sdc/hiframes/pd_dataframe_type.py
@@ -0,0 +1,106 @@
+# *****************************************************************************
+# Copyright (c) 2019, Intel Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     Redistributions of source code must retain the above copyright notice,
+#     this list of conditions and the following disclaimer.
+#
+#     Redistributions in binary form must reproduce the above copyright notice,
+#     this list of conditions and the following disclaimer in the documentation
+#     and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+
+import numba
+from numba import types, cgutils
+from numba.extending import (models, register_model, make_attribute_wrapper)
+
+from sdc.str_ext import string_type
+
+
+class DataFrameType(types.Type):  # TODO: IterableType over column names
+    """Temporary type class for DataFrame objects.
+    """
+
+    def __init__(self, data=None, index=None, columns=None, has_parent=False):
+        self.data = data
+        if index is None:
+            index = types.none
+        self.index = index
+        self.columns = columns
+        # keeping whether it is unboxed from Python to enable reflection of new
+        # columns
+        self.has_parent = has_parent
+        super(DataFrameType, self).__init__(
+            name="dataframe({}, {}, {}, {})".format(data, index, columns, has_parent))
+
+    def copy(self, index=None, has_parent=None):
+        # XXX is copy necessary?
+        if index is None:
+            index = types.none if self.index == types.none else self.index.copy()
+        data = tuple(a.copy() for a in self.data)
+        if has_parent is None:
+            has_parent = self.has_parent
+        return DataFrameType(data, index, self.columns, has_parent)
+
+    @property
+    def key(self):
+        # needed?
+        return self.data, self.index, self.columns, self.has_parent
+
+    def unify(self, typingctx, other):
+        if (isinstance(other, DataFrameType)
+                and len(other.data) == len(self.data)
+                and other.columns == self.columns
+                and other.has_parent == self.has_parent):
+            new_index = types.none
+            if self.index != types.none and other.index != types.none:
+                new_index = self.index.unify(typingctx, other.index)
+            elif other.index != types.none:
+                new_index = other.index
+            elif self.index != types.none:
+                new_index = self.index
+
+            data = tuple(a.unify(typingctx, b) for a, b in zip(self.data, other.data))
+            return DataFrameType(data, new_index, self.columns, self.has_parent)
+
+    def is_precise(self):
+        return all(a.is_precise() for a in self.data) and self.index.is_precise()
+
+
+@register_model(DataFrameType)
+class DataFrameModel(models.StructModel):
+    def __init__(self, dmm, fe_type):
+        n_cols = len(fe_type.columns)
+        members = [
+            ('data', types.Tuple(fe_type.data)),
+            ('index', fe_type.index),
+            ('columns', types.UniTuple(string_type, n_cols)),
+            # for lazy unboxing of df coming from Python (usually argument)
+            # list of flags noting which columns and index are unboxed
+            # index flag is last
+            ('unboxed', types.UniTuple(types.int8, n_cols + 1)),
+            ('parent', types.pyobject),
+        ]
+        super(DataFrameModel, self).__init__(dmm, fe_type, members)
+
+
+make_attribute_wrapper(DataFrameType, 'data', '_data')
+make_attribute_wrapper(DataFrameType, 'index', '_index')
+make_attribute_wrapper(DataFrameType, 'columns', '_columns')
+make_attribute_wrapper(DataFrameType, 'unboxed', '_unboxed')
+make_attribute_wrapper(DataFrameType, 'parent', '_parent')
diff --git a/sdc/hiframes/pd_series_ext.py b/sdc/hiframes/pd_series_ext.py
diff --git a/sdc/hiframes/pd_series_type.py b/sdc/hiframes/pd_series_type.py