Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit 1f4a3e8

Browse files
authored
Df.iloc impl (#743)
1 parent bb625dd commit 1f4a3e8

File tree

3 files changed

+370
-0
lines changed

3 files changed

+370
-0
lines changed
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# *****************************************************************************
2+
# Copyright (c) 2020, Intel Corporation All rights reserved.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions are met:
6+
#
7+
# Redistributions of source code must retain the above copyright notice,
8+
# this list of conditions and the following disclaimer.
9+
#
10+
# Redistributions in binary form must reproduce the above copyright notice,
11+
# this list of conditions and the following disclaimer in the documentation
12+
# and/or other materials provided with the distribution.
13+
#
14+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
16+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
18+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21+
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22+
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23+
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24+
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25+
# *****************************************************************************
26+
27+
28+
"""
29+
Expected result:
30+
A 2.0
31+
B 5.0
32+
Name: 1, dtype: float64
33+
"""
34+
35+
36+
import pandas as pd
37+
from numba import njit
38+
39+
40+
@njit
41+
def dataframe_iloc():
42+
df = pd.DataFrame({'A': [1.0, 2.0, 3.0, 1.0], 'B': [4, 5, 6, 7]})
43+
44+
return df.iloc[1]
45+
46+
47+
print(dataframe_iloc())

sdc/datatypes/hpat_pandas_dataframe_functions.py

Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1876,6 +1876,153 @@ def _df_getitem_unicode_idx_impl(self, idx):
18761876
ty_checker.raise_exc(idx, expected_types, 'idx')
18771877

18781878

1879+
def df_getitem_int_iloc_codegen(self, idx):
1880+
"""
1881+
Example of generated implementation:
1882+
def _df_getitem_int_iloc_impl(self, idx):
1883+
if -1 < idx < len(self._dataframe.index):
1884+
data_0 = pandas.Series(self._dataframe._data[0])
1885+
result_0 = data_0.iat[idx]
1886+
data_1 = pandas.Series(self._dataframe._data[1])
1887+
result_1 = data_1.iat[idx]
1888+
return pandas.Series(data=[result_0, result_1], index=['A', 'B'], name=str(idx))
1889+
raise IndexingError('Index is out of bounds for axis')
1890+
"""
1891+
func_lines = ['def _df_getitem_int_iloc_impl(self, idx):',
1892+
' if -1 < idx < len(self._dataframe.index):']
1893+
results = []
1894+
index = []
1895+
name = 'self._dataframe._index[idx]'
1896+
if isinstance(self.index, types.NoneType):
1897+
name = 'idx'
1898+
for i, c in enumerate(self.columns):
1899+
result_c = f"result_{i}"
1900+
func_lines += [f" data_{i} = pandas.Series(self._dataframe._data[{i}])",
1901+
f" {result_c} = data_{i}.iat[idx]"]
1902+
results.append(result_c)
1903+
index.append(c)
1904+
data = ', '.join(col for col in results)
1905+
func_lines += [f" return pandas.Series(data=[{data}], index={index}, name=str({name}))",
1906+
f" raise IndexingError('Index is out of bounds for axis')"]
1907+
1908+
func_text = '\n'.join(func_lines)
1909+
global_vars = {'pandas': pandas, 'numpy': numpy, 'IndexingError': IndexingError}
1910+
1911+
return func_text, global_vars
1912+
1913+
1914+
def df_getitem_slice_iloc_codegen(self, idx):
1915+
"""
1916+
Example of generated implementation:
1917+
def _df_getitem_slice_iloc_impl(self, idx):
1918+
data_0 = pandas.Series(self._dataframe._data[0])
1919+
result_0 = data_0.iloc[idx]
1920+
data_1 = pandas.Series(self._dataframe._data[1])
1921+
result_1 = data_1.iloc[idx]
1922+
return pandas.DataFrame(data={"A": result_0, "B": result_1}, index=self._dataframe.index[idx])
1923+
"""
1924+
func_lines = ['def _df_getitem_slice_iloc_impl(self, idx):']
1925+
results = []
1926+
for i, c in enumerate(self.columns):
1927+
result_c = f"result_{i}"
1928+
func_lines += [f" data_{i} = pandas.Series(self._dataframe._data[{i}])",
1929+
f" {result_c} = data_{i}.iloc[idx]"]
1930+
results.append((c, result_c))
1931+
data = ', '.join(f'"{col}": {data}' for col, data in results)
1932+
func_lines += [f" return pandas.DataFrame(data={{{data}}}, index=self._dataframe.index[idx])"]
1933+
1934+
func_text = '\n'.join(func_lines)
1935+
global_vars = {'pandas': pandas, 'numpy': numpy}
1936+
1937+
return func_text, global_vars
1938+
1939+
1940+
def df_getitem_list_iloc_codegen(self, idx):
1941+
"""
1942+
Example of generated implementation:
1943+
def _df_getitem_list_iloc_impl(self, idx):
1944+
check_idx = False
1945+
for i in idx:
1946+
if -1 < i < len(self._dataframe.index):
1947+
check_idx = True
1948+
if check_idx == True:
1949+
data_0 = pandas.Series(self._dataframe._data[0])
1950+
result_0 = data_0.iloc[numpy.array(idx)]
1951+
data_1 = pandas.Series(self._dataframe._data[1])
1952+
result_1 = data_1.iloc[numpy.array(idx)]
1953+
return pandas.DataFrame(data={"A": result_0, "B": result_1}, index=idx)
1954+
raise IndexingError('Index is out of bounds for axis')
1955+
"""
1956+
func_lines = ['def _df_getitem_list_iloc_impl(self, idx):',
1957+
' check_idx = False',
1958+
' for i in idx:',
1959+
' if -1 < i < len(self._dataframe.index):',
1960+
' check_idx = True',
1961+
' if check_idx == True:']
1962+
results = []
1963+
index = '[self._dataframe._index[i] for i in idx]'
1964+
if isinstance(self.index, types.NoneType):
1965+
index = 'idx'
1966+
for i, c in enumerate(self.columns):
1967+
result_c = f"result_{i}"
1968+
func_lines += [f" data_{i} = pandas.Series(self._dataframe._data[{i}])",
1969+
f" {result_c} = data_{i}.iloc[numpy.array(idx)]"]
1970+
results.append((c, result_c))
1971+
data = ', '.join(f'"{col}": {data}' for col, data in results)
1972+
func_lines += [f" return pandas.DataFrame(data={{{data}}}, index={index})",
1973+
f" raise IndexingError('Index is out of bounds for axis')"]
1974+
1975+
func_text = '\n'.join(func_lines)
1976+
global_vars = {'pandas': pandas, 'numpy': numpy, 'IndexingError': IndexingError}
1977+
1978+
return func_text, global_vars
1979+
1980+
1981+
def df_getitem_list_bool_iloc_codegen(self, idx):
1982+
"""
1983+
Example of generated implementation:
1984+
def _df_getitem_list_bool_iloc_impl(self, idx):
1985+
if len(self._dataframe.index) == len(idx):
1986+
data_0 = self._dataframe._data[0]
1987+
result_0 = pandas.Series(data_0[numpy.array(idx)])
1988+
data_1 = self._dataframe._data[1]
1989+
result_1 = pandas.Series(data_1[numpy.array(idx)])
1990+
return pandas.DataFrame(data={"A": result_0, "B": result_1},
1991+
index=self._dataframe.index[numpy.array(idx)])
1992+
raise IndexingError('Item wrong length')
1993+
"""
1994+
func_lines = ['def _df_getitem_list_bool_iloc_impl(self, idx):']
1995+
results = []
1996+
index = 'self._dataframe.index[numpy.array(idx)]'
1997+
func_lines += [' if len(self._dataframe.index) == len(idx):']
1998+
for i, c in enumerate(self.columns):
1999+
result_c = f"result_{i}"
2000+
func_lines += [f" data_{i} = self._dataframe._data[{i}]",
2001+
f" {result_c} = pandas.Series(data_{i}[numpy.array(idx)])"]
2002+
results.append((c, result_c))
2003+
data = ', '.join(f'"{col}": {data}' for col, data in results)
2004+
func_lines += [f" return pandas.DataFrame(data={{{data}}}, index={index})",
2005+
f" raise IndexingError('Item wrong length')"]
2006+
2007+
func_text = '\n'.join(func_lines)
2008+
global_vars = {'pandas': pandas, 'numpy': numpy, 'IndexingError': IndexingError}
2009+
2010+
return func_text, global_vars
2011+
2012+
2013+
gen_df_getitem_iloc_int_impl = gen_impl_generator(
2014+
df_getitem_int_iloc_codegen, '_df_getitem_int_iloc_impl')
2015+
2016+
gen_df_getitem_iloc_slice_impl = gen_impl_generator(
2017+
df_getitem_slice_iloc_codegen, '_df_getitem_slice_iloc_impl')
2018+
2019+
gen_df_getitem_iloc_list_impl = gen_impl_generator(
2020+
df_getitem_list_iloc_codegen, '_df_getitem_list_iloc_impl')
2021+
2022+
gen_df_getitem_iloc_list_bool_impl = gen_impl_generator(
2023+
df_getitem_list_bool_iloc_codegen, '_df_getitem_list_bool_iloc_impl')
2024+
2025+
18792026
@sdc_overload(operator.getitem)
18802027
def sdc_pandas_dataframe_accessor_getitem(self, idx):
18812028
if not isinstance(self, DataFrameGetitemAccessorType):
@@ -1903,10 +2050,90 @@ def df_getitem_iat_tuple_impl(self, idx):
19032050

19042051
raise TypingError('Operator getitem(). The index must be a row and literal column. Given: {}'.format(idx))
19052052

2053+
if accessor == 'iloc':
2054+
if isinstance(idx, types.SliceType):
2055+
return gen_df_getitem_iloc_slice_impl(self.dataframe, idx)
2056+
2057+
if (
2058+
isinstance(idx, (types.List, types.Array)) and
2059+
isinstance(idx.dtype, (types.Boolean, bool))
2060+
):
2061+
return gen_df_getitem_iloc_list_bool_impl(self.dataframe, idx)
2062+
2063+
if isinstance(idx, types.List):
2064+
return gen_df_getitem_iloc_list_impl(self.dataframe, idx)
2065+
2066+
if isinstance(idx, types.Integer):
2067+
return gen_df_getitem_iloc_int_impl(self.dataframe, idx)
2068+
2069+
if isinstance(idx, (types.Tuple, types.UniTuple)):
2070+
def df_getitem_tuple_iat_impl(self, idx):
2071+
return self._dataframe.iat[idx]
2072+
2073+
return df_getitem_tuple_iat_impl
2074+
2075+
raise TypingError('Attribute iloc(). The index must be an integer, a list or array of integers,\
2076+
a slice object with ints or a boolean array.\
2077+
Given: {}'.format(idx))
2078+
19062079
raise TypingError('Operator getitem(). Unknown accessor. Only "loc", "iloc", "at", "iat" are supported.\
19072080
Given: {}'.format(accessor))
19082081

19092082

2083+
@sdc_overload_attribute(DataFrameType, 'iloc')
2084+
def sdc_pandas_dataframe_iloc(self):
2085+
"""
2086+
Intel Scalable Dataframe Compiler User Guide
2087+
********************************************
2088+
2089+
Pandas API: pandas.DataFrame.iloc
2090+
2091+
Limitations
2092+
-----------
2093+
- Parameter ``'name'`` in new DataFrame can be String only
2094+
- Column can be literal value only, in DataFrame.iloc[row, column]
2095+
- Iloc works with basic cases only: an integer, a list or array of integers,
2096+
a slice object with ints, a boolean array
2097+
2098+
Examples
2099+
--------
2100+
.. literalinclude:: ../../../examples/dataframe/dataframe_iloc.py
2101+
:language: python
2102+
:lines: 36-
2103+
:caption: Get value at specified index position.
2104+
:name: ex_dataframe_iloc
2105+
2106+
.. command-output:: python ./dataframe/dataframe_iloc.py
2107+
:cwd: ../../../examples
2108+
2109+
.. seealso::
2110+
2111+
:ref:`DataFrame.iat <pandas.DataFrame.iat>`
2112+
Fast integer location scalar accessor.
2113+
2114+
:ref:`DataFrame.loc <pandas.DataFrame.loc>`
2115+
Purely label-location based indexer for selection by label.
2116+
2117+
:ref:`Series.iloc <pandas.Series.iloc>`
2118+
Purely integer-location based indexing for selection by position.
2119+
2120+
Intel Scalable Dataframe Compiler Developer Guide
2121+
*************************************************
2122+
Pandas DataFrame method :meth:`pandas.DataFrame.iloc` implementation.
2123+
2124+
.. only:: developer
2125+
Test: python -m sdc.runtests -k sdc.tests.test_dataframe.TestDataFrame.test_df_iloc*
2126+
"""
2127+
2128+
ty_checker = TypeChecker('Attribute iloc().')
2129+
ty_checker.check(self, DataFrameType)
2130+
2131+
def sdc_pandas_dataframe_iloc_impl(self):
2132+
return dataframe_getitem_accessor_init(self, 'iloc')
2133+
2134+
return sdc_pandas_dataframe_iloc_impl
2135+
2136+
19102137
@sdc_overload_attribute(DataFrameType, 'iat')
19112138
def sdc_pandas_dataframe_iat(self):
19122139
"""

0 commit comments

Comments
 (0)