Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit b741040

Browse files
authored
Df.at impl (#738)
1 parent ab3a99b commit b741040

File tree

4 files changed

+190
-1
lines changed

4 files changed

+190
-1
lines changed

examples/dataframe/dataframe_at.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# *****************************************************************************
2+
# Copyright (c) 2020, Intel Corporation All rights reserved.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions are met:
6+
#
7+
# Redistributions of source code must retain the above copyright notice,
8+
# this list of conditions and the following disclaimer.
9+
#
10+
# Redistributions in binary form must reproduce the above copyright notice,
11+
# this list of conditions and the following disclaimer in the documentation
12+
# and/or other materials provided with the distribution.
13+
#
14+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
16+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
18+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21+
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22+
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23+
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24+
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25+
# *****************************************************************************
26+
27+
28+
import pandas as pd
29+
from numba import njit
30+
31+
32+
@njit
33+
def dataframe_at():
34+
df = pd.DataFrame({'A': [1.0, 2.0, 3.0, 1.0], 'B': [4, 5, 6, 7], 'C': ['a', 'b', 'c', 'd']})
35+
36+
return df.at[1, 'C'] # ['b']
37+
38+
39+
print(dataframe_at())

sdc/datatypes/hpat_pandas_dataframe_functions.py

Lines changed: 106 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737

3838
from pandas.core.indexing import IndexingError
3939

40-
from numba import types
40+
from numba import types, prange
4141
from numba.special import literally
4242
from numba.typed import List, Dict
4343
from numba.errors import TypingError
@@ -1877,6 +1877,35 @@ def _df_getitem_unicode_idx_impl(self, idx):
18771877
ty_checker.raise_exc(idx, expected_types, 'idx')
18781878

18791879

1880+
def df_getitem_tuple_at_codegen(self, row, col):
1881+
"""
1882+
Example of generated implementation:
1883+
def _df_getitem_tuple_at_impl(self, idx):
1884+
row, _ = idx
1885+
data = self._dataframe._data[1]
1886+
res_data = pandas.Series(data, index=self._dataframe.index)
1887+
return res_data.at[row]
1888+
"""
1889+
func_lines = ['def _df_getitem_tuple_at_impl(self, idx):',
1890+
' row, _ = idx']
1891+
check = False
1892+
for i in range(len(self.columns)):
1893+
if self.columns[i] == col:
1894+
check = True
1895+
func_lines += [
1896+
f' data = self._dataframe._data[{i}]',
1897+
f' res_data = pandas.Series(data, index=self._dataframe.index)',
1898+
' return res_data.at[row]',
1899+
]
1900+
if check == False: # noqa
1901+
raise KeyError('Column is not in the DataFrame')
1902+
1903+
func_text = '\n'.join(func_lines)
1904+
global_vars = {'pandas': pandas}
1905+
1906+
return func_text, global_vars
1907+
1908+
18801909
def df_getitem_single_label_loc_codegen(self, idx):
18811910
"""
18821911
Example of generated implementation:
@@ -2063,6 +2092,15 @@ def _df_getitem_list_bool_iloc_impl(self, idx):
20632092
return func_text, global_vars
20642093

20652094

2095+
def gen_df_getitem_tuple_at_impl(self, row, col):
2096+
func_text, global_vars = df_getitem_tuple_at_codegen(self, row, col)
2097+
loc_vars = {}
2098+
exec(func_text, global_vars, loc_vars)
2099+
_reduce_impl = loc_vars['_df_getitem_tuple_at_impl']
2100+
2101+
return _reduce_impl
2102+
2103+
20662104
gen_df_getitem_loc_single_label_impl = gen_impl_generator(
20672105
df_getitem_single_label_loc_codegen, '_df_getitem_single_label_loc_impl')
20682106

@@ -2086,6 +2124,21 @@ def sdc_pandas_dataframe_accessor_getitem(self, idx):
20862124

20872125
accessor = self.accessor.literal_value
20882126

2127+
if accessor == 'at':
2128+
num_idx = isinstance(idx[0], types.Number) and isinstance(self.dataframe.index, (types.Array, types.NoneType))
2129+
str_idx = (isinstance(idx[0], (types.UnicodeType, types.StringLiteral))
2130+
and isinstance(self.dataframe.index, StringArrayType))
2131+
if isinstance(idx, types.Tuple) and isinstance(idx[1], types.StringLiteral):
2132+
if num_idx or str_idx:
2133+
row = idx[0]
2134+
col = idx[1].literal_value
2135+
return gen_df_getitem_tuple_at_impl(self.dataframe, row, col)
2136+
2137+
raise TypingError('Attribute at(). The row parameter type ({}) is different from the index type\
2138+
({})'.format(type(idx[0]), type(self.dataframe.index)))
2139+
2140+
raise TypingError('Attribute at(). The index must be a row and literal column. Given: {}'.format(idx))
2141+
20892142
if accessor == 'loc':
20902143
if isinstance(idx, (types.Integer, types.UnicodeType, types.StringLiteral)):
20912144
return gen_df_getitem_loc_single_label_impl(self.dataframe, idx)
@@ -2244,6 +2297,58 @@ def sdc_pandas_dataframe_iat_impl(self):
22442297
return sdc_pandas_dataframe_iat_impl
22452298

22462299

2300+
@sdc_overload_attribute(DataFrameType, 'at')
2301+
def sdc_pandas_dataframe_at(self):
2302+
"""
2303+
Intel Scalable Dataframe Compiler User Guide
2304+
********************************************
2305+
2306+
Limitations
2307+
-----------
2308+
- ``Dataframe.at`` always returns ``array``.
2309+
- Parameter ``column`` in ``idx`` must be a literal value.
2310+
2311+
Pandas API: pandas.DataFrame.at
2312+
2313+
Examples
2314+
--------
2315+
.. literalinclude:: ../../../examples/dataframe/dataframe_at.py
2316+
:language: python
2317+
:lines: 28-
2318+
:caption: Access a single value for a row/column label pair.
2319+
:name: ex_dataframe_at
2320+
2321+
.. command-output:: python ./dataframe/dataframe_at.py
2322+
:cwd: ../../../examples
2323+
2324+
.. seealso::
2325+
2326+
:ref:`DataFrame.iat <pandas.DataFrame.iat>`
2327+
Access a single value for a row/column pair by integer position.
2328+
2329+
:ref:`DataFrame.loc <pandas.DataFrame.loc>`
2330+
Access a group of rows and columns by label(s).
2331+
2332+
:ref:`Series.at <pandas.Series.at>`
2333+
Access a single value using a label.
2334+
2335+
Intel Scalable Dataframe Compiler Developer Guide
2336+
*************************************************
2337+
Pandas DataFrame method :meth:`pandas.DataFrame.at` implementation.
2338+
2339+
.. only:: developer
2340+
Test: python -m sdc.runtests -k sdc.tests.test_dataframe.TestDataFrame.test_df_at*
2341+
"""
2342+
2343+
ty_checker = TypeChecker('Attribute at().')
2344+
ty_checker.check(self, DataFrameType)
2345+
2346+
def sdc_pandas_dataframe_at_impl(self):
2347+
return dataframe_getitem_accessor_init(self, 'at')
2348+
2349+
return sdc_pandas_dataframe_at_impl
2350+
2351+
22472352
@sdc_overload_attribute(DataFrameType, 'loc')
22482353
def sdc_pandas_dataframe_loc(self):
22492354
"""

sdc/datatypes/hpat_pandas_series_functions.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,9 +245,14 @@ def hpat_pandas_series_loc_impl(self, idx):
245245
if isinstance(idx, (int, types.Integer, types.UnicodeType, types.StringLiteral)):
246246
def hpat_pandas_series_at_impl(self, idx):
247247
index = self._series.index
248+
check = False
248249
mask = numpy.empty(len(self._series._data), numpy.bool_)
249250
for i in numba.prange(len(index)):
250251
mask[i] = index[i] == idx
252+
if mask[i] == True: # noqa
253+
check = True
254+
if check != True: # noqa
255+
raise ValueError("Index is not in the Series")
251256
return self._series._data[mask]
252257

253258
return hpat_pandas_series_at_impl

sdc/tests/test_dataframe.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1197,6 +1197,46 @@ def test_impl(df):
11971197
msg = 'Index is out of bounds for axis'
11981198
self.assertIn(msg, str(raises.exception))
11991199

1200+
def test_df_at(self):
1201+
def test_impl(df, n):
1202+
return df.at[n, 'C']
1203+
1204+
sdc_func = sdc.jit(test_impl)
1205+
idx = [3, 0, 1, 2, 0]
1206+
n_cases = [0, 2]
1207+
df = pd.DataFrame({"A": [3.2, 4.4, 7.0, 3.3, 1.0],
1208+
"B": [3, 4, 1, 0, 222],
1209+
"C": ['a', 'dd', 'c', '12', 'ddf']}, index=idx)
1210+
for n in n_cases:
1211+
np.testing.assert_array_equal(sdc_func(df, n), test_impl(df, n))
1212+
1213+
def test_df_at_type(self):
1214+
def test_impl(df, n, k):
1215+
return df.at[n, "B"]
1216+
1217+
sdc_func = sdc.jit(test_impl)
1218+
idx = ['3', '4', '1', '2', '0']
1219+
n_cases = ['2', '3']
1220+
df = pd.DataFrame({"A": [3.2, 4.4, 7.0, 3.3, 1.0],
1221+
"B": [3, 4, 1, 0, 222],
1222+
"C": ['a', 'dd', 'c', '12', 'ddf']}, index=idx)
1223+
for n in n_cases:
1224+
self.assertEqual(sdc_func(df, n, "B"), test_impl(df, n, "B"))
1225+
1226+
def test_df_at_value_error(self):
1227+
def test_impl(df):
1228+
return df.at[5, 'C']
1229+
sdc_func = sdc.jit(test_impl)
1230+
idx = [3, 4, 1, 2, 0]
1231+
df = pd.DataFrame({"A": [3.2, 4.4, 7.0, 3.3, 1.0],
1232+
"B": [3, 4, 1, 0, 222],
1233+
"C": [3, 4, 2, 6, 1]}, index=idx)
1234+
1235+
with self.assertRaises(ValueError) as raises:
1236+
sdc_func(df)
1237+
msg = 'Index is not in the Series'
1238+
self.assertIn(msg, str(raises.exception))
1239+
12001240
def test_df_loc(self):
12011241
def test_impl(df):
12021242
return df.loc[4]

0 commit comments

Comments
 (0)