Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit d529471

Browse files
authored
Argmin/argmax + nanargmin/nanargmax (#580)
Argmin/argmax + nanargmin/nanargmax parallel
1 parent 9a27d8d commit d529471

File tree

5 files changed

+351
-23
lines changed

5 files changed

+351
-23
lines changed

sdc/datatypes/hpat_pandas_series_functions.py

Lines changed: 68 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2926,7 +2926,7 @@ def hpat_pandas_series_take_impl(self, indices, axis=0, is_copy=False):
29262926

29272927

29282928
@sdc_overload_method(SeriesType, 'idxmax')
2929-
def hpat_pandas_series_idxmax(self, axis=None, skipna=True):
2929+
def hpat_pandas_series_idxmax(self, axis=None, skipna=None):
29302930
"""
29312931
Intel Scalable Dataframe Compiler User Guide
29322932
********************************************
@@ -2975,25 +2975,48 @@ def hpat_pandas_series_idxmax(self, axis=None, skipna=True):
29752975
if not isinstance(self.data.dtype, types.Number):
29762976
ty_checker.raise_exc(self.data.dtype, 'int, float', 'self.data.dtype')
29772977

2978-
if not (isinstance(skipna, (types.Omitted, types.Boolean, bool)) or skipna is True):
2978+
if not (isinstance(skipna, (types.Omitted, types.Boolean, bool)) or skipna is None):
29792979
ty_checker.raise_exc(skipna, 'bool', 'skipna')
29802980

29812981
if not (isinstance(axis, types.Omitted) or axis is None):
29822982
ty_checker.raise_exc(axis, 'None', 'axis')
29832983

2984-
if isinstance(self.index, types.NoneType) or self.index is None:
2985-
def hpat_pandas_series_idxmax_impl(self, axis=None, skipna=True):
2986-
return numpy.argmax(self._data)
2987-
2988-
return hpat_pandas_series_idxmax_impl
2984+
none_index = isinstance(self.index, types.NoneType) or self.index is None
2985+
if isinstance(self.data, StringArrayType):
2986+
def hpat_pandas_series_idxmax_str_impl(self, axis=None, skipna=None):
2987+
if skipna is None:
2988+
_skipna = True
2989+
else:
2990+
raise ValueError("Method idxmax(). Unsupported parameter 'skipna'=False with str data")
29892991

2990-
else:
2991-
def hpat_pandas_series_idxmax_index_impl(self, axis=None, skipna=True):
2992-
# no numpy.nanargmax is supported by Numba at this time
29932992
result = numpy.argmax(self._data)
2993+
if none_index == True: # noqa
2994+
return result
2995+
else:
2996+
return self._index[int(result)]
2997+
2998+
return hpat_pandas_series_idxmax_str_impl
2999+
3000+
def hpat_pandas_series_idxmax_impl(self, axis=None, skipna=None):
3001+
# return numpy.argmax(self._data)
3002+
if skipna is None:
3003+
_skipna = True
3004+
else:
3005+
_skipna = skipna
3006+
3007+
if _skipna:
3008+
result = numpy_like.nanargmax(self._data)
3009+
else:
3010+
result = numpy_like.argmax(self._data)
3011+
3012+
if none_index == True: # noqa
3013+
return result
3014+
else:
29943015
return self._index[int(result)]
29953016

2996-
return hpat_pandas_series_idxmax_index_impl
3017+
return numpy_like.argmax(self._data)
3018+
3019+
return hpat_pandas_series_idxmax_impl
29973020

29983021

29993022
@sdc_overload_method(SeriesType, 'mul')
@@ -3987,7 +4010,7 @@ def hpat_pandas_series_ge_impl(self, other, level=None, fill_value=None, axis=0)
39874010

39884011

39894012
@sdc_overload_method(SeriesType, 'idxmin')
3990-
def hpat_pandas_series_idxmin(self, axis=None, skipna=True):
4013+
def hpat_pandas_series_idxmin(self, axis=None, skipna=None):
39914014
"""
39924015
Intel Scalable Dataframe Compiler User Guide
39934016
********************************************
@@ -4036,25 +4059,48 @@ def hpat_pandas_series_idxmin(self, axis=None, skipna=True):
40364059
if not isinstance(self.data.dtype, types.Number):
40374060
ty_checker.raise_exc(self.data.dtype, 'int, float', 'self.data.dtype')
40384061

4039-
if not (isinstance(skipna, (types.Omitted, types.Boolean, bool)) or skipna is True):
4062+
if not (isinstance(skipna, (types.Omitted, types.Boolean, bool)) or skipna is None):
40404063
ty_checker.raise_exc(skipna, 'bool', 'skipna')
40414064

40424065
if not (isinstance(axis, types.Omitted) or axis is None):
40434066
ty_checker.raise_exc(axis, 'None', 'axis')
40444067

4045-
if isinstance(self.index, types.NoneType) or self.index is None:
4046-
def hpat_pandas_series_idxmin_impl(self, axis=None, skipna=True):
4047-
return numpy.argmin(self._data)
4048-
4049-
return hpat_pandas_series_idxmin_impl
4068+
none_index = isinstance(self.index, types.NoneType) or self.index is None
4069+
if isinstance(self.data, StringArrayType):
4070+
def hpat_pandas_series_idxmin_str_impl(self, axis=None, skipna=None):
4071+
if skipna is None:
4072+
_skipna = True
4073+
else:
4074+
raise ValueError("Method idxmin(). Unsupported parameter 'skipna'=False with str data")
40504075

4051-
else:
4052-
def hpat_pandas_series_idxmin_index_impl(self, axis=None, skipna=True):
4053-
# no numpy.nanargmin is supported by Numba at this time
40544076
result = numpy.argmin(self._data)
4077+
if none_index == True: # noqa
4078+
return result
4079+
else:
4080+
return self._index[int(result)]
4081+
4082+
return hpat_pandas_series_idxmin_str_impl
4083+
4084+
def hpat_pandas_series_idxmin_impl(self, axis=None, skipna=None):
4085+
# return numpy.argmin(self._data)
4086+
if skipna is None:
4087+
_skipna = True
4088+
else:
4089+
_skipna = skipna
4090+
4091+
if _skipna:
4092+
result = numpy_like.nanargmin(self._data)
4093+
else:
4094+
result = numpy_like.argmin(self._data)
4095+
4096+
if none_index == True: # noqa
4097+
return result
4098+
else:
40554099
return self._index[int(result)]
40564100

4057-
return hpat_pandas_series_idxmin_index_impl
4101+
return numpy_like.argmin(self._data)
4102+
4103+
return hpat_pandas_series_idxmin_impl
40584104

40594105

40604106
@sdc_overload_method(SeriesType, 'lt')

sdc/functions/numpy_like.py

Lines changed: 184 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333

3434
import numba
3535
import numpy
36+
import sys
3637
import pandas
3738
import numpy as np
3839

@@ -42,6 +43,9 @@
4243

4344
import sdc
4445
from sdc.utilities.sdc_typing_utils import TypeChecker
46+
from sdc.utilities.utils import (sdc_overload, sdc_register_jitable,
47+
min_dtype_int_val, max_dtype_int_val, min_dtype_float_val,
48+
max_dtype_float_val)
4549
from sdc.str_arr_ext import (StringArrayType, pre_alloc_string_array, get_utf8_size, str_arr_is_na)
4650
from sdc.utilities.utils import sdc_overload, sdc_register_jitable
4751
from sdc.utilities.prange_utils import parallel_chunks
@@ -51,6 +55,22 @@ def astype(self, dtype):
5155
pass
5256

5357

58+
def argmin(self):
59+
pass
60+
61+
62+
def argmax(self):
63+
pass
64+
65+
66+
def nanargmin(self):
67+
pass
68+
69+
70+
def nanargmax(self):
71+
pass
72+
73+
5474
def fillna(self, inplace=False, value=None):
5575
pass
5676

@@ -133,7 +153,170 @@ def sdc_astype_number_impl(self, dtype):
133153

134154
return sdc_astype_number_impl
135155

136-
ty_checker.raise_exc(self.dtype, 'str or type', 'self.dtype')
156+
157+
def sdc_nanarg_overload(reduce_op):
158+
def nanarg_impl(self):
159+
"""
160+
Intel Scalable Dataframe Compiler Developer Guide
161+
*************************************************
162+
Parallel replacement of numpy.nanargmin/numpy.nanargmax.
163+
164+
.. only:: developer
165+
Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k nanargmin
166+
Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k nanargmax
167+
168+
"""
169+
170+
ty_checker = TypeChecker("numpy-like 'nanargmin'/'nanargmax'")
171+
dtype = self.dtype
172+
isnan = get_isnan(dtype)
173+
max_int64 = max_dtype_int_val(numpy_support.from_dtype(numpy.int64))
174+
if isinstance(dtype, types.Integer):
175+
initial_result = {
176+
min: max_dtype_int_val(dtype),
177+
max: min_dtype_int_val(dtype),
178+
}[reduce_op]
179+
180+
if isinstance(dtype, types.Float):
181+
initial_result = {
182+
min: max_dtype_float_val(dtype),
183+
max: min_dtype_float_val(dtype),
184+
}[reduce_op]
185+
186+
if not isinstance(self, types.Array):
187+
return None
188+
189+
if isinstance(dtype, types.Number):
190+
def sdc_nanargmin_impl(self):
191+
chunks = parallel_chunks(len(self))
192+
arr_res = numpy.empty(shape=len(chunks), dtype=dtype)
193+
arr_pos = numpy.empty(shape=len(chunks), dtype=numpy.int64)
194+
for i in prange(len(chunks)):
195+
chunk = chunks[i]
196+
res = initial_result
197+
pos = max_int64
198+
for j in range(chunk.start, chunk.stop):
199+
if reduce_op(res, self[j]) != self[j]:
200+
continue
201+
if isnan(self[j]):
202+
continue
203+
if res == self[j]:
204+
pos = min(pos, j)
205+
else:
206+
pos = j
207+
res = self[j]
208+
arr_res[i] = res
209+
arr_pos[i] = pos
210+
211+
general_res = initial_result
212+
general_pos = max_int64
213+
for i in range(len(chunks)):
214+
if reduce_op(general_res, arr_res[i]) != arr_res[i]:
215+
continue
216+
if general_res == arr_res[i]:
217+
general_pos = min(general_pos, arr_pos[i])
218+
else:
219+
general_pos = arr_pos[i]
220+
general_res = arr_res[i]
221+
222+
return general_pos
223+
224+
return sdc_nanargmin_impl
225+
226+
ty_checker.raise_exc(dtype, 'number', 'self.dtype')
227+
return nanarg_impl
228+
229+
230+
sdc_overload(nanargmin)(sdc_nanarg_overload(min))
231+
sdc_overload(nanargmax)(sdc_nanarg_overload(max))
232+
233+
234+
def sdc_arg_overload(reduce_op):
235+
def arg_impl(self):
236+
"""
237+
Intel Scalable Dataframe Compiler Developer Guide
238+
*************************************************
239+
Parallel replacement of numpy.argmin/numpy.argmax.
240+
241+
.. only:: developer
242+
Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k argmin
243+
Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k argmax
244+
245+
"""
246+
247+
ty_checker = TypeChecker("numpy-like 'argmin'/'argmax'")
248+
dtype = self.dtype
249+
isnan = get_isnan(dtype)
250+
max_int64 = max_dtype_int_val(numpy_support.from_dtype(numpy.int64))
251+
if isinstance(dtype, types.Integer):
252+
initial_result = {
253+
min: max_dtype_int_val(dtype),
254+
max: min_dtype_int_val(dtype),
255+
}[reduce_op]
256+
257+
if isinstance(dtype, types.Float):
258+
initial_result = {
259+
min: max_dtype_float_val(dtype),
260+
max: min_dtype_float_val(dtype),
261+
}[reduce_op]
262+
263+
if not isinstance(self, types.Array):
264+
return None
265+
266+
if isinstance(dtype, types.Number):
267+
def sdc_argmin_impl(self):
268+
chunks = parallel_chunks(len(self))
269+
arr_res = numpy.empty(shape=len(chunks), dtype=dtype)
270+
arr_pos = numpy.empty(shape=len(chunks), dtype=numpy.int64)
271+
for i in prange(len(chunks)):
272+
chunk = chunks[i]
273+
res = initial_result
274+
pos = max_int64
275+
for j in range(chunk.start, chunk.stop):
276+
if not isnan(self[j]):
277+
if reduce_op(res, self[j]) != self[j]:
278+
continue
279+
if res == self[j]:
280+
pos = min(pos, j)
281+
else:
282+
pos = j
283+
res = self[j]
284+
else:
285+
if numpy.isnan(res):
286+
pos = min(pos, j)
287+
else:
288+
pos = j
289+
res = self[j]
290+
291+
arr_res[i] = res
292+
arr_pos[i] = pos
293+
general_res = initial_result
294+
general_pos = max_int64
295+
for i in range(len(chunks)):
296+
if not isnan(arr_res[i]):
297+
if reduce_op(general_res, arr_res[i]) != arr_res[i]:
298+
continue
299+
if general_res == arr_res[i]:
300+
general_pos = min(general_pos, arr_pos[i])
301+
else:
302+
general_pos = arr_pos[i]
303+
general_res = arr_res[i]
304+
else:
305+
if numpy.isnan(general_res):
306+
general_pos = min(general_pos, arr_pos[i])
307+
else:
308+
general_pos = arr_pos[i]
309+
general_res = arr_res[i]
310+
return general_pos
311+
312+
return sdc_argmin_impl
313+
314+
ty_checker.raise_exc(dtype, 'number', 'self.dtype')
315+
return arg_impl
316+
317+
318+
sdc_overload(argmin)(sdc_arg_overload(min))
319+
sdc_overload(argmax)(sdc_arg_overload(max))
137320

138321

139322
@sdc_overload(copy)

0 commit comments

Comments
 (0)