Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit 47a3fd4

Browse files
authored
Scale Series.fillna (#589)
* Scale Series.fillna * wip * remove fillna impl to numpy_like * pep fix
1 parent b5bbbaa commit 47a3fd4

File tree

2 files changed

+92
-33
lines changed

2 files changed

+92
-33
lines changed

sdc/datatypes/hpat_pandas_series_functions.py

Lines changed: 13 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
from numba import (types, numpy_support, cgutils)
4343
from numba.typed import Dict
4444
from numba import prange
45+
from numba.targets.arraymath import get_isnan
4546

4647
import sdc
4748
import sdc.datatypes.common_functions as common_functions
@@ -5019,61 +5020,40 @@ def hpat_pandas_series_fillna(self, value=None, method=None, axis=None, inplace=
50195020
raise TypingError('{} Not implemented when Series dtype is {} and\
50205021
inplace={}'.format(_func_name, self.dtype, inplace))
50215022

5022-
elif isinstance(self.dtype, (types.Integer, types.Boolean)):
5023-
def hpat_pandas_series_no_nan_fillna_impl(self, value=None, method=None, axis=None, inplace=False,
5024-
limit=None, downcast=None):
5025-
# no NaNs in series of Integers or Booleans
5026-
return None
5027-
5028-
return hpat_pandas_series_no_nan_fillna_impl
50295023
else:
50305024
def hpat_pandas_series_fillna_impl(self, value=None, method=None, axis=None, inplace=False,
50315025
limit=None, downcast=None):
5032-
na_data_arr = sdc.hiframes.api.get_nan_mask(self._data)
5033-
self._data[na_data_arr] = value
5034-
return None
5026+
return numpy_like.fillna(self._data, inplace=inplace, value=value)
50355027

50365028
return hpat_pandas_series_fillna_impl
5029+
50375030
else:
50385031
# non inplace implementations, copy array, fill the NA/NaN and return a new Series
50395032
if isinstance(self.dtype, types.UnicodeType):
50405033
# For StringArrayType implementation is taken from _series_fillna_str_alloc_impl
50415034
# (can be called directly when it's index handling is fixed)
50425035
def hpat_pandas_series_str_fillna_impl(self, value=None, method=None, axis=None,
50435036
inplace=False, limit=None, downcast=None):
5044-
5045-
n = len(self._data)
5046-
num_chars = 0
5047-
# get total chars in new array
5048-
for i in prange(n):
5049-
s = self._data[i]
5050-
if sdc.hiframes.api.isna(self._data, i):
5051-
num_chars += len(value)
5052-
else:
5053-
num_chars += len(s)
5054-
5055-
filled_data = pre_alloc_string_array(n, num_chars)
5056-
for i in prange(n):
5057-
if sdc.hiframes.api.isna(self._data, i):
5058-
filled_data[i] = value
5059-
else:
5060-
filled_data[i] = self._data[i]
5061-
return pandas.Series(data=filled_data, index=self._index, name=self._name)
5037+
return pandas.Series(data=numpy_like.fillna(self._data, inplace=inplace, value=value),
5038+
index=self._index,
5039+
name=self._name)
50625040

50635041
return hpat_pandas_series_str_fillna_impl
50645042

50655043
elif isinstance(self.dtype, (types.Integer, types.Boolean)):
50665044
def hpat_pandas_series_no_nan_fillna_impl(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None):
5067-
return pandas.Series(data=numpy.copy(self._data), index=self._index, name=self._name)
5045+
return pandas.Series(data=numpy_like.fillna(self._data, inplace=inplace, value=value),
5046+
index=self._index,
5047+
name=self._name)
50685048

50695049
return hpat_pandas_series_no_nan_fillna_impl
50705050

50715051
else:
50725052
def hpat_pandas_series_fillna_impl(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None):
5073-
na_data_arr = sdc.hiframes.api.get_nan_mask(self._data)
5074-
filled_data = numpy.copy(self._data)
5075-
filled_data[na_data_arr] = value
5076-
return pandas.Series(data=filled_data, index=self._index, name=self._name)
5053+
filled_data = numpy_like.fillna(self._data, inplace=inplace, value=value)
5054+
return pandas.Series(data=filled_data,
5055+
index=self._index,
5056+
name=self._name)
50775057

50785058
return hpat_pandas_series_fillna_impl
50795059

sdc/functions/numpy_like.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ def astype(self, dtype):
4848
pass
4949

5050

51+
def fillna(self, inplace=False, value=None):
52+
pass
53+
54+
5155
def copy(self):
5256
pass
5357

@@ -315,6 +319,81 @@ def sdc_nansum_number_impl(self):
315319
return gen_sum_bool_impl()
316320

317321

322+
@sdc_overload(fillna)
323+
def sdc_fillna_overload(self, inplace=False, value=None):
324+
"""
325+
Intel Scalable Dataframe Compiler Developer Guide
326+
*************************************************
327+
Parallel replacement of fillna.
328+
.. only:: developer
329+
Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k fillna
330+
"""
331+
if not isinstance(self, (types.Array, StringArrayType)):
332+
return None
333+
334+
dtype = self.dtype
335+
isnan = get_isnan(dtype)
336+
if (
337+
(isinstance(inplace, types.Literal) and inplace.literal_value == True) or # noqa
338+
(isinstance(inplace, bool) and inplace == True) # noqa
339+
):
340+
if isinstance(dtype, (types.Integer, types.Boolean)):
341+
def sdc_fillna_inplace_int_impl(self, inplace=False, value=None):
342+
return None
343+
344+
return sdc_fillna_inplace_int_impl
345+
346+
def sdc_fillna_inplace_float_impl(self, inplace=False, value=None):
347+
length = len(self)
348+
for i in prange(length):
349+
if isnan(self[i]):
350+
self[i] = value
351+
return None
352+
353+
return sdc_fillna_inplace_float_impl
354+
355+
else:
356+
if isinstance(self.dtype, types.UnicodeType):
357+
def sdc_fillna_str_impl(self, inplace=False, value=None):
358+
n = len(self)
359+
num_chars = 0
360+
# get total chars in new array
361+
for i in prange(n):
362+
s = self[i]
363+
if sdc.hiframes.api.isna(self, i):
364+
num_chars += len(value)
365+
else:
366+
num_chars += len(s)
367+
368+
filled_data = pre_alloc_string_array(n, num_chars)
369+
for i in prange(n):
370+
if sdc.hiframes.api.isna(self, i):
371+
filled_data[i] = value
372+
else:
373+
filled_data[i] = self[i]
374+
return filled_data
375+
376+
return sdc_fillna_str_impl
377+
378+
if isinstance(dtype, (types.Integer, types.Boolean)):
379+
def sdc_fillna_int_impl(self, inplace=False, value=None):
380+
return copy(self)
381+
382+
return sdc_fillna_int_impl
383+
384+
def sdc_fillna_impl(self, inplace=False, value=None):
385+
length = len(self)
386+
filled_data = numpy.empty(length, dtype=dtype)
387+
for i in prange(length):
388+
if isnan(self[i]):
389+
filled_data[i] = value
390+
else:
391+
filled_data[i] = self[i]
392+
return filled_data
393+
394+
return sdc_fillna_impl
395+
396+
318397
def nanmin(a):
319398
pass
320399

0 commit comments

Comments
 (0)