4141from numba import numpy_support
4242
4343import sdc
44+ from sdc .hiframes .api import isna
4445from sdc .hiframes .pd_series_type import SeriesType
45- from sdc .str_arr_ext import (
46- append_string_array_to , cp_str_list_to_array , num_total_chars ,
47- pre_alloc_string_array , str_arr_is_na , str_arr_set_na , string_array_type
48- )
46+ from sdc .str_arr_type import string_array_type
47+ from sdc . str_arr_ext import ( num_total_chars , append_string_array_to ,
48+ str_arr_is_na , pre_alloc_string_array , str_arr_set_na , string_array_type ,
49+ cp_str_list_to_array , create_str_arr_from_list , get_utf8_size )
4950from sdc .utilities .utils import sdc_overload , sdc_register_jitable
5051from sdc .utilities .sdc_typing_utils import (find_common_dtype_from_numpy_dtypes ,
5152 TypeChecker )
@@ -483,18 +484,21 @@ def sdc_arrays_argsort(A, kind='quicksort'):
483484
484485@sdc_overload (sdc_arrays_argsort , jit_options = {'parallel' : False })
485486def sdc_arrays_argsort_overload (A , kind = 'quicksort' ):
486- """Function overloading argsort for different 1D array types"""
487+ """Function providing pandas argsort implementation for different 1D array types"""
487488
488489 # kind is not known at compile time, so get this function here and use in impl if needed
489490 quicksort_func = quicksort .make_jit_quicksort ().run_quicksort
490491
492+ kind_is_default = isinstance (kind , str )
491493 if isinstance (A , types .Array ):
492- def _sdc_arrays_argsort_numeric_impl (A , kind = 'quicksort' ):
493- return numpy .argsort (A , kind = kind )
494- return _sdc_arrays_argsort_numeric_impl
494+ def _sdc_arrays_argsort_array_impl (A , kind = 'quicksort' ):
495+ _kind = 'quicksort' if kind_is_default == True else kind # noqa
496+ return numpy .argsort (A , kind = _kind )
497+
498+ return _sdc_arrays_argsort_array_impl
495499
496500 elif A == string_array_type :
497- def _sdc_arrays_argsort_str_impl (A , kind = 'quicksort' ):
501+ def _sdc_arrays_argsort_str_arr_impl (A , kind = 'quicksort' ):
498502
499503 nan_mask = sdc .hiframes .api .get_nan_mask (A )
500504 idx = numpy .arange (len (A ))
@@ -515,7 +519,10 @@ def _sdc_arrays_argsort_str_impl(A, kind='quicksort'):
515519 argsorted .extend (old_nan_positions )
516520 return numpy .asarray (argsorted , dtype = numpy .int32 )
517521
518- return _sdc_arrays_argsort_str_impl
522+ return _sdc_arrays_argsort_str_arr_impl
523+
524+ elif isinstance (A , types .List ):
525+ return None
519526
520527 return None
521528
@@ -591,3 +598,78 @@ def _sdc_pandas_series_align_impl(series, other, size='max', finiteness=False):
591598 return aligned , aligned_other
592599
593600 return _sdc_pandas_series_align_impl
601+
602+
603+ def _sdc_asarray (data ):
604+ pass
605+
606+
607+ @sdc_overload (_sdc_asarray , jit_options = {'parallel' : True })
608+ def _sdc_asarray_overload (data ):
609+
610+ # TODO: extend with other types
611+ if not isinstance (data , types .List ):
612+ return None
613+
614+ if isinstance (data .dtype , types .UnicodeType ):
615+ def _sdc_asarray_impl (data ):
616+ return create_str_arr_from_list (data )
617+
618+ return _sdc_asarray_impl
619+
620+ else :
621+ result_dtype = data .dtype
622+
623+ def _sdc_asarray_impl (data ):
624+ # TODO: check if elementwise copy is needed at all
625+ res_size = len (data )
626+ res_arr = numpy .empty (res_size , dtype = result_dtype )
627+ for i in numba .prange (res_size ):
628+ res_arr [i ] = data [i ]
629+ return res_arr
630+
631+ return _sdc_asarray_impl
632+
633+ return None
634+
635+
636+ def _sdc_take (data , indexes ):
637+ pass
638+
639+
640+ @sdc_overload (_sdc_take , jit_options = {'parallel' : True })
641+ def _sdc_take_overload (data , indexes ):
642+
643+ if isinstance (data , types .Array ):
644+ arr_dtype = data .dtype
645+
646+ def _sdc_take_array_impl (data , indexes ):
647+ res_size = len (indexes )
648+ res_arr = numpy .empty (res_size , dtype = arr_dtype )
649+ for i in numba .prange (res_size ):
650+ res_arr [i ] = data [indexes [i ]]
651+ return res_arr
652+
653+ return _sdc_take_array_impl
654+
655+ elif data == string_array_type :
656+ def _sdc_take_str_arr_impl (data , indexes ):
657+ res_size = len (indexes )
658+ nan_mask = numpy .zeros (res_size , dtype = numpy .bool_ )
659+ num_total_bytes = 0
660+ for i in numba .prange (res_size ):
661+ num_total_bytes += get_utf8_size (data [indexes [i ]])
662+ if isna (data , indexes [i ]):
663+ nan_mask [i ] = True
664+
665+ res_arr = pre_alloc_string_array (res_size , num_total_bytes )
666+ for i in numpy .arange (res_size ):
667+ res_arr [i ] = data [indexes [i ]]
668+ if nan_mask [i ]:
669+ str_arr_set_na (res_arr , i )
670+
671+ return res_arr
672+
673+ return _sdc_take_str_arr_impl
674+
675+ return None
0 commit comments