44This module tests only the private methods (prefixed with underscore).
55"""
66
7+ from numba .typed import List as NumbaList
78import numpy as np
89import pytest
9- from numba .typed import List as NumbaList
1010
11+ import pandas ._testing as tm
1112from pandas .core .nanops_numba import (
1213 MIN_INT ,
1314 NumbaReductionOps ,
1415 _cast_to_timelike ,
1516 _chunk_arr_into_arr_list ,
1617 _get_initial_value ,
18+ _nanvar_std_sem ,
1719 _nb_reduce_arr_list_in_parallel ,
1820 _nb_reduce_single_arr ,
19- _nanvar_std_sem ,
2021 _nullify_below_mincount ,
2122 _reduce_chunked_results ,
2223 _reduce_empty_array ,
2324 nb_reduce ,
2425)
2526
26- import pandas ._testing as tm
27-
2827
2928class TestGetInitialValue :
3029 """Test the _get_initial_value private function."""
@@ -255,7 +254,7 @@ def test_parallel_with_mask(self):
255254 # Create corresponding mask list
256255 mask_list = NumbaList ()
257256 mask_list .append (np .array ([False , True , False ])) # Mask middle element
258- mask_list .append (np .array ([True , False , False ])) # Mask first element
257+ mask_list .append (np .array ([True , False , False ])) # Mask first element
259258
260259 target = np .zeros (len (arr_list ), dtype = np .float64 )
261260 result , counts = _nb_reduce_arr_list_in_parallel (
@@ -280,13 +279,18 @@ def test_single_chunk_reduction(self):
280279 return_dtype = np .dtype ("float64" )
281280
282281 result , count = _reduce_chunked_results (
283- "sum" , chunk_results , counts , final_length , return_dtype ,
284- skipna = True , find_initial_value = True
282+ "sum" ,
283+ chunk_results ,
284+ counts ,
285+ final_length ,
286+ return_dtype ,
287+ skipna = True ,
288+ find_initial_value = True ,
285289 )
286290
287291 # Should reduce the chunk_results array itself
288292 expected_result = np .array ([6.0 ]) # 1 + 2 + 3
289- expected_count = np .array ([6 ]) # 2 + 2 + 2
293+ expected_count = np .array ([6 ]) # 2 + 2 + 2
290294
291295 tm .assert_numpy_array_equal (result , expected_result )
292296 tm .assert_numpy_array_equal (count , expected_count )
@@ -299,8 +303,13 @@ def test_no_chunking_needed(self):
299303 return_dtype = np .dtype ("float64" )
300304
301305 result , count = _reduce_chunked_results (
302- "sum" , chunk_results , counts , final_length , return_dtype ,
303- skipna = True , find_initial_value = True
306+ "sum" ,
307+ chunk_results ,
308+ counts ,
309+ final_length ,
310+ return_dtype ,
311+ skipna = True ,
312+ find_initial_value = True ,
304313 )
305314
306315 # Should return results as-is (no further reduction needed)
@@ -371,7 +380,7 @@ def test_with_nan_values(self):
371380 assert np .isfinite (result )
372381
373382 def test_complex_array (self ):
374- arr = np .array ([1 + 2j , 3 + 4j ])
383+ arr = np .array ([1 + 2j , 3 + 4j ])
375384 result = _nanvar_std_sem (arr )
376385 # Should handle complex numbers by processing real and imag parts
377386 assert np .isfinite (result )
@@ -490,8 +499,11 @@ def test_nb_reduce_with_nans_skipna_true_multithreaded(
490499 ):
491500 """Test sum with NaN values and skipna=True on large array (multi-threaded)."""
492501 result , count = nb_reduce (
493- "sum" , large_2d_array_with_nans , axis = None ,
494- skipna = True , multi_threading = True
502+ "sum" ,
503+ large_2d_array_with_nans ,
504+ axis = None ,
505+ skipna = True ,
506+ multi_threading = True ,
495507 )
496508
497509 # Compare with numpy nansum
@@ -503,8 +515,9 @@ def test_nb_reduce_with_nans_skipna_true_multithreaded(
503515
504516 def test_nb_reduce_with_nans_axis_0_multithreaded (self , large_2d_array_with_nans ):
505517 """Test sum with NaN values along axis 0 (multi-threaded)."""
506- result , count = nb_reduce ("sum" , large_2d_array_with_nans , axis = 0 ,
507- skipna = True , multi_threading = True )
518+ result , count = nb_reduce (
519+ "sum" , large_2d_array_with_nans , axis = 0 , skipna = True , multi_threading = True
520+ )
508521
509522 # Compare with numpy nansum
510523 expected = np .nansum (large_2d_array_with_nans , axis = 0 )
@@ -515,8 +528,9 @@ def test_nb_reduce_with_nans_axis_0_multithreaded(self, large_2d_array_with_nans
515528
516529 def test_nb_reduce_with_nans_axis_1_multithreaded (self , large_2d_array_with_nans ):
517530 """Test sum with NaN values along axis 1 (multi-threaded)."""
518- result , count = nb_reduce ("sum" , large_2d_array_with_nans , axis = 1 ,
519- skipna = True , multi_threading = True )
531+ result , count = nb_reduce (
532+ "sum" , large_2d_array_with_nans , axis = 1 , skipna = True , multi_threading = True
533+ )
520534
521535 # Compare with numpy nansum
522536 expected = np .nansum (large_2d_array_with_nans , axis = 1 )
@@ -528,12 +542,14 @@ def test_nb_reduce_with_nans_axis_1_multithreaded(self, large_2d_array_with_nans
528542 def test_nb_reduce_single_thread_vs_multithread_consistency (self , large_2d_array ):
529543 """Test that single-threaded and multi-threaded results are identical."""
530544 # Single-threaded result
531- result_st , count_st = nb_reduce ("sum" , large_2d_array , axis = 0 ,
532- multi_threading = False )
545+ result_st , count_st = nb_reduce (
546+ "sum" , large_2d_array , axis = 0 , multi_threading = False
547+ )
533548
534549 # Multi-threaded result
535- result_mt , count_mt = nb_reduce ("sum" , large_2d_array , axis = 0 ,
536- multi_threading = True )
550+ result_mt , count_mt = nb_reduce (
551+ "sum" , large_2d_array , axis = 0 , multi_threading = True
552+ )
537553
538554 # Results should be identical
539555 tm .assert_numpy_array_equal (result_st , result_mt )
@@ -568,9 +584,14 @@ def test_nb_reduce_min_count_multithreaded(self, large_2d_array_with_nans):
568584 """Test min_count parameter with large array (multi-threaded)."""
569585 min_count = 100 # Require at least 100 non-NaN values per column
570586
571- result , count = nb_reduce ("sum" , large_2d_array_with_nans , axis = 0 ,
572- skipna = True , min_count = min_count ,
573- multi_threading = True )
587+ result , count = nb_reduce (
588+ "sum" ,
589+ large_2d_array_with_nans ,
590+ axis = 0 ,
591+ skipna = True ,
592+ min_count = min_count ,
593+ multi_threading = True ,
594+ )
574595
575596 # Check that columns with insufficient data are NaN
576597 valid_columns = count >= min_count
@@ -582,8 +603,9 @@ def test_nb_reduce_min_count_multithreaded(self, large_2d_array_with_nans):
582603
583604 def test_nb_reduce_mean_axis_none_multithreaded (self , large_2d_array ):
584605 """Test mean reduction with axis=None on large array (multi-threaded)."""
585- result , count = nb_reduce ("mean" , large_2d_array , axis = None ,
586- multi_threading = True )
606+ result , count = nb_reduce (
607+ "mean" , large_2d_array , axis = None , multi_threading = True
608+ )
587609
588610 # Compare with numpy result
589611 expected = np .mean (large_2d_array )
@@ -594,8 +616,7 @@ def test_nb_reduce_mean_axis_none_multithreaded(self, large_2d_array):
594616
595617 def test_nb_reduce_mean_axis_0_multithreaded (self , large_2d_array ):
596618 """Test mean reduction along axis 0 on large array (multi-threaded)."""
597- result , count = nb_reduce ("mean" , large_2d_array , axis = 0 ,
598- multi_threading = True )
619+ result , count = nb_reduce ("mean" , large_2d_array , axis = 0 , multi_threading = True )
599620
600621 # Compare with numpy result
601622 expected = np .mean (large_2d_array , axis = 0 )
@@ -606,8 +627,7 @@ def test_nb_reduce_mean_axis_0_multithreaded(self, large_2d_array):
606627
607628 def test_nb_reduce_mean_axis_1_multithreaded (self , large_2d_array ):
608629 """Test mean reduction along axis 1 on large array (multi-threaded)."""
609- result , count = nb_reduce ("mean" , large_2d_array , axis = 1 ,
610- multi_threading = True )
630+ result , count = nb_reduce ("mean" , large_2d_array , axis = 1 , multi_threading = True )
611631
612632 # Compare with numpy result
613633 expected = np .mean (large_2d_array , axis = 1 )
@@ -618,35 +638,38 @@ def test_nb_reduce_mean_axis_1_multithreaded(self, large_2d_array):
618638
619639 def test_nb_reduce_sum_square_axis_none_multithreaded (self , large_2d_array ):
620640 """Test sum_square reduction with axis=None on large array."""
621- result , count = nb_reduce ("sum_square" , large_2d_array , axis = None ,
622- multi_threading = True )
641+ result , count = nb_reduce (
642+ "sum_square" , large_2d_array , axis = None , multi_threading = True
643+ )
623644
624645 # Compare with numpy result (sum of squares)
625- expected = np .sum (large_2d_array ** 2 )
646+ expected = np .sum (large_2d_array ** 2 )
626647 expected_count = large_2d_array .size
627648
628649 tm .assert_almost_equal (result , expected , rtol = 1e-10 )
629650 assert count == expected_count
630651
631652 def test_nb_reduce_sum_square_axis_0_multithreaded (self , large_2d_array ):
632653 """Test sum_square reduction along axis 0 on large array."""
633- result , count = nb_reduce ("sum_square" , large_2d_array , axis = 0 ,
634- multi_threading = True )
654+ result , count = nb_reduce (
655+ "sum_square" , large_2d_array , axis = 0 , multi_threading = True
656+ )
635657
636658 # Compare with numpy result (sum of squares along axis 0)
637- expected = np .sum (large_2d_array ** 2 , axis = 0 )
659+ expected = np .sum (large_2d_array ** 2 , axis = 0 )
638660 expected_count = np .full (large_2d_array .shape [1 ], large_2d_array .shape [0 ])
639661
640662 tm .assert_numpy_array_equal (result , expected )
641663 tm .assert_numpy_array_equal (count , expected_count )
642664
643665 def test_nb_reduce_sum_square_axis_1_multithreaded (self , large_2d_array ):
644666 """Test sum_square reduction along axis 1 on large array."""
645- result , count = nb_reduce ("sum_square" , large_2d_array , axis = 1 ,
646- multi_threading = True )
667+ result , count = nb_reduce (
668+ "sum_square" , large_2d_array , axis = 1 , multi_threading = True
669+ )
647670
648671 # Compare with numpy result (sum of squares along axis 1)
649- expected = np .sum (large_2d_array ** 2 , axis = 1 )
672+ expected = np .sum (large_2d_array ** 2 , axis = 1 )
650673 expected_count = np .full (large_2d_array .shape [0 ], large_2d_array .shape [1 ])
651674
652675 np .testing .assert_array_almost_equal (result , expected )
@@ -687,8 +710,9 @@ def timedelta64_2d_array_with_nat(self):
687710
688711 def test_nb_reduce_timedelta64_sum_axis_none (self , timedelta64_2d_array ):
689712 """Test sum reduction on timedelta64 array with axis=None."""
690- result , count = nb_reduce ("sum" , timedelta64_2d_array , axis = None ,
691- multi_threading = True )
713+ result , count = nb_reduce (
714+ "sum" , timedelta64_2d_array , axis = None , multi_threading = True
715+ )
692716
693717 # Compare with numpy result
694718 expected = np .sum (timedelta64_2d_array )
@@ -699,42 +723,48 @@ def test_nb_reduce_timedelta64_sum_axis_none(self, timedelta64_2d_array):
699723
700724 def test_nb_reduce_timedelta64_sum_axis_0 (self , timedelta64_2d_array ):
701725 """Test sum reduction on timedelta64 array along axis 0."""
702- result , count = nb_reduce ("sum" , timedelta64_2d_array , axis = 0 ,
703- multi_threading = True )
726+ result , count = nb_reduce (
727+ "sum" , timedelta64_2d_array , axis = 0 , multi_threading = True
728+ )
704729
705730 # Compare with numpy result
706731 expected = np .sum (timedelta64_2d_array , axis = 0 )
707- expected_count = np .full (timedelta64_2d_array .shape [1 ],
708- timedelta64_2d_array .shape [0 ])
732+ expected_count = np .full (
733+ timedelta64_2d_array .shape [1 ], timedelta64_2d_array .shape [0 ]
734+ )
709735
710736 tm .assert_numpy_array_equal (result , expected )
711737 tm .assert_numpy_array_equal (count , expected_count )
712738
713739 def test_nb_reduce_timedelta64_sum_axis_1 (self , timedelta64_2d_array ):
714740 """Test sum reduction on timedelta64 array along axis 1."""
715- result , count = nb_reduce ("sum" , timedelta64_2d_array , axis = 1 ,
716- multi_threading = True )
741+ result , count = nb_reduce (
742+ "sum" , timedelta64_2d_array , axis = 1 , multi_threading = True
743+ )
717744
718745 # Compare with numpy result
719746 expected = np .sum (timedelta64_2d_array , axis = 1 )
720- expected_count = np .full (timedelta64_2d_array .shape [0 ],
721- timedelta64_2d_array .shape [1 ])
747+ expected_count = np .full (
748+ timedelta64_2d_array .shape [0 ], timedelta64_2d_array .shape [1 ]
749+ )
722750
723751 tm .assert_numpy_array_equal (result , expected )
724752 tm .assert_numpy_array_equal (count , expected_count )
725753
726754 def test_nb_reduce_timedelta64_min_max (self , timedelta64_2d_array ):
727755 """Test min/max reduction on timedelta64 array."""
728756 # Test min
729- result_min , count_min = nb_reduce ("min" , timedelta64_2d_array , axis = None ,
730- multi_threading = True )
757+ result_min , count_min = nb_reduce (
758+ "min" , timedelta64_2d_array , axis = None , multi_threading = True
759+ )
731760 expected_min = np .min (timedelta64_2d_array )
732761 assert result_min == expected_min
733762 assert count_min == timedelta64_2d_array .size
734763
735764 # Test max
736- result_max , count_max = nb_reduce ("max" , timedelta64_2d_array , axis = None ,
737- multi_threading = True )
765+ result_max , count_max = nb_reduce (
766+ "max" , timedelta64_2d_array , axis = None , multi_threading = True
767+ )
738768 expected_max = np .max (timedelta64_2d_array )
739769 assert result_max == expected_max
740770 assert count_max == timedelta64_2d_array .size
@@ -743,8 +773,13 @@ def test_nb_reduce_timedelta64_with_nat_skipna_true(
743773 self , timedelta64_2d_array_with_nat
744774 ):
745775 """Test reduction on timedelta64 array with NaT values, skipna=True."""
746- result , count = nb_reduce ("sum" , timedelta64_2d_array_with_nat , axis = None ,
747- skipna = True , multi_threading = True )
776+ result , count = nb_reduce (
777+ "sum" ,
778+ timedelta64_2d_array_with_nat ,
779+ axis = None ,
780+ skipna = True ,
781+ multi_threading = True ,
782+ )
748783
749784 # Compare with numpy result
750785 # For timedelta64 with NaT, we need to use nansum equivalent
@@ -759,21 +794,28 @@ def test_nb_reduce_timedelta64_with_nat_skipna_false(
759794 self , timedelta64_2d_array_with_nat
760795 ):
761796 """Test reduction on timedelta64 array with NaT values, skipna=False."""
762- result , count = nb_reduce ("sum" , timedelta64_2d_array_with_nat , axis = None ,
763- skipna = False , multi_threading = True )
797+ result , count = nb_reduce (
798+ "sum" ,
799+ timedelta64_2d_array_with_nat ,
800+ axis = None ,
801+ skipna = False ,
802+ multi_threading = True ,
803+ )
764804
765805 # When skipna=False and there are NaT values, result should be NaT
766806 assert np .isnat (result )
767807
768808 def test_nb_reduce_timedelta64_mean_axis_0 (self , timedelta64_2d_array ):
769809 """Test mean reduction on timedelta64 array along axis 0."""
770- result , count = nb_reduce ("mean" , timedelta64_2d_array , axis = 0 ,
771- multi_threading = True )
810+ result , count = nb_reduce (
811+ "mean" , timedelta64_2d_array , axis = 0 , multi_threading = True
812+ )
772813
773814 # Compare with numpy result
774815 expected = np .mean (timedelta64_2d_array , axis = 0 )
775- expected_count = np .full (timedelta64_2d_array .shape [1 ],
776- timedelta64_2d_array .shape [0 ])
816+ expected_count = np .full (
817+ timedelta64_2d_array .shape [1 ], timedelta64_2d_array .shape [0 ]
818+ )
777819
778820 tm .assert_numpy_array_equal (result , expected )
779821 tm .assert_numpy_array_equal (count , expected_count )
0 commit comments