@@ -258,45 +258,53 @@ def test_impl(df):
258258 # np.testing.assert_array_equal(hpat_func(df), test_impl(df))
259259 self .assertEqual (set (hpat_func (df )), set (test_impl (df )))
260260
261- @skip_numba_jit
261+ @skip_numba_jit ( "BUG: SDC impl of Series.sum returns float64 on as series of ints" )
262262 def test_agg_seq_sum (self ):
263263 def test_impl (df ):
264- A = df .groupby ('A' )['B' ].sum ()
265- return A .values
264+ return df .groupby ('A' )['B' ].sum ()
266265
267266 hpat_func = self .jit (test_impl )
268267 df = pd .DataFrame ({'A' : [2 , 1 , 1 , 1 , 2 , 2 , 1 ], 'B' : [- 8 , 2 , 3 , 1 , 5 , 6 , 7 ]})
269- self .assertEqual (set (hpat_func (df )), set (test_impl (df )))
268+ # pandas returns groupby.generic.SeriesGroupBy object in this case, hence align result_ref
269+ result = hpat_func (df )
270+ result_ref = pd .DataFrame (test_impl (df ))
271+ pd .testing .assert_frame_equal (result , result_ref , check_names = False )
270272
271- @skip_numba_jit
273+ @skip_sdc_jit ( "Old-style implementation returns ndarray, not a Series" )
272274 def test_agg_seq_count (self ):
273275 def test_impl (df ):
274- A = df .groupby ('A' )['B' ].count ()
275- return A .values
276+ return df .groupby ('A' )['B' ].count ()
276277
277278 hpat_func = self .jit (test_impl )
278279 df = pd .DataFrame ({'A' : [2 , 1 , 1 , 1 , 2 , 2 , 1 ], 'B' : [- 8 , 2 , 3 , 1 , 5 , 6 , 7 ]})
279- self .assertEqual (set (hpat_func (df )), set (test_impl (df )))
280+ # pandas returns groupby.generic.SeriesGroupBy object in this case, hence align result_ref
281+ result = hpat_func (df )
282+ result_ref = pd .DataFrame (test_impl (df ))
283+ pd .testing .assert_frame_equal (result , result_ref , check_names = False )
280284
281- @skip_numba_jit
285+ @skip_sdc_jit ( "Old-style implementation returns ndarray, not a Series" )
282286 def test_agg_seq_mean (self ):
283287 def test_impl (df ):
284- A = df .groupby ('A' )['B' ].mean ()
285- return A .values
288+ return df .groupby ('A' )['B' ].mean ()
286289
287290 hpat_func = self .jit (test_impl )
288291 df = pd .DataFrame ({'A' : [2 , 1 , 1 , 1 , 2 , 2 , 1 ], 'B' : [- 8 , 2 , 3 , 1 , 5 , 6 , 7 ]})
289- self .assertEqual (set (hpat_func (df )), set (test_impl (df )))
292+ # pandas returns groupby.generic.SeriesGroupBy object in this case, hence align result_ref
293+ result = hpat_func (df )
294+ result_ref = pd .DataFrame (test_impl (df ))
295+ pd .testing .assert_frame_equal (result , result_ref , check_names = False )
290296
291- @skip_numba_jit
297+ @skip_sdc_jit ( "Old-style implementation returns ndarray, not a Series" )
292298 def test_agg_seq_min (self ):
293299 def test_impl (df ):
294- A = df .groupby ('A' )['B' ].min ()
295- return A .values
300+ return df .groupby ('A' )['B' ].min ()
296301
297302 hpat_func = self .jit (test_impl )
298303 df = pd .DataFrame ({'A' : [2 , 1 , 1 , 1 , 2 , 2 , 1 ], 'B' : [- 8 , 2 , 3 , 1 , 5 , 6 , 7 ]})
299- self .assertEqual (set (hpat_func (df )), set (test_impl (df )))
304+ # pandas returns groupby.generic.SeriesGroupBy object in this case, hence align result_ref
305+ result = hpat_func (df )
306+ result_ref = pd .DataFrame (test_impl (df ))
307+ pd .testing .assert_frame_equal (result , result_ref , check_names = False )
300308
301309 @skip_numba_jit
302310 def test_agg_seq_min_date (self ):
@@ -308,15 +316,17 @@ def test_impl(df):
308316 df = pd .DataFrame ({'A' : [2 , 1 , 1 , 1 , 2 , 2 , 1 ], 'B' : pd .date_range ('2019-1-3' , '2019-1-9' )})
309317 self .assertEqual (set (hpat_func (df )), set (test_impl (df )))
310318
311- @skip_numba_jit
319+ @skip_sdc_jit ( "Old-style implementation returns ndarray, not a Series" )
312320 def test_agg_seq_max (self ):
313321 def test_impl (df ):
314- A = df .groupby ('A' )['B' ].max ()
315- return A .values
322+ return df .groupby ('A' )['B' ].max ()
316323
317324 hpat_func = self .jit (test_impl )
318325 df = pd .DataFrame ({'A' : [2 , 1 , 1 , 1 , 2 , 2 , 1 ], 'B' : [- 8 , 2 , 3 , 1 , 5 , 6 , 7 ]})
319- self .assertEqual (set (hpat_func (df )), set (test_impl (df )))
326+ # pandas returns groupby.generic.SeriesGroupBy object in this case, hence align result_ref
327+ result = hpat_func (df )
328+ result_ref = pd .DataFrame (test_impl (df ))
329+ pd .testing .assert_frame_equal (result , result_ref , check_names = False )
320330
321331 @skip_numba_jit
322332 def test_agg_seq_all_col (self ):
@@ -338,37 +348,43 @@ def test_impl(df):
338348 df = pd .DataFrame ({'A' : [2 , 1 , 1 , 1 , 2 , 2 , 1 ], 'B' : [- 8 , 2 , 3 , 1 , 5 , 6 , 7 ]})
339349 self .assertEqual (set (hpat_func (df )), set (test_impl (df )))
340350
341- @skip_numba_jit
351+ @skip_sdc_jit ( "Old-style implementation returns ndarray, not a Series" )
342352 def test_agg_seq_prod (self ):
343353 def test_impl (df ):
344- A = df .groupby ('A' )['B' ].prod ()
345- return A .values
354+ return df .groupby ('A' )['B' ].prod ()
346355
347356 hpat_func = self .jit (test_impl )
348357 df = pd .DataFrame ({'A' : [2 , 1 , 1 , 1 , 2 , 2 , 1 ], 'B' : [- 8 , 2 , 3 , 1 , 5 , 6 , 7 ]})
349- self .assertEqual (set (hpat_func (df )), set (test_impl (df )))
358+ # pandas returns groupby.generic.SeriesGroupBy object in this case, hence align result_ref
359+ result = hpat_func (df )
360+ result_ref = pd .DataFrame (test_impl (df ))
361+ pd .testing .assert_frame_equal (result , result_ref , check_names = False )
350362
351363 @skip_sdc_jit
352364 @skip_numba_jit
353365 def test_agg_seq_var (self ):
354366 def test_impl (df ):
355- A = df .groupby ('A' )['B' ].var ()
356- return A .values
367+ return df .groupby ('A' )['B' ].var ()
357368
358369 hpat_func = self .jit (test_impl )
359370 df = pd .DataFrame ({'A' : [2 , 1 , 1 , 1 , 2 , 2 , 1 ], 'B' : [- 8 , 2 , 3 , 1 , 5 , 6 , 7 ]})
360- self .assertEqual (set (hpat_func (df )), set (test_impl (df )))
371+ # pandas returns groupby.generic.SeriesGroupBy object in this case, hence align result_ref
372+ result = hpat_func (df )
373+ result_ref = pd .DataFrame (test_impl (df ))
374+ pd .testing .assert_frame_equal (result , result_ref , check_names = False )
361375
362376 @skip_sdc_jit
363377 @skip_numba_jit
364378 def test_agg_seq_std (self ):
365379 def test_impl (df ):
366- A = df .groupby ('A' )['B' ].std ()
367- return A .values
380+ return df .groupby ('A' )['B' ].std ()
368381
369382 hpat_func = self .jit (test_impl )
370383 df = pd .DataFrame ({'A' : [2 , 1 , 1 , 1 , 2 , 2 , 1 ], 'B' : [- 8 , 2 , 3 , 1 , 5 , 6 , 7 ]})
371- self .assertEqual (set (hpat_func (df )), set (test_impl (df )))
384+ # pandas returns groupby.generic.SeriesGroupBy object in this case, hence align result_ref
385+ result = hpat_func (df )
386+ result_ref = pd .DataFrame (test_impl (df ))
387+ pd .testing .assert_frame_equal (result , result_ref , check_names = False )
372388
373389 @skip_numba_jit
374390 def test_agg_seq_multiselect (self ):
@@ -661,6 +677,53 @@ def test_impl(df):
661677 hpat_func = self .jit (test_impl )
662678 pd .testing .assert_frame_equal (hpat_func (df ), test_impl (df ))
663679
680+ def test_dataframe_groupby_getitem_literal_tuple (self ):
681+ def test_impl (df ):
682+ return df .groupby ('A' )['B' , 'C' ].count ()
683+ hpat_func = self .jit (test_impl )
684+
685+ df = pd .DataFrame (_default_df_numeric_data )
686+ result = hpat_func (df )
687+ result_ref = test_impl (df )
688+ # TODO: implement index classes, as current indexes do not have names
689+ pd .testing .assert_frame_equal (result , result_ref , check_names = False )
690+
691+ def test_dataframe_groupby_getitem_literal_str (self ):
692+ def test_impl (df ):
693+ return df .groupby ('C' )['B' ].count ()
694+ hpat_func = self .jit (test_impl )
695+
696+ df = pd .DataFrame (_default_df_numeric_data )
697+ # pandas returns groupby.generic.SeriesGroupBy object in this case, hence align result_ref
698+ result = hpat_func (df )
699+ result_ref = pd .DataFrame (test_impl (df ))
700+ # TODO: implement index classes, as current indexes do not have names
701+ pd .testing .assert_frame_equal (result , result_ref , check_names = False )
702+
703+ def test_dataframe_groupby_getitem_unicode_str (self ):
704+ def test_impl (df , col_name ):
705+ return df .groupby ('A' )[col_name ].count ()
706+ hpat_func = self .jit (test_impl )
707+
708+ df = pd .DataFrame (_default_df_numeric_data )
709+ col_name = 'C'
710+ # pandas returns groupby.generic.SeriesGroupBy object in this case, hence align result_ref
711+ result = hpat_func (df , col_name )
712+ result_ref = pd .DataFrame (test_impl (df , col_name ))
713+ # TODO: implement index classes, as current indexes do not have names
714+ pd .testing .assert_frame_equal (result , result_ref , check_names = False )
715+
716+ def test_dataframe_groupby_getitem_repeated (self ):
717+ def test_impl (df ):
718+ return df .groupby ('A' )['B' , 'C' ]['D' ]
719+ hpat_func = self .jit (test_impl )
720+
721+ df = pd .DataFrame (_default_df_numeric_data )
722+ with self .assertRaises (Exception ) as context :
723+ test_impl (df )
724+ pandas_exception = context .exception
725+
726+ self .assertRaises (type (pandas_exception ), hpat_func , df )
664727
665728if __name__ == "__main__" :
666729 unittest .main ()
0 commit comments