@@ -278,63 +278,50 @@ def test_distinct():
278278 assert df_a .collect () == df_b .collect ()
279279
280280
281- def test_window_functions (df ):
281+ test_data_window_functions = [
282+ ("row" , f .window ("row_number" , [], order_by = [f .order_by (column ("c" ))]), [2 , 1 , 3 ]),
283+ ("rank" , f .window ("rank" , [], order_by = [f .order_by (column ("c" ))]), [2 , 1 , 2 ]),
284+ ("dense_rank" , f .window ("dense_rank" , [], order_by = [f .order_by (column ("c" ))]), [2 , 1 , 2 ] ),
285+ ("percent_rank" , f .window ("percent_rank" , [], order_by = [f .order_by (column ("c" ))]), [0.5 , 0 , 0.5 ]),
286+ ("cume_dist" , f .window ("cume_dist" , [], order_by = [f .order_by (column ("b" ))]), [0.3333333333333333 , 0.6666666666666666 , 1.0 ]),
287+ ("ntile" , f .window ("ntile" , [literal (2 )], order_by = [f .order_by (column ("c" ))]), [1 , 1 , 2 ]),
288+ ("next" , f .window ("lead" , [column ("b" )], order_by = [f .order_by (column ("b" ))]), [5 , 6 , None ]),
289+ ("previous" , f .window ("lag" , [column ("b" )], order_by = [f .order_by (column ("b" ))]), [None , 4 , 5 ]),
290+ pytest .param (
291+ "first_value" ,
292+ f .window (
293+ "first_value" ,
294+ [column ("a" )],
295+ order_by = [f .order_by (column ("b" ))]
296+ ),
297+ [1 , 1 , 1 ],
298+ marks = pytest .mark .xfail ,
299+ ),
300+ pytest .param (
301+ "last_value" ,
302+ f .window ("last_value" , [column ("b" )], order_by = [f .order_by (column ("b" ))]),
303+ [4 , 5 , 6 ],
304+ marks = pytest .mark .xfail ,
305+ ),
306+ pytest .param (
307+ "2nd_value" ,
308+ f .window (
309+ "nth_value" ,
310+ [column ("b" ), literal (2 )],
311+ order_by = [f .order_by (column ("b" ))],
312+ ),
313+ [None , 5 , 5 ],
314+ ),
315+ ]
316+
317+
318+ @pytest .mark .parametrize ("name,expr,result" , test_data_window_functions )
319+ def test_window_functions (df , name , expr , result ):
282320 df = df .select (
283321 column ("a" ),
284322 column ("b" ),
285323 column ("c" ),
286- f .alias (
287- f .window ("row_number" , [], order_by = [f .order_by (column ("c" ))]),
288- "row" ,
289- ),
290- f .alias (
291- f .window ("rank" , [], order_by = [f .order_by (column ("c" ))]),
292- "rank" ,
293- ),
294- f .alias (
295- f .window ("dense_rank" , [], order_by = [f .order_by (column ("c" ))]),
296- "dense_rank" ,
297- ),
298- f .alias (
299- f .window ("percent_rank" , [], order_by = [f .order_by (column ("c" ))]),
300- "percent_rank" ,
301- ),
302- f .alias (
303- f .window ("cume_dist" , [], order_by = [f .order_by (column ("b" ))]),
304- "cume_dist" ,
305- ),
306- f .alias (
307- f .window ("ntile" , [literal (2 )], order_by = [f .order_by (column ("c" ))]),
308- "ntile" ,
309- ),
310- f .alias (
311- f .window ("lag" , [column ("b" )], order_by = [f .order_by (column ("b" ))]),
312- "previous" ,
313- ),
314- f .alias (
315- f .window ("lead" , [column ("b" )], order_by = [f .order_by (column ("b" ))]),
316- "next" ,
317- ),
318- f .alias (
319- f .window (
320- "first_value" ,
321- [column ("a" )],
322- order_by = [f .order_by (column ("b" ))],
323- ),
324- "first_value" ,
325- ),
326- f .alias (
327- f .window ("last_value" , [column ("b" )], order_by = [f .order_by (column ("b" ))]),
328- "last_value" ,
329- ),
330- f .alias (
331- f .window (
332- "nth_value" ,
333- [column ("b" ), literal (2 )],
334- order_by = [f .order_by (column ("b" ))],
335- ),
336- "2nd_value" ,
337- ),
324+ f .alias (expr , name )
338325 )
339326
340327 table = pa .Table .from_batches (df .collect ())
@@ -343,18 +330,9 @@ def test_window_functions(df):
343330 "a" : [1 , 2 , 3 ],
344331 "b" : [4 , 5 , 6 ],
345332 "c" : [8 , 5 , 8 ],
346- "row" : [2 , 1 , 3 ],
347- "rank" : [2 , 1 , 2 ],
348- "dense_rank" : [2 , 1 , 2 ],
349- "percent_rank" : [0.5 , 0 , 0.5 ],
350- "cume_dist" : [0.3333333333333333 , 0.6666666666666666 , 1.0 ],
351- "ntile" : [1 , 1 , 2 ],
352- "next" : [5 , 6 , None ],
353- "previous" : [None , 4 , 5 ],
354- "first_value" : [1 , 1 , 1 ],
355- "last_value" : [4 , 5 , 6 ],
356- "2nd_value" : [None , 5 , 5 ],
333+ name : result
357334 }
335+
358336 assert table .sort_by ("a" ).to_pydict () == expected
359337
360338
0 commit comments