@@ -24,6 +24,7 @@ use arrow::compute::can_cast_types;
2424use arrow:: error:: ArrowError ;
2525use arrow:: ffi:: FFI_ArrowSchema ;
2626use arrow:: ffi_stream:: FFI_ArrowArrayStream ;
27+ use arrow:: pyarrow:: FromPyArrow ;
2728use datafusion:: arrow:: datatypes:: Schema ;
2829use datafusion:: arrow:: pyarrow:: { PyArrowType , ToPyArrow } ;
2930use datafusion:: arrow:: util:: pretty;
@@ -301,68 +302,8 @@ impl PyDataFrame {
301302 batches : None ,
302303 }
303304 }
304- }
305-
306- #[ pymethods]
307- impl PyDataFrame {
308- /// Enable selection for `df[col]`, `df[col1, col2, col3]`, and `df[[col1, col2, col3]]`
309- fn __getitem__ ( & self , key : Bound < ' _ , PyAny > ) -> PyDataFusionResult < Self > {
310- if let Ok ( key) = key. extract :: < PyBackedStr > ( ) {
311- // df[col]
312- self . select_columns ( vec ! [ key] )
313- } else if let Ok ( tuple) = key. downcast :: < PyTuple > ( ) {
314- // df[col1, col2, col3]
315- let keys = tuple
316- . iter ( )
317- . map ( |item| item. extract :: < PyBackedStr > ( ) )
318- . collect :: < PyResult < Vec < PyBackedStr > > > ( ) ?;
319- self . select_columns ( keys)
320- } else if let Ok ( keys) = key. extract :: < Vec < PyBackedStr > > ( ) {
321- // df[[col1, col2, col3]]
322- self . select_columns ( keys)
323- } else {
324- let message = "DataFrame can only be indexed by string index or indices" . to_string ( ) ;
325- Err ( PyDataFusionError :: Common ( message) )
326- }
327- }
328-
329- fn __repr__ ( & mut self , py : Python ) -> PyDataFusionResult < String > {
330- // Get the Python formatter config
331- let PythonFormatter {
332- formatter : _,
333- config,
334- } = get_python_formatter_with_config ( py) ?;
335-
336- let should_cache = * is_ipython_env ( py) && self . batches . is_none ( ) ;
337- let ( batches, has_more) = match self . batches . take ( ) {
338- Some ( b) => b,
339- None => wait_for_future (
340- py,
341- collect_record_batches_to_display ( self . df . as_ref ( ) . clone ( ) , config) ,
342- ) ??,
343- } ;
344-
345- if batches. is_empty ( ) {
346- // This should not be reached, but do it for safety since we index into the vector below
347- return Ok ( "No data to display" . to_string ( ) ) ;
348- }
349-
350- let batches_as_displ =
351- pretty:: pretty_format_batches ( & batches) . map_err ( py_datafusion_err) ?;
352-
353- let additional_str = match has_more {
354- true => "\n Data truncated." ,
355- false => "" ,
356- } ;
357-
358- if should_cache {
359- self . batches = Some ( ( batches, has_more) ) ;
360- }
361-
362- Ok ( format ! ( "DataFrame()\n {batches_as_displ}{additional_str}" ) )
363- }
364305
365- fn _repr_html_ ( & mut self , py : Python ) -> PyDataFusionResult < String > {
306+ fn prepare_repr_string ( & mut self , py : Python , as_html : bool ) -> PyDataFusionResult < String > {
366307 // Get the Python formatter and config
367308 let PythonFormatter { formatter, config } = get_python_formatter_with_config ( py) ?;
368309
@@ -398,15 +339,82 @@ impl PyDataFrame {
398339 kwargs. set_item ( "has_more" , has_more) ?;
399340 kwargs. set_item ( "table_uuid" , table_uuid) ?;
400341
401- let html_result = formatter. call_method ( "format_html" , ( ) , Some ( & kwargs) ) ?;
402- let html_str: String = html_result. extract ( ) ?;
342+ let method_name = match as_html {
343+ true => "format_html" ,
344+ false => "format_str" ,
345+ } ;
403346
347+ let html_result = formatter. call_method ( method_name, ( ) , Some ( & kwargs) ) ?;
348+ let html_str: String = html_result. extract ( ) ?;
404349 if should_cache {
405350 self . batches = Some ( ( batches, has_more) ) ;
406351 }
407352
408353 Ok ( html_str)
409354 }
355+ }
356+
357+ #[ pymethods]
358+ impl PyDataFrame {
359+ /// Enable selection for `df[col]`, `df[col1, col2, col3]`, and `df[[col1, col2, col3]]`
360+ fn __getitem__ ( & self , key : Bound < ' _ , PyAny > ) -> PyDataFusionResult < Self > {
361+ if let Ok ( key) = key. extract :: < PyBackedStr > ( ) {
362+ // df[col]
363+ self . select_columns ( vec ! [ key] )
364+ } else if let Ok ( tuple) = key. downcast :: < PyTuple > ( ) {
365+ // df[col1, col2, col3]
366+ let keys = tuple
367+ . iter ( )
368+ . map ( |item| item. extract :: < PyBackedStr > ( ) )
369+ . collect :: < PyResult < Vec < PyBackedStr > > > ( ) ?;
370+ self . select_columns ( keys)
371+ } else if let Ok ( keys) = key. extract :: < Vec < PyBackedStr > > ( ) {
372+ // df[[col1, col2, col3]]
373+ self . select_columns ( keys)
374+ } else {
375+ let message = "DataFrame can only be indexed by string index or indices" . to_string ( ) ;
376+ Err ( PyDataFusionError :: Common ( message) )
377+ }
378+ }
379+
380+ fn __repr__ ( & mut self , py : Python ) -> PyDataFusionResult < String > {
381+ self . prepare_repr_string ( py, false )
382+ }
383+
384+ fn _repr_html_ ( & mut self , py : Python ) -> PyDataFusionResult < String > {
385+ self . prepare_repr_string ( py, true )
386+ }
387+
388+ #[ staticmethod]
389+ #[ expect( unused_variables) ]
390+ fn default_str_repr < ' py > (
391+ batches : Vec < Bound < ' py , PyAny > > ,
392+ schema : & Bound < ' py , PyAny > ,
393+ has_more : bool ,
394+ table_uuid : & str ,
395+ ) -> PyResult < String > {
396+ let batches = batches
397+ . into_iter ( )
398+ . map ( |batch| RecordBatch :: from_pyarrow_bound ( & batch) )
399+ . collect :: < PyResult < Vec < RecordBatch > > > ( ) ?
400+ . into_iter ( )
401+ . filter ( |batch| batch. num_rows ( ) > 0 )
402+ . collect :: < Vec < _ > > ( ) ;
403+
404+ if batches. is_empty ( ) {
405+ return Ok ( "No data to display" . to_owned ( ) ) ;
406+ }
407+
408+ let batches_as_displ =
409+ pretty:: pretty_format_batches ( & batches) . map_err ( py_datafusion_err) ?;
410+
411+ let additional_str = match has_more {
412+ true => "\n Data truncated." ,
413+ false => "" ,
414+ } ;
415+
416+ Ok ( format ! ( "DataFrame()\n {batches_as_displ}{additional_str}" ) )
417+ }
410418
411419 /// Calculate summary statistics for a DataFrame
412420 fn describe ( & self , py : Python ) -> PyDataFusionResult < Self > {
0 commit comments