@@ -1420,16 +1420,30 @@ def build_dataframe(args, constructor):
14201420
14211421 # Cast data_frame argument to DataFrame (it could be a numpy array, dict etc.)
14221422 df_provided = args ["data_frame" ] is not None
1423+
1424+ # Flag that indicates if the resulting data_frame after parsing is pandas-like
1425+ # (in terms of resulting Narwhals DataFrame).
1426+ # True if pandas, modin.pandas or cudf DataFrame/Series instance, or converted from
1427+ # PySpark to pandas.
14231428 is_pd_like = False
1429+
1430+ # Flag that indicates if data_frame requires to be converted to arrow via the
1431+ # dataframe interchange protocol.
1432+ # True if Ibis, DuckDB, Vaex or implementes __dataframe__
14241433 needs_interchanging = False
1434+
1435+ # If data_frame is provided, we parse it into a narwhals DataFrame, while accounting
1436+ # for compatibility with pandas specific paths (e.g. Index/MultiIndex case).
14251437 if df_provided :
14261438
1439+ # data_frame is pandas-like DataFrame (pandas, modin.pandas, cudf)
14271440 if nw .dependencies .is_pandas_like_dataframe (args ["data_frame" ]):
14281441
14291442 columns = args ["data_frame" ].columns # This can be multi index
14301443 args ["data_frame" ] = nw .from_native (args ["data_frame" ], eager_only = True )
14311444 is_pd_like = True
14321445
1446+ # data_frame is pandas-like Series (pandas, modin.pandas, cudf)
14331447 elif nw .dependencies .is_pandas_like_series (args ["data_frame" ]):
14341448
14351449 args ["data_frame" ] = nw .from_native (
@@ -1438,6 +1452,9 @@ def build_dataframe(args, constructor):
14381452 columns = args ["data_frame" ].columns
14391453 is_pd_like = True
14401454
1455+ # data_frame is any other DataFrame object natively supported via Narwhals.
1456+ # With strict=False, the original object will be returned if unable to convert
1457+ # to a Narwhals DataFrame, making this condition False.
14411458 elif isinstance (
14421459 data_frame := nw .from_native (
14431460 args ["data_frame" ], eager_or_interchange_only = True , strict = False
@@ -1448,6 +1465,9 @@ def build_dataframe(args, constructor):
14481465 needs_interchanging = nw .get_level (data_frame ) == "interchange"
14491466 columns = args ["data_frame" ].columns
14501467
1468+ # data_frame is any other Series object natively supported via Narwhals.
1469+ # With strict=False, the original object will be returned if unable to convert
1470+ # to a Narwhals DataFrame, making this condition False.
14511471 elif isinstance (
14521472 series := nw .from_native (
14531473 args ["data_frame" ], series_only = True , strict = False
@@ -1457,15 +1477,19 @@ def build_dataframe(args, constructor):
14571477 args ["data_frame" ] = series .to_frame ()
14581478 columns = args ["data_frame" ].columns
14591479
1480+ # data_frame is PySpark: it does not support interchange protocol and it is not
1481+ # integrated in Narwhals. We use its native method to convert it to pandas.
14601482 elif hasattr (args ["data_frame" ], "toPandas" ):
1461- # data_frame is PySpark: it does not support interchange and it is not
1462- # integrated in narwhals just yet
1483+ # data_frame is PySpark:
14631484 args ["data_frame" ] = nw .from_native (
14641485 args ["data_frame" ].toPandas (), eager_only = True
14651486 )
14661487 columns = args ["data_frame" ].columns
14671488 is_pd_like = True
14681489
1490+ # data_frame is some other object type (e.g. dict, list, ...)
1491+ # We try to import pandas, and then try to instantiate a pandas dataframe from
1492+ # this such object
14691493 else :
14701494 try :
14711495 import pandas as pd
@@ -1477,18 +1501,24 @@ def build_dataframe(args, constructor):
14771501 columns = args ["data_frame" ].columns
14781502 is_pd_like = True
14791503 except Exception :
1480- msg = f"Unsupported type: { type (args ['data_frame' ])} "
1504+ msg = (
1505+ f"Unable to convert data_frame of type { type (args ['data_frame' ])} "
1506+ "to pandas DataFrame. Please provide a supported dataframe type "
1507+ "or a type that can be passed to pd.DataFrame."
1508+ )
1509+
14811510 raise NotImplementedError (msg )
14821511 except ImportError :
14831512 msg = (
1484- f"data_frame of type { type (args ['data_frame' ])} requires Pandas "
1485- "to be installed. Convert it to supported dataframe type or "
1486- "install Pandas ."
1513+ f"Attempting to convert data_frame of type { type (args ['data_frame' ])} "
1514+ "to pandas DataFrame, but Pandas is not installed. "
1515+ "Convert it to supported dataframe type or install pandas ."
14871516 )
14881517 raise NotImplementedError (msg )
14891518
1519+ # data_frame is not provided
14901520 else :
1491- columns = None # no data_frame
1521+ columns = None
14921522
14931523 df_input : nw .DataFrame | None = args ["data_frame" ]
14941524 index = (
@@ -1573,7 +1603,7 @@ def build_dataframe(args, constructor):
15731603 value_name = _escape_col_name (columns , "value" , [])
15741604 var_name = _escape_col_name (columns , var_name , [])
15751605
1576- if isinstance ( args [ "data_frame" ], nw . DataFrame ) and needs_interchanging :
1606+ if needs_interchanging :
15771607 # Interchange to PyArrow
15781608 if wide_mode :
15791609 args ["data_frame" ] = nw .from_native (
@@ -2035,8 +2065,6 @@ def process_dataframe_timeline(args):
20352065 raise ValueError ("Both x_start and x_end are required" )
20362066
20372067 try :
2038- # TODO(FBruzzesi): We still cannot infer datetime format for pyarrow
2039- # Related issue: https://github.com/narwhals-dev/narwhals/issues/1151
20402068 df : nw .DataFrame = args ["data_frame" ]
20412069 df = df .with_columns (
20422070 ** {
0 commit comments