11"""
22Built-in datasets for demonstration, educational and test purposes.
33"""
4+ import os
5+ from importlib import import_module
6+
47import narwhals .stable .v1 as nw
58
9+ AVAILABLE_BACKENDS = {"pandas" , "polars" , "pyarrow" }
10+
611
712def gapminder (
813 datetimes = False ,
@@ -372,9 +377,10 @@ def _get_dataset(d, return_type):
372377 """
373378 Loads the dataset using the specified backend.
374379
375- Notice that the available backends are 'pandas', 'polars', 'pyarrow' and they all
376- have a `read_csv` function. Therefore we can dynamically load the library via
377- `importlib.import_module` and then call `backend.read_csv(filepath)`.
380+ Notice that the available backends are 'pandas', 'polars', 'pyarrow' and they all have
381+ a `read_csv` function (pyarrow has it via pyarrow.csv). Therefore we can dynamically
382+ load the library using `importlib.import_module` and then call
383+ `backend.read_csv(filepath)`.
378384
379385 Parameters
380386 ----------
@@ -388,23 +394,20 @@ def _get_dataset(d, return_type):
388394 -------
389395 Dataframe of `return_type` type
390396 """
391- import os
392- from importlib import import_module
393-
394- AVAILABLE_BACKENDS = {"pandas" , "polars" , "pyarrow" }
395-
396397 filepath = os .path .join (
397398 os .path .dirname (os .path .dirname (__file__ )),
398399 "package_data" ,
399400 "datasets" ,
400401 d + ".csv.gz" ,
401402 )
403+
402404 if return_type not in AVAILABLE_BACKENDS :
403405 msg = f"Unsupported return_type. Found { return_type } , expected one of { AVAILABLE_BACKENDS } "
404406 raise NotImplementedError (msg )
405407
406408 try :
407- backend = import_module (return_type )
409+ module_to_load = "pyarrow.csv" if return_type == "pyarrow" else return_type
410+ backend = import_module (module_to_load )
408411 except ModuleNotFoundError :
409412 msg = f"return_type={ return_type } , but { return_type } is not installed"
410413 raise ModuleNotFoundError (msg )
0 commit comments