Skip to content

Commit b6516b4

Browse files
committed
docstrings
1 parent 7735366 commit b6516b4

File tree

1 file changed

+165
-32
lines changed

1 file changed

+165
-32
lines changed

packages/python/plotly/plotly/data/__init__.py

Lines changed: 165 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,30 @@ def gapminder(
1616
1717
https://www.gapminder.org/data/
1818
19-
Returns:
20-
A `pandas.DataFrame` with 1704 rows and the following columns:
19+
Parameters
20+
----------
21+
datetimes: bool
22+
Whether or not 'year' column will converted to datetime type
23+
24+
centroids: bool
25+
If True, ['centroid_lat', 'centroid_lon'] columns are added
26+
27+
year: int | None
28+
If provided, the dataset will be filtered for that year
29+
30+
pretty_names: bool
31+
If True, prettifies the column names
32+
33+
return_type: {'pandas', 'polars', 'pyarrow'}
34+
Type of the resulting dataframe
35+
36+
Returns
37+
-------
38+
Dataframe of `return_type` type
39+
Dataframe with 1704 rows and the following columns:
2140
`['country', 'continent', 'year', 'lifeExp', 'pop', 'gdpPercap',
2241
'iso_alpha', 'iso_num']`.
42+
2343
If `datetimes` is True, the 'year' column will be a datetime column
2444
If `centroids` is True, two new columns are added: ['centroid_lat', 'centroid_lon']
2545
If `year` is an integer, the dataset will be filtered for that year
@@ -61,9 +81,20 @@ def tips(pretty_names=False, return_type="pandas"):
6181
6282
https://vincentarelbundock.github.io/Rdatasets/doc/reshape2/tips.html
6383
64-
Returns:
65-
A `pandas.DataFrame` with 244 rows and the following columns:
66-
`['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size']`."""
84+
Parameters
85+
----------
86+
pretty_names: bool
87+
If True, prettifies the column names
88+
89+
return_type: {'pandas', 'polars', 'pyarrow'}
90+
Type of the resulting dataframe
91+
92+
Returns
93+
-------
94+
Dataframe of `return_type` type
95+
Dataframe with 244 rows and the following columns:
96+
`['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size']`.
97+
"""
6798

6899
df = nw.from_native(_get_dataset("tips", return_type=return_type), eager_only=True)
69100
if pretty_names:
@@ -87,19 +118,35 @@ def iris(return_type="pandas"):
87118
88119
https://en.wikipedia.org/wiki/Iris_flower_data_set
89120
90-
Returns:
91-
A `pandas.DataFrame` with 150 rows and the following columns:
92-
`['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species', 'species_id']`."""
121+
Parameters
122+
----------
123+
return_type: {'pandas', 'polars', 'pyarrow'}
124+
Type of the resulting dataframe
125+
126+
Returns
127+
-------
128+
Dataframe of `return_type` type
129+
Dataframe with 150 rows and the following columns:
130+
`['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species', 'species_id']`.
131+
"""
93132
return _get_dataset("iris", return_type=return_type)
94133

95134

96135
def wind(return_type="pandas"):
97136
"""
98137
Each row represents a level of wind intensity in a cardinal direction, and its frequency.
99138
100-
Returns:
101-
A `pandas.DataFrame` with 128 rows and the following columns:
102-
`['direction', 'strength', 'frequency']`."""
139+
Parameters
140+
----------
141+
return_type: {'pandas', 'polars', 'pyarrow'}
142+
Type of the resulting dataframe
143+
144+
Returns
145+
-------
146+
Dataframe of `return_type` type
147+
Dataframe with 128 rows and the following columns:
148+
`['direction', 'strength', 'frequency']`.
149+
"""
103150
return _get_dataset("wind", return_type=return_type)
104151

105152

@@ -108,20 +155,30 @@ def election(return_type="pandas"):
108155
Each row represents voting results for an electoral district in the 2013 Montreal
109156
mayoral election.
110157
111-
Returns:
112-
A `pandas.DataFrame` with 58 rows and the following columns:
113-
`['district', 'Coderre', 'Bergeron', 'Joly', 'total', 'winner', 'result', 'district_id']`."""
158+
Parameters
159+
----------
160+
return_type: {'pandas', 'polars', 'pyarrow'}
161+
Type of the resulting dataframe
162+
163+
Returns
164+
-------
165+
Dataframe of `return_type` type
166+
Dataframe with 58 rows and the following columns:
167+
`['district', 'Coderre', 'Bergeron', 'Joly', 'total', 'winner', 'result', 'district_id']`.
168+
"""
114169
return _get_dataset("election", return_type=return_type)
115170

116171

117172
def election_geojson():
118173
"""
119174
Each feature represents an electoral district in the 2013 Montreal mayoral election.
120175
121-
Returns:
176+
Returns
177+
-------
122178
A GeoJSON-formatted `dict` with 58 polygon or multi-polygon features whose `id`
123179
is an electoral district numerical ID and whose `district` property is the ID and
124-
district name."""
180+
district name.
181+
"""
125182
import gzip
126183
import json
127184
import os
@@ -142,22 +199,45 @@ def carshare(return_type="pandas"):
142199
Each row represents the availability of car-sharing services near the centroid of a zone
143200
in Montreal over a month-long period.
144201
145-
Returns:
146-
A `pandas.DataFrame` with 249 rows and the following columns:
147-
`['centroid_lat', 'centroid_lon', 'car_hours', 'peak_hour']`."""
202+
Parameters
203+
----------
204+
return_type: {'pandas', 'polars', 'pyarrow'}
205+
Type of the resulting dataframe
206+
207+
Returns
208+
-------
209+
Dataframe of `return_type` type
210+
Dataframe` with 249 rows and the following columns:
211+
`['centroid_lat', 'centroid_lon', 'car_hours', 'peak_hour']`.
212+
"""
148213
return _get_dataset("carshare", return_type=return_type)
149214

150215

151216
def stocks(indexed=False, datetimes=False, return_type="pandas"):
152217
"""
153218
Each row in this wide dataset represents closing prices from 6 tech stocks in 2018/2019.
154219
155-
Returns:
156-
A `pandas.DataFrame` with 100 rows and the following columns:
220+
Parameters
221+
----------
222+
indexed: bool
223+
Whether or not the 'date' column is used as the index and the column index
224+
is named 'company'. Applicable only if `return_type='pandas'`
225+
226+
datetimes: bool
227+
Whether or not the 'date' column will be of datetime type
228+
229+
return_type: {'pandas', 'polars', 'pyarrow'}
230+
Type of the resulting dataframe
231+
232+
Returns
233+
-------
234+
Dataframe of `return_type` type
235+
Dataframe with 100 rows and the following columns:
157236
`['date', 'GOOG', 'AAPL', 'AMZN', 'FB', 'NFLX', 'MSFT']`.
158237
If `indexed` is True, the 'date' column is used as the index and the column index
238+
is named 'company'
159239
If `datetimes` is True, the 'date' column will be a datetime column
160-
is named 'company'"""
240+
"""
161241
if indexed and return_type != "pandas":
162242
msg = "Cannot set index for backend different from pandas"
163243
raise NotImplementedError(msg)
@@ -181,11 +261,22 @@ def experiment(indexed=False, return_type="pandas"):
181261
Each row in this wide dataset represents the results of 100 simulated participants
182262
on three hypothetical experiments, along with their gender and control/treatment group.
183263
264+
Parameters
265+
----------
266+
indexed: bool
267+
If True, then the index is named "participant".
268+
Applicable only if `return_type='pandas'`
269+
270+
return_type: {'pandas', 'polars', 'pyarrow'}
271+
Type of the resulting dataframe
184272
185-
Returns:
186-
A `pandas.DataFrame` with 100 rows and the following columns:
273+
Returns
274+
-------
275+
Dataframe of `return_type` type
276+
Dataframe with 100 rows and the following columns:
187277
`['experiment_1', 'experiment_2', 'experiment_3', 'gender', 'group']`.
188-
If `indexed` is True, the data frame index is named "participant" """
278+
If `indexed` is True, the data frame index is named "participant"
279+
"""
189280

190281
if indexed and return_type != "pandas":
191282
msg = "Cannot set index for backend different from pandas"
@@ -206,11 +297,23 @@ def medals_wide(indexed=False, return_type="pandas"):
206297
This dataset represents the medal table for Olympic Short Track Speed Skating for the
207298
top three nations as of 2020.
208299
209-
Returns:
210-
A `pandas.DataFrame` with 3 rows and the following columns:
300+
Parameters
301+
----------
302+
indexed: bool
303+
Whether or not the 'nation' column is used as the index and the column index
304+
is named 'medal'. Applicable only if `return_type='pandas'`
305+
306+
return_type: {'pandas', 'polars', 'pyarrow'}
307+
Type of the resulting dataframe
308+
309+
Returns
310+
-------
311+
Dataframe of `return_type` type
312+
Dataframe with 3 rows and the following columns:
211313
`['nation', 'gold', 'silver', 'bronze']`.
212314
If `indexed` is True, the 'nation' column is used as the index and the column index
213-
is named 'medal'"""
315+
is named 'medal'
316+
"""
214317

215318
if indexed and return_type != "pandas":
216319
msg = "Cannot set index for backend different from pandas"
@@ -231,10 +334,21 @@ def medals_long(indexed=False, return_type="pandas"):
231334
This dataset represents the medal table for Olympic Short Track Speed Skating for the
232335
top three nations as of 2020.
233336
234-
Returns:
235-
A `pandas.DataFrame` with 9 rows and the following columns:
236-
`['nation', 'medal', 'count']`.
237-
If `indexed` is True, the 'nation' column is used as the index."""
337+
Parameters
338+
----------
339+
indexed: bool
340+
Whether or not the 'nation' column is used as the index.
341+
Applicable only if `return_type='pandas'`
342+
343+
return_type: {'pandas', 'polars', 'pyarrow'}
344+
Type of the resulting dataframe
345+
346+
Returns
347+
-------
348+
Dataframe of `return_type` type
349+
Dataframe with 9 rows and the following columns: `['nation', 'medal', 'count']`.
350+
If `indexed` is True, the 'nation' column is used as the index.
351+
"""
238352

239353
if indexed and return_type != "pandas":
240354
msg = "Cannot set index for backend different from pandas"
@@ -253,6 +367,25 @@ def medals_long(indexed=False, return_type="pandas"):
253367

254368

255369
def _get_dataset(d, return_type):
370+
"""
371+
Loads the dataset using the specified backend.
372+
373+
Notice that the available backends are 'pandas', 'polars', 'pyarrow' and they all
374+
have a `read_csv` function. Therefore we can dynamically load the library via
375+
`importlib.import_module` and then call `backend.read_csv(filepath)`.
376+
377+
Parameters
378+
----------
379+
d: str
380+
Name of the dataset to load.
381+
382+
return_type: {'pandas', 'polars', 'pyarrow'}
383+
Type of the resulting dataframe
384+
385+
Returns
386+
-------
387+
Dataframe of `return_type` type
388+
"""
256389
import os
257390
from importlib import import_module
258391

0 commit comments

Comments
 (0)