@@ -93,7 +93,7 @@ def election_geojson():
9393def carshare ():
9494 """
9595Each row represents the availability of car-sharing services near the centroid of a zone
96- in Montreal.
96+ in Montreal over a month-long period .
9797
9898Returns:
9999 A `pandas.DataFrame` with 249 rows and the following columns:
@@ -102,31 +102,78 @@ def carshare():
102102 return _get_dataset ("carshare" )
103103
104104
105- def timeseries ():
105+ def timeseries (indexed = False ):
106106 """
107107Each row in this wide dataset represents values from 6 random walk time-series. The
108108index contains dates.
109109
110110Returns:
111111 A `pandas.DataFrame` with 100 rows and the following columns:
112- `['MOL.AA', 'JJK.OB', 'LFF.KP', 'UJS.PX', 'BTK.HH', 'SHX.QQ']`.
112+ `['day', 'MOL.AA', 'JJK.OB', 'LFF.KP', 'UJS.PX', 'BTK.HH', 'SHX.QQ']`.
113+ If `indexed` is True, the 'day' column is used as the index and the column index
114+ is named 'ticker'
113115"""
114- return _get_dataset ("timeseries" , index_col = 0 )
116+ df = _get_dataset ("timeseries" )
117+ if indexed :
118+ df = df .set_index ("day" )
119+ df .columns .name = "ticker"
120+ return df
115121
116122
117- def experiment ():
123+ def experiment (indexed = False ):
118124 """
119125Each row in this wide dataset represents the results of 100 simulated participants
120126on three hypothetical experiments, along with their gender and control/treatment group.
121127
128+
122129Returns:
123130 A `pandas.DataFrame` with 100 rows and the following columns:
124131 `['experiment_1', 'experiment_2', 'experiment_3', 'gender', 'group']`.
132+ If `indexed` is True, the data frame index is named "participant"
133+ """
134+ df = _get_dataset ("experiment" )
135+ if indexed :
136+ df .index .name = "participant"
137+ return df
138+
139+
140+ def short_track_wide (indexed = False ):
141+ """
142+ This dataset represents the medal table for Olympic Short Track Speed Skating for the
143+ top three nations as of 2020.
144+
145+ Returns:
146+ A `pandas.DataFrame` with 3 rows and the following columns:
147+ `['nation', 'gold', 'silver', 'bronze']`.
148+ If `indexed` is True, the 'nation' column is used as the index and the column index
149+ is named 'medal'
125150"""
126- return _get_dataset ("experiment" )
151+ df = _get_dataset ("short_track" )
152+ if indexed :
153+ df = df .set_index ("nation" )
154+ df .index .name = "medal"
155+ return df
156+
157+
158+ def short_track_long (indexed = False ):
159+ """
160+ This dataset represents the medal table for Olympic Short Track Speed Skating for the
161+ top three nations as of 2020.
162+
163+ Returns:
164+ A `pandas.DataFrame` with 9 rows and the following columns:
165+ `['nation', 'medal', 'count']`.
166+ If `indexed` is True, the 'nation' column is used as the index.
167+ """
168+ df = _get_dataset ("short_track" ).melt (
169+ id_vars = ["nation" ], value_name = "count" , var_name = "medal"
170+ )
171+ if indexed :
172+ df = df .set_index ("nation" )
173+ return df
127174
128175
129- def _get_dataset (d , index_col = None ):
176+ def _get_dataset (d ):
130177 import pandas
131178 import os
132179
@@ -136,6 +183,5 @@ def _get_dataset(d, index_col=None):
136183 "package_data" ,
137184 "datasets" ,
138185 d + ".csv.gz" ,
139- ),
140- index_col = index_col ,
186+ )
141187 )
0 commit comments