@@ -301,19 +301,24 @@ TODO Review exercises below --- are they all too hard for undergrads or should
301301we keep some of them.
302302
303303``` {code-cell} ipython3
304- def extract_wb(varlist=['NY.GDP.MKTP.CD'], c='all', s=1900, e=2021):
305- df = wb.download(indicator=varlist, country=c, start=s, end=e).stack().unstack(0).reset_index()
306- df = df.drop(['level_1'], axis=1).set_index(['year']).transpose()
304+ def extract_wb(varlist=['NY.GDP.MKTP.CD'],
305+ c='all',
306+ s=1900,
307+ e=2021,
308+ varnames=None):
309+ if c == "all_countries":
310+ # keep countries only (no aggregated regions)
311+ countries = wb.get_countries()
312+ countries_code = countries[countries['region'] != 'Aggregates']['iso3c'].values
313+
314+ df = wb.download(indicator=varlist, country=countries_code, start=s, end=e).stack().unstack(0).reset_index()
315+ df = df.drop(['level_1'], axis=1).transpose() # set_index(['year'])
316+ if varnames != None:
317+ df.columns = varnames
318+ df = df[1:]
307319 return df
308320```
309321
310- ``` {code-cell} ipython3
311- c='all'
312- s=1900
313- e=2021
314- wb.download(indicator=['NY.GDP.MKTP.CD'], country=c, start=s, end=e)
315- ```
316-
317322``` {code-cell} ipython3
318323def empirical_ccdf(data,
319324 ax,
@@ -376,19 +381,22 @@ def empirical_ccdf(data,
376381
377382``` {code-cell} ipython3
378383# get gdp and gdp per capita for all regions and countries in 2021
379- df_gdp1 = extract_wb(varlist=['NY.GDP.MKTP.CD'], s="2021", e="2021")[48:]
380- df_gdp2 = extract_wb(varlist=['NY.GDP.PCAP.CD'], s="2021", e="2021")[48:]
381384
382- # Keep the data for all countries only
383- df_gdp1 = df_gdp1[48:]
384- df_gdp2 = df_gdp2[48:]
385+ variable_code = ['NY.GDP.MKTP.CD', 'NY.GDP.PCAP.CD']
386+ variable_names = ['GDP', 'GDP per capita']
387+
388+ df_gdp1 = extract_wb(varlist=variable_code,
389+ c="all_countries",
390+ s="2021",
391+ e="2021",
392+ varnames=variable_names)
385393```
386394
387395``` {code-cell} ipython3
388396fig, axes = plt.subplots(1, 2, figsize=(8.8, 3.6))
389397
390- empirical_ccdf(np.asarray(df_gdp1['2021'].dropna()), axes[0], add_reg_line=False, label='GDP')
391- empirical_ccdf(np.asarray(df_gdp2['2021'].dropna()), axes[1] , add_reg_line=False, label='GDP per capita' )
398+ for name, ax in zip(variable_names, axes):
399+ empirical_ccdf(np.asarray(df_gdp1[name]).astype("float64"), ax , add_reg_line=False, label=name )
392400
393401plt.show()
394402```
0 commit comments