1+ """
2+ Functions that generate data sets used in examples
3+ """
14import numpy as np
25import pandas as pd
36from scipy .stats import dirichlet , gamma , norm , uniform
@@ -13,6 +16,15 @@ def _smoothed_gaussian_random_walk(
1316):
1417 """
1518 Generates Gaussian random walk data and applies LOWESS
19+
20+ :param gaussian_random_walk_mu:
21+ Mean of the random walk
22+ :param gaussian_random_walk_sigma:
23+ Standard deviation of the random walk
24+ :param N:
25+ Length of the random walk
26+ :param lowess_kwargs:
27+ Keyword argument dictionary passed to statsmodels lowess
1628 """
1729 x = np .arange (N )
1830 y = norm (gaussian_random_walk_mu , gaussian_random_walk_sigma ).rvs (N ).cumsum ()
@@ -29,13 +41,24 @@ def generate_synthetic_control_data(
2941 lowess_kwargs = default_lowess_kwargs ,
3042):
3143 """
44+ Generates data for synthetic control example.
45+
46+ :param N:
47+ Number fo data points
48+ :param treatment_time:
49+ Index where treatment begins in the generated data frame
50+ :param grw_mu:
51+ Mean of Gaussian Random Walk
52+ :param grw_sigma:
53+ Standard deviation of Gaussian Random Walk
54+ :lowess_kwargs:
55+ Keyword argument dictionary passed to statsmodels lowess
56+
3257 Example
3358 --------
34- >>> import pathlib
3559 >>> df, weightings_true = generate_synthetic_control_data(
3660 ... treatment_time=treatment_time
3761 ... )
38- >>> df.to_csv(pathlib.Path.cwd() / 'synthetic_control.csv', index=False)
3962 """
4063
4164 # 1. Generate non-treated variables
@@ -74,7 +97,21 @@ def generate_synthetic_control_data(
7497def generate_time_series_data (
7598 N = 100 , treatment_time = 70 , beta_temp = - 1 , beta_linear = 0.5 , beta_intercept = 3
7699):
77- """ """
100+ """
101+ Generates interrupted time series example data
102+
103+ :param N:
104+ Length of the time series
105+ :param treatment_time:
106+ Index of when treatment begins
107+ :param beta_temp:
108+ The temperature coefficient
109+ :param beta_linear:
110+ The linear coefficient
111+ :param beta_intercept:
112+ The intercept
113+
114+ """
78115 x = np .arange (0 , 100 , 1 )
79116 df = pd .DataFrame (
80117 {
@@ -104,7 +141,9 @@ def generate_time_series_data(
104141
105142
106143def generate_time_series_data_seasonal (treatment_time ):
107- """ """
144+ """
145+ Generates 10 years of monthly data with seasonality
146+ """
108147 dates = pd .date_range (
109148 start = pd .to_datetime ("2010-01-01" ), end = pd .to_datetime ("2020-01-01" ), freq = "M"
110149 )
@@ -170,6 +209,7 @@ def generate_did():
170209 def outcome (
171210 t , control_intercept , treat_intercept_delta , trend , Δ , group , post_treatment
172211 ):
212+ """Compute the outcome of each unit"""
173213 return (
174214 control_intercept
175215 + (treat_intercept_delta * group )
@@ -214,9 +254,11 @@ def generate_regression_discontinuity_data(
214254 """
215255
216256 def is_treated (x ):
257+ """Check if x was treated"""
217258 return np .greater_equal (x , true_treatment_threshold )
218259
219260 def impact (x ):
261+ """Assign true_causal_impact to all treaated entries"""
220262 y = np .zeros (len (x ))
221263 y [is_treated (x )] = true_causal_impact
222264 return y
@@ -263,6 +305,10 @@ def generate_geolift_data():
263305 causal_impact = 0.2
264306
265307 def create_series (n = 52 , amplitude = 1 , length_scale = 2 ):
308+ """
309+ Returns numpy tile with generated seasonality data repeated over
310+ multiple years
311+ """
266312 return np .tile (
267313 generate_seasonality (n = n , amplitude = amplitude , length_scale = 2 ) + 3 , n_years
268314 )
0 commit comments