DoubleML
diff --git a/‎doc/did/did_cs_multi.qmd‎
Lines changed: 5 additions & 5 deletions b/‎doc/did/did_cs_multi.qmd‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎doc/did/did_pa_multi.qmd‎
Lines changed: 6 additions & 10 deletions b/‎doc/did/did_pa_multi.qmd‎
Lines changed: 6 additions & 10 deletions
diff --git a/‎monte-cover/src/montecover/did/did_cs_multi.py‎
Lines changed: 11 additions & 0 deletions b/‎monte-cover/src/montecover/did/did_cs_multi.py‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎monte-cover/src/montecover/did/did_pa_multi.py‎
Lines changed: 8 additions & 1 deletion b/‎monte-cover/src/montecover/did/did_pa_multi.py‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎results/did/did_cs_multi_config.yml‎
Lines changed: 1 addition & 23 deletions b/‎results/did/did_cs_multi_config.yml‎
Lines changed: 1 addition & 23 deletions
@@ -24,7 +24,7 @@ init_notebook_mode(all_interactive=True)
 
 ## Coverage
 
-The simulations are based on the [make_did_cs_CS2021](https://docs.doubleml.org/stable/api/generated/doubleml.did.datasets.make_did_cs_CS2021.html)-DGP with $2000$ observations. Learners are both set to either boosting or a linear (logistic) model. Due to time constraints we only consider the following DGPs:
+The simulations are based on the [make_did_cs_CS2021](https://docs.doubleml.org/stable/api/generated/doubleml.did.datasets.make_did_cs_CS2021.html)-DGP with $1000$ observations. Learners are both set to either boosting or a linear (logistic) model. Due to time constraints we only consider the following DGPs:
 
  - Type 1: Linear outcome model and treatment assignment
  - Type 4: Nonlinear outcome model and treatment assignment
@@ -52,7 +52,7 @@ df = pd.read_csv("../../results/did/did_cs_multi_detailed.csv", index_col=None)
 assert df["repetition"].nunique() == 1
 n_rep = df["repetition"].unique()[0]
 
-display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"]
+display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage", "Loss g_d0_t0", "Loss g_d0_t1", "Loss g_d1_t0", "Loss g_d1_t1", "Loss m"]
 ```
 
 ### Observational Score
@@ -127,7 +127,7 @@ df_group = pd.read_csv("../../results/did/did_cs_multi_group.csv", index_col=Non
 assert df_group["repetition"].nunique() == 1
 n_rep_group = df_group["repetition"].unique()[0]
 
-display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"]
+display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage", "Loss g_d0_t0", "Loss g_d0_t1", "Loss g_d1_t0", "Loss g_d1_t1", "Loss m"]
 ```
 
 #### Observational Score
@@ -195,7 +195,7 @@ df_time = pd.read_csv("../../results/did/did_cs_multi_time.csv", index_col=None)
 assert df_time["repetition"].nunique() == 1
 n_rep_time = df_time["repetition"].unique()[0]
 
-display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"]
+display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage", "Loss g_d0_t0", "Loss g_d0_t1", "Loss g_d1_t0", "Loss g_d1_t1", "Loss m"]
 ```
 
 #### Observational Score
@@ -263,7 +263,7 @@ df_es = pd.read_csv("../../results/did/did_cs_multi_eventstudy.csv", index_col=N
 assert df_es["repetition"].nunique() == 1
 n_rep_es = df_es["repetition"].unique()[0]
 
-display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"]
+display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage", "Loss g_d0_t0", "Loss g_d0_t1", "Loss g_d1_t0", "Loss g_d1_t1", "Loss m"]
 ```
 
 #### Observational Score
 
@@ -24,7 +24,7 @@ init_notebook_mode(all_interactive=True)
 
 ## Coverage
 
-The simulations are based on the  the [make_did_CS2021](https://docs.doubleml.org/stable/api/generated/doubleml.did.datasets.make_did_CS2021.html)-DGP with $2000$ observations. Learners are both set to either boosting or a linear (logistic) model. Due to time constraints we only consider the following DGPs:
+The simulations are based on the  the [make_did_CS2021](https://docs.doubleml.org/stable/api/generated/doubleml.did.datasets.make_did_CS2021.html)-DGP with $1000$ observations. Learners are both set to either boosting or a linear (logistic) model. Due to time constraints we only consider the following DGPs:
 
  - Type 1: Linear outcome model and treatment assignment
  - Type 4: Nonlinear outcome model and treatment assignment
@@ -52,7 +52,7 @@ df = pd.read_csv("../../results/did/did_pa_multi_detailed.csv", index_col=None)
 assert df["repetition"].nunique() == 1
 n_rep = df["repetition"].unique()[0]
 
-display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"]
+display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage", "Loss g_control", "Loss g_treated", "Loss m"]
 ```
 
 ### Observational Score
@@ -127,7 +127,7 @@ df_group = pd.read_csv("../../results/did/did_pa_multi_group.csv", index_col=Non
 assert df_group["repetition"].nunique() == 1
 n_rep_group = df_group["repetition"].unique()[0]
 
-display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"]
+display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage", "Loss g_control", "Loss g_treated", "Loss m"]
 ```
 
 #### Observational Score
@@ -195,7 +195,7 @@ df_time = pd.read_csv("../../results/did/did_pa_multi_time.csv", index_col=None)
 assert df_time["repetition"].nunique() == 1
 n_rep_time = df_time["repetition"].unique()[0]
 
-display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"]
+display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage", "Loss g_control", "Loss g_treated", "Loss m"]
 ```
 
 #### Observational Score
@@ -263,7 +263,7 @@ df_es = pd.read_csv("../../results/did/did_pa_multi_eventstudy.csv", index_col=N
 assert df_es["repetition"].nunique() == 1
 n_rep_es = df_es["repetition"].unique()[0]
 
-display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"]
+display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage", "Loss g_control", "Loss g_treated", "Loss m"]
 ```
 
 #### Observational Score
@@ -324,11 +324,9 @@ generate_and_show_styled_table(
 
 ## Tuning
 
-The simulations are based on the  the [make_did_CS2021](https://docs.doubleml.org/stable/api/generated/doubleml.did.datasets.make_did_CS2021.html)-DGP with $2000$ observations. Due to time constraints we only consider one learner, use in-sample normalization and the following DGPs:
+The simulations are based on the  the [make_did_CS2021](https://docs.doubleml.org/stable/api/generated/doubleml.did.datasets.make_did_CS2021.html)-DGP with $1000$ observations. Due to time constraints we only consider one learner, use in-sample normalization and the following DGPs:
 
  - Type 1: Linear outcome model and treatment assignment
- - Type 2: Nonlinear outcome model and linear treatment assignment
- - Type 3: Linear outcome model and nonlinear treatment assignment
  - Type 4: Nonlinear outcome model and treatment assignment
 
 The non-uniform results (coverage, ci length and bias) refer to averaged values over all $ATTs$ (point-wise confidende intervals). This is only an example as the untuned version just relies on the default configuration.
@@ -389,8 +387,6 @@ These simulations test different types of aggregation, as described in [DiD User
 As before, we only consider one learner, use in-sample normalization and the following DGPs:
 
  - Type 1: Linear outcome model and treatment assignment
- - Type 2: Nonlinear outcome model and linear treatment assignment
- - Type 3: Linear outcome model and nonlinear treatment assignment
  - Type 4: Nonlinear outcome model and treatment assignment
 
 The non-uniform results (coverage, ci length and bias) refer to averaged values over all $ATTs$ (point-wise confidende intervals). This is only an example as the untuned version just relies on the default configuration.
 
@@ -96,6 +96,7 @@ def run_single_rep(self, dml_data, dml_params) -> Dict[str, Any]:
         )
         dml_model.fit()
         dml_model.bootstrap(n_rep_boot=2000)
+        nuisance_loss = dml_model.nuisance_loss
 
         # Oracle values for this model
         oracle_thetas = np.full_like(dml_model.coef, np.nan)
@@ -143,6 +144,11 @@ def run_single_rep(self, dml_data, dml_params) -> Dict[str, Any]:
                         "Score": score,
                         "In-sample-norm.": in_sample_normalization,
                         "level": level,
+                        "Loss g_d0_t0": nuisance_loss["ml_g_d0_t0"].mean(),
+                        "Loss g_d1_t0": nuisance_loss["ml_g_d1_t0"].mean(),
+                        "Loss g_d0_t1": nuisance_loss["ml_g_d0_t1"].mean(),
+                        "Loss g_d1_t1": nuisance_loss["ml_g_d1_t1"].mean(),
+                        "Loss m": nuisance_loss["ml_m"].mean() if score == "observational" else np.nan,
                     }
                 )
             for key, res in level_result.items():
@@ -168,6 +174,11 @@ def summarize_results(self):
             "Bias": "mean",
             "Uniform Coverage": "mean",
             "Uniform CI Length": "mean",
+            "Loss g_d0_t0": "mean",
+            "Loss g_d1_t0": "mean",
+            "Loss g_d0_t1": "mean",
+            "Loss g_d1_t1": "mean",
+            "Loss m": "mean",
             "repetition": "count",
         }
 
 
@@ -94,6 +94,7 @@ def run_single_rep(self, dml_data, dml_params) -> Dict[str, Any]:
         )
         dml_model.fit()
         dml_model.bootstrap(n_rep_boot=2000)
+        nuisance_loss = dml_model.nuisance_loss
 
         # Oracle values for this model
         oracle_thetas = np.full_like(dml_model.coef, np.nan)
@@ -141,6 +142,9 @@ def run_single_rep(self, dml_data, dml_params) -> Dict[str, Any]:
                         "Score": score,
                         "In-sample-norm.": in_sample_normalization,
                         "level": level,
+                        "Loss g_control": nuisance_loss["ml_g0"].mean(),
+                        "Loss g_treated": nuisance_loss["ml_g1"].mean(),
+                        "Loss m": nuisance_loss["ml_m"].mean() if score == "observational" else np.nan,
                     }
                 )
             for key, res in level_result.items():
@@ -166,6 +170,9 @@ def summarize_results(self):
             "Bias": "mean",
             "Uniform Coverage": "mean",
             "Uniform CI Length": "mean",
+            "Loss g_control": "mean",
+            "Loss g_treated": "mean",
+            "Loss m": "mean",
             "repetition": "count",
         }
 
@@ -180,7 +187,7 @@ def summarize_results(self):
 
     def _generate_dml_data(self, dgp_params) -> dml.data.DoubleMLPanelData:
         """Generate data for the simulation."""
-        data = make_did_CS2021(n_obs=dgp_params["n_obs"], dgp_type=dgp_params["DGP"], xi=dgp_params["xi"])
+        data = make_did_CS2021(n_obs=dgp_params["n_obs"], dgp_type=dgp_params["DGP"])
         dml_data = dml.data.DoubleMLPanelData(
             data,
             y_col="y",
 
@@ -9,7 +9,7 @@ dgp_parameters:
   - 4
   - 6
   n_obs:
-  - 2000
+  - 1000
   lambda_t:
   - 0.5
 learner_definitions:
@@ -19,30 +19,8 @@ learner_definitions:
     name: Logistic
   lgbmr: &id003
     name: LGBM Regr.
-    params:
-      n_estimators: 300
-      learning_rate: 0.03
-      num_leaves: 7
-      max_depth: 3
-      min_child_samples: 20
-      subsample: 0.8
-      colsample_bytree: 0.8
-      reg_alpha: 0.1
-      reg_lambda: 1.0
-      random_state: 42
   lgbmc: &id004
     name: LGBM Clas.
-    params:
-      n_estimators: 300
-      learning_rate: 0.03
-      num_leaves: 7
-      max_depth: 3
-      min_child_samples: 20
-      subsample: 0.8
-      colsample_bytree: 0.8
-      reg_alpha: 0.1
-      reg_lambda: 1.0
-      random_state: 42
 dml_parameters:
   learners:
   - ml_g: *id001