Skip to content

Commit f55e349

Browse files
committed
Updated Python BCF propensity arguments
1 parent 1978369 commit f55e349

13 files changed

+86
-86
lines changed

demo/debug/bcf_pred_rmse.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,11 @@
5151
bcf_model.sample(
5252
X_train=X_train,
5353
Z_train=Z_train,
54-
pi_train=pi_train,
54+
propensity_train=pi_train,
5555
y_train=y_train,
5656
X_test=X_test,
5757
Z_test=Z_test,
58-
pi_test=pi_test,
58+
propensity_test=pi_test,
5959
)
6060

6161
# Predict out of sample

demo/debug/bcf_predict_debug.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
bcf_model.sample(
4646
X_train=X_train,
4747
Z_train=Z_train,
48-
pi_train=pi_train,
48+
propensity_train=pi_train,
4949
y_train=y_train,
5050
num_gfr=10,
5151
num_burnin=0,
@@ -182,7 +182,7 @@
182182
bcf_model.sample(
183183
X_train=X_train,
184184
Z_train=Z_train,
185-
pi_train=pi_train,
185+
propensity_train=pi_train,
186186
y_train=y_train,
187187
rfx_group_ids_train=rfx_group_ids_train,
188188
num_gfr=10,

demo/debug/causal_inference_binary_outcome.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,8 @@ def g(x5):
101101

102102
# Run the sampler
103103
bcf_model = BCFModel()
104-
bcf_model.sample(X_train=X_train, Z_train=Z_train, y_train=y_train, pi_train=pi_train,
105-
X_test=X_test, Z_test=Z_test, pi_test=pi_test, num_gfr=num_gfr,
104+
bcf_model.sample(X_train=X_train, Z_train=Z_train, y_train=y_train, propensity_train=pi_train,
105+
X_test=X_test, Z_test=Z_test, propensity_test=pi_test, num_gfr=num_gfr,
106106
num_burnin=num_burnin, num_mcmc=num_mcmc, general_params=general_params,
107107
prognostic_forest_params=prognostic_forest_params,
108108
treatment_effect_forest_params=treatment_effect_forest_params)

demo/debug/causal_inference_feature_subsets.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
bcf_model_a = BCFModel()
4545
prog_forest_config_a = {"num_trees": 100}
4646
trt_forest_config_a = {"num_trees": 50}
47-
bcf_model_a.sample(X_train=X_train, Z_train=Z_train, pi_train=pi_x_train, y_train=y_train, X_test=X_test, Z_test=Z_test, pi_test=pi_x_test, num_gfr=100, num_mcmc=0, prognostic_forest_params=prog_forest_config_a, treatment_effect_forest_params=trt_forest_config_a)
47+
bcf_model_a.sample(X_train=X_train, Z_train=Z_train, propensity_train=pi_x_train, y_train=y_train, X_test=X_test, Z_test=Z_test, propensity_test=pi_x_test, num_gfr=100, num_mcmc=0, prognostic_forest_params=prog_forest_config_a, treatment_effect_forest_params=trt_forest_config_a)
4848
"""
4949
timing_no_subsampling = timeit.timeit(stmt=s, number=5, globals=globals())
5050
print(f"Average runtime, without feature subsampling (p = {p:d}): {timing_no_subsampling:.2f}")
@@ -54,7 +54,7 @@
5454
bcf_model_b = BCFModel()
5555
prog_forest_config_b = {"num_trees": 100, "num_features_subsample": 5}
5656
trt_forest_config_b = {"num_trees": 50, "num_features_subsample": 5}
57-
bcf_model_b.sample(X_train=X_train, Z_train=Z_train, pi_train=pi_x_train, y_train=y_train, X_test=X_test, Z_test=Z_test, pi_test=pi_x_test, num_gfr=100, num_mcmc=0, prognostic_forest_params=prog_forest_config_b, treatment_effect_forest_params=trt_forest_config_b)
57+
bcf_model_b.sample(X_train=X_train, Z_train=Z_train, propensity_train=pi_x_train, y_train=y_train, X_test=X_test, Z_test=Z_test, propensity_test=pi_x_test, num_gfr=100, num_mcmc=0, prognostic_forest_params=prog_forest_config_b, treatment_effect_forest_params=trt_forest_config_b)
5858
"""
5959
timing_subsampling = timeit.timeit(stmt=s, number=5, globals=globals())
6060
print(f"Average runtime, subsampling 5 out of {p:d} features: {timing_subsampling:.2f}")
@@ -63,11 +63,11 @@
6363
bcf_model_a = BCFModel()
6464
prog_forest_config_a = {"num_trees": 100}
6565
trt_forest_config_a = {"num_trees": 50}
66-
bcf_model_a.sample(X_train=X_train, Z_train=Z_train, pi_train=pi_x_train, y_train=y_train, X_test=X_test, Z_test=Z_test, pi_test=pi_x_test, num_gfr=100, num_mcmc=0, prognostic_forest_params=prog_forest_config_a, treatment_effect_forest_params=trt_forest_config_a)
66+
bcf_model_a.sample(X_train=X_train, Z_train=Z_train, propensity_train=pi_x_train, y_train=y_train, X_test=X_test, Z_test=Z_test, propensity_test=pi_x_test, num_gfr=100, num_mcmc=0, prognostic_forest_params=prog_forest_config_a, treatment_effect_forest_params=trt_forest_config_a)
6767
bcf_model_b = BCFModel()
6868
prog_forest_config_b = {"num_trees": 100, "num_features_subsample": 5}
6969
trt_forest_config_b = {"num_trees": 50, "num_features_subsample": 5}
70-
bcf_model_b.sample(X_train=X_train, Z_train=Z_train, pi_train=pi_x_train, y_train=y_train, X_test=X_test, Z_test=Z_test, pi_test=pi_x_test, num_gfr=100, num_mcmc=0, prognostic_forest_params=prog_forest_config_b, treatment_effect_forest_params=trt_forest_config_b)
70+
bcf_model_b.sample(X_train=X_train, Z_train=Z_train, propensity_train=pi_x_train, y_train=y_train, X_test=X_test, Z_test=Z_test, propensity_test=pi_x_test, num_gfr=100, num_mcmc=0, prognostic_forest_params=prog_forest_config_b, treatment_effect_forest_params=trt_forest_config_b)
7171
y_hat_test_a = np.squeeze(bcf_model_a.y_hat_test).mean(axis = 1)
7272
rmse_no_subsampling = np.sqrt(np.mean(np.power(y_test - y_hat_test_a,2)))
7373
print(f"Test set RMSE, no subsampling (p = {p:d}): {rmse_no_subsampling:.2f}")

demo/debug/gfr_ties_debug.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@
157157
xbcf_model.sample(
158158
X_train=X_train,
159159
Z_train=Z_train,
160-
pi_train=propensity_train,
160+
propensity_train=propensity_train,
161161
y_train=y_train,
162162
num_gfr=10,
163163
num_burnin=0,
@@ -182,7 +182,7 @@
182182
bcf_model.sample(
183183
X_train=X_train,
184184
Z_train=Z_train,
185-
pi_train=propensity_train,
185+
propensity_train=propensity_train,
186186
y_train=y_train,
187187
num_gfr=10,
188188
num_burnin=0,
@@ -237,7 +237,7 @@
237237
xbcf_model.sample(
238238
X_train=X_train,
239239
Z_train=Z_train,
240-
pi_train=propensity_train,
240+
propensity_train=propensity_train,
241241
y_train=y_train,
242242
num_gfr=10,
243243
num_burnin=0,
@@ -262,7 +262,7 @@
262262
bcf_model.sample(
263263
X_train=X_train,
264264
Z_train=Z_train,
265-
pi_train=propensity_train,
265+
propensity_train=propensity_train,
266266
y_train=y_train,
267267
num_gfr=10,
268268
num_burnin=0,

demo/notebooks/causal_inference.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,10 +109,10 @@
109109
" X_train=X_train,\n",
110110
" Z_train=Z_train,\n",
111111
" y_train=y_train,\n",
112-
" pi_train=pi_train,\n",
112+
" propensity_train=pi_train,\n",
113113
" X_test=X_test,\n",
114114
" Z_test=Z_test,\n",
115-
" pi_test=pi_test,\n",
115+
" propensity_test=pi_test,\n",
116116
" num_gfr=10,\n",
117117
" num_mcmc=100,\n",
118118
" general_params=general_params,\n",

demo/notebooks/causal_inference_feature_subsets.ipynb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,10 @@
113113
" X_train=X_train,\n",
114114
" Z_train=Z_train,\n",
115115
" y_train=y_train,\n",
116-
" pi_train=pi_train,\n",
116+
" propensity_train=pi_train,\n",
117117
" X_test=X_test,\n",
118118
" Z_test=Z_test,\n",
119-
" pi_test=pi_test,\n",
119+
" propensity_test=pi_test,\n",
120120
" num_gfr=10,\n",
121121
" num_mcmc=100,\n",
122122
" general_params={\"keep_every\": 5},\n",
@@ -242,10 +242,10 @@
242242
" X_train=X_train,\n",
243243
" Z_train=Z_train,\n",
244244
" y_train=y_train,\n",
245-
" pi_train=pi_train,\n",
245+
" propensity_train=pi_train,\n",
246246
" X_test=X_test,\n",
247247
" Z_test=Z_test,\n",
248-
" pi_test=pi_test,\n",
248+
" propensity_test=pi_test,\n",
249249
" num_gfr=10,\n",
250250
" num_mcmc=100,\n",
251251
" treatment_effect_forest_params=tau_params,\n",

demo/notebooks/multivariate_treatment_causal_inference.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,10 +110,10 @@
110110
" X_train=X_train,\n",
111111
" Z_train=Z_train,\n",
112112
" y_train=y_train,\n",
113-
" pi_train=pi_train,\n",
113+
" propensity_train=pi_train,\n",
114114
" X_test=X_test,\n",
115115
" Z_test=Z_test,\n",
116-
" pi_test=pi_test,\n",
116+
" propensity_test=pi_test,\n",
117117
" num_gfr=10,\n",
118118
" num_mcmc=100,\n",
119119
")"

stochtree/bcf.py

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -84,12 +84,12 @@ def sample(
8484
X_train: Union[pd.DataFrame, np.array],
8585
Z_train: np.array,
8686
y_train: np.array,
87-
pi_train: np.array = None,
87+
propensity_train: np.array = None,
8888
rfx_group_ids_train: np.array = None,
8989
rfx_basis_train: np.array = None,
9090
X_test: Union[pd.DataFrame, np.array] = None,
9191
Z_test: np.array = None,
92-
pi_test: np.array = None,
92+
propensity_test: np.array = None,
9393
rfx_group_ids_test: np.array = None,
9494
rfx_basis_test: np.array = None,
9595
num_gfr: int = 5,
@@ -114,7 +114,7 @@ def sample(
114114
Array of (continuous or binary; univariate or multivariate) treatment assignments.
115115
y_train : np.array
116116
Outcome to be modeled by the ensemble.
117-
pi_train : np.array
117+
propensity_train : np.array
118118
Optional vector of propensity scores. If not provided, this will be estimated from the data.
119119
rfx_group_ids_train : np.array, optional
120120
Optional group labels used for an additive random effects model.
@@ -125,7 +125,7 @@ def sample(
125125
Z_test : np.array, optional
126126
Optional test set of (continuous or binary) treatment assignments.
127127
Must be provided if `X_test` is provided.
128-
pi_test : np.array, optional
128+
propensity_test : np.array, optional
129129
Optional test set vector of propensity scores. If not provided (but `X_test` and `Z_test` are), this will be estimated from the data.
130130
rfx_group_ids_test : np.array, optional
131131
Optional test set group labels used for an additive random effects model. We do not currently support (but plan to in the near future),
@@ -541,9 +541,9 @@ def sample(
541541
raise ValueError("X_train must be a pandas dataframe or numpy array")
542542
if not isinstance(Z_train, np.ndarray):
543543
raise ValueError("Z_train must be a numpy array")
544-
if pi_train is not None:
545-
if not isinstance(pi_train, np.ndarray):
546-
raise ValueError("pi_train must be a numpy array")
544+
if propensity_train is not None:
545+
if not isinstance(propensity_train, np.ndarray):
546+
raise ValueError("propensity_train must be a numpy array")
547547
if not isinstance(y_train, np.ndarray):
548548
raise ValueError("y_train must be a numpy array")
549549
if X_test is not None:
@@ -554,9 +554,9 @@ def sample(
554554
if Z_test is not None:
555555
if not isinstance(Z_test, np.ndarray):
556556
raise ValueError("Z_test must be a numpy array")
557-
if pi_test is not None:
558-
if not isinstance(pi_test, np.ndarray):
559-
raise ValueError("pi_test must be a numpy array")
557+
if propensity_test is not None:
558+
if not isinstance(propensity_test, np.ndarray):
559+
raise ValueError("propensity_test must be a numpy array")
560560
if rfx_group_ids_train is not None:
561561
if not isinstance(rfx_group_ids_train, np.ndarray):
562562
raise ValueError("rfx_group_ids_train must be a numpy array")
@@ -585,9 +585,9 @@ def sample(
585585
if Z_train is not None:
586586
if Z_train.ndim == 1:
587587
Z_train = np.expand_dims(Z_train, 1)
588-
if pi_train is not None:
589-
if pi_train.ndim == 1:
590-
pi_train = np.expand_dims(pi_train, 1)
588+
if propensity_train is not None:
589+
if propensity_train.ndim == 1:
590+
propensity_train = np.expand_dims(propensity_train, 1)
591591
if y_train.ndim == 1:
592592
y_train = np.expand_dims(y_train, 1)
593593
if X_test is not None:
@@ -597,9 +597,9 @@ def sample(
597597
if Z_test is not None:
598598
if Z_test.ndim == 1:
599599
Z_test = np.expand_dims(Z_test, 1)
600-
if pi_test is not None:
601-
if pi_test.ndim == 1:
602-
pi_test = np.expand_dims(pi_test, 1)
600+
if propensity_test is not None:
601+
if propensity_test.ndim == 1:
602+
propensity_test = np.expand_dims(propensity_test, 1)
603603
if rfx_group_ids_train is not None:
604604
if rfx_group_ids_train.ndim != 1:
605605
rfx_group_ids_train = np.squeeze(rfx_group_ids_train)
@@ -631,17 +631,17 @@ def sample(
631631
raise ValueError("X_train and Z_train must have the same number of rows")
632632
if y_train.shape[0] != X_train.shape[0]:
633633
raise ValueError("X_train and y_train must have the same number of rows")
634-
if pi_train is not None:
635-
if pi_train.shape[0] != X_train.shape[0]:
634+
if propensity_train is not None:
635+
if propensity_train.shape[0] != X_train.shape[0]:
636636
raise ValueError(
637-
"X_train and pi_train must have the same number of rows"
637+
"X_train and propensity_train must have the same number of rows"
638638
)
639639
if X_test is not None and Z_test is not None:
640640
if X_test.shape[0] != Z_test.shape[0]:
641641
raise ValueError("X_test and Z_test must have the same number of rows")
642-
if X_test is not None and pi_test is not None:
643-
if X_test.shape[0] != pi_test.shape[0]:
644-
raise ValueError("X_test and pi_test must have the same number of rows")
642+
if X_test is not None and propensity_test is not None:
643+
if X_test.shape[0] != propensity_test.shape[0]:
644+
raise ValueError("X_test and propensity_test must have the same number of rows")
645645

646646
# Raise a warning if the data have ties and only GFR is being run
647647
if (num_gfr > 0) and (num_burnin == 0) and (num_mcmc == 0):
@@ -1311,10 +1311,10 @@ def sample(
13111311
sample_sigma2_leaf_tau = False
13121312

13131313
# Check if user has provided propensities that are needed in the model
1314-
if pi_train is None and propensity_covariate != "none":
1314+
if propensity_train is None and propensity_covariate != "none":
13151315
if self.multivariate_treatment:
13161316
raise ValueError(
1317-
"Propensities must be provided (via pi_train and / or pi_test parameters) or omitted by setting propensity_covariate = 'none' for multivariate treatments"
1317+
"Propensities must be provided (via propensity_train and / or propensity_test parameters) or omitted by setting propensity_covariate = 'none' for multivariate treatments"
13181318
)
13191319
else:
13201320
self.bart_propensity_model = BARTModel()
@@ -1330,10 +1330,10 @@ def sample(
13301330
num_burnin=num_burnin_propensity,
13311331
num_mcmc=num_mcmc_propensity,
13321332
)
1333-
pi_train = np.mean(
1333+
propensity_train = np.mean(
13341334
self.bart_propensity_model.y_hat_train, axis=1, keepdims=True
13351335
)
1336-
pi_test = np.mean(
1336+
propensity_test = np.mean(
13371337
self.bart_propensity_model.y_hat_test, axis=1, keepdims=True
13381338
)
13391339
else:
@@ -1344,7 +1344,7 @@ def sample(
13441344
num_burnin=num_burnin_propensity,
13451345
num_mcmc=num_mcmc_propensity,
13461346
)
1347-
pi_train = np.mean(
1347+
propensity_train = np.mean(
13481348
self.bart_propensity_model.y_hat_train, axis=1, keepdims=True
13491349
)
13501350
self.internal_propensity_model = True
@@ -1674,34 +1674,34 @@ def sample(
16741674
)
16751675
if propensity_covariate != "none":
16761676
feature_types = np.append(
1677-
feature_types, np.repeat(0, pi_train.shape[1])
1677+
feature_types, np.repeat(0, propensity_train.shape[1])
16781678
).astype("int")
1679-
X_train_processed = np.c_[X_train_processed, pi_train]
1679+
X_train_processed = np.c_[X_train_processed, propensity_train]
16801680
if self.has_test:
1681-
X_test_processed = np.c_[X_test_processed, pi_test]
1681+
X_test_processed = np.c_[X_test_processed, propensity_test]
16821682
if propensity_covariate == "prognostic":
16831683
variable_weights_mu = np.append(
1684-
variable_weights_mu, np.repeat(1 / num_cov_orig, pi_train.shape[1])
1684+
variable_weights_mu, np.repeat(1 / num_cov_orig, propensity_train.shape[1])
16851685
)
16861686
variable_weights_tau = np.append(
1687-
variable_weights_tau, np.repeat(0.0, pi_train.shape[1])
1687+
variable_weights_tau, np.repeat(0.0, propensity_train.shape[1])
16881688
)
16891689
elif propensity_covariate == "treatment_effect":
16901690
variable_weights_mu = np.append(
1691-
variable_weights_mu, np.repeat(0.0, pi_train.shape[1])
1691+
variable_weights_mu, np.repeat(0.0, propensity_train.shape[1])
16921692
)
16931693
variable_weights_tau = np.append(
1694-
variable_weights_tau, np.repeat(1 / num_cov_orig, pi_train.shape[1])
1694+
variable_weights_tau, np.repeat(1 / num_cov_orig, propensity_train.shape[1])
16951695
)
16961696
elif propensity_covariate == "both":
16971697
variable_weights_mu = np.append(
1698-
variable_weights_mu, np.repeat(1 / num_cov_orig, pi_train.shape[1])
1698+
variable_weights_mu, np.repeat(1 / num_cov_orig, propensity_train.shape[1])
16991699
)
17001700
variable_weights_tau = np.append(
1701-
variable_weights_tau, np.repeat(1 / num_cov_orig, pi_train.shape[1])
1701+
variable_weights_tau, np.repeat(1 / num_cov_orig, propensity_train.shape[1])
17021702
)
17031703
variable_weights_variance = np.append(
1704-
variable_weights_variance, np.repeat(0.0, pi_train.shape[1])
1704+
variable_weights_variance, np.repeat(0.0, propensity_train.shape[1])
17051705
)
17061706

17071707
# Renormalize variable weights

0 commit comments

Comments
 (0)