Updated Python BCF propensity arguments

andrewherren · andrewherren · commit f55e349a325b · 2025-11-20T01:30:14.000-06:00
diff --git a/demo/debug/bcf_pred_rmse.py b/demo/debug/bcf_pred_rmse.py
@@ -51,11 +51,11 @@
     bcf_model.sample(
         X_train=X_train,
         Z_train=Z_train,
-        pi_train=pi_train,
+        propensity_train=pi_train,
         y_train=y_train,
         X_test=X_test,
         Z_test=Z_test,
-        pi_test=pi_test,
+        propensity_test=pi_test,
     )
 
     # Predict out of sample
diff --git a/demo/debug/bcf_predict_debug.py b/demo/debug/bcf_predict_debug.py
@@ -45,7 +45,7 @@
 bcf_model.sample(
     X_train=X_train,
     Z_train=Z_train,
-    pi_train=pi_train,
+    propensity_train=pi_train,
     y_train=y_train,
     num_gfr=10,
     num_burnin=0,
@@ -182,7 +182,7 @@
 bcf_model.sample(
     X_train=X_train,
     Z_train=Z_train,
-    pi_train=pi_train,
+    propensity_train=pi_train,
     y_train=y_train,
     rfx_group_ids_train=rfx_group_ids_train,
     num_gfr=10,
diff --git a/demo/debug/causal_inference_binary_outcome.py b/demo/debug/causal_inference_binary_outcome.py
@@ -101,8 +101,8 @@ def g(x5):
 
 # Run the sampler
 bcf_model = BCFModel()
-bcf_model.sample(X_train=X_train, Z_train=Z_train, y_train=y_train, pi_train=pi_train, 
-                 X_test=X_test, Z_test=Z_test, pi_test=pi_test, num_gfr=num_gfr, 
+bcf_model.sample(X_train=X_train, Z_train=Z_train, y_train=y_train, propensity_train=pi_train, 
+                 X_test=X_test, Z_test=Z_test, propensity_test=pi_test, num_gfr=num_gfr, 
                  num_burnin=num_burnin, num_mcmc=num_mcmc, general_params=general_params, 
                  prognostic_forest_params=prognostic_forest_params, 
                  treatment_effect_forest_params=treatment_effect_forest_params)
diff --git a/demo/debug/causal_inference_feature_subsets.py b/demo/debug/causal_inference_feature_subsets.py
@@ -44,7 +44,7 @@
 bcf_model_a = BCFModel()
 prog_forest_config_a = {"num_trees": 100}
 trt_forest_config_a = {"num_trees": 50}
-bcf_model_a.sample(X_train=X_train, Z_train=Z_train, pi_train=pi_x_train, y_train=y_train, X_test=X_test, Z_test=Z_test, pi_test=pi_x_test, num_gfr=100, num_mcmc=0, prognostic_forest_params=prog_forest_config_a, treatment_effect_forest_params=trt_forest_config_a)
+bcf_model_a.sample(X_train=X_train, Z_train=Z_train, propensity_train=pi_x_train, y_train=y_train, X_test=X_test, Z_test=Z_test, propensity_test=pi_x_test, num_gfr=100, num_mcmc=0, prognostic_forest_params=prog_forest_config_a, treatment_effect_forest_params=trt_forest_config_a)
 """
 timing_no_subsampling = timeit.timeit(stmt=s, number=5, globals=globals())
 print(f"Average runtime, without feature subsampling (p = {p:d}): {timing_no_subsampling:.2f}")
@@ -54,7 +54,7 @@
 bcf_model_b = BCFModel()
 prog_forest_config_b = {"num_trees": 100, "num_features_subsample": 5}
 trt_forest_config_b = {"num_trees": 50, "num_features_subsample": 5}
-bcf_model_b.sample(X_train=X_train, Z_train=Z_train, pi_train=pi_x_train, y_train=y_train, X_test=X_test, Z_test=Z_test, pi_test=pi_x_test, num_gfr=100, num_mcmc=0, prognostic_forest_params=prog_forest_config_b, treatment_effect_forest_params=trt_forest_config_b)
+bcf_model_b.sample(X_train=X_train, Z_train=Z_train, propensity_train=pi_x_train, y_train=y_train, X_test=X_test, Z_test=Z_test, propensity_test=pi_x_test, num_gfr=100, num_mcmc=0, prognostic_forest_params=prog_forest_config_b, treatment_effect_forest_params=trt_forest_config_b)
 """
 timing_subsampling = timeit.timeit(stmt=s, number=5, globals=globals())
 print(f"Average runtime, subsampling 5 out of {p:d} features: {timing_subsampling:.2f}")
@@ -63,11 +63,11 @@
 bcf_model_a = BCFModel()
 prog_forest_config_a = {"num_trees": 100}
 trt_forest_config_a = {"num_trees": 50}
-bcf_model_a.sample(X_train=X_train, Z_train=Z_train, pi_train=pi_x_train, y_train=y_train, X_test=X_test, Z_test=Z_test, pi_test=pi_x_test, num_gfr=100, num_mcmc=0, prognostic_forest_params=prog_forest_config_a, treatment_effect_forest_params=trt_forest_config_a)
+bcf_model_a.sample(X_train=X_train, Z_train=Z_train, propensity_train=pi_x_train, y_train=y_train, X_test=X_test, Z_test=Z_test, propensity_test=pi_x_test, num_gfr=100, num_mcmc=0, prognostic_forest_params=prog_forest_config_a, treatment_effect_forest_params=trt_forest_config_a)
 bcf_model_b = BCFModel()
 prog_forest_config_b = {"num_trees": 100, "num_features_subsample": 5}
 trt_forest_config_b = {"num_trees": 50, "num_features_subsample": 5}
-bcf_model_b.sample(X_train=X_train, Z_train=Z_train, pi_train=pi_x_train, y_train=y_train, X_test=X_test, Z_test=Z_test, pi_test=pi_x_test, num_gfr=100, num_mcmc=0, prognostic_forest_params=prog_forest_config_b, treatment_effect_forest_params=trt_forest_config_b)
+bcf_model_b.sample(X_train=X_train, Z_train=Z_train, propensity_train=pi_x_train, y_train=y_train, X_test=X_test, Z_test=Z_test, propensity_test=pi_x_test, num_gfr=100, num_mcmc=0, prognostic_forest_params=prog_forest_config_b, treatment_effect_forest_params=trt_forest_config_b)
 y_hat_test_a = np.squeeze(bcf_model_a.y_hat_test).mean(axis = 1)
 rmse_no_subsampling = np.sqrt(np.mean(np.power(y_test - y_hat_test_a,2)))
 print(f"Test set RMSE, no subsampling (p = {p:d}): {rmse_no_subsampling:.2f}")
diff --git a/demo/debug/gfr_ties_debug.py b/demo/debug/gfr_ties_debug.py
@@ -157,7 +157,7 @@
 xbcf_model.sample(
     X_train=X_train,
     Z_train=Z_train,
-    pi_train=propensity_train,
+    propensity_train=propensity_train,
     y_train=y_train,
     num_gfr=10,
     num_burnin=0,
@@ -182,7 +182,7 @@
 bcf_model.sample(
     X_train=X_train,
     Z_train=Z_train,
-    pi_train=propensity_train,
+    propensity_train=propensity_train,
     y_train=y_train,
     num_gfr=10,
     num_burnin=0,
@@ -237,7 +237,7 @@
 xbcf_model.sample(
     X_train=X_train,
     Z_train=Z_train,
-    pi_train=propensity_train,
+    propensity_train=propensity_train,
     y_train=y_train,
     num_gfr=10,
     num_burnin=0,
@@ -262,7 +262,7 @@
 bcf_model.sample(
     X_train=X_train,
     Z_train=Z_train,
-    pi_train=propensity_train,
+    propensity_train=propensity_train,
     y_train=y_train,
     num_gfr=10,
     num_burnin=0,
diff --git a/demo/notebooks/causal_inference.ipynb b/demo/notebooks/causal_inference.ipynb
@@ -109,10 +109,10 @@
     "    X_train=X_train,\n",
     "    Z_train=Z_train,\n",
     "    y_train=y_train,\n",
-    "    pi_train=pi_train,\n",
+    "    propensity_train=pi_train,\n",
     "    X_test=X_test,\n",
     "    Z_test=Z_test,\n",
-    "    pi_test=pi_test,\n",
+    "    propensity_test=pi_test,\n",
     "    num_gfr=10,\n",
     "    num_mcmc=100,\n",
     "    general_params=general_params,\n",
diff --git a/demo/notebooks/causal_inference_feature_subsets.ipynb b/demo/notebooks/causal_inference_feature_subsets.ipynb
@@ -113,10 +113,10 @@
     "    X_train=X_train,\n",
     "    Z_train=Z_train,\n",
     "    y_train=y_train,\n",
-    "    pi_train=pi_train,\n",
+    "    propensity_train=pi_train,\n",
     "    X_test=X_test,\n",
     "    Z_test=Z_test,\n",
-    "    pi_test=pi_test,\n",
+    "    propensity_test=pi_test,\n",
     "    num_gfr=10,\n",
     "    num_mcmc=100,\n",
     "    general_params={\"keep_every\": 5},\n",
@@ -242,10 +242,10 @@
     "    X_train=X_train,\n",
     "    Z_train=Z_train,\n",
     "    y_train=y_train,\n",
-    "    pi_train=pi_train,\n",
+    "    propensity_train=pi_train,\n",
     "    X_test=X_test,\n",
     "    Z_test=Z_test,\n",
-    "    pi_test=pi_test,\n",
+    "    propensity_test=pi_test,\n",
     "    num_gfr=10,\n",
     "    num_mcmc=100,\n",
     "    treatment_effect_forest_params=tau_params,\n",
diff --git a/demo/notebooks/multivariate_treatment_causal_inference.ipynb b/demo/notebooks/multivariate_treatment_causal_inference.ipynb
@@ -110,10 +110,10 @@
     "    X_train=X_train,\n",
     "    Z_train=Z_train,\n",
     "    y_train=y_train,\n",
-    "    pi_train=pi_train,\n",
+    "    propensity_train=pi_train,\n",
     "    X_test=X_test,\n",
     "    Z_test=Z_test,\n",
-    "    pi_test=pi_test,\n",
+    "    propensity_test=pi_test,\n",
     "    num_gfr=10,\n",
     "    num_mcmc=100,\n",
     ")"
diff --git a/stochtree/bcf.py b/stochtree/bcf.py
@@ -84,12 +84,12 @@ def sample(
         X_train: Union[pd.DataFrame, np.array],
         Z_train: np.array,
         y_train: np.array,
-        pi_train: np.array = None,
+        propensity_train: np.array = None,
         rfx_group_ids_train: np.array = None,
         rfx_basis_train: np.array = None,
         X_test: Union[pd.DataFrame, np.array] = None,
         Z_test: np.array = None,
-        pi_test: np.array = None,
+        propensity_test: np.array = None,
         rfx_group_ids_test: np.array = None,
         rfx_basis_test: np.array = None,
         num_gfr: int = 5,
@@ -114,7 +114,7 @@ def sample(
             Array of (continuous or binary; univariate or multivariate) treatment assignments.
         y_train : np.array
             Outcome to be modeled by the ensemble.
-        pi_train : np.array
+        propensity_train : np.array
             Optional vector of propensity scores. If not provided, this will be estimated from the data.
         rfx_group_ids_train : np.array, optional
             Optional group labels used for an additive random effects model.
@@ -125,7 +125,7 @@ def sample(
         Z_test : np.array, optional
             Optional test set of (continuous or binary) treatment assignments.
             Must be provided if `X_test` is provided.
-        pi_test : np.array, optional
+        propensity_test : np.array, optional
             Optional test set vector of propensity scores. If not provided (but `X_test` and `Z_test` are), this will be estimated from the data.
         rfx_group_ids_test : np.array, optional
             Optional test set group labels used for an additive random effects model. We do not currently support (but plan to in the near future),
@@ -541,9 +541,9 @@ def sample(
             raise ValueError("X_train must be a pandas dataframe or numpy array")
         if not isinstance(Z_train, np.ndarray):
             raise ValueError("Z_train must be a numpy array")
-        if pi_train is not None:
-            if not isinstance(pi_train, np.ndarray):
-                raise ValueError("pi_train must be a numpy array")
+        if propensity_train is not None:
+            if not isinstance(propensity_train, np.ndarray):
+                raise ValueError("propensity_train must be a numpy array")
         if not isinstance(y_train, np.ndarray):
             raise ValueError("y_train must be a numpy array")
         if X_test is not None:
@@ -554,9 +554,9 @@ def sample(
         if Z_test is not None:
             if not isinstance(Z_test, np.ndarray):
                 raise ValueError("Z_test must be a numpy array")
-        if pi_test is not None:
-            if not isinstance(pi_test, np.ndarray):
-                raise ValueError("pi_test must be a numpy array")
+        if propensity_test is not None:
+            if not isinstance(propensity_test, np.ndarray):
+                raise ValueError("propensity_test must be a numpy array")
         if rfx_group_ids_train is not None:
             if not isinstance(rfx_group_ids_train, np.ndarray):
                 raise ValueError("rfx_group_ids_train must be a numpy array")
@@ -585,9 +585,9 @@ def sample(
         if Z_train is not None:
             if Z_train.ndim == 1:
                 Z_train = np.expand_dims(Z_train, 1)
-        if pi_train is not None:
-            if pi_train.ndim == 1:
-                pi_train = np.expand_dims(pi_train, 1)
+        if propensity_train is not None:
+            if propensity_train.ndim == 1:
+                propensity_train = np.expand_dims(propensity_train, 1)
         if y_train.ndim == 1:
             y_train = np.expand_dims(y_train, 1)
         if X_test is not None:
@@ -597,9 +597,9 @@ def sample(
         if Z_test is not None:
             if Z_test.ndim == 1:
                 Z_test = np.expand_dims(Z_test, 1)
-        if pi_test is not None:
-            if pi_test.ndim == 1:
-                pi_test = np.expand_dims(pi_test, 1)
+        if propensity_test is not None:
+            if propensity_test.ndim == 1:
+                propensity_test = np.expand_dims(propensity_test, 1)
         if rfx_group_ids_train is not None:
             if rfx_group_ids_train.ndim != 1:
                 rfx_group_ids_train = np.squeeze(rfx_group_ids_train)
@@ -631,17 +631,17 @@ def sample(
             raise ValueError("X_train and Z_train must have the same number of rows")
         if y_train.shape[0] != X_train.shape[0]:
             raise ValueError("X_train and y_train must have the same number of rows")
-        if pi_train is not None:
-            if pi_train.shape[0] != X_train.shape[0]:
+        if propensity_train is not None:
+            if propensity_train.shape[0] != X_train.shape[0]:
                 raise ValueError(
-                    "X_train and pi_train must have the same number of rows"
+                    "X_train and propensity_train must have the same number of rows"
                 )
         if X_test is not None and Z_test is not None:
             if X_test.shape[0] != Z_test.shape[0]:
                 raise ValueError("X_test and Z_test must have the same number of rows")
-        if X_test is not None and pi_test is not None:
-            if X_test.shape[0] != pi_test.shape[0]:
-                raise ValueError("X_test and pi_test must have the same number of rows")
+        if X_test is not None and propensity_test is not None:
+            if X_test.shape[0] != propensity_test.shape[0]:
+                raise ValueError("X_test and propensity_test must have the same number of rows")
 
         # Raise a warning if the data have ties and only GFR is being run
         if (num_gfr > 0) and (num_burnin == 0) and (num_mcmc == 0):
@@ -1311,10 +1311,10 @@ def sample(
             sample_sigma2_leaf_tau = False
 
         # Check if user has provided propensities that are needed in the model
-        if pi_train is None and propensity_covariate != "none":
+        if propensity_train is None and propensity_covariate != "none":
             if self.multivariate_treatment:
                 raise ValueError(
-                    "Propensities must be provided (via pi_train and / or pi_test parameters) or omitted by setting propensity_covariate = 'none' for multivariate treatments"
+                    "Propensities must be provided (via propensity_train and / or propensity_test parameters) or omitted by setting propensity_covariate = 'none' for multivariate treatments"
                 )
             else:
                 self.bart_propensity_model = BARTModel()
@@ -1330,10 +1330,10 @@ def sample(
                         num_burnin=num_burnin_propensity,
                         num_mcmc=num_mcmc_propensity,
                     )
-                    pi_train = np.mean(
+                    propensity_train = np.mean(
                         self.bart_propensity_model.y_hat_train, axis=1, keepdims=True
                     )
-                    pi_test = np.mean(
+                    propensity_test = np.mean(
                         self.bart_propensity_model.y_hat_test, axis=1, keepdims=True
                     )
                 else:
@@ -1344,7 +1344,7 @@ def sample(
                         num_burnin=num_burnin_propensity,
                         num_mcmc=num_mcmc_propensity,
                     )
-                    pi_train = np.mean(
+                    propensity_train = np.mean(
                         self.bart_propensity_model.y_hat_train, axis=1, keepdims=True
                     )
                 self.internal_propensity_model = True
@@ -1674,34 +1674,34 @@ def sample(
             )
         if propensity_covariate != "none":
             feature_types = np.append(
-                feature_types, np.repeat(0, pi_train.shape[1])
+                feature_types, np.repeat(0, propensity_train.shape[1])
             ).astype("int")
-            X_train_processed = np.c_[X_train_processed, pi_train]
+            X_train_processed = np.c_[X_train_processed, propensity_train]
             if self.has_test:
-                X_test_processed = np.c_[X_test_processed, pi_test]
+                X_test_processed = np.c_[X_test_processed, propensity_test]
             if propensity_covariate == "prognostic":
                 variable_weights_mu = np.append(
-                    variable_weights_mu, np.repeat(1 / num_cov_orig, pi_train.shape[1])
+                    variable_weights_mu, np.repeat(1 / num_cov_orig, propensity_train.shape[1])
                 )
                 variable_weights_tau = np.append(
-                    variable_weights_tau, np.repeat(0.0, pi_train.shape[1])
+                    variable_weights_tau, np.repeat(0.0, propensity_train.shape[1])
                 )
             elif propensity_covariate == "treatment_effect":
                 variable_weights_mu = np.append(
-                    variable_weights_mu, np.repeat(0.0, pi_train.shape[1])
+                    variable_weights_mu, np.repeat(0.0, propensity_train.shape[1])
                 )
                 variable_weights_tau = np.append(
-                    variable_weights_tau, np.repeat(1 / num_cov_orig, pi_train.shape[1])
+                    variable_weights_tau, np.repeat(1 / num_cov_orig, propensity_train.shape[1])
                 )
             elif propensity_covariate == "both":
                 variable_weights_mu = np.append(
-                    variable_weights_mu, np.repeat(1 / num_cov_orig, pi_train.shape[1])
+                    variable_weights_mu, np.repeat(1 / num_cov_orig, propensity_train.shape[1])
                 )
                 variable_weights_tau = np.append(
-                    variable_weights_tau, np.repeat(1 / num_cov_orig, pi_train.shape[1])
+                    variable_weights_tau, np.repeat(1 / num_cov_orig, propensity_train.shape[1])
                 )
         variable_weights_variance = np.append(
-            variable_weights_variance, np.repeat(0.0, pi_train.shape[1])
+            variable_weights_variance, np.repeat(0.0, propensity_train.shape[1])
         )
 
         # Renormalize variable weights
diff --git a/test/python/test_bcf.py b/test/python/test_bcf.py
diff --git a/test/python/test_json.py b/test/python/test_json.py
diff --git a/test/python/test_predict.py b/test/python/test_predict.py
diff --git a/tools/regression/bcf/individual_regression_test_bcf.py b/tools/regression/bcf/individual_regression_test_bcf.py