@@ -87,9 +87,20 @@ def _data_setter(self, X) -> None:
8787
8888 This method is used internally to register new data for the model for
8989 prediction.
90+
91+ NOTE: We are actively changing the `X`. Often, this matrix will have a different
92+ number of rows than the original data. So to make the shapes work, we need to
93+ update all data nodes in the model to have the correct shape. The values are not
94+ used, so we set them to 0. In our case, we just have data nodes X and y, but if
95+ in the future we get more complex models with more data nodes, then we'll need
96+ to update all of them - ideally programmatically.
9097 """
98+ new_no_of_observations = X .shape [0 ]
9199 with self :
92- pm .set_data ({"X" : X })
100+ pm .set_data (
101+ {"X" : X , "y" : np .zeros (new_no_of_observations )},
102+ coords = {"obs_ind" : np .arange (new_no_of_observations )},
103+ )
93104
94105 def fit (self , X , y , coords : Optional [Dict [str , Any ]] = None ) -> None :
95106 """Draw samples from posterior, prior predictive, and posterior predictive
@@ -117,7 +128,6 @@ def predict(self, X):
117128
118129 .. caution::
119130 Results in KeyError if model hasn't been fit.
120-
121131 """
122132
123133 # Ensure random_seed is used in sample_prior_predictive() and
@@ -206,7 +216,7 @@ class LinearRegression(PyMCModel):
206216 >>> lr = LinearRegression(sample_kwargs={"progressbar": False})
207217 >>> lr.fit(X, y, coords={
208218 ... 'coeffs': ['x', 'treated'],
209- ... 'obs_indx ': np.arange(rd.shape[0])
219+ ... 'obs_ind ': np.arange(rd.shape[0])
210220 ... },
211221 ... )
212222 Inference data...
@@ -451,7 +461,7 @@ class PropensityScore(PyMCModel):
451461 >>> ps = PropensityScore(sample_kwargs={"progressbar": False})
452462 >>> ps.fit(X, t, coords={
453463 ... 'coeffs': ['age', 'race'],
454- ... 'obs_indx ': np.arange(df.shape[0])
464+ ... 'obs_ind ': np.arange(df.shape[0])
455465 ... },
456466 ... )
457467 Inference...
0 commit comments