@@ -119,6 +119,126 @@ def intercept_update_step(self, y, Xw):
119119 return np .mean (Xw - y )
120120
121121
122+ class WeightedQuadratic (BaseDatafit ):
123+ r"""Weighted Quadratic datafit to handle sample weights.
124+
125+ The datafit reads:
126+
127+ .. math:: 1 / (2 xx \sum_(i=1)^(n_"samples") weights_i)
128+ \sum_(i=1)^(n_"samples") weights_i (y_i - (Xw)_i)^ 2
129+
130+ Attributes
131+ ----------
132+ Xtwy : array, shape (n_features,)
133+ Pre-computed quantity used during the gradient evaluation.
134+ Equal to ``X.T @ (samples_weights * y)``.
135+ sample_weights : array, shape (n_samples,)
136+ Weights for each sample.
137+
138+ Note
139+ ----
140+ The class is jit compiled at fit time using Numba compiler.
141+ This allows for faster computations.
142+ """
143+
144+ def __init__ (self , sample_weights ):
145+ self .sample_weights = sample_weights
146+
147+ def get_spec (self ):
148+ spec = (
149+ ('Xtwy' , float64 [:]),
150+ ('sample_weights' , float64 [:]),
151+ )
152+ return spec
153+
154+ def params_to_dict (self ):
155+ return {'sample_weights' : self .sample_weights }
156+
157+ def get_lipschitz (self , X , y ):
158+ n_features = X .shape [1 ]
159+ lipschitz = np .zeros (n_features , dtype = X .dtype )
160+ w_sum = self .sample_weights .sum ()
161+
162+ for j in range (n_features ):
163+ lipschitz [j ] = (self .sample_weights * X [:, j ] ** 2 ).sum () / w_sum
164+
165+ return lipschitz
166+
167+ def get_lipschitz_sparse (self , X_data , X_indptr , X_indices , y ):
168+ n_features = len (X_indptr ) - 1
169+ lipschitz = np .zeros (n_features , dtype = X_data .dtype )
170+ w_sum = self .sample_weights .sum ()
171+
172+ for j in range (n_features ):
173+ nrm2 = 0.
174+ for idx in range (X_indptr [j ], X_indptr [j + 1 ]):
175+ nrm2 += self .sample_weights [X_indices [idx ]] * X_data [idx ] ** 2
176+
177+ lipschitz [j ] = nrm2 / w_sum
178+
179+ return lipschitz
180+
181+ def initialize (self , X , y ):
182+ self .Xtwy = X .T @ (self .sample_weights * y )
183+
184+ def initialize_sparse (self , X_data , X_indptr , X_indices , y ):
185+ n_features = len (X_indptr ) - 1
186+ self .Xty = np .zeros (n_features , dtype = X_data .dtype )
187+
188+ for j in range (n_features ):
189+ xty = 0
190+ for idx in range (X_indptr [j ], X_indptr [j + 1 ]):
191+ xty += (X_data [idx ] * self .sample_weights [X_indices [idx ]]
192+ * y [X_indices [idx ]])
193+ self .Xty [j ] = xty
194+
195+ def get_global_lipschitz (self , X , y ):
196+ w_sum = self .sample_weights .sum ()
197+ return norm (X .T @ np .sqrt (self .sample_weights ), ord = 2 ) ** 2 / w_sum
198+
199+ def get_global_lipschitz_sparse (self , X_data , X_indptr , X_indices , y ):
200+ return spectral_norm (
201+ X_data * np .sqrt (self .sample_weights [X_indices ]),
202+ X_indptr , X_indices , len (y )) ** 2 / self .sample_weights .sum ()
203+
204+ def value (self , y , w , Xw ):
205+ w_sum = self .sample_weights .sum ()
206+ return np .sum (self .sample_weights * (y - Xw ) ** 2 ) / (2 * w_sum )
207+
208+ def gradient_scalar (self , X , y , w , Xw , j ):
209+ return (X [:, j ] @ (self .sample_weights * (Xw - y ))) / self .sample_weights .sum ()
210+
211+ def gradient_scalar_sparse (self , X_data , X_indptr , X_indices , y , Xw , j ):
212+ XjTXw = 0.
213+ for i in range (X_indptr [j ], X_indptr [j + 1 ]):
214+ XjTXw += X_data [i ] * self .sample_weights [X_indices [i ]] * Xw [X_indices [i ]]
215+ return (XjTXw - self .Xty [j ]) / self .sample_weights .sum ()
216+
217+ def gradient (self , X , y , Xw ):
218+ return X .T @ (self .sample_weights * (Xw - y )) / self .sample_weights .sum ()
219+
220+ def raw_grad (self , y , Xw ):
221+ return (self .sample_weights * (Xw - y )) / self .sample_weights .sum ()
222+
223+ def raw_hessian (self , y , Xw ):
224+ return self .sample_weights / self .sample_weights .sum ()
225+
226+ def full_grad_sparse (self , X_data , X_indptr , X_indices , y , Xw ):
227+ n_features = X_indptr .shape [0 ] - 1
228+ grad = np .zeros (n_features , dtype = Xw .dtype )
229+
230+ for j in range (n_features ):
231+ XjTXw = 0.
232+ for i in range (X_indptr [j ], X_indptr [j + 1 ]):
233+ XjTXw += (X_data [i ] * self .sample_weights [X_indices [i ]]
234+ * Xw [X_indices [i ]])
235+ grad [j ] = (XjTXw - self .Xty [j ]) / self .sample_weights .sum ()
236+ return grad
237+
238+ def intercept_update_step (self , y , Xw ):
239+ return np .sum (self .sample_weights * (Xw - y )) / self .sample_weights .sum ()
240+
241+
122242@njit
123243def sigmoid (x ):
124244 """Vectorwise sigmoid."""
0 commit comments