WIP Lipschitz

PABannier · PABannier · commit 4940a0d2cff4 · 2022-10-14T15:48:35.000+02:00
diff --git a/skglm/solvers/fista.py b/skglm/solvers/fista.py
@@ -1,6 +1,7 @@
 import numpy as np
 from numba import njit
 from skglm.solvers.base import BaseSolver
+from skglm.solvers.common import construct_grad
 
 
 @njit
@@ -23,37 +24,35 @@ def __init__(self, max_iter=100, tol=1e-4, fit_intercept=False, warm_start=False
         self.opt_freq = opt_freq
         self.verbose = verbose
 
-    def solve(self, X, y, penalty, w_init=None, weights=None):
-        # needs a quadratic datafit, but works with L1, WeightedL1, SLOPE
+    def solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None):
         n_samples, n_features = X.shape
         all_features = np.arange(n_features)
         t_new = 1
 
         w = w_init.copy() if w_init is not None else np.zeros(n_features)
         z = w_init.copy() if w_init is not None else np.zeros(n_features)
-        weights = weights if weights is not None else np.ones(n_features)
+        Xw = Xw_init.copy() if Xw_init is not None else np.zeros(n_samples)
 
-        # FISTA with Gram update
-        G = X.T @ X
-        Xty = X.T @ y
+        # line search?
+        # lipschitz = np.max(datafit.lipschitz)
         lipschitz = np.linalg.norm(X, ord=2) ** 2 / n_samples
 
         for n_iter in range(self.max_iter):
             t_old = t_new
             t_new = (1 + np.sqrt(1 + 4 * t_old ** 2)) / 2
             w_old = w.copy()
-            grad = (G @ z - Xty) / n_samples
+            grad = construct_grad(X, y, z, X @ z, datafit, all_features)
             z -= grad / lipschitz
             w = _prox_vec(w, z, penalty, lipschitz)
+            Xw = X @ w
             z = w + (t_old - 1.) / t_new * (w - w_old)
 
             if n_iter % self.opt_freq == 0:
                 opt = penalty.subdiff_distance(w, grad, all_features)
                 stop_crit = np.max(opt)
 
                 if self.verbose:
-                    p_obj = (np.sum((y - X @ w) ** 2) / (2 * n_samples)
-                             + penalty.value(w))
+                    p_obj = datafit.value(y, w, Xw) + penalty.value(w)
                     print(
                         f"Iteration {n_iter+1}: {p_obj:.10f}, "
                         f"stopping crit: {stop_crit:.2e}"
diff --git a/toy_fista.py b/toy_fista.py
@@ -1,5 +1,6 @@
 import numpy as np
 from numpy.linalg import norm
+from skglm.datafits.single_task import Quadratic
 from skglm.solvers import FISTA
 from skglm.penalties import L1
 from skglm.estimators import Lasso
@@ -18,8 +19,10 @@
 tol = 1e-10
 
 solver = FISTA(max_iter=max_iter, tol=tol, opt_freq=obj_freq, verbose=1)
+datafit = compiled_clone(Quadratic())
+datafit.initialize(X, y)
 penalty = compiled_clone(L1(alpha))
-w = solver.solve(X, y, penalty)
+w = solver.solve(X, y, datafit, penalty)
 
 clf = Lasso(alpha=alpha, tol=tol, fit_intercept=False)
 clf.fit(X, y)