nonmonotone PANOCplus

aldma · aldma · commit 1d047e4ebabb · 2025-05-14T22:18:20.000+02:00
diff --git a/src/algorithms/panocplus.jl b/src/algorithms/panocplus.jl
@@ -32,6 +32,7 @@ See also: [`PANOCplus`](@ref).
 - `minimum_gamma=1e-7`: lower bound to `gamma` in case `adaptive == true`.
 - `max_backtracks=20`: maximum number of line-search backtracks.
 - `directions=LBFGS(5)`: strategy to use to compute line-search directions.
+- `monotonicity=1`: parameter controlling the averaging scheme for nonmonotone linesearch; monotonicity ∈ (0,1], monotone scheme by default.
 
 # References
 1. De Marchi, Themelis, "Proximal Gradient Algorithms under Local Lipschitz Gradient Continuity", Journal of Optimization Theory and Applications, vol. 194, no. 3, pp. 771-794 (2022).
@@ -49,6 +50,7 @@ Base.@kwdef struct PANOCplusIteration{R,Tx,Tf,TA,Tg,TLf,Tgamma,D}
     minimum_gamma::R = real(eltype(x0))(1e-7)
     max_backtracks::Int = 20
     directions::D = LBFGS(5)
+    monotonicity::R = real(eltype(x0))(1)
 end
 
 Base.IteratorSize(::Type{<:PANOCplusIteration}) = Base.IsInfinite()
@@ -65,6 +67,7 @@ Base.@kwdef mutable struct PANOCplusState{R,Tx,TAx,TH}
     g_z::R            # value of nonsmooth term (at z)
     res::Tx           # fixed-point residual at iterate (= x - z)
     H::TH             # variable metric
+    merit::R = zero(gamma)
     tau::R = zero(gamma)
     x_prev::Tx = similar(x)
     res_prev::Tx = similar(x)
@@ -125,6 +128,8 @@ function Base.iterate(iter::PANOCplusIteration{R}) where {R}
         state.grad_f_Az = grad_f_Az
     end
     mul!(state.At_grad_f_Az, adjoint(iter.A), state.grad_f_Az)
+    # initialize merit
+    state.merit = f_model(iter, state) + state.g_z
     return state, state
 end
 
@@ -170,9 +175,8 @@ function Base.iterate(iter::PANOCplusIteration{R}, state::PANOCplusState) where
     state.x_prev .= state.x
     state.res_prev .= state.res
 
-    # compute FBE
-    FBE_x = f_model(iter, state) + state.g_z
-
+    # retrieve merit and set threshold
+    FBE_x = state.merit
     sigma = iter.beta * (0.5 / state.gamma) * (1 - iter.alpha)
     tol = 10 * eps(R) * (1 + abs(FBE_x))
     threshold = FBE_x - sigma * norm(state.res)^2 + tol
@@ -226,6 +230,8 @@ function Base.iterate(iter::PANOCplusIteration{R}, state::PANOCplusState) where
 
         FBE_x_new = f_Az_upp + state.g_z
         if FBE_x_new <= threshold || tau_backtracks >= iter.max_backtracks
+            # update merit with averaging rule
+            state.merit = (1 - iter.monotonicity) * state.merit + iter.monotonicity * FBE_x_new
             break
         end
         state.tau = tau_backtracks >= iter.max_backtracks - 1 ? R(0) : state.tau / 2
@@ -280,13 +286,13 @@ See also: [`PANOCplusIteration`](@ref), [`IterativeAlgorithm`](@ref).
 1. De Marchi, Themelis, "Proximal Gradient Algorithms under Local Lipschitz Gradient Continuity", Journal of Optimization Theory and Applications, vol. 194, no. 3, pp. 771-794 (2022).
 """
 PANOCplus(;
-    maxit = 1_000,
-    tol = 1e-8,
-    stop = (iter, state) -> default_stopping_criterion(tol, iter, state),
-    solution = default_solution,
-    verbose = false,
-    freq = 10,
-    display = default_display,
+    maxit=1_000,
+    tol=1e-8,
+    stop=(iter, state) -> default_stopping_criterion(tol, iter, state),
+    solution=default_solution,
+    verbose=false,
+    freq=10,
+    display=default_display,
     kwargs...,
 ) = IterativeAlgorithm(
     PANOCplusIteration;
diff --git a/test/problems/test_lasso_small.jl b/test/problems/test_lasso_small.jl
@@ -202,6 +202,17 @@ using ProximalAlgorithms:
         @test x0 == x0_backup
     end
 
+    @testset "PANOCplus (adaptive step, nonmonotone)" begin
+        x0 = zeros(T, n)
+        x0_backup = copy(x0)
+        solver = ProximalAlgorithms.PANOCplus(adaptive = true, tol = TOL, monotonicity=R(0.1))
+        x, it = @inferred solver(x0 = x0, f = f_autodiff, A = A, g = g)
+        @test eltype(x) == T
+        @test norm(x - x_star, Inf) <= TOL
+        @test it < 40
+        @test x0 == x0_backup
+    end
+
     @testset "DouglasRachford" begin
         x0 = zeros(T, n)
         x0_backup = copy(x0)
diff --git a/test/problems/test_lasso_small_strongly_convex.jl b/test/problems/test_lasso_small_strongly_convex.jl
@@ -54,26 +54,26 @@ using ProximalAlgorithms
     x0_backup = copy(x0)
 
     @testset "SFISTA" begin
-        solver = ProximalAlgorithms.SFISTA(tol = TOL)
-        y, it = solver(x0 = x0, f = fA_autodiff, g = g, Lf = Lf, mf = mf)
+        solver = ProximalAlgorithms.SFISTA(tol=TOL)
+        y, it = solver(x0=x0, f=fA_autodiff, g=g, Lf=Lf, mf=mf)
         @test eltype(y) == T
         @test norm(y - x_star) <= TOL
         @test it < 40
         @test x0 == x0_backup
     end
 
     @testset "ForwardBackward" begin
-        solver = ProximalAlgorithms.ForwardBackward(tol = TOL)
-        y, it = solver(x0 = x0, f = fA_autodiff, g = g, Lf = Lf)
+        solver = ProximalAlgorithms.ForwardBackward(tol=TOL)
+        y, it = solver(x0=x0, f=fA_autodiff, g=g, Lf=Lf)
         @test eltype(y) == T
         @test norm(y - x_star, Inf) <= TOL
         @test it < 110
         @test x0 == x0_backup
     end
 
     @testset "ForwardBackward (adaptive step)" begin
-        solver = ProximalAlgorithms.ForwardBackward(tol = TOL, adaptive = true)
-        y, it = solver(x0 = x0, f = fA_autodiff, g = g)
+        solver = ProximalAlgorithms.ForwardBackward(tol=TOL, adaptive=true)
+        y, it = solver(x0=x0, f=fA_autodiff, g=g)
         @test eltype(y) == T
         @test norm(y - x_star, Inf) <= TOL
         @test it < 300
@@ -82,29 +82,29 @@ using ProximalAlgorithms
 
     @testset "ForwardBackward (adaptive step, regret)" begin
         solver = ProximalAlgorithms.ForwardBackward(
-            tol = TOL,
-            adaptive = true,
-            increase_gamma = T(1.01),
+            tol=TOL,
+            adaptive=true,
+            increase_gamma=T(1.01),
         )
-        y, it = solver(x0 = x0, f = fA_autodiff, g = g)
+        y, it = solver(x0=x0, f=fA_autodiff, g=g)
         @test eltype(y) == T
         @test norm(y - x_star, Inf) <= TOL
         @test it < 80
         @test x0 == x0_backup
     end
 
     @testset "FastForwardBackward" begin
-        solver = ProximalAlgorithms.FastForwardBackward(tol = TOL)
-        y, it = solver(x0 = x0, f = fA_autodiff, g = g, Lf = Lf, mf = mf)
+        solver = ProximalAlgorithms.FastForwardBackward(tol=TOL)
+        y, it = solver(x0=x0, f=fA_autodiff, g=g, Lf=Lf, mf=mf)
         @test eltype(y) == T
         @test norm(y - x_star, Inf) <= TOL
         @test it < 35
         @test x0 == x0_backup
     end
 
     @testset "FastForwardBackward (adaptive step)" begin
-        solver = ProximalAlgorithms.FastForwardBackward(tol = TOL, adaptive = true)
-        y, it = solver(x0 = x0, f = fA_autodiff, g = g)
+        solver = ProximalAlgorithms.FastForwardBackward(tol=TOL, adaptive=true)
+        y, it = solver(x0=x0, f=fA_autodiff, g=g)
         @test eltype(y) == T
         @test norm(y - x_star, Inf) <= TOL
         @test it < 100
@@ -113,26 +113,26 @@ using ProximalAlgorithms
 
     @testset "FastForwardBackward (adaptive step, regret)" begin
         solver = ProximalAlgorithms.FastForwardBackward(
-            tol = TOL,
-            adaptive = true,
-            increase_gamma = T(1.01),
+            tol=TOL,
+            adaptive=true,
+            increase_gamma=T(1.01),
         )
-        y, it = solver(x0 = x0, f = fA_autodiff, g = g)
+        y, it = solver(x0=x0, f=fA_autodiff, g=g)
         @test eltype(y) == T
         @test norm(y - x_star, Inf) <= TOL
         @test it < 100
         @test x0 == x0_backup
     end
 
     @testset "FastForwardBackward (custom extrapolation)" begin
-        solver = ProximalAlgorithms.FastForwardBackward(tol = TOL)
+        solver = ProximalAlgorithms.FastForwardBackward(tol=TOL)
         y, it = solver(
-            x0 = x0,
-            f = fA_autodiff,
-            g = g,
-            gamma = 1 / Lf,
-            mf = mf,
-            extrapolation_sequence = ProximalAlgorithms.ConstantNesterovSequence(
+            x0=x0,
+            f=fA_autodiff,
+            g=g,
+            gamma=1 / Lf,
+            mf=mf,
+            extrapolation_sequence=ProximalAlgorithms.ConstantNesterovSequence(
                 mf,
                 1 / Lf,
             ),
@@ -144,26 +144,35 @@ using ProximalAlgorithms
     end
 
     @testset "DRLS" begin
-        solver = ProximalAlgorithms.DRLS(tol = TOL)
-        v, it = solver(x0 = x0, f = fA_prox, g = g, mf = mf)
+        solver = ProximalAlgorithms.DRLS(tol=TOL)
+        v, it = solver(x0=x0, f=fA_prox, g=g, mf=mf)
         @test eltype(v) == T
         @test norm(v - x_star, Inf) <= TOL
         @test it < 14
         @test x0 == x0_backup
     end
 
     @testset "PANOC" begin
-        solver = ProximalAlgorithms.PANOC(tol = TOL)
-        y, it = solver(x0 = x0, f = fA_autodiff, g = g, Lf = Lf)
+        solver = ProximalAlgorithms.PANOC(tol=TOL)
+        y, it = solver(x0=x0, f=fA_autodiff, g=g, Lf=Lf)
         @test eltype(y) == T
         @test norm(y - x_star, Inf) <= TOL
         @test it < 45
         @test x0 == x0_backup
     end
 
     @testset "PANOCplus" begin
-        solver = ProximalAlgorithms.PANOCplus(tol = TOL)
-        y, it = solver(x0 = x0, f = fA_autodiff, g = g, Lf = Lf)
+        solver = ProximalAlgorithms.PANOCplus(tol=TOL)
+        y, it = solver(x0=x0, f=fA_autodiff, g=g, Lf=Lf)
+        @test eltype(y) == T
+        @test norm(y - x_star, Inf) <= TOL
+        @test it < 45
+        @test x0 == x0_backup
+    end
+
+    @testset "PANOCplus (nonmonotone)" begin
+        solver = ProximalAlgorithms.PANOCplus(tol=TOL, monotonicity=T(0.1))
+        y, it = solver(x0=x0, f=fA_autodiff, g=g, Lf=Lf)
         @test eltype(y) == T
         @test norm(y - x_star, Inf) <= TOL
         @test it < 45
diff --git a/test/problems/test_nonconvex_qp.jl b/test/problems/test_nonconvex_qp.jl
@@ -45,6 +45,16 @@ using Test
         @test x0 == x0_backup
     end
 
+    @testset "PANOCplus (nonmonotone)" begin
+        x0 = zeros(T, n)
+        x0_backup = copy(x0)
+        solver = ProximalAlgorithms.PANOCplus(tol = TOL, monotonicity=T(0.1))
+        x, it = solver(x0 = x0, f = f, g = g)
+        z = min.(upp, max.(low, x .- gamma .* (Q * x + q)))
+        @test norm(x - z, Inf) / gamma <= TOL
+        @test x0 == x0_backup
+    end
+
     @testset "ZeroFPR" begin
         x0 = zeros(T, n)
         x0_backup = copy(x0)
@@ -112,6 +122,16 @@ end
             @test x0 == x0_backup
         end
 
+        @testset "PANOCplus (nonmonotone)" begin
+            x0 = zeros(T, n)
+            x0_backup = copy(x0)
+            solver = ProximalAlgorithms.PANOCplus(tol = TOL, monotonicity=T(0.1))
+            x, it = solver(x0 = x0, f = f, g = g)
+            z = min.(upp, max.(low, x .- gamma .* (Q * x + q)))
+            @test norm(x - z, Inf) / gamma <= TOL
+            @test x0 == x0_backup
+        end
+
         @testset "ZeroFPR" begin
             x0 = zeros(T, n)
             x0_backup = copy(x0)
diff --git a/test/problems/test_sparse_logistic_small.jl b/test/problems/test_sparse_logistic_small.jl
@@ -120,4 +120,26 @@ using LinearAlgebra
         @test x0 == x0_backup
     end
 
+    @testset "PANOCplus (adaptive step, nonmonotone)" begin
+        x0 = zeros(T, n)
+        x0_backup = copy(x0)
+        solver = ProximalAlgorithms.PANOCplus(adaptive = true, tol = TOL, monotonicity=R(0.9))
+        x, it = solver(x0 = x0, f = f_autodiff, A = A, g = g)
+        @test eltype(x) == T
+        @test norm(x - x_star, Inf) <= 1e-4
+        @test it < 50
+        @test x0 == x0_backup
+    end
+
+    @testset "PANOCplus (adaptive step, very nonmonotone)" begin
+        x0 = zeros(T, n)
+        x0_backup = copy(x0)
+        solver = ProximalAlgorithms.PANOCplus(adaptive = true, tol = TOL, monotonicity=R(0.1))
+        x, it = solver(x0 = x0, f = f_autodiff, A = A, g = g)
+        @test eltype(x) == T
+        @test norm(x - x_star, Inf) <= 1e-4
+        @test it < 110
+        @test x0 == x0_backup
+    end
+
 end