DFSane implementation

Deltadahl · Deltadahl · commit 7fcca42ecba2 · 2023-02-06T19:51:25.000+01:00
diff --git a/src/ad.jl b/src/ad.jl
@@ -31,16 +31,16 @@ end
 function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, StaticArraysCore.SVector},
                                                 iip,
                                                 <:Dual{T, V, P}},
-                         alg::Union{SimpleNewtonRaphson, SimpleTrustRegion, SimpleDFSane}, # TODO make this to one variable
+                         alg::AbstractSimpleNonlinearSolveAlgorithm,
                          args...; kwargs...) where {iip, T, V, P}
     sol, partials = scalar_nlsolve_ad(prob, alg, args...; kwargs...)
     return SciMLBase.build_solution(prob, alg, Dual{T, V, P}(sol.u, partials), sol.resid;
                                     retcode = sol.retcode)
 end
-function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, StaticArraysCore.SVector}, # TODO make this to one variable
+function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, StaticArraysCore.SVector},
                                                 iip,
                                                 <:AbstractArray{<:Dual{T, V, P}}},
-                         alg::Union{SimpleNewtonRaphson, SimpleTrustRegion, SimpleDFSane}, args...;
+                         alg::AbstractSimpleNonlinearSolveAlgorithm, args...;
                          kwargs...) where {iip, T, V, P}
     sol, partials = scalar_nlsolve_ad(prob, alg, args...; kwargs...)
     return SciMLBase.build_solution(prob, alg, Dual{T, V, P}(sol.u, partials), sol.resid;
diff --git a/src/dfsane.jl b/src/dfsane.jl
@@ -1,55 +1,78 @@
 """
 ```julia
-SimpleDFSane(; σ_min::Real = 1e-10, σ_0::Real = 1.0, M::Int = 10,
-            γ::Real = 1e-4, τ_min::Real = 0.1, τ_max::Real = 0.5,
-            nexp::Int = 2)
+SimpleDFSane(; σ_min::Real = 1e-10, σ_max::Real = 1e10, σ_1::Real = 1.0,
+             M::Int = 10, γ::Real = 1e-4, τ_min::Real = 0.1, τ_max::Real = 0.5,
+             nexp::Int = 2, η_strategy::Function = (f_1, k, x, F) -> f_1 / k^2)
 ```
 
-A low-overhead implementation of the df-sane method. For more information, see [1].
-References:
-    W LaCruz, JM Martinez, and M Raydan (2006), Spectral residual mathod without gradient information for solving large-scale nonlinear systems of equations, Mathematics of Computation, 75, 1429-1448.
-### Keyword Arguments
-
+A low-overhead implementation of the df-sane method for solving large-scale nonlinear
+systems of equations. For in depth information about all the parameters and the algorithm,
+see the paper: [W LaCruz, JM Martinez, and M Raydan (2006), Spectral residual mathod without
+gradient information for solving large-scale nonlinear systems of equations, Mathematics of
+Computation, 75, 1429-1448.](https://www.researchgate.net/publication/220576479_Spectral_Residual_Method_without_Gradient_Information_for_Solving_Large-Scale_Nonlinear_Systems_of_Equations)
 
-- `σ_min`: the minimum value of `σ_k`. Defaults to `1e-10`. # TODO write about this...
-- `σ_0`: the initial value of `σ_k`. Defaults to `1.0`. # TODO write about this...
-- `M`: the value of `M` in the paper. Defaults to `10`. # TODO write about this...
-- `γ`: the value of `γ` in the paper. Defaults to `1e-4`. # TODO write about this...
-- `τ_min`: the minimum value of `τ_k`. Defaults to `0.1`. # TODO write about this...
-- `τ_max`: the maximum value of `τ_k`. Defaults to `0.5`. # TODO write about this...
-- `nexp`: the value of `nexp` in the paper. Defaults to `2`. # TODO write about this...
+### Keyword Arguments
 
+- `σ_min`: the minimum value of the spectral coefficient `σ_k` which is related to the step
+  size in the algorithm. Defaults to `1e-10`.
+- `σ_max`: the maximum value of the spectral coefficient `σ_k` which is related to the step
+  size in the algorithm. Defaults to `1e10`.
+- `σ_1`: the initial value of the spectral coefficient `σ_k` which is related to the step
+  size in the algorithm.. Defaults to `1.0`.
+- `M`: The monotonicity of the algorithm is determined by a this positive integer.
+  A value of 1 for `M` would result in strict monotonicity in the decrease of the L2-norm
+  of the function `f`. However, higher values allow for more flexibility in this reduction.
+  Despite this, the algorithm still ensures global convergence through the use of a
+  non-monotone line-search algorithm that adheres to the Grippo-Lampariello-Lucidi
+  condition. Values in the range of 5 to 20 are usually sufficient, but some cases may call
+  for a higher value of `M`. The default setting is 10.
+- `γ`: a parameter that influences if a proposed step will be accepted. Higher value of `γ`
+  will make the algorithm more restrictive in accepting steps. Defaults to `1e-4`.
+- `τ_min`: if a step is rejected the new step size will get multiplied by factor, and this
+  parameter is the minimum value of that factor. Defaults to `0.1`.
+- `τ_max`: if a step is rejected the new step size will get multiplied by factor, and this
+  parameter is the maximum value of that factor. Defaults to `0.5`.
+- `nexp`: the exponent of the loss, i.e. ``f_k=||F(x_k)||^{nexp}``. The paper uses
+  `nexp ∈ {1,2}`. Defaults to `2`.
+- `η_strategy`:  function to determine the parameter `η_k`, which enables growth
+  of ``||F||^2``. Called as ``η_k = η_strategy(f_1, k, x, F)`` with `f_1` initialized as
+  ``f_1=||F(x_1)||^{nexp}``, `k` is the iteration number, `x` is the current `x`-value and
+  `F` the current residual. Should satisfy ``η_k > 0`` and ``∑ₖ ηₖ < ∞``. Defaults to
+  ``||F||^2 / k^2``.
 """
 struct SimpleDFSane{T} <: AbstractSimpleNonlinearSolveAlgorithm
     σ_min::T
-    σ_0::T
+    σ_max::T
+    σ_1::T
     M::Int
     γ::T
     τ_min::T
     τ_max::T
     nexp::Int
+    η_strategy::Function
 
-    function SimpleDFSane(; σ_min::Real = 1e-10, σ_0::Real = 1.0, M::Int = 10,
-                          γ::Real = 1e-4, τ_min::Real = 0.1, τ_max::Real = 0.5,
-                          nexp::Int = 2)
-        new{typeof(σ_min)}(σ_min, σ_0, M, γ, τ_min, τ_max, nexp)
+    function SimpleDFSane(; σ_min::Real = 1e-10, σ_max::Real = 1e10, σ_1::Real = 1.0,
+                          M::Int = 10, γ::Real = 1e-4, τ_min::Real = 0.1, τ_max::Real = 0.5,
+                          nexp::Int = 2, η_strategy::Function = (f_1, k, x, F) -> f_1 / k^2)
+        new{typeof(σ_min)}(σ_min, σ_max, σ_1, M, γ, τ_min, τ_max, nexp, η_strategy)
     end
 end
 
-function SciMLBase.__solve(prob::NonlinearProblem,
-                           alg::SimpleDFSane, args...; abstol = nothing,
-                           reltol = nothing,
-                           maxiters = 1000, kwargs...)
+function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleDFSane,
+                           args...; abstol = nothing, reltol = nothing, maxiters = 1000,
+                           kwargs...)
     f = Base.Fix2(prob.f, prob.p)
     x = float(prob.u0)
     T = eltype(x)
     σ_min = float(alg.σ_min)
-    σ_k = float(alg.σ_0)
+    σ_max = float(alg.σ_max)
+    σ_k = float(alg.σ_1)
     M = alg.M
     γ = float(alg.γ)
     τ_min = float(alg.τ_min)
     τ_max = float(alg.τ_max)
     nexp = alg.nexp
+    η_strategy = alg.η_strategy
 
     if SciMLBase.isinplace(prob)
         error("SimpleDFSane currently only supports out-of-place nonlinear problems")
@@ -66,70 +89,67 @@ function SciMLBase.__solve(prob::NonlinearProblem,
     end
 
     f_k, F_k = ff(x)
-    α_0 = convert(T, 1.0)
-    f_0 = f_k
-    prev_fs = fill(f_k, M)
+    α_1 = convert(T, 1.0)
+    f_1 = f_k
+    history_f_k = fill(f_k, M)
 
     for k in 1:maxiters
         iszero(F_k) &&
             return SciMLBase.build_solution(prob, alg, x, F_k;
                                             retcode = ReturnCode.Success)
 
-        # Control spectral parameter
-        if abs(σ_k) > 1 / σ_min
-            σ_k = 1 / σ_min * sign(σ_k)
+        # Spectral parameter range check
+        if abs(σ_k) > σ_max
+            σ_k = sign(σ_k) * σ_max
         elseif abs(σ_k) < σ_min
-            σ_k = σ_min
+            σ_k = sign(σ_k) * σ_min
         end
 
         # Line search direction
         d = -σ_k * F_k
 
-        # Nonmonotone line search
-        η = f_0 / k^2
-
-        f_bar = maximum(prev_fs)
-        α_p = α_0
-        α_m = α_0
-        xp = x + α_p * d
-        fp, Fp = ff(xp)
+        η = η_strategy(f_1, k, x, F_k)
+        f̄ = maximum(history_f_k)
+        α_p = α_1
+        α_m = α_1
+        x_new = x + α_p * d
+        f_new, F_new = ff(x_new)
         while true
-            if fp ≤ f_bar + η - γ * α_p^2 * f_k
+            if f_new ≤ f̄ + η - γ * α_p^2 * f_k
                 break
             end
 
-            α_tp = α_p^2 * f_k / (fp + (2 * α_p - 1) * f_k)
-            xp = x - α_m * d
-            fp, Fp = ff(xp)
+            α_tp = α_p^2 * f_k / (f_new + (2 * α_p - 1) * f_k)
+            x_new = x - α_m * d
+            f_new, F_new = ff(x_new)
 
-            if fp ≤ f_bar + η - γ * α_m^2 * f_k
+            if f_new ≤ f̄ + η - γ * α_m^2 * f_k
                 break
             end
 
-            α_tm = α_m^2 * f_k / (fp + (2 * α_m - 1) * f_k)
+            α_tm = α_m^2 * f_k / (f_new + (2 * α_m - 1) * f_k)
             α_p = min(τ_max * α_p, max(α_tp, τ_min * α_p))
             α_m = min(τ_max * α_m, max(α_tm, τ_min * α_m))
-            xp = x + α_p * d
-            fp, Fp = ff(xp)
+            x_new = x + α_p * d
+            f_new, F_new = ff(x_new)
         end
 
-        if isapprox(xp, x, atol = atol, rtol = rtol)
-            return SciMLBase.build_solution(prob, alg, xp, Fp;
+        if isapprox(x_new, x, atol = atol, rtol = rtol)
+            return SciMLBase.build_solution(prob, alg, x_new, F_new;
                                             retcode = ReturnCode.Success)
         end
         # Update spectral parameter
-        s_k = xp - x
-        y_k = Fp - F_k
-        σ_k = dot(s_k, s_k) / dot(s_k, y_k)
+        s_k = x_new - x
+        y_k = F_new - F_k
+        σ_k = (s_k' * s_k) / (s_k' * y_k)
 
         # Take step
-        x = xp
-        F_k = Fp
-        f_k = fp
+        x = x_new
+        F_k = F_new
+        f_k = f_new
 
         # Store function value
-        idx_to_replace = k % M + 1
-        prev_fs[idx_to_replace] = fp
+        history_f_k[k % M + 1] = f_new
     end
     return SciMLBase.build_solution(prob, alg, x, F_k; retcode = ReturnCode.MaxIters)
 end
diff --git a/test/basictests.jl b/test/basictests.jl
@@ -160,7 +160,8 @@ for alg in [Bisection(), Falsi(), Ridder(), Brent()]
     @test ForwardDiff.jacobian(g, p) ≈ ForwardDiff.jacobian(t, p)
 end
 
-for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion(), SimpleDFSane())
+for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion(),
+            SimpleDFSane())
     global g, p
     g = function (p)
         probN = NonlinearProblem{false}(f, 0.5, p)
@@ -313,45 +314,57 @@ for options in list_of_options
     @test all(abs.(f(u, p)) .< 1e-10)
 end
 
-# # Test that `SimpleDFSane` passes a test that `SimpleNewtonRaphson` fails on.
-# u0 = [-10.0, -1.0, 1.0, 2.0, 3.0, 4.0, 10.0]
-# global g, f
-# f = (u, p) -> 0.010000000000000002 .+
-#               10.000000000000002 ./ (1 .+
-#                (0.21640425613334457 .+
-#                 216.40425613334457 ./ (1 .+
-#                  (0.21640425613334457 .+
-#                   216.40425613334457 ./
-#                   (1 .+ 0.0006250000000000001(u .^ 2.0))) .^ 2.0)) .^ 2.0) .-
-#               0.0011552453009332421u .- p
-# g = function (p)
-#     probN = NonlinearProblem{false}(f, u0, p)
-#     sol = solve(probN, SimpleDFSane())
-#     return sol.u
-# end
-# p = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
-# u = g(p)
-# f(u, p)
-# @test all(abs.(f(u, p)) .< 1e-10)
-
-# # Test kwars in `SimpleDFSane`
-
-
-# list_of_options = zip(max_trust_radius, initial_trust_radius, step_threshold,
-#                       shrink_threshold, expand_threshold, shrink_factor,
-#                       expand_factor, max_shrink_times)
-# for options in list_of_options
-#     local probN, sol, alg
-#     alg = SimpleDFSane(max_trust_radius = options[1],
-#                             initial_trust_radius = options[2],
-#                             step_threshold = options[3],
-#                             shrink_threshold = options[4],
-#                             expand_threshold = options[5],
-#                             shrink_factor = options[6],
-#                             expand_factor = options[7],
-#                             max_shrink_times = options[8])
-
-#     probN = NonlinearProblem(f, u0, p)
-#     sol = solve(probN, alg)
-#     @test all(abs.(f(u, p)) .< 1e-10)
-# end
+# Test that `SimpleDFSane` passes a test that `SimpleNewtonRaphson` fails on.
+u0 = [-10.0, -1.0, 1.0, 2.0, 3.0, 4.0, 10.0]
+global g, f
+f = (u, p) -> 0.010000000000000002 .+
+              10.000000000000002 ./ (1 .+
+               (0.21640425613334457 .+
+                216.40425613334457 ./ (1 .+
+                 (0.21640425613334457 .+
+                  216.40425613334457 ./
+                  (1 .+ 0.0006250000000000001(u .^ 2.0))) .^ 2.0)) .^ 2.0) .-
+              0.0011552453009332421u .- p
+g = function (p)
+    probN = NonlinearProblem{false}(f, u0, p)
+    sol = solve(probN, SimpleDFSane())
+    return sol.u
+end
+p = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
+u = g(p)
+f(u, p)
+@test all(abs.(f(u, p)) .< 1e-10)
+
+# Test kwars in `SimpleDFSane`
+σ_min = [1e-10, 1e-5, 1e-4]
+σ_max = [1e10, 1e5, 1e4]
+σ_1 = [1.0, 0.5, 2.0]
+M = [10, 1, 100]
+γ = [1e-4, 1e-3, 1e-5]
+τ_min = [0.1, 0.2, 0.3]
+τ_max = [0.5, 0.8, 0.9]
+nexp = [2, 1, 2]
+η_strategy = [
+    (f_1, k, x, F) -> f_1 / k^2,
+    (f_1, k, x, F) -> f_1 / k^3,
+    (f_1, k, x, F) -> f_1 / k^4,
+]
+
+list_of_options = zip(σ_min, σ_max, σ_1, M, γ, τ_min, τ_max, nexp,
+                      η_strategy)
+for options in list_of_options
+    local probN, sol, alg
+    alg = SimpleDFSane(σ_min = options[1],
+                       σ_max = options[2],
+                       σ_1 = options[3],
+                       M = options[4],
+                       γ = options[5],
+                       τ_min = options[6],
+                       τ_max = options[7],
+                       nexp = options[8],
+                       η_strategy = options[9])
+
+    probN = NonlinearProblem(f, u0, p)
+    sol = solve(probN, alg)
+    @test all(abs.(f(u, p)) .< 1e-10)
+end