Skip to content
This repository was archived by the owner on May 15, 2025. It is now read-only.

Commit 7fcca42

Browse files
committed
DFSane implementation
1 parent 9312115 commit 7fcca42

File tree

3 files changed

+137
-104
lines changed

3 files changed

+137
-104
lines changed

src/ad.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,16 @@ end
3131
function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, StaticArraysCore.SVector},
3232
iip,
3333
<:Dual{T, V, P}},
34-
alg::Union{SimpleNewtonRaphson, SimpleTrustRegion, SimpleDFSane}, # TODO make this to one variable
34+
alg::AbstractSimpleNonlinearSolveAlgorithm,
3535
args...; kwargs...) where {iip, T, V, P}
3636
sol, partials = scalar_nlsolve_ad(prob, alg, args...; kwargs...)
3737
return SciMLBase.build_solution(prob, alg, Dual{T, V, P}(sol.u, partials), sol.resid;
3838
retcode = sol.retcode)
3939
end
40-
function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, StaticArraysCore.SVector}, # TODO make this to one variable
40+
function SciMLBase.solve(prob::NonlinearProblem{<:Union{Number, StaticArraysCore.SVector},
4141
iip,
4242
<:AbstractArray{<:Dual{T, V, P}}},
43-
alg::Union{SimpleNewtonRaphson, SimpleTrustRegion, SimpleDFSane}, args...;
43+
alg::AbstractSimpleNonlinearSolveAlgorithm, args...;
4444
kwargs...) where {iip, T, V, P}
4545
sol, partials = scalar_nlsolve_ad(prob, alg, args...; kwargs...)
4646
return SciMLBase.build_solution(prob, alg, Dual{T, V, P}(sol.u, partials), sol.resid;

src/dfsane.jl

Lines changed: 78 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,78 @@
11
"""
22
```julia
3-
SimpleDFSane(; σ_min::Real = 1e-10, σ_0::Real = 1.0, M::Int = 10,
4-
γ::Real = 1e-4, τ_min::Real = 0.1, τ_max::Real = 0.5,
5-
nexp::Int = 2)
3+
SimpleDFSane(; σ_min::Real = 1e-10, σ_max::Real = 1e10, σ_1::Real = 1.0,
4+
M::Int = 10, γ::Real = 1e-4, τ_min::Real = 0.1, τ_max::Real = 0.5,
5+
nexp::Int = 2, η_strategy::Function = (f_1, k, x, F) -> f_1 / k^2)
66
```
77
8-
A low-overhead implementation of the df-sane method. For more information, see [1].
9-
References:
10-
W LaCruz, JM Martinez, and M Raydan (2006), Spectral residual mathod without gradient information for solving large-scale nonlinear systems of equations, Mathematics of Computation, 75, 1429-1448.
11-
### Keyword Arguments
12-
8+
A low-overhead implementation of the df-sane method for solving large-scale nonlinear
9+
systems of equations. For in depth information about all the parameters and the algorithm,
10+
see the paper: [W LaCruz, JM Martinez, and M Raydan (2006), Spectral residual mathod without
11+
gradient information for solving large-scale nonlinear systems of equations, Mathematics of
12+
Computation, 75, 1429-1448.](https://www.researchgate.net/publication/220576479_Spectral_Residual_Method_without_Gradient_Information_for_Solving_Large-Scale_Nonlinear_Systems_of_Equations)
1313
14-
- `σ_min`: the minimum value of `σ_k`. Defaults to `1e-10`. # TODO write about this...
15-
- `σ_0`: the initial value of `σ_k`. Defaults to `1.0`. # TODO write about this...
16-
- `M`: the value of `M` in the paper. Defaults to `10`. # TODO write about this...
17-
- `γ`: the value of `γ` in the paper. Defaults to `1e-4`. # TODO write about this...
18-
- `τ_min`: the minimum value of `τ_k`. Defaults to `0.1`. # TODO write about this...
19-
- `τ_max`: the maximum value of `τ_k`. Defaults to `0.5`. # TODO write about this...
20-
- `nexp`: the value of `nexp` in the paper. Defaults to `2`. # TODO write about this...
14+
### Keyword Arguments
2115
16+
- `σ_min`: the minimum value of the spectral coefficient `σ_k` which is related to the step
17+
size in the algorithm. Defaults to `1e-10`.
18+
- `σ_max`: the maximum value of the spectral coefficient `σ_k` which is related to the step
19+
size in the algorithm. Defaults to `1e10`.
20+
- `σ_1`: the initial value of the spectral coefficient `σ_k` which is related to the step
21+
size in the algorithm.. Defaults to `1.0`.
22+
- `M`: The monotonicity of the algorithm is determined by a this positive integer.
23+
A value of 1 for `M` would result in strict monotonicity in the decrease of the L2-norm
24+
of the function `f`. However, higher values allow for more flexibility in this reduction.
25+
Despite this, the algorithm still ensures global convergence through the use of a
26+
non-monotone line-search algorithm that adheres to the Grippo-Lampariello-Lucidi
27+
condition. Values in the range of 5 to 20 are usually sufficient, but some cases may call
28+
for a higher value of `M`. The default setting is 10.
29+
- `γ`: a parameter that influences if a proposed step will be accepted. Higher value of `γ`
30+
will make the algorithm more restrictive in accepting steps. Defaults to `1e-4`.
31+
- `τ_min`: if a step is rejected the new step size will get multiplied by factor, and this
32+
parameter is the minimum value of that factor. Defaults to `0.1`.
33+
- `τ_max`: if a step is rejected the new step size will get multiplied by factor, and this
34+
parameter is the maximum value of that factor. Defaults to `0.5`.
35+
- `nexp`: the exponent of the loss, i.e. ``f_k=||F(x_k)||^{nexp}``. The paper uses
36+
`nexp ∈ {1,2}`. Defaults to `2`.
37+
- `η_strategy`: function to determine the parameter `η_k`, which enables growth
38+
of ``||F||^2``. Called as ``η_k = η_strategy(f_1, k, x, F)`` with `f_1` initialized as
39+
``f_1=||F(x_1)||^{nexp}``, `k` is the iteration number, `x` is the current `x`-value and
40+
`F` the current residual. Should satisfy ``η_k > 0`` and ``∑ₖ ηₖ < ∞``. Defaults to
41+
``||F||^2 / k^2``.
2242
"""
2343
struct SimpleDFSane{T} <: AbstractSimpleNonlinearSolveAlgorithm
2444
σ_min::T
25-
σ_0::T
45+
σ_max::T
46+
σ_1::T
2647
M::Int
2748
γ::T
2849
τ_min::T
2950
τ_max::T
3051
nexp::Int
52+
η_strategy::Function
3153

32-
function SimpleDFSane(; σ_min::Real = 1e-10, σ_0::Real = 1.0, M::Int = 10,
33-
γ::Real = 1e-4, τ_min::Real = 0.1, τ_max::Real = 0.5,
34-
nexp::Int = 2)
35-
new{typeof(σ_min)}(σ_min, σ_0, M, γ, τ_min, τ_max, nexp)
54+
function SimpleDFSane(; σ_min::Real = 1e-10, σ_max::Real = 1e10, σ_1::Real = 1.0,
55+
M::Int = 10, γ::Real = 1e-4, τ_min::Real = 0.1, τ_max::Real = 0.5,
56+
nexp::Int = 2, η_strategy::Function = (f_1, k, x, F) -> f_1 / k^2)
57+
new{typeof(σ_min)}(σ_min, σ_max, σ_1, M, γ, τ_min, τ_max, nexp, η_strategy)
3658
end
3759
end
3860

39-
function SciMLBase.__solve(prob::NonlinearProblem,
40-
alg::SimpleDFSane, args...; abstol = nothing,
41-
reltol = nothing,
42-
maxiters = 1000, kwargs...)
61+
function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleDFSane,
62+
args...; abstol = nothing, reltol = nothing, maxiters = 1000,
63+
kwargs...)
4364
f = Base.Fix2(prob.f, prob.p)
4465
x = float(prob.u0)
4566
T = eltype(x)
4667
σ_min = float(alg.σ_min)
47-
σ_k = float(alg.σ_0)
68+
σ_max = float(alg.σ_max)
69+
σ_k = float(alg.σ_1)
4870
M = alg.M
4971
γ = float(alg.γ)
5072
τ_min = float(alg.τ_min)
5173
τ_max = float(alg.τ_max)
5274
nexp = alg.nexp
75+
η_strategy = alg.η_strategy
5376

5477
if SciMLBase.isinplace(prob)
5578
error("SimpleDFSane currently only supports out-of-place nonlinear problems")
@@ -66,70 +89,67 @@ function SciMLBase.__solve(prob::NonlinearProblem,
6689
end
6790

6891
f_k, F_k = ff(x)
69-
α_0 = convert(T, 1.0)
70-
f_0 = f_k
71-
prev_fs = fill(f_k, M)
92+
α_1 = convert(T, 1.0)
93+
f_1 = f_k
94+
history_f_k = fill(f_k, M)
7295

7396
for k in 1:maxiters
7497
iszero(F_k) &&
7598
return SciMLBase.build_solution(prob, alg, x, F_k;
7699
retcode = ReturnCode.Success)
77100

78-
# Control spectral parameter
79-
if abs(σ_k) > 1 / σ_min
80-
σ_k = 1 / σ_min * sign(σ_k)
101+
# Spectral parameter range check
102+
if abs(σ_k) > σ_max
103+
σ_k = sign(σ_k) * σ_max
81104
elseif abs(σ_k) < σ_min
82-
σ_k = σ_min
105+
σ_k = sign(σ_k) * σ_min
83106
end
84107

85108
# Line search direction
86109
d = -σ_k * F_k
87110

88-
# Nonmonotone line search
89-
η = f_0 / k^2
90-
91-
f_bar = maximum(prev_fs)
92-
α_p = α_0
93-
α_m = α_0
94-
xp = x + α_p * d
95-
fp, Fp = ff(xp)
111+
η = η_strategy(f_1, k, x, F_k)
112+
= maximum(history_f_k)
113+
α_p = α_1
114+
α_m = α_1
115+
x_new = x + α_p * d
116+
f_new, F_new = ff(x_new)
96117
while true
97-
if fp f_bar + η - γ * α_p^2 * f_k
118+
if f_new + η - γ * α_p^2 * f_k
98119
break
99120
end
100121

101-
α_tp = α_p^2 * f_k / (fp + (2 * α_p - 1) * f_k)
102-
xp = x - α_m * d
103-
fp, Fp = ff(xp)
122+
α_tp = α_p^2 * f_k / (f_new + (2 * α_p - 1) * f_k)
123+
x_new = x - α_m * d
124+
f_new, F_new = ff(x_new)
104125

105-
if fp f_bar + η - γ * α_m^2 * f_k
126+
if f_new + η - γ * α_m^2 * f_k
106127
break
107128
end
108129

109-
α_tm = α_m^2 * f_k / (fp + (2 * α_m - 1) * f_k)
130+
α_tm = α_m^2 * f_k / (f_new + (2 * α_m - 1) * f_k)
110131
α_p = min(τ_max * α_p, max(α_tp, τ_min * α_p))
111132
α_m = min(τ_max * α_m, max(α_tm, τ_min * α_m))
112-
xp = x + α_p * d
113-
fp, Fp = ff(xp)
133+
x_new = x + α_p * d
134+
f_new, F_new = ff(x_new)
114135
end
115136

116-
if isapprox(xp, x, atol = atol, rtol = rtol)
117-
return SciMLBase.build_solution(prob, alg, xp, Fp;
137+
if isapprox(x_new, x, atol = atol, rtol = rtol)
138+
return SciMLBase.build_solution(prob, alg, x_new, F_new;
118139
retcode = ReturnCode.Success)
119140
end
120141
# Update spectral parameter
121-
s_k = xp - x
122-
y_k = Fp - F_k
123-
σ_k = dot(s_k, s_k) / dot(s_k, y_k)
142+
s_k = x_new - x
143+
y_k = F_new - F_k
144+
σ_k = (s_k' * s_k) / (s_k' * y_k)
124145

125146
# Take step
126-
x = xp
127-
F_k = Fp
128-
f_k = fp
147+
x = x_new
148+
F_k = F_new
149+
f_k = f_new
129150

130151
# Store function value
131-
idx_to_replace = k % M + 1
132-
prev_fs[idx_to_replace] = fp
152+
history_f_k[k % M + 1] = f_new
133153
end
134154
return SciMLBase.build_solution(prob, alg, x, F_k; retcode = ReturnCode.MaxIters)
135155
end

test/basictests.jl

Lines changed: 56 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,8 @@ for alg in [Bisection(), Falsi(), Ridder(), Brent()]
160160
@test ForwardDiff.jacobian(g, p) ForwardDiff.jacobian(t, p)
161161
end
162162

163-
for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion(), SimpleDFSane())
163+
for alg in (SimpleNewtonRaphson(), Broyden(), Klement(), SimpleTrustRegion(),
164+
SimpleDFSane())
164165
global g, p
165166
g = function (p)
166167
probN = NonlinearProblem{false}(f, 0.5, p)
@@ -313,45 +314,57 @@ for options in list_of_options
313314
@test all(abs.(f(u, p)) .< 1e-10)
314315
end
315316

316-
# # Test that `SimpleDFSane` passes a test that `SimpleNewtonRaphson` fails on.
317-
# u0 = [-10.0, -1.0, 1.0, 2.0, 3.0, 4.0, 10.0]
318-
# global g, f
319-
# f = (u, p) -> 0.010000000000000002 .+
320-
# 10.000000000000002 ./ (1 .+
321-
# (0.21640425613334457 .+
322-
# 216.40425613334457 ./ (1 .+
323-
# (0.21640425613334457 .+
324-
# 216.40425613334457 ./
325-
# (1 .+ 0.0006250000000000001(u .^ 2.0))) .^ 2.0)) .^ 2.0) .-
326-
# 0.0011552453009332421u .- p
327-
# g = function (p)
328-
# probN = NonlinearProblem{false}(f, u0, p)
329-
# sol = solve(probN, SimpleDFSane())
330-
# return sol.u
331-
# end
332-
# p = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
333-
# u = g(p)
334-
# f(u, p)
335-
# @test all(abs.(f(u, p)) .< 1e-10)
336-
337-
# # Test kwars in `SimpleDFSane`
338-
339-
340-
# list_of_options = zip(max_trust_radius, initial_trust_radius, step_threshold,
341-
# shrink_threshold, expand_threshold, shrink_factor,
342-
# expand_factor, max_shrink_times)
343-
# for options in list_of_options
344-
# local probN, sol, alg
345-
# alg = SimpleDFSane(max_trust_radius = options[1],
346-
# initial_trust_radius = options[2],
347-
# step_threshold = options[3],
348-
# shrink_threshold = options[4],
349-
# expand_threshold = options[5],
350-
# shrink_factor = options[6],
351-
# expand_factor = options[7],
352-
# max_shrink_times = options[8])
353-
354-
# probN = NonlinearProblem(f, u0, p)
355-
# sol = solve(probN, alg)
356-
# @test all(abs.(f(u, p)) .< 1e-10)
357-
# end
317+
# Test that `SimpleDFSane` passes a test that `SimpleNewtonRaphson` fails on.
318+
u0 = [-10.0, -1.0, 1.0, 2.0, 3.0, 4.0, 10.0]
319+
global g, f
320+
f = (u, p) -> 0.010000000000000002 .+
321+
10.000000000000002 ./ (1 .+
322+
(0.21640425613334457 .+
323+
216.40425613334457 ./ (1 .+
324+
(0.21640425613334457 .+
325+
216.40425613334457 ./
326+
(1 .+ 0.0006250000000000001(u .^ 2.0))) .^ 2.0)) .^ 2.0) .-
327+
0.0011552453009332421u .- p
328+
g = function (p)
329+
probN = NonlinearProblem{false}(f, u0, p)
330+
sol = solve(probN, SimpleDFSane())
331+
return sol.u
332+
end
333+
p = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
334+
u = g(p)
335+
f(u, p)
336+
@test all(abs.(f(u, p)) .< 1e-10)
337+
338+
# Test kwars in `SimpleDFSane`
339+
σ_min = [1e-10, 1e-5, 1e-4]
340+
σ_max = [1e10, 1e5, 1e4]
341+
σ_1 = [1.0, 0.5, 2.0]
342+
M = [10, 1, 100]
343+
γ = [1e-4, 1e-3, 1e-5]
344+
τ_min = [0.1, 0.2, 0.3]
345+
τ_max = [0.5, 0.8, 0.9]
346+
nexp = [2, 1, 2]
347+
η_strategy = [
348+
(f_1, k, x, F) -> f_1 / k^2,
349+
(f_1, k, x, F) -> f_1 / k^3,
350+
(f_1, k, x, F) -> f_1 / k^4,
351+
]
352+
353+
list_of_options = zip(σ_min, σ_max, σ_1, M, γ, τ_min, τ_max, nexp,
354+
η_strategy)
355+
for options in list_of_options
356+
local probN, sol, alg
357+
alg = SimpleDFSane(σ_min = options[1],
358+
σ_max = options[2],
359+
σ_1 = options[3],
360+
M = options[4],
361+
γ = options[5],
362+
τ_min = options[6],
363+
τ_max = options[7],
364+
nexp = options[8],
365+
η_strategy = options[9])
366+
367+
probN = NonlinearProblem(f, u0, p)
368+
sol = solve(probN, alg)
369+
@test all(abs.(f(u, p)) .< 1e-10)
370+
end

0 commit comments

Comments
 (0)