Skip to content
This repository was archived by the owner on May 15, 2025. It is now read-only.

Commit e56a659

Browse files
committed
Add alpha scaling
1 parent c320b75 commit e56a659

File tree

4 files changed

+69
-37
lines changed

4 files changed

+69
-37
lines changed

src/linesearch.jl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,11 +112,10 @@ function (cache::StaticLiFukushimaLineSearchCache)(u, δu)
112112
# Early Terminate based on Eq. 2.7
113113
fx_norm = NONLINEARSOLVE_DEFAULT_NORM(cache.f(u, cache.p))
114114
du_norm = NONLINEARSOLVE_DEFAULT_NORM(δu)
115-
fxλ_norm = NONLINEARSOLVE_DEFAULT_NORM(cache.f(u .+ cache.λ₀ .* δu, cache.p))
115+
fxλ_norm = NONLINEARSOLVE_DEFAULT_NORM(cache.f(u .+ δu, cache.p))
116116
fxλ_norm cache.ρ * fx_norm - cache.σ₂ * du_norm^2 && return T(true)
117117

118118
λ₂, λ₁ = cache.λ₀, cache.λ₀
119-
fxλp_norm = NONLINEARSOLVE_DEFAULT_NORM(cache.f(u .+ λ₂ .* δu, cache.p))
120119

121120
for i in 1:(cache.maxiters)
122121
fxλp_norm = NONLINEARSOLVE_DEFAULT_NORM(cache.f(u .+ λ₂ .* δu, cache.p))

src/nlsolve/broyden.jl

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,30 @@
11
"""
2-
SimpleBroyden(; linesearch = Val(false))
2+
SimpleBroyden(; linesearch = Val(false), alpha = nothing)
33
44
A low-overhead implementation of Broyden. This method is non-allocating on scalar
55
and static array problems.
66
7-
If `linesearch` is `Val(true)`, then we use the `LiFukushimaLineSearch` [1] line search else
8-
no line search is used. For advanced customization of the line search, use the
9-
[`Broyden`](@ref) algorithm in `NonlinearSolve.jl`.
7+
### Keyword Arguments
8+
9+
* `linesearch`: If `linesearch` is `Val(true)`, then we use the `LiFukushimaLineSearch`
10+
[1] line search else no line search is used. For advanced customization of the line
11+
search, use the [`Broyden`](@ref) algorithm in `NonlinearSolve.jl`.
12+
* `alpha`: Scale the initial jacobian initialization with `alpha`. If it is `nothing`, we
13+
will compute the scaling using `2 * norm(fu) / max(norm(u), true)`.
1014
1115
### References
1216
1317
[1] Li, Dong-Hui, and Masao Fukushima. "A derivative-free line search and global convergence
1418
of Broyden-like method for nonlinear equations." Optimization methods and software 13.3
1519
(2000): 181-201.
1620
"""
17-
struct SimpleBroyden{linesearch} <: AbstractSimpleNonlinearSolveAlgorithm end
21+
@concrete struct SimpleBroyden{linesearch} <: AbstractSimpleNonlinearSolveAlgorithm
22+
alpha
23+
end
1824

19-
SimpleBroyden(; linesearch = Val(false)) = SimpleBroyden{_unwrap_val(linesearch)}()
25+
function SimpleBroyden(; linesearch = Val(false), alpha = nothing)
26+
return SimpleBroyden{_unwrap_val(linesearch)}(alpha)
27+
end
2028

2129
__get_linesearch(::SimpleBroyden{LS}) where {LS} = Val(LS)
2230

@@ -25,13 +33,22 @@ function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleBroyden, args...;
2533
termination_condition = nothing, kwargs...)
2634
x = __maybe_unaliased(prob.u0, alias_u0)
2735
fx = _get_fx(prob, x)
36+
T = promote_type(eltype(x), eltype(fx))
2837

2938
@bb xo = copy(x)
3039
@bb δx = copy(x)
3140
@bb δf = copy(fx)
3241
@bb fprev = copy(fx)
3342

34-
J⁻¹ = __init_identity_jacobian(fx, x)
43+
if alg.alpha === nothing
44+
fx_norm = NONLINEARSOLVE_DEFAULT_NORM(fx)
45+
x_norm = NONLINEARSOLVE_DEFAULT_NORM(x)
46+
init_α = ifelse(fx_norm 1e-5, max(x_norm, T(true)) / (2 * fx_norm), T(true))
47+
else
48+
init_α = inv(alg.alpha)
49+
end
50+
51+
J⁻¹ = __init_identity_jacobian(fx, x, init_α)
3552
@bb J⁻¹δf = copy(x)
3653
@bb xᵀJ⁻¹ = copy(x)
3754
@bb δJ⁻¹n = copy(x)
@@ -47,7 +64,7 @@ function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleBroyden, args...;
4764
@bb δx = J⁻¹ × vec(fprev)
4865
@bb δx .*= -1
4966

50-
α = ls_cache === nothing ? true : ls_cache(x, δx)
67+
α = ls_cache === nothing ? true : ls_cache(xo, δx)
5168
@bb @. x = xo + α * δx
5269
fx = __eval_f(prob, fx, x)
5370
@bb @. δf = fx - fprev

src/nlsolve/lbroyden.jl

Lines changed: 38 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,43 @@
11
"""
2-
SimpleLimitedMemoryBroyden(; threshold::Int = 27, linesearch = Val(false))
3-
SimpleLimitedMemoryBroyden(; threshold::Val = Val(27), linesearch = Val(false))
2+
SimpleLimitedMemoryBroyden(; threshold::Union{Val, Int} = Val(27),
3+
linesearch = Val(false), alpha = nothing)
44
55
A limited memory implementation of Broyden. This method applies the L-BFGS scheme to
66
Broyden's method.
77
88
If the threshold is larger than the problem size, then this method will use `SimpleBroyden`.
99
10-
If `linesearch` is `Val(true)`, then we use the `LiFukushimaLineSearch` [1] line search else
11-
no line search is used. For advanced customization of the line search, use the
12-
[`LimitedMemoryBroyden`](@ref) algorithm in `NonlinearSolve.jl`.
10+
### Keyword Arguments:
11+
12+
- `linesearch`: If `linesearch` is `Val(true)`, then we use the
13+
`LiFukushimaLineSearch` [1] line search else no line search is used. For advanced
14+
customization of the line search, use the [`LimitedMemoryBroyden`](@ref) algorithm in
15+
`NonlinearSolve.jl`.
16+
- `alpha`: Scale the initial jacobian initialization with `alpha`. If it is `nothing`, we
17+
will compute the scaling using `2 * norm(fu) / max(norm(u), true)`.
1318
1419
### References
1520
1621
[1] Li, Dong-Hui, and Masao Fukushima. "A derivative-free line search and global convergence
1722
of Broyden-like method for nonlinear equations." Optimization methods and software 13.3
1823
(2000): 181-201.
1924
"""
20-
struct SimpleLimitedMemoryBroyden{threshold, linesearch} <:
21-
AbstractSimpleNonlinearSolveAlgorithm end
25+
@concrete struct SimpleLimitedMemoryBroyden{threshold, linesearch} <:
26+
AbstractSimpleNonlinearSolveAlgorithm
27+
alpha
28+
end
2229

2330
__get_threshold(::SimpleLimitedMemoryBroyden{threshold}) where {threshold} = Val(threshold)
2431
__use_linesearch(::SimpleLimitedMemoryBroyden{Th, LS}) where {Th, LS} = Val(LS)
2532

2633
function SimpleLimitedMemoryBroyden(; threshold::Union{Val, Int} = Val(27),
27-
linesearch = Val(false))
28-
return SimpleLimitedMemoryBroyden{_unwrap_val(threshold), _unwrap_val(linesearch)}()
34+
linesearch = Val(false), alpha = nothing)
35+
return SimpleLimitedMemoryBroyden{_unwrap_val(threshold), _unwrap_val(linesearch)}(alpha)
2936
end
3037

38+
# Don't resolve the `abstol` and `reltol` here
3139
function SciMLBase.solve(prob::NonlinearProblem{<:Union{<:Number, <:SArray}},
3240
alg::SimpleLimitedMemoryBroyden, args...; kwargs...)
33-
# Don't resolve the `abstol` and `reltol` here
3441
return SciMLBase.__solve(prob, alg, args...; kwargs...)
3542
end
3643

@@ -133,8 +140,17 @@ function __static_solve(prob::NonlinearProblem{<:SArray}, alg::SimpleLimitedMemo
133140
ls_cache = __use_linesearch(alg) === Val(true) ?
134141
LiFukushimaLineSearch()(prob, fx, x) : nothing
135142

143+
T = promote_type(eltype(x), eltype(fx))
144+
if alg.alpha === nothing
145+
fx_norm = NONLINEARSOLVE_DEFAULT_NORM(fx)
146+
x_norm = NONLINEARSOLVE_DEFAULT_NORM(x)
147+
init_α = ifelse(fx_norm 1e-5, max(x_norm, T(true)) / (2 * fx_norm), T(true))
148+
else
149+
init_α = inv(alg.alpha)
150+
end
151+
136152
converged, res = __unrolled_lbroyden_initial_iterations(prob, xo, fo, δx, abstol, U, Vᵀ,
137-
threshold, ls_cache)
153+
threshold, ls_cache, init_α)
138154

139155
converged &&
140156
return build_solution(prob, alg, res.x, res.fx; retcode = ReturnCode.Success)
@@ -150,16 +166,16 @@ function __static_solve(prob::NonlinearProblem{<:SArray}, alg::SimpleLimitedMemo
150166
maximum(abs, fx) abstol &&
151167
return build_solution(prob, alg, x, fx; retcode = ReturnCode.Success)
152168

153-
vᵀ = _restructure(x, _rmatvec!!(U, Vᵀ, vec(δx)))
154-
mvec = _restructure(x, _matvec!!(U, Vᵀ, vec(δf)))
169+
vᵀ = _restructure(x, _rmatvec!!(U, Vᵀ, vec(δx), init_α))
170+
mvec = _restructure(x, _matvec!!(U, Vᵀ, vec(δf), init_α))
155171

156172
d = dot(vᵀ, δf)
157173
δx = @. (δx - mvec) / d
158174

159175
U = Base.setindex(U, vec(δx), mod1(i, _unwrap_val(threshold)))
160176
Vᵀ = Base.setindex(Vᵀ, vec(vᵀ), mod1(i, _unwrap_val(threshold)))
161177

162-
δx = -_restructure(fx, _matvec!!(U, Vᵀ, vec(fx)))
178+
δx = -_restructure(fx, _matvec!!(U, Vᵀ, vec(fx), init_α))
163179

164180
xo = x
165181
fo = fx
@@ -169,7 +185,7 @@ function __static_solve(prob::NonlinearProblem{<:SArray}, alg::SimpleLimitedMemo
169185
end
170186

171187
@generated function __unrolled_lbroyden_initial_iterations(prob, xo, fo, δx, abstol, U,
172-
Vᵀ, ::Val{threshold}, ls_cache) where {threshold}
188+
Vᵀ, ::Val{threshold}, ls_cache, init_α) where {threshold}
173189
calls = []
174190
for i in 1:threshold
175191
static_idx, static_idx_p1 = Val(i - 1), Val(i)
@@ -185,8 +201,8 @@ end
185201
_U = __first_n_getindex(U, $(static_idx))
186202
_Vᵀ = __first_n_getindex(Vᵀ, $(static_idx))
187203

188-
vᵀ = _restructure(x, _rmatvec!!(_U, _Vᵀ, vec(δx)))
189-
mvec = _restructure(x, _matvec!!(_U, _Vᵀ, vec(δf)))
204+
vᵀ = _restructure(x, _rmatvec!!(_U, _Vᵀ, vec(δx), init_α))
205+
mvec = _restructure(x, _matvec!!(_U, _Vᵀ, vec(δf), init_α))
190206

191207
d = dot(vᵀ, δf)
192208
δx = @. (δx - mvec) / d
@@ -196,7 +212,7 @@ end
196212

197213
_U = __first_n_getindex(U, $(static_idx_p1))
198214
_Vᵀ = __first_n_getindex(Vᵀ, $(static_idx_p1))
199-
δx = -_restructure(fx, _matvec!!(_U, _Vᵀ, vec(fx)))
215+
δx = -_restructure(fx, _matvec!!(_U, _Vᵀ, vec(fx), init_α))
200216

201217
xo = x
202218
fo = fx
@@ -226,8 +242,8 @@ function _rmatvec!!(y, xᵀU, U, Vᵀ, x)
226242
return y
227243
end
228244

229-
@inline _rmatvec!!(::Nothing, Vᵀ, x) = -x
230-
@inline _rmatvec!!(U, Vᵀ, x) = __mapTdot(__mapdot(x, U), Vᵀ) .- x
245+
@inline _rmatvec!!(::Nothing, Vᵀ, x, init_α) = -x .* init_α
246+
@inline _rmatvec!!(U, Vᵀ, x, init_α) = __mapTdot(__mapdot(x, U), Vᵀ) .- x .* init_α
231247

232248
function _matvec!!(y, Vᵀx, U, Vᵀ, x)
233249
# (-I + UVᵀ) × x
@@ -244,8 +260,8 @@ function _matvec!!(y, Vᵀx, U, Vᵀ, x)
244260
return y
245261
end
246262

247-
@inline _matvec!!(::Nothing, Vᵀ, x) = -x
248-
@inline _matvec!!(U, Vᵀ, x) = __mapTdot(__mapdot(x, Vᵀ), U) .- x
263+
@inline _matvec!!(::Nothing, Vᵀ, x, init_α) = -x .* init_α
264+
@inline _matvec!!(U, Vᵀ, x, init_α) = __mapTdot(__mapdot(x, Vᵀ), U) .- x .* init_α
249265

250266
function __mapdot(x::SVector{S1}, Y::SVector{S2, <:SVector{S1}}) where {S1, S2}
251267
return map(Base.Fix1(dot, x), Y)

src/utils.jl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -214,12 +214,12 @@ function compute_jacobian_and_hessian(ad::AutoFiniteDiff, prob, fx, x)
214214
end
215215
end
216216

217-
__init_identity_jacobian(u::Number, _) = one(u)
217+
__init_identity_jacobian(u::Number, fu, α = true) = oftype(u, α)
218218
__init_identity_jacobian!!(J::Number) = one(J)
219-
function __init_identity_jacobian(u, fu)
219+
function __init_identity_jacobian(u, fu, α = true)
220220
J = similar(u, promote_type(eltype(u), eltype(fu)), length(fu), length(u))
221221
fill!(J, zero(eltype(J)))
222-
J[diagind(J)] .= one(eltype(J))
222+
J[diagind(J)] .= eltype(J))
223223
return J
224224
end
225225
function __init_identity_jacobian!!(J)
@@ -231,9 +231,9 @@ function __init_identity_jacobian!!(J::AbstractVector)
231231
fill!(J, one(eltype(J)))
232232
return J
233233
end
234-
function __init_identity_jacobian(u::StaticArray, fu)
234+
function __init_identity_jacobian(u::StaticArray, fu, α = true)
235235
S1, S2 = length(fu), length(u)
236-
J = SMatrix{S1, S2, eltype(u)}(I)
236+
J = SMatrix{S1, S2, eltype(u)}(I * α)
237237
return J
238238
end
239239
function __init_identity_jacobian!!(J::SMatrix{S1, S2}) where {S1, S2}

0 commit comments

Comments
 (0)