Skip to content

Commit 60a5fef

Browse files
committed
[WIP] Do not (mis)use objective as state
1 parent 9290b55 commit 60a5fef

33 files changed

+674
-684
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ ExplicitImports = "1.13.2"
3030
FillArrays = "0.6.2, 0.7, 0.8, 0.9, 0.10, 0.11, 0.12, 0.13, 1"
3131
ForwardDiff = "0.10, 1"
3232
JET = "0.9, 0.10"
33-
LineSearches = "7.4.0"
33+
LineSearches = "7.5.1"
3434
LinearAlgebra = "<0.0.1, 1.6"
3535
MathOptInterface = "1.17"
3636
Measurements = "2.14.1"

src/Manifolds.jl

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,7 @@ end
2828
# TODO: is it safe here to call retract! and change x?
2929
function NLSolversBase.value!(obj::ManifoldObjective, x)
3030
xin = retract(obj.manifold, x)
31-
value!(obj.inner_obj, xin)
32-
end
33-
function NLSolversBase.value(obj::ManifoldObjective)
34-
value(obj.inner_obj)
35-
end
36-
function NLSolversBase.gradient(obj::ManifoldObjective)
37-
gradient(obj.inner_obj)
38-
end
39-
function NLSolversBase.gradient(obj::ManifoldObjective, i::Int)
40-
gradient(obj.inner_obj, i)
31+
return value!(obj.inner_obj, xin)
4132
end
4233
function NLSolversBase.gradient!(obj::ManifoldObjective, x)
4334
xin = retract(obj.manifold, x)

src/Optim.jl

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,7 @@ using NLSolversBase:
4141
TwiceDifferentiableConstraints,
4242
nconstraints,
4343
nconstraints_x,
44-
hessian,
4544
hessian!,
46-
hessian!!,
47-
hv_product,
4845
hv_product!
4946

5047
# var for NelderMead

src/multivariate/optimize/optimize.jl

Lines changed: 42 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,36 @@
1-
update_g!(d, state, method) = nothing
2-
function update_g!(d, state, method::FirstOrderOptimizer)
3-
# Update the function value and gradient
4-
value_gradient!(d, state.x)
5-
project_tangent!(method.manifold, gradient(d), state.x)
1+
function update_fg!(d, state, ::ZerothOrderOptimizer)
2+
f_x = value!(d, state.x)
3+
state.f_x = f_x
4+
return nothing
65
end
7-
function update_g!(d, state, method::Newton)
8-
# Update the function value and gradient
9-
value_gradient!(d, state.x)
10-
end
11-
update_fg!(d, state, method) = nothing
12-
update_fg!(d, state, method::ZerothOrderOptimizer) = value!(d, state.x)
13-
function update_fg!(d, state, method::FirstOrderOptimizer)
14-
value_gradient!(d, state.x)
15-
project_tangent!(method.manifold, gradient(d), state.x)
16-
end
17-
function update_fg!(d, state, method::Newton)
18-
value_gradient!(d, state.x)
6+
function update_fg!(d, state, method)
7+
f_x, g_x = value_gradient!(d, state.x)
8+
if hasproperty(method, :manifold)
9+
project_tangent!(method.manifold, g_x, state.x)
10+
end
11+
state.f_x = f_x
12+
copyto!(state.g_x, g_x)
13+
return nothing
1914
end
2015

2116
# Update the Hessian
22-
update_h!(d, state, method) = nothing
23-
update_h!(d, state, method::SecondOrderOptimizer) = hessian!(d, state.x)
17+
update_h!(d, state, ::Union{ZerothOrderOptimizer,FirstOrderOptimizer}) = nothing
18+
function update_h!(d, state, ::SecondOrderOptimizer)
19+
H_x = hessian!(d, state.x)
20+
copyto!(state.H_x, H_x)
21+
return nothing
22+
end
2423

2524
after_while!(d, state, method, options) = nothing
2625

27-
function initial_convergence(d, state, method::AbstractOptimizer, initial_x, options)
28-
gradient!(d, initial_x)
29-
stopped = !isfinite(value(d)) || any(!isfinite, gradient(d))
30-
g_residual(d, state) <= options.g_abstol, stopped
26+
function initial_convergence(state::AbstractOptimizerState, options::Options)
27+
stopped = !isfinite(state.f_x) || any(!isfinite, state.g_x)
28+
return g_residual(state) <= options.g_abstol, stopped
3129
end
32-
function initial_convergence(d, state, method::ZerothOrderOptimizer, initial_x, options)
30+
function initial_convergence(::ZerothOrderState, ::Options)
3331
false, false
3432
end
33+
3534
function optimize(
3635
d::D,
3736
initial_x::Tx,
@@ -41,7 +40,7 @@ function optimize(
4140
) where {D<:AbstractObjective,M<:AbstractOptimizer,Tx<:AbstractArray,T,TCallback}
4241

4342
t0 = time() # Initial time stamp used to control early stopping by options.time_limit
44-
tr = OptimizationTrace{typeof(value(d)),typeof(method)}()
43+
tr = OptimizationTrace{typeof(state.f_x),typeof(method)}()
4544
tracing =
4645
options.store_trace ||
4746
options.show_trace ||
@@ -51,7 +50,7 @@ function optimize(
5150
f_limit_reached, g_limit_reached, h_limit_reached = false, false, false
5251
x_converged, f_converged, f_increased, counter_f_tol = false, false, false, 0
5352

54-
g_converged, stopped = initial_convergence(d, state, method, initial_x, options)
53+
g_converged, stopped = initial_convergence(state, options)
5554
converged = g_converged || stopped
5655
# prepare iteration counter (used to make "initial state" trace entry)
5756
iteration = 0
@@ -66,9 +65,7 @@ function optimize(
6665
if !ls_success
6766
break # it returns true if it's forced by something in update! to stop (eg dx_dg == 0.0 in BFGS, or linesearch errors)
6867
end
69-
if !(method isa NewtonTrustRegion)
70-
update_g!(d, state, method) # TODO: Should this be `update_fg!`?
71-
end
68+
update_fg!(d, state, method)
7269
x_converged, f_converged, g_converged, f_increased =
7370
assess_convergence(state, d, options)
7471
# For some problems it may be useful to require `f_converged` to be hit multiple times
@@ -113,11 +110,11 @@ function optimize(
113110
end
114111
end
115112

116-
if g_calls(d) > 0 && !all(isfinite, gradient(d))
113+
if hasproperty(state, :g_x) && !all(isfinite, state.g_x)
117114
options.show_warnings && @warn "Terminated early due to NaN in gradient."
118115
break
119116
end
120-
if h_calls(d) > 0 && !(d isa TwiceDifferentiableHV) && !all(isfinite, hessian(d))
117+
if hasproperty(state, :H_x) && !all(isfinite, state.H_x)
121118
options.show_warnings && @warn "Terminated early due to NaN in Hessian."
122119
break
123120
end
@@ -127,7 +124,7 @@ function optimize(
127124

128125
# we can just check minimum, as we've earlier enforced same types/eltypes
129126
# in variables besides the option settings
130-
Tf = typeof(value(d))
127+
Tf = typeof(state.f_x)
131128
f_incr_pick = f_increased && !options.allow_f_increases
132129
stopped_by = (x_converged, f_converged, g_converged,
133130
f_limit_reached = f_limit_reached,
@@ -141,7 +138,7 @@ function optimize(
141138
)
142139

143140
termination_code =
144-
_termination_code(d, g_residual(d, state), state, stopped_by, options)
141+
_termination_code(d, g_residual(state), state, stopped_by, options)
145142

146143
return MultivariateOptimizationResults{
147144
typeof(method),
@@ -154,18 +151,18 @@ function optimize(
154151
method,
155152
initial_x,
156153
pick_best_x(f_incr_pick, state),
157-
pick_best_f(f_incr_pick, state, d),
154+
pick_best_f(f_incr_pick, state),
158155
iteration,
159156
Tf(options.x_abstol),
160157
Tf(options.x_reltol),
161158
x_abschange(state),
162159
x_relchange(state),
163160
Tf(options.f_abstol),
164161
Tf(options.f_reltol),
165-
f_abschange(d, state),
166-
f_relchange(d, state),
162+
f_abschange(state),
163+
f_relchange(state),
167164
Tf(options.g_abstol),
168-
g_residual(d, state),
165+
g_residual(state),
169166
tr,
170167
f_calls(d),
171168
g_calls(d),
@@ -186,13 +183,13 @@ function _termination_code(d, gres, state, stopped_by, options)
186183
elseif (iszero(options.x_abstol) && x_abschange(state) <= options.x_abstol) ||
187184
(iszero(options.x_reltol) && x_relchange(state) <= options.x_reltol)
188185
TerminationCode.NoXChange
189-
elseif (iszero(options.f_abstol) && f_abschange(d, state) <= options.f_abstol) ||
190-
(iszero(options.f_reltol) && f_relchange(d, state) <= options.f_reltol)
186+
elseif (iszero(options.f_abstol) && f_abschange(state) <= options.f_abstol) ||
187+
(iszero(options.f_reltol) && f_relchange(state) <= options.f_reltol)
191188
TerminationCode.NoObjectiveChange
192189
elseif x_abschange(state) <= options.x_abstol || x_relchange(state) <= options.x_reltol
193190
TerminationCode.SmallXChange
194-
elseif f_abschange(d, state) <= options.f_abstol ||
195-
f_relchange(d, state) <= options.f_reltol
191+
elseif f_abschange(state) <= options.f_abstol ||
192+
f_relchange(state) <= options.f_reltol
196193
TerminationCode.SmallObjectiveChange
197194
elseif stopped_by.ls_failed
198195
TerminationCode.FailedLinesearch
@@ -210,11 +207,11 @@ function _termination_code(d, gres, state, stopped_by, options)
210207
TerminationCode.HessianCalls
211208
elseif stopped_by.f_increased
212209
TerminationCode.ObjectiveIncreased
213-
elseif f_calls(d) > 0 && !isfinite(value(d))
214-
TerminationCode.GradientNotFinite
215-
elseif g_calls(d) > 0 && !all(isfinite, gradient(d))
210+
elseif !isfinite(state.f_x)
211+
TerminationCode.ObjectiveNotFinite
212+
elseif hasproperty(state, :g_x) && !all(isfinite, state.g_x)
216213
TerminationCode.GradientNotFinite
217-
elseif h_calls(d) > 0 && !(d isa TwiceDifferentiableHV) && !all(isfinite, hessian(d))
214+
elseif hasproperty(state, :H_x) && !all(isfinite, state.H_x)
218215
TerminationCode.HessianNotFinite
219216
else
220217
TerminationCode.NotImplemented

src/multivariate/solvers/constrained/fminbox.jl

Lines changed: 21 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
using NLSolversBase:
2-
value, value!, value!!, gradient, gradient!, value_gradient!, value_gradient!!
2+
value, value!, gradient, gradient!, value_gradient!
33
####### FIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIX THE MIDDLE OF BOX CASE THAT WAS THERE
44
mutable struct BarrierWrapper{TO,TB,Tm,TF,TDF} <: AbstractObjective
55
obj::TO
@@ -69,29 +69,6 @@ function _barrier_term_gradient(x::T, l, u) where {T}
6969
end
7070

7171
# Wrappers
72-
function NLSolversBase.value!!(bw::BarrierWrapper, x)
73-
bw.Fb = _barrier_value(bw.b, x)
74-
if in_box(bw, x)
75-
F = value!!(bw.obj, x)
76-
bw.Ftotal = muladd(bw.mu, bw.Fb, F)
77-
else
78-
bw.Ftotal = bw.mu * bw.Fb
79-
end
80-
return bw.Ftotal
81-
end
82-
function NLSolversBase.value_gradient!!(bw::BarrierWrapper, x)
83-
bw.Fb = _barrier_value(bw.b, x)
84-
bw.DFb .= _barrier_term_gradient.(x, bw.b.lower, bw.b.upper)
85-
if in_box(bw, x)
86-
F, DF = value_gradient!!(bw.obj, x)
87-
bw.Ftotal = muladd(bw.mu, bw.Fb, F)
88-
bw.DFtotal .= muladd.(bw.mu, bw.DFb, DF)
89-
else
90-
bw.Ftotal = bw.mu * bw.Fb
91-
bw.DFtotal .= bw.mu .* bw.DFb
92-
end
93-
return bw.Ftotal, bw.DFtotal
94-
end
9572
function NLSolversBase.value_gradient!(bb::BarrierWrapper, x)
9673
bb.DFb .= _barrier_term_gradient.(x, bb.b.lower, bb.b.upper)
9774
bb.Fb = _barrier_value(bb.b, x)
@@ -115,7 +92,6 @@ function NLSolversBase.value!(obj::BarrierWrapper, x)
11592
end
11693
return obj.Ftotal
11794
end
118-
NLSolversBase.value(obj::BarrierWrapper) = obj.Ftotal
11995
function NLSolversBase.value(obj::BarrierWrapper, x)
12096
Fb = _barrier_value(obj.b, x)
12197
if in_box(obj, x)
@@ -134,11 +110,10 @@ function NLSolversBase.gradient!(obj::BarrierWrapper, x)
134110
end
135111
return obj.DFtotal
136112
end
137-
NLSolversBase.gradient(obj::BarrierWrapper) = obj.DFtotal
138113

139114
# this mutates mu but not the gradients
140115
# Super unsafe in that it depends on x_df being correct!
141-
function initial_mu(obj::BarrierWrapper, F)
116+
function initial_mu!(obj::BarrierWrapper, g_x, F)
142117
T = typeof(obj.Fb) # this will not work if F is real, G is complex
143118
gbarrier = map(
144119
x ->
@@ -148,8 +123,9 @@ function initial_mu(obj::BarrierWrapper, F)
148123
)
149124

150125
# obj.mu = initial_mu(gradient(obj.obj), gradient(obj.b, obj.DFb, obj.obj.x_df), T(F.mufactor), T(F.mu0))
151-
obj.mu = initial_mu(gradient(obj.obj), gbarrier, T(F.mufactor), T(F.mu0))
126+
return obj.mu = initial_mu(g_x, gbarrier, T(F.mufactor), T(F.mu0))
152127
end
128+
153129
# Attempt to compute a reasonable default mu: at the starting
154130
# position, the gradient of the input function should dominate the
155131
# gradient of the barrier.
@@ -279,6 +255,13 @@ barrier_method(
279255
precondprep,
280256
) = m # use `m` as is
281257

258+
struct BoxState{T,Tx} <: ZerothOrderState
259+
x::Tx
260+
f_x::T
261+
x_previous::Tx
262+
f_x_previous::T
263+
end
264+
282265
function optimize(
283266
f,
284267
l::AbstractArray,
@@ -483,14 +466,14 @@ function optimize(
483466
# barrier-aware optimization method instance (precondition relevance)
484467
_optimizer = barrier_method(F.method, P, (P, x) -> F.precondprep(P, x, l, u, dfbox))
485468

469+
# we wait until state has been initialized to set the initial mu because we need the gradient of the objective
486470
state = initial_state(_optimizer, options, dfbox, x)
487-
# we wait until state has been initialized to set the initial mu because
488-
# we need the gradient of the objective and initial_state will value_gradient!!
489-
# the objective, so that forces an evaluation
490-
if F.method isa NelderMead
471+
g_x = if hasproperty(state, :g_x)
472+
state.g_x
473+
else
491474
gradient!(dfbox, x)
492475
end
493-
dfbox.mu = initial_mu(dfbox, F)
476+
initial_mu!(dfbox, g_x, F)
494477
if F.method isa NelderMead
495478
for i = 1:length(state.f_simplex)
496479
x = state.simplex[i]
@@ -514,10 +497,9 @@ function optimize(
514497
iteration = 1
515498

516499
# define the function (dfbox) to optimize by the inner optimizer
517-
518500
xold = copy(x)
501+
fval0 = state.f_x
519502
_time = time()
520-
fval0 = dfbox.obj.F
521503

522504
# Optimize with current setting of mu
523505
if show_trace > 0
@@ -530,9 +512,7 @@ function optimize(
530512
println("(numbers below include barrier contribution)")
531513
end
532514

533-
# we need to update the +mu*barrier_grad part. Since we're using the
534-
# value_gradient! not !! as in initial_state, we won't make a superfluous
535-
# evaluation
515+
# we need to update the +mu*barrier_grad part.
536516
if !(F.method isa NelderMead)
537517
value_gradient!(dfbox, x)
538518
reset!(_optimizer, state, dfbox, x)
@@ -681,14 +661,13 @@ function optimize(
681661
f_converged = _f_converged,
682662
g_converged = _g_converged,
683663
)
684-
box_state = (; x, x_previous = xold, f_x_previous = fval0)
685-
termination_code = _termination_code(df, g_residual(g), box_state, stopped_by, options)
664+
termination_code = _termination_code(df, g_residual(g), BoxState(minimizer(results), minimum(results), xold, fval0), stopped_by, options)
686665

687666
return MultivariateOptimizationResults(
688667
F,
689668
initial_x,
690669
minimizer(results),
691-
df.f(minimizer(results)),
670+
minimum(results),
692671
iteration,
693672
results.x_abstol,
694673
results.x_reltol,
@@ -699,7 +678,7 @@ function optimize(
699678
f_abschange(minimum(results), fval0),
700679
f_relchange(minimum(results), fval0),
701680
results.g_abstol,
702-
g_residual(g, Inf),
681+
g_residual(g),
703682
results.trace,
704683
results.f_calls,
705684
results.g_calls,

0 commit comments

Comments
 (0)