Bug fixes

avik-pal · avik-pal · commit 58f41f76b544 · 2023-12-04T15:20:23.000-05:00
diff --git a/src/nlsolve/broyden.jl b/src/nlsolve/broyden.jl
@@ -7,9 +7,9 @@ and static array problems.
 struct SimpleBroyden <: AbstractSimpleNonlinearSolveAlgorithm end
 
 function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleBroyden, args...;
-        abstol = nothing, reltol = nothing, maxiters = 1000,
+        abstol = nothing, reltol = nothing, maxiters = 1000, alias_u0 = false,
         termination_condition = nothing, kwargs...)
-    @bb x = copy(float(prob.u0))
+    x = __maybe_unaliased(prob.u0, alias_u0)
     fx = _get_fx(prob, x)
 
     @bb xo = copy(x)
diff --git a/src/nlsolve/dfsane.jl b/src/nlsolve/dfsane.jl
@@ -51,9 +51,9 @@ Computation, 75, 1429-1448.](https://www.researchgate.net/publication/220576479_
 end
 
 function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleDFSane, args...;
-        abstol = nothing, reltol = nothing, maxiters = 1000,
+        abstol = nothing, reltol = nothing, maxiters = 1000, alias_u0 = false,
         termination_condition = nothing, kwargs...)
-    x = float(copy(prob.u0))
+    x = __maybe_unaliased(prob.u0, alias_u0)
     fx = _get_fx(prob, x)
     T = eltype(x)
 
@@ -76,6 +76,7 @@ function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleDFSane, args...;
     history_f_k = fill(fx_norm, M)
 
     # Generate the cache
+    @bb x_cache = similar(x)
     @bb d = copy(x)
     @bb xo = copy(x)
     @bb δx = copy(x)
@@ -89,38 +90,40 @@ function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleDFSane, args...;
         # Line search direction
         @bb @. d = -σ_k * fx
 
-        η = η_strategy(f_1, k, x, fx)
+        η = η_strategy(f_1, k + 1, x, fx)
         f_bar = maximum(history_f_k)
         α_p = α_1
         α_m = α_1
 
-        @bb @. x += α_p * d
+        @bb @. x_cache = x + α_p * d
 
-        fx = __eval_f(prob, fx, x)
+        fx = __eval_f(prob, fx, x_cache)
         fx_norm_new = NONLINEARSOLVE_DEFAULT_NORM(fx)^nexp
 
         while k < maxiters
             Bool(fx_norm_new ≤ (f_bar + η - γ * α_p^2 * fx_norm)) && break
 
-            α_p = α_p^2 * fx_norm / (fx_norm_new + (T(2) * α_p - T(1)) * fx_norm)
-            @bb @. x -= α_m * d
+            α_tp = α_p^2 * fx_norm / (fx_norm_new + (T(2) * α_p - T(1)) * fx_norm)
+            @bb @. x_cache = x - α_m * d
 
-            fx = __eval_f(prob, fx, x)
+            fx = __eval_f(prob, fx, x_cache)
             fx_norm_new = NONLINEARSOLVE_DEFAULT_NORM(fx)^nexp
 
             Bool(fx_norm_new ≤ (f_bar + η - γ * α_m^2 * fx_norm)) && break
 
             α_tm = α_m^2 * fx_norm / (fx_norm_new + (T(2) * α_m - T(1)) * fx_norm)
-            α_p = clamp(α_p, τ_min * α_p, τ_max * α_p)
+            α_p = clamp(α_tp, τ_min * α_p, τ_max * α_p)
             α_m = clamp(α_tm, τ_min * α_m, τ_max * α_m)
-            @bb @. x += α_p * d
+            @bb @. x_cache = x + α_p * d
 
-            fx = __eval_f(prob, fx, x)
+            fx = __eval_f(prob, fx, x_cache)
             fx_norm_new = NONLINEARSOLVE_DEFAULT_NORM(fx)^nexp
 
             k += 1
         end
 
+        @bb copyto!(x, x_cache)
+
         tc_sol = check_termination(tc_cache, fx, x, xo, prob, alg)
         tc_sol !== nothing && return tc_sol
 
diff --git a/src/nlsolve/halley.jl b/src/nlsolve/halley.jl
@@ -24,12 +24,12 @@ A low-overhead implementation of Halley's Method.
 end
 
 function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleHalley, args...;
-        abstol = nothing, reltol = nothing, maxiters = 1000,
+        abstol = nothing, reltol = nothing, maxiters = 1000, alias_u0 = false,
         termination_condition = nothing, kwargs...)
     isinplace(prob) &&
         error("SimpleHalley currently only supports out-of-place nonlinear problems")
 
-    x = copy(float(prob.u0))
+    x = __maybe_unaliased(prob.u0, alias_u0)
     fx = _get_fx(prob, x)
     T = eltype(x)
 
diff --git a/src/nlsolve/klement.jl b/src/nlsolve/klement.jl
@@ -7,9 +7,9 @@ method is non-allocating on scalar and static array problems.
 struct SimpleKlement <: AbstractSimpleNonlinearSolveAlgorithm end
 
 function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleKlement, args...;
-        abstol = nothing, reltol = nothing, maxiters = 1000,
+        abstol = nothing, reltol = nothing, maxiters = 1000, alias_u0 = false,
         termination_condition = nothing, kwargs...)
-    @bb x = copy(float(prob.u0))
+    x = __maybe_unaliased(prob.u0, alias_u0)
     T = eltype(x)
     fx = _get_fx(prob, x)
 
@@ -21,13 +21,12 @@ function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleKlement, args...;
     @bb δx = copy(x)
     @bb fprev = copy(fx)
     @bb xo = copy(x)
-    @bb δf = copy(fx)
     @bb d = copy(x)
 
     J = __init_identity_jacobian(fx, x)
-    @bb J_cache = copy(J)
-    @bb δx² = copy(x)
-    @bb J_cache2 = copy(J)
+    @bb J_cache = similar(J)
+    @bb δx² = similar(x)
+    @bb J_cache2 = similar(J)
     @bb F = copy(J)
 
     for _ in 1:maxiters
@@ -67,23 +66,18 @@ function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleKlement, args...;
         tc_sol !== nothing && return tc_sol
 
         @bb δx .*= -1
-        @bb @. δf = fx - fprev
-
-        # Prevent division by 0
+        @bb J_cache .= J' .^ 2
         @bb @. δx² = δx^2
-        @bb @. J_cache = J^2
-        @bb d = transpose(J_cache) × vec(δx²)
-        @bb @. d = max(d, singular_tol)
-
+        @bb d = J_cache × vec(δx²)
         @bb δx² = J × vec(δx)
-        @bb @. δf = (δf - δx²) / d
-
-        _vδf, _vδx = _vec(δf), _vec(δx)
-        @bb J_cache = _vδf × transpose(_vδx)
+        @bb @. fprev = (fx - fprev - δx²) / ifelse(iszero(d), singular_tol, d)
+        @bb J_cache = vec(fprev) × transpose(_vec(δx))
         @bb @. J_cache *= J
         @bb J_cache2 = J_cache × J
-
         @bb @. J += J_cache2
+
+        @bb copyto!(fprev, fx)
+        @bb copyto!(xo, x)
     end
 
     return build_solution(prob, alg, x, fx; retcode = ReturnCode.MaxIters)
diff --git a/src/nlsolve/lbroyden.jl b/src/nlsolve/lbroyden.jl
@@ -22,9 +22,9 @@ function SimpleLimitedMemoryBroyden(; threshold::Union{Val, Int} = Val(27))
 end
 
 @views function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleLimitedMemoryBroyden,
-        args...; abstol = nothing, reltol = nothing, maxiters = 1000,
+        args...; abstol = nothing, reltol = nothing, maxiters = 1000, alias_u0 = false,
         termination_condition = nothing, kwargs...)
-    @bb x = copy(float(prob.u0))
+    x = __maybe_unaliased(prob.u0, alias_u0)
     threshold = __get_threshold(alg)
     η = min(SciMLBase._unwrap_val(threshold), maxiters)
 
diff --git a/src/nlsolve/raphson.jl b/src/nlsolve/raphson.jl
@@ -24,8 +24,8 @@ const SimpleGaussNewton = SimpleNewtonRaphson
 
 function SciMLBase.__solve(prob::Union{NonlinearProblem, NonlinearLeastSquaresProblem},
         alg::SimpleNewtonRaphson, args...; abstol = nothing, reltol = nothing,
-        maxiters = 1000, termination_condition = nothing, kwargs...)
-    @bb x = copy(float(prob.u0))
+        maxiters = 1000, termination_condition = nothing, alias_u0 = false, kwargs...)
+    x = __maybe_unaliased(prob.u0, alias_u0)
     fx = _get_fx(prob, x)
     @bb xo = copy(x)
     J, jac_cache = jacobian_cache(alg.autodiff, prob.f, fx, x, prob.p)
@@ -37,9 +37,7 @@ function SciMLBase.__solve(prob::Union{NonlinearProblem, NonlinearLeastSquaresPr
         fx, dfx = value_and_jacobian(alg.autodiff, prob.f, fx, x, prob.p, jac_cache; J)
 
         if i == 1
-            if iszero(fx)
-                return build_solution(prob, alg, x, fx; retcode = ReturnCode.Success)
-            end
+            iszero(fx) && build_solution(prob, alg, x, fx; retcode = ReturnCode.Success)
         else
             # Termination Checks
             tc_sol = check_termination(tc_cache, fx, x, xo, prob, alg)
diff --git a/src/nlsolve/trustRegion.jl b/src/nlsolve/trustRegion.jl
@@ -49,9 +49,9 @@ scalar and static array problems.
 end
 
 function SciMLBase.__solve(prob::NonlinearProblem, alg::SimpleTrustRegion, args...;
-        abstol = nothing, reltol = nothing, maxiters = 1000,
+        abstol = nothing, reltol = nothing, maxiters = 1000, alias_u0 = false,
         termination_condition = nothing, kwargs...)
-    @bb x = copy(float(prob.u0))
+    x = __maybe_unaliased(prob.u0, alias_u0)
     T = eltype(real(x))
     Δₘₐₓ = T(alg.max_trust_radius)
     Δ = T(alg.initial_trust_radius)
diff --git a/src/utils.jl b/src/utils.jl
@@ -335,3 +335,11 @@ end
 
 @inline __eval_f(prob, fx, x) = isinplace(prob) ? (prob.f(fx, x, prob.p); fx) :
                                 prob.f(x, prob.p)
+
+# Unalias
+@inline __maybe_unaliased(x::Union{Number, SArray}, ::Bool) = x
+@inline function __maybe_unaliased(x::AbstractArray, alias::Bool)
+    # Spend time coping iff we will mutate the array
+    (alias || !ArrayInterface.can_setindex(typeof(x))) && return x
+    return deepcopy(x)
+end
diff --git a/test/23_test_problems.jl b/test/23_test_problems.jl
@@ -61,7 +61,7 @@ end
     alg_ops = (SimpleDFSane(),)
 
     broken_tests = Dict(alg => Int[] for alg in alg_ops)
-    broken_tests[alg_ops[1]] = [1, 2, 3, 4, 5, 6, 7, 9, 11, 12, 13, 15, 16, 17, 21, 22]
+    broken_tests[alg_ops[1]] = [1, 2, 3, 4, 5, 6, 11, 21]
 
     test_on_library(problems, dicts, alg_ops, broken_tests)
 end
@@ -82,7 +82,7 @@ end
     alg_ops = (SimpleKlement(),)
 
     broken_tests = Dict(alg => Int[] for alg in alg_ops)
-    broken_tests[alg_ops[1]] = [1, 2, 4, 5, 6, 7, 9, 10, 11, 12, 13, 19, 21, 22]
+    broken_tests[alg_ops[1]] = [1, 2, 4, 5, 6, 7, 11, 13, 22]
 
     test_on_library(problems, dicts, alg_ops, broken_tests)
 end