JuliaNLSolvers · rsenne · Jul 7, 2025 · Jul 7, 2025 · Jul 8, 2025 · Jul 8, 2025
diff --git a/Project.toml b/Project.toml
@@ -65,7 +65,9 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd"
 ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
+StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
+BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Test", "Aqua", "Distributions", "ExplicitImports", "JET", "MathOptInterface", "Measurements", "OptimTestProblems", "Random", "RecursiveArrayTools", "StableRNGs", "LineSearches", "NLSolversBase", "PositiveFactorizations", "ReverseDiff", "ADTypes"]
+test = ["Test", "Aqua", "Distributions", "ExplicitImports", "JET", "MathOptInterface", "Measurements", "OptimTestProblems", "Random", "RecursiveArrayTools", "StableRNGs", "LineSearches", "NLSolversBase", "PositiveFactorizations", "ReverseDiff", "ADTypes", "StaticArrays", "BlockArrays"]
diff --git a/src/multivariate/solvers/second_order/newton.jl b/src/multivariate/solvers/second_order/newton.jl
@@ -1,37 +1,41 @@
-struct Newton{IL,L} <: SecondOrderOptimizer
+struct Newton{IL,L,S} <: SecondOrderOptimizer
     alphaguess!::IL
     linesearch!::L
+    solve!::S   # mutating solver: (d, state, method) i.e., writes state.s (and maybe state.F)
 end
 
+
 """
 # Newton
 ## Constructor
 ```julia
 Newton(; alphaguess = LineSearches.InitialStatic(),
-linesearch = LineSearches.HagerZhang())
+       linesearch = LineSearches.HagerZhang(),
+       solve = default_newton_solve)
 ```
-
 ## Description
-The `Newton` method implements Newton's method for optimizing a function. We use
-a special factorization from the package `PositiveFactorizations.jl` to ensure
+The `Newton` method implements Newton's method for optimizing a function. 
+
+The `solve` function should take (H, g) and return s such that H*s = -g.
+Defaults to a robust solver that handles dense or sparse matrices.
+If the matrix is not an `AbstractSparseMatrix`, we use a special factorization from the package `PositiveFactorizations.jl` to ensure
 that each search direction is a direction of descent. See Wright and Nocedal and
 Wright (ch. 6, 1999) for a discussion of Newton's method in practice.
 
 ## References
  - Nocedal, J. and S. J. Wright (1999), Numerical optimization. Springer Science 35.67-68: 7.
 """
 function Newton(;
-    alphaguess = LineSearches.InitialStatic(), # Good default for Newton
+    alphaguess = LineSearches.InitialStatic(),
     linesearch = LineSearches.HagerZhang(),
-)    # Good default for Newton
-    Newton(_alphaguess(alphaguess), linesearch)
+    solve = default_newton_solve!
+)
+    Newton(_alphaguess(alphaguess), linesearch, solve)
 end
 
-Base.summary(io::IO, ::Newton) = print(io, "Newton's Method")
-
 mutable struct NewtonState{Tx,T,F<:Cholesky} <: AbstractOptimizerState
     x::Tx
-    x_previous::Tx
+    x_previous::Tx  
     f_x_previous::T
     F::F
     s::Tx
@@ -40,51 +44,56 @@ end
 
 function initial_state(method::Newton, options, d, initial_x)
     T = eltype(initial_x)
-    n = length(initial_x)
-    # Maintain current gradient in gr
-    s = similar(initial_x)
 
     value_gradient!!(d, initial_x)
     hessian!!(d, initial_x)
 
     NewtonState(
-        copy(initial_x), # Maintain current state in state.x
-        copy(initial_x), # Maintain previous state in state.x_previous
-        T(NaN), # Store previous f in state.f_x_previous
-        Cholesky(similar(d.H, T, 0, 0), :U, 0),
-        similar(initial_x), # Maintain current search direction in state.s
+        copy(initial_x),                  # x
+        copy(initial_x),                  # x_previous
+        T(NaN),                           # f_x_previous
+        Cholesky(similar(d.H, T, 0, 0),   # F
+                :U, 0),
+        similar(initial_x),               # s
         @initial_linesearch()...,
     )
 end
 
-function update_state!(d, state::NewtonState, method::Newton)
-    # Search direction is always the negative gradient divided by
-    # a matrix encoding the absolute values of the curvatures
-    # represented by H. It deviates from the usual "add a scaled
-    # identity matrix" version of the modified Newton method. More
-    # information can be found in the discussion at issue #153.
+# Default solver that handles common matrix types intelligently
+function default_newton_solve!(d, state::NewtonState, method::Newton)
+    H = NLSolversBase.hessian(d)
+    g = gradient(d)
     T = eltype(state.x)
 
-    if typeof(NLSolversBase.hessian(d)) <: AbstractSparseMatrix
-        state.s .= .-(NLSolversBase.hessian(d) \ convert(Vector{T}, gradient(d)))
+    if H isa AbstractSparseMatrix
+        state.s .= .-(H \ convert(Vector{T}, gradient(d)))
     else
-        state.F = cholesky!(Positive, NLSolversBase.hessian(d))
-        if typeof(gradient(d)) <: Array
-            # is this actually StridedArray?
-            ldiv!(state.s, state.F, -gradient(d))
-        else
-            # not Array, we can't do inplace ldiv
-            gv = Vector{T}(undef, length(gradient(d)))
-            copyto!(gv, -gradient(d))
+        # Use PositiveFactorizations for robustness on dense matrices
+         # Search direction is always the negative gradient divided by
+         # a matrix encoding the absolute values of the curvatures
+         # represented by H. It deviates from the usual "add a scaled
+         # identity matrix" version of the modified Newton method. More
+         # information can be found in the discussion at issue #153.
+         state.F = cholesky!(Positive, H)
+         if g isa StridedArray
+            ldiv!(state.s, state.F, g)
+            state.s .= .-state.s
+         else
+            gv = Vector{T}(undef, length(g))
+            gv .= .-g
             copyto!(state.s, state.F \ gv)
-        end
+         end
     end
-    # Determine the distance of movement along the search line
-    lssuccess = perform_linesearch!(state, method, d)
+end
+
+Base.summary(io::IO, ::Newton) = print(io, "Newton's Method")
 
-    # Update current position # x = x + alpha * s
+function update_state!(d, state::NewtonState, method::Newton)
+    method.solve!(d, state, method)  # should mutate state.s (and maybe state.F)
+
+    lssuccess = perform_linesearch!(state, method, d)
     @. state.x = state.x + state.alpha * state.s
-    return !lssuccess # break on linesearch error
+    return !lssuccess
 end
 
 function trace!(tr, d, state::NewtonState, iteration::Integer, method::Newton, options::Options, curr_time = time())