Non allocating version of LBroyden for StaticArrays

avik-pal · avik-pal · commit c071dd339c07 · 2024-01-13T01:12:47.000-05:00
diff --git a/src/nlsolve/lbroyden.jl b/src/nlsolve/lbroyden.jl
@@ -36,7 +36,7 @@ end
 
     fx = _get_fx(prob, x)
 
-    U, Vᵀ = __init_low_rank_jacobian(x, fx, threshold)
+    U, Vᵀ = __init_low_rank_jacobian(x, fx, x isa StaticArray ? threshold : Val(η))
 
     abstol, reltol, tc_cache = init_termination_cache(abstol, reltol, fx, x,
         termination_condition)
@@ -48,7 +48,7 @@ end
     @bb δf = copy(fx)
 
     @bb vᵀ_cache = copy(x)
-    Tcache = __lbroyden_threshold_cache(x, threshold)
+    Tcache = __lbroyden_threshold_cache(x, x isa StaticArray ? threshold : Val(η))
     @bb mat_cache = copy(x)
 
     for i in 1:maxiters
@@ -83,6 +83,105 @@ end
     return build_solution(prob, alg, x, fx; retcode = ReturnCode.MaxIters)
 end
 
+# Non-allocating StaticArrays version of SimpleLimitedMemoryBroyden is actually quite
+# finicky, so we'll implement it separately from the generic version
+# We make an exception here and don't support termination conditions
+@views function SciMLBase.__solve(prob::NonlinearProblem{<:SArray},
+        alg::SimpleLimitedMemoryBroyden, args...; abstol = nothing,
+        termination_condition = nothing,
+        maxiters = 1000, kwargs...)
+    if termination_condition !== nothing &&
+       !(termination_condition isa AbsNormTerminationMode)
+        error("SimpleLimitedMemoryBroyden with StaticArrays does not support termination \
+               conditions!")
+    end
+
+    x = prob.u0
+    fx = _get_fx(prob, x)
+    threshold = __get_threshold(alg)
+
+    U, Vᵀ = __init_low_rank_jacobian(x, fx, threshold)
+
+    abstol = DiffEqBase._get_tolerance(abstol, eltype(x))
+
+    xo, δx, fo, δf = x, -fx, fx, fx
+
+    converged, res = __unrolled_lbroyden_initial_iterations(prob, xo, fo, δx, abstol, U, Vᵀ,
+        threshold)
+
+    converged &&
+        return build_solution(prob, alg, res.x, res.fx; retcode = ReturnCode.Success)
+
+    xo, fo, δx = res.x, res.fx, res.δx
+
+    for i in 1:(maxiters - SciMLBase._unwrap_val(threshold))
+        x = xo .+ δx
+        fx = prob.f(x, prob.p)
+        δf = fx - fo
+
+        maximum(abs, fx) ≤ abstol &&
+            return build_solution(prob, alg, x, fx; retcode = ReturnCode.Success)
+
+        vᵀ = _restructure(x, _rmatvec!!(U, Vᵀ, vec(δx)))
+        mvec = _restructure(x, _matvec!!(U, Vᵀ, vec(δf)))
+
+        d = dot(vᵀ, δf)
+        δx = @. (δx - mvec) / d
+
+        U = Base.setindex(U, vec(δx), mod1(i, SciMLBase._unwrap_val(threshold)))
+        Vᵀ = Base.setindex(Vᵀ, vec(vᵀ), mod1(i, SciMLBase._unwrap_val(threshold)))
+
+        δx = -_restructure(fx, _matvec!!(U, Vᵀ, vec(fx)))
+
+        xo = x
+        fo = fx
+    end
+
+    return build_solution(prob, alg, xo, fo; retcode = ReturnCode.MaxIters)
+end
+
+@generated function __unrolled_lbroyden_initial_iterations(prob, xo, fo, δx, abstol, U,
+        Vᵀ, ::Val{threshold}) where {threshold}
+    calls = []
+    for i in 1:threshold
+        static_idx, static_idx_p1 = Val(i - 1), Val(i)
+        push!(calls,
+            quote
+                x = xo .+ δx
+                fx = prob.f(x, prob.p)
+                δf = fx - fo
+
+                maximum(abs, fx) ≤ abstol && return true, (; x, fx, δx)
+
+                _U = __first_n_getindex(U, $(static_idx))
+                _Vᵀ = __first_n_getindex(Vᵀ, $(static_idx))
+
+                vᵀ = _restructure(x, _rmatvec!!(_U, _Vᵀ, vec(δx)))
+                mvec = _restructure(x, _matvec!!(_U, _Vᵀ, vec(δf)))
+
+                d = dot(vᵀ, δf)
+                δx = @. (δx - mvec) / d
+
+                U = Base.setindex(U, vec(δx), $(i))
+                Vᵀ = Base.setindex(Vᵀ, vec(vᵀ), $(i))
+
+                _U = __first_n_getindex(U, $(static_idx_p1))
+                _Vᵀ = __first_n_getindex(Vᵀ, $(static_idx_p1))
+                δx = -_restructure(fx, _matvec!!(_U, _Vᵀ, vec(fx)))
+
+                xo = x
+                fo = fx
+            end)
+    end
+    push!(calls, quote
+        # Termination Check
+        maximum(abs, fx) ≤ abstol && return true, (; x, fx, δx)
+
+        return false, (; x, fx, δx)
+    end)
+    return Expr(:block, calls...)
+end
+
 function _rmatvec!!(y, xᵀU, U, Vᵀ, x)
     # xᵀ × (-I + UVᵀ)
     η = size(U, 2)
@@ -98,6 +197,9 @@ function _rmatvec!!(y, xᵀU, U, Vᵀ, x)
     return y
 end
 
+@inline _rmatvec!!(::Nothing, Vᵀ, x) = -x
+@inline _rmatvec!!(U, Vᵀ, x) = __mapTdot(__mapdot(x, U), Vᵀ) .- x
+
 function _matvec!!(y, Vᵀx, U, Vᵀ, x)
     # (-I + UVᵀ) × x
     η = size(U, 2)
@@ -113,7 +215,57 @@ function _matvec!!(y, Vᵀx, U, Vᵀ, x)
     return y
 end
 
+@inline _matvec!!(::Nothing, Vᵀ, x) = -x
+@inline _matvec!!(U, Vᵀ, x) = __mapTdot(__mapdot(x, Vᵀ), U) .- x
+
+function __mapdot(x::SVector{S1}, Y::SVector{S2, <:SVector{S1}}) where {S1, S2}
+    return map(Base.Fix1(dot, x), Y)
+end
+@generated function __mapTdot(x::SVector{S1}, Y::SVector{S1, <:SVector{S2}}) where {S1, S2}
+    calls = []
+    syms = [gensym("m$(i)") for i in 1:length(Y)]
+    for i in 1:length(Y)
+        push!(calls, :($(syms[i]) = x[$(i)] .* Y[$i]))
+    end
+    push!(calls, :(return .+($(syms...))))
+    return Expr(:block, calls...)
+end
+
+@generated function __first_n_getindex(x::SVector{L, T}, ::Val{N}) where {L, T, N}
+    @assert N ≤ L
+    getcalls = ntuple(i -> :(x[$i]), N)
+    N == 0 && return :(return nothing)
+    return :(return SVector{$N, $T}(($(getcalls...))))
+end
+
 __lbroyden_threshold_cache(x, ::Val{threshold}) where {threshold} = similar(x, threshold)
-function __lbroyden_threshold_cache(x::SArray, ::Val{threshold}) where {threshold}
-    return SArray{Tuple{threshold}, eltype(x)}(ntuple(_ -> zero(eltype(x)), threshold))
+function __lbroyden_threshold_cache(x::StaticArray, ::Val{threshold}) where {threshold}
+    return zeros(MArray{Tuple{threshold}, eltype(x)})
+end
+__lbroyden_threshold_cache(x::SArray, ::Val{threshold}) where {threshold} = nothing
+
+function __init_low_rank_jacobian(u::StaticArray{S1, T1}, fu::StaticArray{S2, T2},
+        ::Val{threshold}) where {S1, S2, T1, T2, threshold}
+    T = promote_type(T1, T2)
+    fuSize, uSize = Size(fu), Size(u)
+    Vᵀ = MArray{Tuple{threshold, prod(uSize)}, T}(undef)
+    U = MArray{Tuple{prod(fuSize), threshold}, T}(undef)
+    return U, Vᵀ
+end
+@generated function __init_low_rank_jacobian(u::SArray{S1, T1}, fu::SArray{S2, T2},
+        ::Val{threshold}) where {S1, S2, T1, T2, threshold}
+    T = promote_type(T1, T2)
+    Lfu, Lu = prod(Size(fu)), prod(Size(u))
+    inner_inits_Vᵀ = [zeros(SVector{Lu, T}) for i in 1:threshold]
+    inner_inits_U = [zeros(SVector{Lfu, T}) for i in 1:threshold]
+    return quote
+        Vᵀ = SVector($(inner_inits_Vᵀ...))
+        U = SVector($(inner_inits_U...))
+        return U, Vᵀ
+    end
+end
+function __init_low_rank_jacobian(u, fu, ::Val{threshold}) where {threshold}
+    Vᵀ = similar(u, threshold, length(u))
+    U = similar(u, length(fu), threshold)
+    return U, Vᵀ
 end
diff --git a/src/utils.jl b/src/utils.jl
@@ -243,20 +243,6 @@ function __init_identity_jacobian!!(J::SVector{S1}) where {S1}
     return ones(SVector{S1, eltype(J)})
 end
 
-function __init_low_rank_jacobian(u::StaticArray{S1, T1}, fu::StaticArray{S2, T2},
-        ::Val{threshold}) where {S1, S2, T1, T2, threshold}
-    T = promote_type(T1, T2)
-    fuSize, uSize = Size(fu), Size(u)
-    Vᵀ = MArray{Tuple{threshold, prod(uSize)}, T}(undef)
-    U = MArray{Tuple{prod(fuSize), threshold}, T}(undef)
-    return U, Vᵀ
-end
-function __init_low_rank_jacobian(u, fu, ::Val{threshold}) where {threshold}
-    Vᵀ = similar(u, threshold, length(u))
-    U = similar(u, length(fu), threshold)
-    return U, Vᵀ
-end
-
 @inline _vec(v) = vec(v)
 @inline _vec(v::Number) = v
 @inline _vec(v::AbstractVector) = v