From 5bcde5a95f19910ddecbd7e9fd82972e0993fe6d Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Thu, 16 Oct 2025 14:05:49 -0400 Subject: [PATCH 1/2] lbfgs: optimize memory accesses in multiply --- src/lbfgs.jl | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/lbfgs.jl b/src/lbfgs.jl index 0460d9b..1fe1e00 100644 --- a/src/lbfgs.jl +++ b/src/lbfgs.jl @@ -187,14 +187,12 @@ function LBFGSOperator(T::Type, n::I; kwargs...) where {I <: Integer} data.scaling && (q ./= data.scaling_factor) # B = B₀ + Σᵢ (bᵢbᵢ' - aᵢaᵢ'). - for i = 1:(data.mem) + @inbounds for i = 1:(data.mem) k = mod(data.insert + i - 2, data.mem) + 1 if data.ys[k] != 0 ax = dot(data.a[k], x) bx = dot(data.b[k], x) - for j ∈ eachindex(q) - q[j] += bx * data.b[k][j] - ax * data.a[k][j] - end + @. q += bx .* data.b[k] - ax .* data.a[k] end end if β == zero(T2) From 623203a41053388f1f1cd58afadaa710574baee9 Mon Sep 17 00:00:00 2001 From: Maxence Gollier Date: Thu, 16 Oct 2025 14:16:30 -0400 Subject: [PATCH 2/2] lbfgs: optimize memory accesses --- src/lbfgs.jl | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/src/lbfgs.jl b/src/lbfgs.jl index 1fe1e00..0f18abf 100644 --- a/src/lbfgs.jl +++ b/src/lbfgs.jl @@ -124,27 +124,23 @@ function InverseLBFGSOperator(T::Type, n::I; kwargs...) where {I <: Integer} q = data.Ax # tmp vector q .= x - for i = 1:(data.mem) + @inbounds for i = 1:(data.mem) k = mod(data.insert - i - 1, data.mem) + 1 if data.ys[k] != 0 αk = dot(data.s[k], q) / data.ys[k] data.α[k] = αk - for j ∈ eachindex(q) - q[j] -= αk * data.y[k][j] - end + @. q -= αk * data.y[k] end end data.scaling && (q .*= data.scaling_factor) - for i = 1:(data.mem) + @inbounds for i = 1:(data.mem) k = mod(data.insert + i - 2, data.mem) + 1 if data.ys[k] != 0 αk = data.α[k] β = αk - dot(data.y[k], q) / data.ys[k] - for j ∈ eachindex(q) - q[j] += β * data.s[k][j] - end + @. q += β * data.s[k] end end if βm == zero(T2) @@ -227,12 +223,12 @@ function push_common!( if !op.inverse @. data.b[insert] = y / sqrt(ys) - for i = 1:(data.mem) + @inbounds for i = 1:(data.mem) k = mod(insert + i - 1, data.mem) + 1 if data.ys[k] != 0 @. data.a[k] = data.s[k] / data.scaling_factor # B₀ = I / γ. - for j = 1:(i - 1) + @inbounds for j = 1:(i - 1) l = mod(insert + j - 1, data.mem) + 1 if data.ys[l] != 0 data.a[k] .+= dot(data.b[l], data.s[k]) .* data.b[l] @@ -379,12 +375,10 @@ function diag!(op::LBFGSOperator{T}, d) where {T} fill!(d, 1) data.scaling && (d ./= data.scaling_factor) - for i = 1:(data.mem) + @inbounds for i = 1:(data.mem) k = mod(data.insert + i - 2, data.mem) + 1 if data.ys[k] != 0 - for j = 1:(op.nrow) - d[j] = d[j] + data.b[k][j]^2 - data.a[k][j]^2 - end + @. d += data.b[k].^2 - data.a[k].^2 end end return d