fix: CrossEntropy is now functional 🎉

avik-pal · avik-pal · commit e0c9ef4cb842 · 2024-09-19T21:07:14.000-04:00
diff --git a/lib/DataDrivenLux/src/DataDrivenLux.jl b/lib/DataDrivenLux/src/DataDrivenLux.jl
@@ -8,12 +8,11 @@ using DataDrivenDiffEq: AbstractBasis, AbstractDataDrivenAlgorithm,
                         ABSTRACT_CONT_PROB, ABSTRACT_DISCRETE_PROB,
                         InternalDataDrivenProblem, is_implicit, is_controlled
 
-using DocStringExtensions: DocStringExtensions, FIELDS, TYPEDEF
+using DocStringExtensions: DocStringExtensions, FIELDS, TYPEDEF, SIGNATURES
 using CommonSolve: CommonSolve, solve!
 using ConcreteStructs: @concrete
 using Setfield: Setfield, @set!
 
-# TODO: Get rid of Optim and Optimisers in favor of Optimization.jl
 using Optim: Optim, LBFGS
 using Optimisers: Optimisers, ADAM
 
@@ -93,6 +92,8 @@ export SearchCache
 include("algorithms/rewards.jl")
 export RelativeReward, AbsoluteReward
 
+include("algorithms/common.jl")
+
 include("algorithms/randomsearch.jl")
 export RandomSearch
 
@@ -104,4 +105,4 @@ export CrossEntropy
 
 include("solve.jl")
 
-end # module DataDrivenLux
+end
diff --git a/lib/DataDrivenLux/src/algorithms/common.jl b/lib/DataDrivenLux/src/algorithms/common.jl
@@ -0,0 +1,19 @@
+@kwdef @concrete struct CommonAlgOptions
+    populationsize::Int = 100
+    functions = (sin, exp, cos, log, +, -, /, *)
+    arities = (1, 1, 1, 1, 2, 2, 2, 2)
+    n_layers::Int = 1
+    skip::Bool = true
+    simplex <: AbstractSimplex = Softmax()
+    loss = aicc
+    keep <: Union{Real, Int} = 0.1
+    use_protected::Bool = true
+    distributed::Bool = false
+    threaded::Bool = false
+    rng <: AbstractRNG = Random.default_rng()
+    optimizer = LBFGS()
+    optim_options <: Optim.Options = Optim.Options()
+    optimiser <: Union{Nothing, Optimisers.AbstractRule} = nothing
+    observed <: Union{ObservedModel, Nothing} = nothing
+    alpha::Real = 0.999f0
+end
diff --git a/lib/DataDrivenLux/src/algorithms/crossentropy.jl b/lib/DataDrivenLux/src/algorithms/crossentropy.jl
@@ -1,54 +1,29 @@
-"""
-$(TYPEDEF)
+@concrete struct CrossEntropy <: AbstractDAGSRAlgorithm
+    options <: CommonAlgOptions
+end
 
-Uses the crossentropy method for discrete optimization to search the space of possible solutions.
+"""
+$(SIGNATURES)
 
-# Fields
-$(FIELDS)
+Uses the crossentropy method for discrete optimization to search the space of possible
+solutions.
 """
-@kwdef struct CrossEntropy{F, A, L, O} <: AbstractDAGSRAlgorithm
-    "The number of candidates to track"
-    populationsize::Int = 100
-    "The functions to include in the search"
-    functions::F = (sin, exp, cos, log, +, -, /, *)
-    "The arities of the functions"
-    arities::A = (1, 1, 1, 1, 2, 2, 2, 2)
-    "The number of layers"
-    n_layers::Int = 1
-    "Include skip layers"
-    skip::Bool = true
-    "Evaluation function to sort the samples"
-    loss::L = aicc
-    "The number of candidates to keep in each iteration"
-    keep::Union{Real, Int} = 0.1
-    "Use protected operators"
-    use_protected::Bool = true
-    "Use distributed optimization and resampling"
-    distributed::Bool = false
-    "Use threaded optimization and resampling - not implemented right now."
-    threaded::Bool = false
-    "Random seed"
-    rng::AbstractRNG = Random.default_rng()
-    "Optim optimiser"
-    optimizer::O = LBFGS()
-    "Optim options"
-    optim_options::Optim.Options = Optim.Options()
-    "Observed model - if `nothing`is used, a normal distributed additive error with fixed variance is assumed."
-    observed::Union{ObservedModel, Nothing} = nothing
-    "Field for possible optimiser - no use for CrossEntropy"
-    optimiser::Nothing = nothing
-    "Update parameter for smoothness"
-    alpha::Real = 0.999f0
+function CrossEntropy(; populationsize = 100, functions = (sin, exp, cos, log, +, -, /, *),
+        arities = (1, 1, 1, 1, 2, 2, 2, 2), n_layers = 1, skip = true, loss = aicc,
+        keep = 0.1, use_protected = true, distributed = false, threaded = false,
+        rng = Random.default_rng(), optimizer = LBFGS(), optim_options = Optim.Options(),
+        observed = nothing, alpha = 0.999f0)
+    return CrossEntropy(CommonAlgOptions(;
+        populationsize, functions, arities, n_layers, skip, simplex = DirectSimplex(), loss,
+        keep, use_protected, distributed, threaded, rng, optimizer,
+        optim_options, optimiser = nothing, observed, alpha))
 end
 
-Base.print(io::IO, ::CrossEntropy) = print(io, "CrossEntropy")
+Base.print(io::IO, ::CrossEntropy) = print(io, "CrossEntropy()")
 Base.summary(io::IO, x::CrossEntropy) = print(io, x)
 
 function init_model(x::CrossEntropy, basis::Basis, dataset::Dataset, intervals)
-    (; n_layers, arities, functions, use_protected, skip) = x
-
-    # We enforce the direct simplex here!
-    simplex = DirectSimplex()
+    (; n_layers, arities, functions, use_protected, skip) = x.options
 
     # Get the parameter mapping
     variable_mask = map(enumerate(equations(basis))) do (i, eq)
@@ -63,15 +38,14 @@ function init_model(x::CrossEntropy, basis::Basis, dataset::Dataset, intervals)
     end
 
     return LayeredDAG(length(basis), size(dataset.y, 1), n_layers, arities, functions;
-        skip = skip, input_functions = variable_mask, simplex = simplex)
+        skip, input_functions = variable_mask, x.options.simplex)
 end
 
 function update_parameters!(cache::SearchCache{<:CrossEntropy})
-    (; candidates, keeps, p, alg) = cache
-    (; alpha) = alg
-    p̄ = mean(map(candidates[keeps]) do candidate
-        return ComponentVector(get_configuration(candidate.model.model, p, candidate.st))
+    p̄ = mean(map(cache.candidates[cache.keeps]) do candidate
+        return ComponentVector(get_configuration(candidate.model.model, cache.p, candidate.st))
     end)
-    cache.p .= alpha * p + (one(alpha) - alpha) .* p̄
+    alpha = cache.alg.options.alpha
+    @. cache.p = alpha * cache.p + (true - alpha) * p̄
     return
 end
diff --git a/lib/DataDrivenLux/src/algorithms/randomsearch.jl b/lib/DataDrivenLux/src/algorithms/randomsearch.jl
@@ -8,38 +8,38 @@ symbolic regression problem.
 $(FIELDS)
 """
 @kwdef struct RandomSearch{F, A, L, O} <: AbstractDAGSRAlgorithm
-    "The number of candidates to track"
-    populationsize::Int = 100
-    "The functions to include in the search"
-    functions::F = (sin, exp, cos, log, +, -, /, *)
-    "The arities of the functions"
-    arities::A = (1, 1, 1, 1, 2, 2, 2, 2)
-    "The number of layers"
-    n_layers::Int = 1
-    "Include skip layers"
-    skip::Bool = true
-    "Simplex mapping"
-    simplex::AbstractSimplex = Softmax()
-    "Evaluation function to sort the samples"
-    loss::L = aicc
-    "The number of candidates to keep in each iteration"
-    keep::Union{Real, Int} = 0.1
-    "Use protected operators"
-    use_protected::Bool = true
-    "Use distributed optimization and resampling"
-    distributed::Bool = false
-    "Use threaded optimization and resampling - not implemented right now."
-    threaded::Bool = false
-    "Random seed"
-    rng::AbstractRNG = Random.default_rng()
-    "Optim optimiser"
-    optimizer::O = LBFGS()
-    "Optim options"
-    optim_options::Optim.Options = Optim.Options()
-    "Observed model - if `nothing`is used, a normal distributed additive error with fixed variance is assumed."
-    observed::Union{ObservedModel, Nothing} = nothing
-    "Field for possible optimiser - no use for Randomsearch"
-    optimiser::Nothing = nothing
+    # "The number of candidates to track"
+    # populationsize::Int = 100
+    # "The functions to include in the search"
+    # functions::F = (sin, exp, cos, log, +, -, /, *)
+    # "The arities of the functions"
+    # arities::A = (1, 1, 1, 1, 2, 2, 2, 2)
+    # "The number of layers"
+    # n_layers::Int = 1
+    # "Include skip layers"
+    # skip::Bool = true
+    # "Simplex mapping"
+    # simplex::AbstractSimplex = Softmax()
+    # "Evaluation function to sort the samples"
+    # loss::L = aicc
+    # "The number of candidates to keep in each iteration"
+    # keep::Union{Real, Int} = 0.1
+    # "Use protected operators"
+    # use_protected::Bool = true
+    # "Use distributed optimization and resampling"
+    # distributed::Bool = false
+    # "Use threaded optimization and resampling - not implemented right now."
+    # threaded::Bool = false
+    # "Random seed"
+    # rng::AbstractRNG = Random.default_rng()
+    # "Optim optimiser"
+    # optimizer::O = LBFGS()
+    # "Optim options"
+    # optim_options::Optim.Options = Optim.Options()
+    # "Observed model - if `nothing`is used, a normal distributed additive error with fixed variance is assumed."
+    # observed::Union{ObservedModel, Nothing} = nothing
+    # "Field for possible optimiser - no use for Randomsearch"
+    # optimiser::Nothing = nothing
 end
 
 Base.print(io::IO, ::RandomSearch) = print(io, "RandomSearch")
diff --git a/lib/DataDrivenLux/src/algorithms/reinforce.jl b/lib/DataDrivenLux/src/algorithms/reinforce.jl
@@ -10,40 +10,40 @@ $(FIELDS)
 @kwdef struct Reinforce{F, A, L, O, R} <: AbstractDAGSRAlgorithm
     "Reward function which should convert the loss to a reward."
     reward::R = RelativeReward(false)
-    "The number of candidates to track"
-    populationsize::Int = 100
-    "The functions to include in the search"
-    functions::F = (sin, exp, cos, log, +, -, /, *)
-    "The arities of the functions"
-    arities::A = (1, 1, 1, 1, 2, 2, 2, 2)
-    "The number of layers"
-    n_layers::Int = 1
-    "Include skip layers"
-    skip::Bool = true
-    "Simplex mapping"
-    simplex::AbstractSimplex = Softmax()
-    "Evaluation function to sort the samples"
-    loss::L = aicc
-    "The number of candidates to keep in each iteration"
-    keep::Union{Real, Int} = 0.1
-    "Use protected operators"
-    use_protected::Bool = true
-    "Use distributed optimization and resampling"
-    distributed::Bool = false
-    "Use threaded optimization and resampling - not implemented right now."
-    threaded::Bool = false
-    "Random seed"
-    rng::AbstractRNG = Random.default_rng()
-    "Optim optimiser"
-    optimizer::O = LBFGS()
-    "Optim options"
-    optim_options::Optim.Options = Optim.Options()
-    "Observed model - if `nothing`is used, a normal distributed additive error with fixed variance is assumed."
-    observed::Union{ObservedModel, Nothing} = nothing
-    "AD Backend"
-    ad_backend::AD.AbstractBackend = AD.ForwardDiffBackend()
-    "Optimiser"
-    optimiser::Optimisers.AbstractRule = ADAM()
+    # "The number of candidates to track"
+    # populationsize::Int = 100
+    # "The functions to include in the search"
+    # functions::F = (sin, exp, cos, log, +, -, /, *)
+    # "The arities of the functions"
+    # arities::A = (1, 1, 1, 1, 2, 2, 2, 2)
+    # "The number of layers"
+    # n_layers::Int = 1
+    # "Include skip layers"
+    # skip::Bool = true
+    # "Simplex mapping"
+    # simplex::AbstractSimplex = Softmax()
+    # "Evaluation function to sort the samples"
+    # loss::L = aicc
+    # "The number of candidates to keep in each iteration"
+    # keep::Union{Real, Int} = 0.1
+    # "Use protected operators"
+    # use_protected::Bool = true
+    # "Use distributed optimization and resampling"
+    # distributed::Bool = false
+    # "Use threaded optimization and resampling - not implemented right now."
+    # threaded::Bool = false
+    # "Random seed"
+    # rng::AbstractRNG = Random.default_rng()
+    # "Optim optimiser"
+    # optimizer::O = LBFGS()
+    # "Optim options"
+    # optim_options::Optim.Options = Optim.Options()
+    # "Observed model - if `nothing`is used, a normal distributed additive error with fixed variance is assumed."
+    # observed::Union{ObservedModel, Nothing} = nothing
+    # "AD Backend"
+    # ad_backend::AD.AbstractBackend = AD.ForwardDiffBackend()
+    # "Optimiser"
+    # optimiser::Optimisers.AbstractRule = ADAM()
 end
 
 Base.print(io::IO, ::Reinforce) = print(io, "Reinforce")
diff --git a/lib/DataDrivenLux/src/algorithms/rewards.jl b/lib/DataDrivenLux/src/algorithms/rewards.jl
@@ -25,9 +25,7 @@ struct AbsoluteReward{risk} <: AbstractRewardScale{risk} end
 
 AbsoluteReward(risk_seeking = true) = AbsoluteReward{risk_seeking}()
 
-function (::AbsoluteReward)(losses::Vector{T}) where {T <: Number}
-    return exp.(-losses)
-end
+(::AbsoluteReward)(losses::Vector{T}) where {T <: Number} = exp.(-losses)
 
 function (::AbsoluteReward{true})(losses::Vector{T}) where {T <: Number}
     r = exp.(-losses)
diff --git a/lib/DataDrivenLux/src/caches/cache.jl b/lib/DataDrivenLux/src/caches/cache.jl
@@ -32,7 +32,7 @@ end
 
 function init_cache(x::X where {X <: AbstractDAGSRAlgorithm},
         basis::Basis, problem::DataDrivenProblem; kwargs...)
-    (; rng, keep, observed, populationsize, optimizer, optim_options, optimiser, loss) = x
+    (; rng, keep, observed, populationsize, optimizer, optim_options, optimiser, loss) = x.options
     # Derive the model
     dataset = Dataset(problem)
     TData = eltype(dataset)
@@ -75,9 +75,9 @@ function init_cache(x::X where {X <: AbstractDAGSRAlgorithm},
     end
 
     # Distributed always goes first here
-    if x.distributed
+    if x.options.distributed
         ptype = __PROCESSUSE(3)
-    elseif x.threaded
+    elseif x.options.threaded
         ptype = __PROCESSUSE(2)
     else
         ptype = __PROCESSUSE(1)
@@ -94,7 +94,7 @@ function init_cache(x::X where {X <: AbstractDAGSRAlgorithm},
 end
 
 function update_cache!(cache::SearchCache)
-    (; keep, loss, optimizer, optim_options) = cache.alg
+    (; keep, loss) = cache.alg.options
 
     # Update the parameters based on the current results
     update_parameters!(cache)
@@ -109,6 +109,7 @@ function update_cache!(cache::SearchCache)
         cache.keeps[1:keep] .= true
     else
         losses = map(loss, cache.candidates)
+        @. losses = ifelse(isnan(losses), Inf, losses)
         # TODO Maybe weight by age or loss here
         sortperm!(cache.sorting, cache.candidates, by = loss)
         permute!(cache.candidates, cache.sorting)
@@ -123,7 +124,7 @@ end
 
 # Serial 
 function optimize_cache!(cache::SearchCache{<:Any, __PROCESSUSE(1)}, p = cache.p)
-    (; optimizer, optim_options) = cache.alg
+    (; optimizer, optim_options) = cache.alg.options
     map(enumerate(cache.candidates)) do (i, candidate)
         if cache.keeps[i]
             cache.ages[i] += 1
@@ -140,7 +141,7 @@ end
 
 # Threaded
 function optimize_cache!(cache::SearchCache{<:Any, __PROCESSUSE(2)}, p = cache.p)
-    (; optimizer, optim_options) = cache.alg
+    (; optimizer, optim_options) = cache.alg.options
     # Update all 
     Threads.@threads for i in 1:length(cache.keeps)
         if cache.keeps[i]
@@ -156,7 +157,7 @@ end
 
 # Distributed
 function optimize_cache!(cache::SearchCache{<:Any, __PROCESSUSE(3)}, p = cache.p)
-    (; optimizer, optim_options) = cache.alg
+    (; optimizer, optim_options) = cache.alg.options
 
     successes = pmap(1:length(cache.keeps)) do i
         if cache.keeps[i]
diff --git a/lib/DataDrivenLux/src/solve.jl b/lib/DataDrivenLux/src/solve.jl
@@ -3,9 +3,9 @@ function DataDrivenDiffEq.get_fit_targets(::A, prob::AbstractDataDrivenProblem,
     return prob.X, DataDrivenDiffEq.get_implicit_data(prob)
 end
 
-struct DataDrivenLuxResult <: DataDrivenDiffEq.AbstractDataDrivenResult
-    candidate::Candidate
-    retcode::DDReturnCode
+@concrete struct DataDrivenLuxResult <: DataDrivenDiffEq.AbstractDataDrivenResult
+    candidate <: Candidate
+    retcode <: DDReturnCode
 end
 
 function CommonSolve.solve!(prob::InternalDataDrivenProblem{A}) where {A <:
@@ -19,7 +19,7 @@ function CommonSolve.solve!(prob::InternalDataDrivenProblem{A}) where {A <:
     _showvalues = let cache = cache
         (iter) -> begin
             shows = min(5, sum(cache.keeps))
-            losses = map(alg.loss, cache.candidates[cache.keeps])
+            losses = map(alg.options.loss, cache.candidates[cache.keeps])
             min_, max_ = extrema(losses)
             [(:Iterations, iter),
                 (:RSS, map(StatsBase.rss, cache.candidates[cache.keeps][1:shows])),
@@ -43,7 +43,7 @@ function CommonSolve.solve!(prob::InternalDataDrivenProblem{A}) where {A <:
     end
 
     # Create the optimal basis
-    sort!(cache.candidates, by = alg.loss)
+    sort!(cache.candidates, by = alg.options.loss)
     best_cache = first(cache.candidates)
 
     new_basis = convert_to_basis(best_cache, cache.p, options)
diff --git a/lib/DataDrivenLux/test/crossentropy_solve.jl b/lib/DataDrivenLux/test/crossentropy_solve.jl