Merge pull request #67 from ven-k/vk/nomad

ChrisRackauckas · web-flow · commit a8073deaba49 · 2022-06-25T10:14:55.000-04:00
Add Nonlinear Manifold Decoders for Operator Learning (NOMAD)
diff --git a/docs/src/introduction.md b/docs/src/introduction.md
@@ -24,3 +24,8 @@ by linking the operators into a Markov chain.
 Deep operator network (DeepONet) learns a neural operator with the help of two sub-neural network structures described as the branch and the trunk network.
 The branch network is fed the initial conditions data, whereas the trunk is fed with the locations where the target(output) is evaluated from the corresponding initial conditions.
 It is important that the output size of the branch and trunk subnets is same so that a dot product can be performed between them.
+
+## [Nonlinear Manifold Decoders for Operator Learning](https://github.com/SciML/NeuralOperators.jl/blob/master/src/NOMAD.jl)
+
+Nonlinear Manifold Decoders for Operator Learning (NOMAD) learns a neural operator with a nonlinear decoder parameterized by a deep neural network which jointly takes output of approximator and the locations as parameters.
+The approximator network is fed with the initial conditions data. The output-of-approximator and the locations are then passed to a decoder neural network to get the target (output). It is important that the input size of the decoder subnet is sum of size of the output-of-approximator and number of locations.
diff --git a/example/Burgers/src/Burgers.jl b/example/Burgers/src/Burgers.jl
@@ -3,6 +3,7 @@ module Burgers
 using DataDeps, MAT, MLUtils
 using NeuralOperators, Flux
 using CUDA, FluxTraining, BSON
+import Flux: params
 
 include("Burgers_deeponet.jl")
 
@@ -69,4 +70,42 @@ function train(; cuda=true, η₀=1f-3, λ=1f-4, epochs=500)
     return learner
 end
 
+function train_nomad(; n=300, cuda=true, learning_rate=0.001, epochs=400)
+    if cuda && has_cuda()
+        @info "Training on GPU"
+        device = gpu
+    else
+        @info "Training on CPU"
+        device = cpu
+    end
+
+    x, y = get_data_don(n=n)
+
+    xtrain = x[1:280, :]'
+    ytrain = y[1:280, :]
+
+    xval = x[end-19:end, :]' |> device
+    yval = y[end-19:end, :] |> device
+
+    # grid = collect(range(0, 1, length=1024)') |> device
+    grid = rand(collect(0:0.001:1), (280, 1024)) |> device
+    gridval = rand(collect(0:0.001:1), (20, 1024)) |> device
+
+
+    opt = ADAM(learning_rate)
+
+    m = NOMAD((1024,1024), (2048,1024), gelu, gelu) |> device
+
+    loss(X, y, sensor) = Flux.Losses.mse(m(X, sensor), y)
+    evalcb() = @show(loss(xval, yval, gridval))
+
+    data = [(xtrain, ytrain, grid)] |> device
+    Flux.@epochs epochs Flux.train!(loss, params(m), data, opt, cb=evalcb)
+    ỹ = m(xval |> device, gridval |> device)
+
+    diffvec = vec(abs.(cpu(yval) .- cpu(ỹ)))
+    mean_diff = sum(diffvec)/length(diffvec)
+    return mean_diff
+end
+
 end
diff --git a/example/Burgers/test/runtests.jl b/example/Burgers/test/runtests.jl
@@ -13,3 +13,8 @@ using Test
 
     # include("deeponet.jl")
 end
+
+@testset "Burger: NOMAD Training Accuracy" begin
+    ϵ = Burgers.train_nomad(; cuda=true, epochs=100)
+    @test ϵ < 0.4 # epoch=100 returns 0.233
+end
diff --git a/src/NOMAD.jl b/src/NOMAD.jl
@@ -0,0 +1,81 @@
+struct NOMAD{T1, T2}
+    approximator_net::T1
+    decoder_net::T2
+end
+
+"""
+`NOMAD(architecture_approximator::Tuple, architecture_decoder::Tuple,
+        act_approximator = identity, act_decoder=true;
+        init_approximator = Flux.glorot_uniform,
+        init_decoder = Flux.glorot_uniform,
+        bias_approximator=true, bias_decoder=true)`
+`NOMAD(approximator_net::Flux.Chain, decoder_net::Flux.Chain)`
+
+Create a Nonlinear Manifold Decoders for Operator Learning (NOMAD) as proposed by Lu et al.
+arXiv:2206.03551
+
+The decoder is defined as follows:
+
+``\\tilde D (β, y) = f(β, y)``
+
+# Usage
+
+```julia
+julia> model = NOMAD((16,32,16), (24,32))
+NOMAD with
+Approximator net: (Chain(Dense(16 => 32), Dense(32 => 16)))
+Decoder net: (Chain(Dense(24 => 32, true)))
+
+julia> model = NeuralOperators.NOMAD((32,64,32), (64,72), σ, tanh; init_approximator=Flux.glorot_normal, bias_decoder=false)
+NOMAD with
+Approximator net: (Chain(Dense(32 => 64, σ), Dense(64 => 32, σ)))
+Decoder net: (Chain(Dense(64 => 72, tanh; bias=false)))
+
+julia> approximator = Chain(Dense(2,128),Dense(128,64))
+Chain(
+  Dense(2 => 128),                      # 384 parameters
+  Dense(128 => 64),                     # 8_256 parameters
+)                   # Total: 4 arrays, 8_640 parameters, 34.000 KiB.
+
+julia> decoder = Chain(Dense(72,24),Dense(24,12))
+Chain(
+  Dense(72 => 24),                      # 1_752 parameters
+  Dense(24 => 12),                      # 300 parameters
+)                   # Total: 4 arrays, 2_052 parameters, 8.266 KiB.
+
+julia> model = NOMAD(approximator, decoder)
+NOMAD with
+Approximator net: (Chain(Dense(2 => 128), Dense(128 => 64)))
+Decoder net: (Chain(Dense(72 => 24), Dense(24 => 12)))
+"""
+function NOMAD(architecture_approximator::Tuple, architecture_decoder::Tuple,
+                act_approximator = identity, act_decoder=true;
+                init_approximator = Flux.glorot_uniform,
+                init_decoder = Flux.glorot_uniform,
+                bias_approximator=true, bias_decoder=true)
+
+    approximator_net = construct_subnet(architecture_approximator, act_approximator;
+                                    init=init_approximator, bias=bias_approximator)
+
+    decoder_net = construct_subnet(architecture_decoder, act_decoder;
+                                    init=init_decoder, bias=bias_decoder)
+
+    return NOMAD{typeof(approximator_net), typeof(decoder_net)}(approximator_net, decoder_net)
+end
+
+Flux.@functor NOMAD
+
+function (a::NOMAD)(x::AbstractArray, y::AbstractVecOrMat)
+    # Assign the parameters
+    approximator, decoder = a.approximator_net, a.decoder_net
+
+    return decoder(cat(approximator(x), y', dims=1))'
+end
+
+# Print nicely
+function Base.show(io::IO, l::NOMAD)
+    print(io, "NOMAD with\nApproximator net: (",l.approximator_net)
+    print(io, ")\n")
+    print(io, "Decoder net: (", l.decoder_net)
+    print(io, ")\n")
+end
diff --git a/src/NeuralOperators.jl b/src/NeuralOperators.jl
@@ -10,12 +10,13 @@ module NeuralOperators
     using GeometricFlux
     using Statistics
 
-    export DeepONet
+    export DeepONet, NOMAD
 
     include("Transform/Transform.jl")
     include("operator_kernel.jl")
     include("loss.jl")
     include("model.jl")
     include("DeepONet.jl")
+    include("NOMAD.jl")
     include("subnets.jl")
 end
diff --git a/test/nomad.jl b/test/nomad.jl
@@ -0,0 +1,30 @@
+@testset "NOMAD" begin
+    @testset "proper construction" begin
+        nomad = NOMAD((32,64,72), (24,48,72), σ, tanh)
+        # approximator net
+        @test size(nomad.approximator_net.layers[end].weight) == (72,64)
+        @test size(nomad.approximator_net.layers[end].bias) == (72,)
+        # decoder net
+        @test size(nomad.decoder_net.layers[end].weight) == (72,48)
+        @test size(nomad.decoder_net.layers[end].bias) == (72,)
+    end
+
+    # Accept only Int as architecture parameters
+    @test_throws MethodError NOMAD((32.5,64,72), (24,48,72), σ, tanh)
+    @test_throws MethodError NOMAD((32,64,72), (24.1,48,72))
+
+    # Just the first 16 datapoints from the Burgers' equation dataset
+    a = [0.83541104, 0.83479851, 0.83404712, 0.83315711, 0.83212979, 0.83096755,
+         0.82967374, 0.82825263, 0.82670928, 0.82504949, 0.82327962, 0.82140651,
+         0.81943734, 0.81737952, 0.8152405, 0.81302771]
+    sensors = collect(range(0, 1, length=16)')
+    model = NOMAD((length(a), 22, length(a)), (length(a) + length(sensors), length(sensors)), σ, tanh; init_approximator=Flux.glorot_normal, bias_decoder=false)
+    y = model(a, sensors)
+    @test size(y) == (1, 16)
+    # Check if model description is printed, when defined
+    @test repr(model) == "NOMAD with\nApproximator net: (Chain(Dense(16 => 22, σ), Dense(22 => 16, σ)))\nDecoder net: (Chain(Dense(32 => 16, tanh; bias=false)))\n"
+
+    mgrad = Flux.Zygote.gradient(() -> sum(model(a, sensors)), Flux.params(model))
+    @info mgrad.grads
+    @test length(mgrad.grads) == 5
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -18,6 +18,7 @@ tests = [
     "loss.jl",
     "model.jl",
     "deeponet.jl",
+    "nomad.jl",
 ]
 
 if CUDA.functional()

Original file line number	Diff line number	Diff line change
`@@ -18,6 +18,7 @@ tests = [`
`18`	`18`	`"loss.jl",`
`19`	`19`	`"model.jl",`
`20`	`20`	`"deeponet.jl",`
	`21`	`+ "nomad.jl",`
`21`	`22`	`]`
`22`	`23`
`23`	`24`	`if CUDA.functional()`