🆕 initial DeepONet implementation

pzimbrod · pzimbrod · commit 4fa134c83d27 · 2022-02-07T08:28:31.000+01:00
diff --git a/src/DeepONet.jl b/src/DeepONet.jl
@@ -0,0 +1,92 @@
+"""
+`DeepONet(in, out, grid, modes, σ=identity, init=glorot_uniform)`
+`DeepONet(Wf::AbstractArray, Wl::AbstractArray, [bias_f, bias_l, σ])`
+
+Create a DeepONet architecture as proposed by Lu et al.
+arXiv:1910.03193
+
+The model works as follows:
+
+x --- branch --
+               |
+                -⊠--u-
+               |
+y --- trunk ---
+
+Where `x` represent the parameters of the PDE, discretely evaluated at its respective sensors,
+and `y` are the probing locations for the operator to be trained.
+`u` is the solution of the queried instance of the PDE, given by the specific choice of parameters.
+
+Both inputs `x` and `y` are multiplied together via dot product Σᵢ bᵢⱼ tᵢₖ.
+
+```julia
+model = DeepONet()
+```
+"""
+struct DeepONet
+    branch_net::Flux.Chain
+    trunk_net::Flux.Chain
+    # Constructor for the DeepONet
+    function DeepONet(
+        branch_net::Flux.Chain,
+        trunk_net::Flux.Chain)
+        new(branch_net, trunk_net)
+    end
+end
+
+# Declare the function that assigns Weights and biases to the layer
+function DeepONet(architecture_branch::Tuple, architecture_trunk::Tuple,
+                        act_branch = identity, act_trunk = identity;
+                        init = Flux.glorot_uniform,
+                        bias_branch=true, bias_trunk=true)
+
+    # To construct the subnets we use the helper function in subnets.jl
+    # Initialize the branch net
+    branch_net = construct_subnet(architecture_branch, act_branch; bias=bias_branch)
+    # Initialize the trunk net
+    trunk_net = construct_subnet(architecture_trunk, act_trunk; bias=bias_trunk)
+
+    return DeepONet(branch_net, trunk_net)
+end
+
+Flux.@functor DeepONet
+
+# The actual layer that does stuff
+# x needs to be at least a 2-dim array,
+# since we need n inputs, evaluated at m locations
+function (a::DeepONet)(x::AbstractMatrix, y::AbstractVecOrMat)
+    # Assign the parameters
+    branch, trunk = a.branch_net, a.trunk_net
+
+    # Dot product needs a dim to contract
+    # However, inputs are normally given with batching done in the same dim
+    # so we need to adjust (i.e. transpose) one of the inputs,
+    # and that's easiest on the matrix-type input
+    return branch(x) * trunk(y)'
+end
+
+# Handling batches:
+# We use basically the same function, but using NNlib's batched_mul instead of
+# regular matrix-matrix multiplication
+function (a::DeepONet)(x::AbstractArray, y::AbstractVecOrMat)
+    # Assign the parameters
+    branch, trunk = a.branch_net, a.trunk_net
+
+    # Dot product needs a dim to contract
+    # However, inputs are normally given with batching done in the same dim
+    # so we need to adjust (i.e. transpose) one of the inputs,
+    # and that's easiest on the matrix-type input
+    return branch(x) ⊠ trunk(y)'
+end
+
+# Sensors stay the same and shouldn't be batched
+(a::DeepONet)(x::AbstractArray, y::AbstractArray) = 
+  throw(ArgumentError("Sensor locations fed to trunk net can't be batched."))
+
+# Print nicely
+function Base.show(io::IO, l::DeepONet)
+    print(io, "DeepONet with\nbranch net: (",l.branch_net)
+    print(io, ")\n")
+    print(io, "Trunk net: (", l.trunk_net)
+    print(io, ")\n")
+end
diff --git a/src/OperatorLearning.jl b/src/OperatorLearning.jl
@@ -10,10 +10,12 @@ using Random: AbstractRNG
 using Flux: nfan, glorot_uniform, batch
 using OMEinsum
 
-export FourierLayer
+export FourierLayer, DeepONet
 
 include("FourierLayer.jl")
+include("DeepONet.jl")
 include("ComplexWeights.jl")
 include("batched.jl")
+include("subnets.jl")
 
 end # module
diff --git a/src/subnets.jl b/src/subnets.jl
@@ -0,0 +1,37 @@
+"""
+Construct a Chain of `Dense` layers from a given tuple of integers.
+
+Input:
+A tuple (m,n,o,p) of integer type numbers that each describe the width of the i-th Dense layer to Construct
+
+Output:
+A `Flux` Chain with length of the input tuple and individual width given by the tuple elements
+
+# Example
+
+```julia
+julia> model = OperatorLearning.construct_subnet((2,128,64,32,1))
+Chain(
+  Dense(2, 128),                        # 384 parameters
+  Dense(128, 64),                       # 8_256 parameters
+  Dense(64, 32),                        # 2_080 parameters
+  Dense(32, 1),                         # 33 parameters
+)                   # Total: 8 arrays, 10_753 parameters, 42.504 KiB.
+
+julia> model([2,1])
+1-element Vector{Float32}:
+ -0.7630446
+```
+"""
+function construct_subnet(architecture::Tuple, σ = identity; bias=true)
+    # First, create an array that contains all Dense layers independently
+    # Given n-element architecture constructs n-1 layers
+    layers = Array{Flux.Dense}(undef, length(architecture)-1)
+    @inbounds for i ∈ 2:length(architecture)
+      layers[i-1] = Flux.Dense(architecture[i-1], architecture[i], σ; bias=bias)
+    end
+
+    # Concatenate the layers to a string, chain them and parse them into
+    # the Flux Chain constructor syntax
+    return Meta.parse("Chain("*join(layers,",")*")") |> eval
+end