✍️ expand docstring

pzimbrod · pzimbrod · commit ad8c34a858e9 · 2022-02-07T09:11:29.000+01:00
diff --git a/src/DeepONet.jl b/src/DeepONet.jl
@@ -1,8 +1,12 @@
 """
-`DeepONet(in, out, grid, modes, σ=identity, init=glorot_uniform)`
-`DeepONet(Wf::AbstractArray, Wl::AbstractArray, [bias_f, bias_l, σ])`
+`DeepONet(architecture_branch::Tuple, architecture_trunk::Tuple,
+                        act_branch = identity, act_trunk = identity;
+                        init_branch = Flux.glorot_uniform,
+                        init_trunk = Flux.glorot_uniform,
+                        bias_branch=true, bias_trunk=true)`
+`DeepONet(branch_net::Flux.Chain, trunk_net::Flux.Chain)`
 
-Create a DeepONet architecture as proposed by Lu et al.
+Create an (unstacked) DeepONet architecture as proposed by Lu et al.
 arXiv:1910.03193
 
 The model works as follows:
@@ -19,8 +23,42 @@ and `y` are the probing locations for the operator to be trained.
 
 Both inputs `x` and `y` are multiplied together via dot product Σᵢ bᵢⱼ tᵢₖ.
 
+You can set up this architecture in two ways:
+
+1. By Specifying the architecture and all its parameters as given above. This always creates `Dense` layers for the branch and trunk net and corresponds to the DeepONet proposed by Lu et al.
+
+2. By passing two architectures in the form of two Chain structs directly. Do this if you want more flexibility and e.g. use an RNN or CNN instead of simple `Dense` layers.
+
+Strictly speaking, DeepONet does not imply either of the branch or trunk net to be a simple DNN. Usually though, this is the case which is why it's treated as the default case here.
+
 ```julia
-model = DeepONet()
+julia> model = DeepONet((32,64,72), (24,64,72))
+DeepONet with
+branch net: (Chain(Dense(32, 64), Dense(64, 72)))
+Trunk net: (Chain(Dense(24, 64), Dense(64, 72)))
+
+julia> model = DeepONet((32,64,72), (24,64,72), σ, tanh; init_branch=Flux.glorot_normal, bias_trunk=false)
+DeepONet with
+branch net: (Chain(Dense(32, 64, σ), Dense(64, 72, σ)))
+Trunk net: (Chain(Dense(24, 64, tanh; bias=false), Dense(64, 72, tanh; bias=false)))
+
+julia> branch = Chain(Dense(2,128),Dense(128,64),Dense(64,72))
+Chain(
+  Dense(2, 128),                        # 384 parameters
+  Dense(128, 64),                       # 8_256 parameters
+  Dense(64, 72),                        # 4_680 parameters
+)                   # Total: 6 arrays, 13_320 parameters, 52.406 KiB.
+
+julia> trunk = Chain(Dense(1,24),Dense(24,72))
+Chain(
+  Dense(1, 24),                         # 48 parameters
+  Dense(24, 72),                        # 1_800 parameters
+)                   # Total: 4 arrays, 1_848 parameters, 7.469 KiB.
+
+julia> model = DeepONet(branch,trunk)
+DeepONet with
+branch net: (Chain(Dense(2, 128), Dense(128, 64), Dense(64, 72)))
+Trunk net: (Chain(Dense(1, 24), Dense(24, 72)))
 ```
 """
 struct DeepONet
@@ -37,14 +75,17 @@ end
 # Declare the function that assigns Weights and biases to the layer
 function DeepONet(architecture_branch::Tuple, architecture_trunk::Tuple,
                         act_branch = identity, act_trunk = identity;
-                        init = Flux.glorot_uniform,
+                        init_branch = Flux.glorot_uniform,
+                        init_trunk = Flux.glorot_uniform,
                         bias_branch=true, bias_trunk=true)
 
     # To construct the subnets we use the helper function in subnets.jl
     # Initialize the branch net
-    branch_net = construct_subnet(architecture_branch, act_branch; bias=bias_branch)
+    branch_net = construct_subnet(architecture_branch, act_branch;
+                                    init=init_branch, bias=bias_branch)
     # Initialize the trunk net
-    trunk_net = construct_subnet(architecture_trunk, act_trunk; bias=bias_trunk)
+    trunk_net = construct_subnet(architecture_trunk, act_trunk;
+                                    init=init_trunk, bias=bias_trunk)
 
     return DeepONet(branch_net, trunk_net)
 end
diff --git a/src/subnets.jl b/src/subnets.jl
@@ -23,12 +23,14 @@ julia> model([2,1])
  -0.7630446
 ```
 """
-function construct_subnet(architecture::Tuple, σ = identity; bias=true)
+function construct_subnet(architecture::Tuple, σ = identity;
+                          init=Flux.glorot_uniform, bias=true)
     # First, create an array that contains all Dense layers independently
     # Given n-element architecture constructs n-1 layers
     layers = Array{Flux.Dense}(undef, length(architecture)-1)
     @inbounds for i ∈ 2:length(architecture)
-      layers[i-1] = Flux.Dense(architecture[i-1], architecture[i], σ; bias=bias)
+      layers[i-1] = Flux.Dense(architecture[i-1], architecture[i], σ;
+                                init=init, bias=bias)
     end
 
     # Concatenate the layers to a string, chain them and parse them into