params

CarloLucibello · CarloLucibello · commit 7694a9c64612 · 2021-10-16T10:36:03.000+02:00
diff --git a/src/Flux.jl b/src/Flux.jl
@@ -8,6 +8,7 @@ using Zygote, MacroTools, Juno, Reexport
 using MacroTools: @forward
 @reexport using NNlib
 using Zygote: Params, @adjoint, gradient, pullback, @nograd
+using Functors: @functor, functor, fmap, isleaf
 export gradient
 
 export Chain, Dense, Maxout, SkipConnection, Parallel, flatten,
diff --git a/src/functor.jl b/src/functor.jl
@@ -1,7 +1,6 @@
 import Adapt: adapt, adapt_storage
 using  LinearAlgebra: Cholesky
 using Zygote: IdSet
-import Functors: Functors, @functor, functor, fmap, isleaf
 using SparseArrays: AbstractSparseArray
 
 trainable(m) = functor(m)[1]
@@ -38,23 +37,31 @@ Possible values include:
 """
 trainmode!(m, mode = true) = mode isa Bool ? testmode!(m, !mode) : testmode!(m, mode)
 
-# push!(::Params, x) automatically discards already seen arrays
-params!(p::Params, x::AbstractArray{<:Number}, seen = IdSet()) = push!(p, x)
+# # push!(::Params, x) automatically discards already seen arrays
+# params!(p::Params, x::AbstractArray{<:Number}, seen = IdSet()) = push!(p, x)
 
-function params!(p::Params, x, seen = IdSet())
-  x in seen && return
-  push!(seen, x)
-  for child in trainable(x)
-    params!(p, child, seen)
-  end
-end
+# function params!(p::Params, x, seen = IdSet())
+#   x in seen && return
+#   push!(seen, x)
+#   for child in trainable(x)
+#     params!(p, child, seen)
+#   end
+# end
+
+# function params(m...)
+#   ps = Params()
+#   params!(ps, m)
+#   return ps
+# end
 
 function params(m...)
   ps = Params()
-  params!(ps, m)
+  collect_params!(ps, m)
   return ps
 end
 
+
+
 function loadparams!(m, xs)
   for (p, x) in zip(params(m), xs)
     size(p) == size(x) ||
diff --git a/src/utils.jl b/src/utils.jl
@@ -675,23 +675,33 @@ modifications to the weight vector (for example, with a hypernetwork).
 """
 function destructure(m)
   xs = Zygote.Buffer([])
+  collect_params!(xs, m)
+  return vcat(vec.(copy(xs))...), p -> _restructure(m, p)
+end
+
+function collect_params!(xs, m)
   filter = (x, c) -> any(y -> c === y, trainable(x))
   walk = filtered_walk(filter)
   fmap(m; walk) do x
     x isa AbstractArray{<:Number} && push!(xs, x)
     return x
   end
-  return vcat(vec.(copy(xs))...), p -> _restructure(m, p)
 end
 
 function filtered_walk(filter)
+  seen = IdSet()
+
   function walk(f, x)
+    x in seen && return x
+    push!(seen, x)
+
     children, reconstruct = functor(x)
     mappedchildren = map(children) do c
       filter(x, c) ? f(c) : c
     end
     reconstruct(mappedchildren)
   end
+
   return walk
 end
 
diff --git a/test/utils.jl b/test/utils.jl
@@ -409,22 +409,23 @@ end
       ∇p = gradient(θ -> sum(re(θ)(x)), p)[1]
       # @show size(∇p)
       # @show size(destructure(∇m)[1])
-      # @show norm(∇p - destructure(∇m)[1])
+      @show norm(∇p - destructure(∇m)[1])
       @test ∇p ≈ destructure(∇m)[1] atol=1e-4
     end
 
     @testset "destructure with buffers" begin
-      p, re = destructure(BatchNorm(10))
-      @test length(p) == 20
+      p, re = destructure(BatchNorm(3))
+      @test length(p) == 6
 
       # https://github.com/FluxML/Flux.jl/issues/1727
-      x = rand(Float32, 2, 3)
-      gs, back = Flux.pullback(x, p) do x, p
+      x = rand(Float32, 3, 4)
+      y, back = Flux.pullback(x, p) do x, p
           vec(re(p)(x))
       end
-      @test_nowarn b = back(a)
-      @test b[1] == size(x)
-      @test b[2] == size(p)
+      @test_nowarn back(y)
+      b = back(y)
+      @test size(b[1]) == size(x)
+      @test size(b[2]) == size(p)
     end
   end
 end