docs and tests

CarloLucibello · CarloLucibello · commit e804b601eae0 · 2021-10-16T10:36:03.000+02:00
diff --git a/src/functor.jl b/src/functor.jl
@@ -37,31 +37,6 @@ Possible values include:
 """
 trainmode!(m, mode = true) = mode isa Bool ? testmode!(m, !mode) : testmode!(m, mode)
 
-# # push!(::Params, x) automatically discards already seen arrays
-# params!(p::Params, x::AbstractArray{<:Number}, seen = IdSet()) = push!(p, x)
-
-# function params!(p::Params, x, seen = IdSet())
-#   x in seen && return
-#   push!(seen, x)
-#   for child in trainable(x)
-#     params!(p, child, seen)
-#   end
-# end
-
-# function params(m...)
-#   ps = Params()
-#   params!(ps, m)
-#   return ps
-# end
-
-function params(m...)
-  ps = Params()
-  collect_params!(ps, m)
-  return ps
-end
-
-
-
 function loadparams!(m, xs)
   for (p, x) in zip(params(m), xs)
     size(p) == size(x) ||
diff --git a/src/utils.jl b/src/utils.jl
@@ -705,8 +705,50 @@ function filtered_walk(filter)
   return walk
 end
 
+"""
+  params(m...)
+
+Collect trainable parameters (a.k.a. numerical arrays)
+from the input model(s) `m` into a [`Zygote.Params`](@ref) object. 
+
+Only the parameters that can be reached by recursion 
+on the [`trainable`](@ref) children of
+the tree with root `m` are collected.
+
+# Usage
+
+```julia 
+julia> m = Dense(ones(2, 3), zeros(2))
+Dense(3, 2)         # 8 parameters
+
+julia> ps = Flux.params(m)
+Params([[1.0 1.0 1.0; 1.0 1.0 1.0], [0.0, 0.0]])
+
+julia> x = ones(3)
+3-element Vector{Float64}:
+ 1.0
+ 1.0
+ 1.0
+
+julia> gs = gradient(() -> sum(2 .* m(x)), ps)
+Grads(...)
+
+julia> gs[m.weight]
+2×3 Matrix{Float64}:
+ 2.0  2.0  2.0
+ 2.0  2.0  2.0
+```
+"""
+function params(m...)
+  ps = Params()
+  collect_params!(ps, m)
+  return ps
+end
+
+
 @functor Base.RefValue
 
+
 # Other
 
 """
diff --git a/test/utils.jl b/test/utils.jl
@@ -216,6 +216,62 @@ end
   r = Any[nothing,m]
   r[1] = r
   @test size.(params(r)) == [(5, 10), (5, 5), (5,), (5, 1)]
+
+  @testset "use params in gradient context" begin
+    m = Chain(Dense(3,2), Dense(2,2))
+    ps = Flux.params(m)
+    gs = gradient(() -> sum(sum(p) for p in Flux.params(m)), ps)
+    for p in ps
+      @test gs[p] ≈ ones(size(p))
+    end    
+
+    w1, w2 =  rand(2), rand(2)
+    ps = Flux.params(w1, w2)
+    gs = gradient(() -> sum(sum(p) for p in Flux.params(w1, w2)), ps)
+    for p in ps
+      @test gs[p] ≈ ones(size(p))
+    end
+
+    # BROKEN TESTS
+    m = Chain(Dense(3,2), Dense(2,2))
+    @test_broken gradient(m -> sum(params(m)[1]), m) != (nothing, )
+    @test_broken gradient(m -> sum(params(m)[1]), m) != (nothing, )
+
+    gs = gradient(() -> sum(params(m)[1]), params(m))
+    @test_broken gs[params(m)[1]] !== nothing
+
+    # Tests from https://github.com/FluxML/Flux.jl/pull/1614
+    m = Dense(3, 2)
+    ps = Flux.params(m)
+    data = rand(Float32, 3, 5)
+    loss(m, x) = sum(m(x).^2)
+
+    g1 = gradient(Flux.params(m)) do
+      loss(m, data)
+    end
+    g2 = gradient(Flux.params(m)) do
+      ps = Flux.params(m) # just creating params without using them
+      loss(m, data)
+    end
+    g3 = gradient(Flux.params(m)) do
+      ps = Flux.params(m)
+      loss(m, data) + sum(sum(p) for p in ps)
+    end 
+    g4 = gradient(Flux.params(m)) do
+      loss(m, data) + sum(sum(p) for p in ps)
+    end
+    g5 = gradient(Flux.params(m)) do
+      sum(Flux.params(m)[1]) + sum(Flux.params(m)[2])
+    end
+    g6 = gradient(Flux.params(m)) do
+      sum(ps[1]) + sum(ps[2])
+    end
+    @test g2[m.weight] == g1[m.weight]
+    @test g3[m.weight] == g1[m.weight] .+ 1
+    @test g4[m.weight] == g1[m.weight] .+ 1
+    @test_broken g5[m.weight] .== 1 # TODO regression with respect to master
+    @test_broken g6[m.weight] .== 1 # Not a regression, broken on master
+  end
 end
 
 @testset "Basic Stacking" begin