|
82 | 82 | @test gradient(x -> re7(x).b[2][2], rand(3))[1] == [0,0,0] |
83 | 83 | @test gradient(x -> re7(x).c[2][1], rand(3))[1] == [0,0,0] |
84 | 84 | end |
| 85 | + |
| 86 | +@testset "Flux issue 1826" begin |
| 87 | + v, re = destructure((x=[1,2.0], y=[3,4,5.0])) |
| 88 | + @test gradient(zero(v)) do w |
| 89 | + m = re(w) |
| 90 | + 5 * sum(m.x) + 7 * sum(m[2]) # uses both x and y |
| 91 | + end == ([5.0, 5.0, 7.0, 7.0, 7.0],) |
| 92 | + # This, using only x, was broken on Flux: |
| 93 | + @test gradient(w -> sum(re(w).x), zero(v)) == ([1.0, 1.0, 0.0, 0.0, 0.0],) |
| 94 | + |
| 95 | + sh = [7,7.0]; |
| 96 | + v, re = destructure((x=sh, y=[3.0,4.0], z=sh)) # shared array in the model |
| 97 | + @test v == [7, 7, 3, 4] |
| 98 | + @test re([1,10,100,1000]) == (x = [1, 10], y = [100, 1000], z = [1, 10]) |
| 99 | + |
| 100 | + @test gradient(zero(v)) do w |
| 101 | + m = re(w) |
| 102 | + 3 * sum(m.x) + 13 * sum(m.z) # no dependence on y, but two distinct gradient arrays |
| 103 | + end == ([16, 16, 0, 0],) # Flux gave ([3.0, 3.0, 13.0, 13.0],) |
| 104 | + |
| 105 | + @test gradient(zero(v)) do w |
| 106 | + m = re(w) |
| 107 | + 4(sum(m.x) + sum(m.z)) # now two gradients are ===, so it eliminates one |
| 108 | + end == ([8,8,0,0],) |
| 109 | + |
| 110 | + @test gradient(zero(v)) do w |
| 111 | + m = re(w) |
| 112 | + 4(sum(m.x) + sum(m.y)) + 13*sum(m.z) # again two gradients are ===, so it eliminates one |
| 113 | + end == ([17,17,4,4],) # Flux gave ([4.0, 4.0, 13.0, 13.0],) |
| 114 | +end |
0 commit comments