Skip to content

Commit dfe3381

Browse files
committed
fix error of gradient over scalar matrix multiplication
1 parent 8febd8f commit dfe3381

File tree

3 files changed

+143
-165
lines changed

3 files changed

+143
-165
lines changed

src/cuda/conv.jl

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,3 @@
1-
(g::GCNConv)(L̃::AbstractMatrix, X::CuArray) = g(cu(L̃), X)
2-
3-
(g::GCNConv)(L̃::CuArray, X::CuArray) = g.σ.(g.weight * X *.+ g.bias)
4-
5-
(c::ChebConv)(L̃::AbstractMatrix, X::CuArray) = c(cu(L̃), X)
6-
7-
function (c::ChebConv)(L̃::CuArray, X::CuArray)
8-
@assert size(X, 1) == c.in_channel "Input feature size must match input channel size."
9-
@assert size(X, 2) == size(L̃, 1) "Input vertex number must match Laplacian matrix size."
10-
11-
Z_prev = X
12-
Z = X *
13-
Y = view(c.weight,:,:,1) * Z_prev
14-
Y += view(c.weight,:,:,2) * Z
15-
for k = 3:c.k
16-
Z, Z_prev = 2*Z*- Z_prev, Z
17-
Y += view(c.weight,:,:,k) * Z
18-
end
19-
return Y .+ c.bias
20-
end
21-
22-
231
# Avoid ambiguity
242
update_batch_edge(g::GATConv, adj, E::Fill{S,2,Axes}, X::CuMatrix, u) where {S,Axes} = update_batch_edge(g, adj, X)
253

src/layers/conv.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ function (c::ChebConv)(fg::FeaturedGraph, X::AbstractMatrix{T}) where T
104104
Y = view(c.weight,:,:,1) * Z_prev
105105
Y += view(c.weight,:,:,2) * Z
106106
for k = 3:c.k
107-
Z, Z_prev = 2*Z*- Z_prev, Z
107+
Z, Z_prev = 2 .* Z * - Z_prev, Z
108108
Y += view(c.weight,:,:,k) * Z
109109
end
110110
return Y .+ c.bias

test/layers/conv.jl

Lines changed: 142 additions & 142 deletions
Original file line numberDiff line numberDiff line change
@@ -259,146 +259,146 @@
259259
end
260260
end
261261

262-
# @testset "GatedGraphConv" begin
263-
# num_layers = 3
264-
# X = rand(T, in_channel, N)
265-
# Xt = transpose(rand(T, N, in_channel))
266-
# @testset "layer with graph" begin
267-
# ggc = GatedGraphConv(fg, out_channel, num_layers)
268-
# @test adjacency_list(ggc.fg) == [[2,4], [1,3], [2,4], [1,3]]
269-
# @test size(ggc.weight) == (out_channel, out_channel, num_layers)
270-
271-
# Y = ggc(X)
272-
# @test size(Y) == (out_channel, N)
273-
274-
275-
# # Test with transposed features
276-
# Y = ggc(Xt)
277-
# @test size(Y) == (out_channel, N)
278-
279-
# g = Zygote.gradient(x -> sum(ggc(x)), X)[1]
280-
# @test size(g) == size(X)
281-
282-
# g = Zygote.gradient(model -> sum(model(X)), ggc)[1]
283-
# @test size(g.weight) == size(ggc.weight)
284-
# end
285-
286-
# @testset "layer without graph" begin
287-
# ggc = GatedGraphConv(out_channel, num_layers)
288-
# @test size(ggc.weight) == (out_channel, out_channel, num_layers)
289-
290-
# fg = FeaturedGraph(adj, nf=X)
291-
# fg_ = ggc(fg)
292-
# @test size(node_feature(fg_)) == (out_channel, N)
293-
# @test_throws MethodError ggc(X)
294-
295-
# # Test with transposed features
296-
# fgt = FeaturedGraph(adj, nf=Xt)
297-
# fgt_ = ggc(fgt)
298-
# @test size(node_feature(fgt_)) == (out_channel, N)
299-
300-
# g = Zygote.gradient(x -> sum(node_feature(ggc(x))), fg)[1]
301-
# @test size(g.nf) == size(X)
302-
303-
# g = Zygote.gradient(model -> sum(node_feature(model(fg))), ggc)[1]
304-
# @test size(g.weight) == size(ggc.weight)
305-
# end
306-
# end
307-
308-
# @testset "EdgeConv" begin
309-
# X = rand(T, in_channel, N)
310-
# Xt = transpose(rand(T, N, in_channel))
311-
# @testset "layer with graph" begin
312-
# ec = EdgeConv(fg, Dense(2*in_channel, out_channel))
313-
# @test adjacency_list(ec.fg) == [[2,4], [1,3], [2,4], [1,3]]
314-
315-
# Y = ec(X)
316-
# @test size(Y) == (out_channel, N)
317-
318-
# # Test with transposed features
319-
# Y = ec(Xt)
320-
# @test size(Y) == (out_channel, N)
321-
322-
# g = Zygote.gradient(x -> sum(ec(x)), X)[1]
323-
# @test size(g) == size(X)
324-
325-
# g = Zygote.gradient(model -> sum(model(X)), ec)[1]
326-
# @test size(g.nn.weight) == size(ec.nn.weight)
327-
# @test size(g.nn.bias) == size(ec.nn.bias)
328-
# end
329-
330-
# @testset "layer without graph" begin
331-
# ec = EdgeConv(Dense(2*in_channel, out_channel))
332-
333-
# fg = FeaturedGraph(adj, nf=X)
334-
# fg_ = ec(fg)
335-
# @test size(node_feature(fg_)) == (out_channel, N)
336-
# @test_throws MethodError ec(X)
337-
338-
# # Test with transposed features
339-
# fgt = FeaturedGraph(adj, nf=Xt)
340-
# fgt_ = ec(fgt)
341-
# @test size(node_feature(fgt_)) == (out_channel, N)
342-
343-
# g = Zygote.gradient(x -> sum(node_feature(ec(x))), fg)[1]
344-
# @test size(g.nf) == size(X)
345-
346-
# g = Zygote.gradient(model -> sum(node_feature(model(fg))), ec)[1]
347-
# @test size(g.nn.weight) == size(ec.nn.weight)
348-
# @test size(g.nn.bias) == size(ec.nn.bias)
349-
# end
350-
# end
351-
352-
# @testset "GINConv" begin
353-
# X = rand(Float32, in_channel, N)
354-
# Xt = transpose(rand(Float32, N, in_channel))
355-
# nn = Flux.Chain(Dense(in_channel, out_channel))
356-
# eps = 0.001
357-
358-
# @testset "layer with graph" begin
359-
# gc = GINConv(FeaturedGraph(adj), nn, eps)
360-
# @test size(gc.nn.layers[1].weight) == (out_channel, in_channel)
361-
# @test size(gc.nn.layers[1].bias) == (out_channel, )
362-
# @test GraphSignals.adjacency_matrix(gc.fg) == adj
363-
364-
# Y = gc(FeaturedGraph(adj, nf=X))
365-
# @test size(node_feature(Y)) == (out_channel, N)
366-
367-
# # Test with transposed features
368-
# Y = gc(FeaturedGraph(adj, nf=Xt))
369-
# @test size(node_feature(Y)) == (out_channel, N)
370-
371-
# g = Zygote.gradient(x -> sum(node_feature(gc(x))),
372-
# FeaturedGraph(adj, nf=X))[1]
373-
# @test size(g.nf) == size(X)
374-
375-
# g = Zygote.gradient(model -> sum(node_feature(model(FeaturedGraph(adj, nf=X)))),
376-
# gc)[1]
377-
# @test size(g.nn.layers[1].weight) == size(gc.nn.layers[1].weight)
378-
# @test size(g.nn.layers[1].bias) == size(gc.nn.layers[1].bias)
379-
# @test !in(:eps, Flux.trainable(gc))
380-
# end
381-
# end
382-
383-
# @testset "CGConv" begin
384-
# fg = FeaturedGraph(adj)
385-
# X = rand(Float32, in_channel, N)
386-
# E = rand(Float32, in_channel_edge, ne(fg))
387-
# Xt = transpose(rand(Float32, N, in_channel))
388-
# @testset "layer with graph" begin
389-
# cgc = CGConv(FeaturedGraph(adj),
390-
# (in_channel, in_channel_edge))
391-
# @test size(cgc.Wf) == (in_channel, 2 * in_channel + in_channel_edge)
392-
# @test size(cgc.Ws) == (in_channel, 2 * in_channel + in_channel_edge)
393-
# @test size(cgc.bf) == (in_channel,)
394-
# @test size(cgc.bs) == (in_channel,)
395-
396-
# Y = cgc(X, E)
397-
# @test size(Y) == (in_channel, N)
398-
399-
# Yg = cgc(FeaturedGraph(adj, nf=X, ef=E))
400-
# @test size(node_feature(Yg)) == (in_channel, N)
401-
# @test edge_feature(Yg) == E
402-
# end
403-
# end
262+
@testset "GatedGraphConv" begin
263+
num_layers = 3
264+
X = rand(T, in_channel, N)
265+
Xt = transpose(rand(T, N, in_channel))
266+
@testset "layer with graph" begin
267+
ggc = GatedGraphConv(fg, out_channel, num_layers)
268+
@test adjacency_list(ggc.fg) == [[2,4], [1,3], [2,4], [1,3]]
269+
@test size(ggc.weight) == (out_channel, out_channel, num_layers)
270+
271+
Y = ggc(X)
272+
@test size(Y) == (out_channel, N)
273+
274+
275+
# Test with transposed features
276+
Y = ggc(Xt)
277+
@test size(Y) == (out_channel, N)
278+
279+
g = Zygote.gradient(x -> sum(ggc(x)), X)[1]
280+
@test size(g) == size(X)
281+
282+
g = Zygote.gradient(model -> sum(model(X)), ggc)[1]
283+
@test size(g.weight) == size(ggc.weight)
284+
end
285+
286+
@testset "layer without graph" begin
287+
ggc = GatedGraphConv(out_channel, num_layers)
288+
@test size(ggc.weight) == (out_channel, out_channel, num_layers)
289+
290+
fg = FeaturedGraph(adj, nf=X)
291+
fg_ = ggc(fg)
292+
@test size(node_feature(fg_)) == (out_channel, N)
293+
@test_throws MethodError ggc(X)
294+
295+
# Test with transposed features
296+
fgt = FeaturedGraph(adj, nf=Xt)
297+
fgt_ = ggc(fgt)
298+
@test size(node_feature(fgt_)) == (out_channel, N)
299+
300+
g = Zygote.gradient(x -> sum(node_feature(ggc(x))), fg)[1]
301+
@test size(g.nf) == size(X)
302+
303+
g = Zygote.gradient(model -> sum(node_feature(model(fg))), ggc)[1]
304+
@test size(g.weight) == size(ggc.weight)
305+
end
306+
end
307+
308+
@testset "EdgeConv" begin
309+
X = rand(T, in_channel, N)
310+
Xt = transpose(rand(T, N, in_channel))
311+
@testset "layer with graph" begin
312+
ec = EdgeConv(fg, Dense(2*in_channel, out_channel))
313+
@test adjacency_list(ec.fg) == [[2,4], [1,3], [2,4], [1,3]]
314+
315+
Y = ec(X)
316+
@test size(Y) == (out_channel, N)
317+
318+
# Test with transposed features
319+
Y = ec(Xt)
320+
@test size(Y) == (out_channel, N)
321+
322+
g = Zygote.gradient(x -> sum(ec(x)), X)[1]
323+
@test size(g) == size(X)
324+
325+
g = Zygote.gradient(model -> sum(model(X)), ec)[1]
326+
@test size(g.nn.weight) == size(ec.nn.weight)
327+
@test size(g.nn.bias) == size(ec.nn.bias)
328+
end
329+
330+
@testset "layer without graph" begin
331+
ec = EdgeConv(Dense(2*in_channel, out_channel))
332+
333+
fg = FeaturedGraph(adj, nf=X)
334+
fg_ = ec(fg)
335+
@test size(node_feature(fg_)) == (out_channel, N)
336+
@test_throws MethodError ec(X)
337+
338+
# Test with transposed features
339+
fgt = FeaturedGraph(adj, nf=Xt)
340+
fgt_ = ec(fgt)
341+
@test size(node_feature(fgt_)) == (out_channel, N)
342+
343+
g = Zygote.gradient(x -> sum(node_feature(ec(x))), fg)[1]
344+
@test size(g.nf) == size(X)
345+
346+
g = Zygote.gradient(model -> sum(node_feature(model(fg))), ec)[1]
347+
@test size(g.nn.weight) == size(ec.nn.weight)
348+
@test size(g.nn.bias) == size(ec.nn.bias)
349+
end
350+
end
351+
352+
@testset "GINConv" begin
353+
X = rand(Float32, in_channel, N)
354+
Xt = transpose(rand(Float32, N, in_channel))
355+
nn = Flux.Chain(Dense(in_channel, out_channel))
356+
eps = 0.001
357+
358+
@testset "layer with graph" begin
359+
gc = GINConv(FeaturedGraph(adj), nn, eps)
360+
@test size(gc.nn.layers[1].weight) == (out_channel, in_channel)
361+
@test size(gc.nn.layers[1].bias) == (out_channel, )
362+
@test GraphSignals.adjacency_matrix(gc.fg) == adj
363+
364+
Y = gc(FeaturedGraph(adj, nf=X))
365+
@test size(node_feature(Y)) == (out_channel, N)
366+
367+
# Test with transposed features
368+
Y = gc(FeaturedGraph(adj, nf=Xt))
369+
@test size(node_feature(Y)) == (out_channel, N)
370+
371+
g = Zygote.gradient(x -> sum(node_feature(gc(x))),
372+
FeaturedGraph(adj, nf=X))[1]
373+
@test size(g.nf) == size(X)
374+
375+
g = Zygote.gradient(model -> sum(node_feature(model(FeaturedGraph(adj, nf=X)))),
376+
gc)[1]
377+
@test size(g.nn.layers[1].weight) == size(gc.nn.layers[1].weight)
378+
@test size(g.nn.layers[1].bias) == size(gc.nn.layers[1].bias)
379+
@test !in(:eps, Flux.trainable(gc))
380+
end
381+
end
382+
383+
@testset "CGConv" begin
384+
fg = FeaturedGraph(adj)
385+
X = rand(Float32, in_channel, N)
386+
E = rand(Float32, in_channel_edge, ne(fg))
387+
Xt = transpose(rand(Float32, N, in_channel))
388+
@testset "layer with graph" begin
389+
cgc = CGConv(FeaturedGraph(adj),
390+
(in_channel, in_channel_edge))
391+
@test size(cgc.Wf) == (in_channel, 2 * in_channel + in_channel_edge)
392+
@test size(cgc.Ws) == (in_channel, 2 * in_channel + in_channel_edge)
393+
@test size(cgc.bf) == (in_channel,)
394+
@test size(cgc.bs) == (in_channel,)
395+
396+
Y = cgc(X, E)
397+
@test size(Y) == (in_channel, N)
398+
399+
Yg = cgc(FeaturedGraph(adj, nf=X, ef=E))
400+
@test size(node_feature(Yg)) == (in_channel, N)
401+
@test edge_feature(Yg) == E
402+
end
403+
end
404404
end

0 commit comments

Comments
 (0)