semi-supervised GCN baseline

yuehhua · yuehhua · commit bd249591dbd7 · 2022-02-18T16:38:37.000+08:00
fix
diff --git a/examples/gcn.jl b/examples/gcn.jl
@@ -1,48 +1,127 @@
+using CUDA
+using Flux
+using Flux: onehotbatch, onecold
+using Flux.Losses: logitcrossentropy
+using Flux.Data: DataLoader
 using GeometricFlux
+using GeometricFlux.Datasets
 using GraphSignals
-using Flux
-using Flux: onehotbatch, onecold, logitcrossentropy, throttle
-using Flux: @epochs
-using JLD2
+using Logging: with_logger
+using Parameters: @with_kw
+using ProgressMeter: Progress, next!
 using Statistics
-using SparseArrays
-using Graphs.SimpleGraphs
-using CUDA
+using Random
+
+CUDA.allowscalar(false)
+
+function load_data(dataset, batch_size)
+    # (train_X, train_y) dim: (num_features, target_dim) × 140
+    train_X, train_y = map(x->Matrix(x), traindata(Planetoid(), dataset))
+    # (test_X, test_y) dim: (num_features, target_dim) × 1000
+    test_X, test_y = map(x->Matrix(x), testdata(Planetoid(), dataset))
+    g = graphdata(Planetoid(), dataset)
+    train_idx = train_indices(Planetoid(), dataset)
+    test_idx = test_indices(Planetoid(), dataset)
+
+    train_data = [(subgraph(FeaturedGraph(g, nf=train_X), train_idx), train_y) for _ in 1:100];
+    test_data = [(subgraph(FeaturedGraph(g, nf=test_X), test_idx), test_y) for _ in 1:100];
+    train_batch = Flux.batch(train_data)
+    test_batch = Flux.batch(test_data)
+
+    train_loader = DataLoader(train_batch, batchsize=batch_size, shuffle=true)
+    test_loader = DataLoader(test_batch, batchsize=batch_size, shuffle=true)
+    return train_loader, test_loader
+end
+
+@with_kw mutable struct Args
+    η = 0.01                # learning rate
+    λ = 5f-4                # regularization paramater
+    batch_size = 32         # batch size
+    num_nodes = 2708        # number of nodes for graph
+    epochs = 200            # number of epochs
+    seed = 0                # random seed
+    cuda = true             # use GPU
+    input_dim = 1433        # input dimension
+    hidden_dim = 16         # hidden dimension
+    target_dim = 7          # target dimension
+end
 
-@load "data/cora_features.jld2" features
-@load "data/cora_labels.jld2" labels
-@load "data/cora_graph.jld2" g
-
-num_nodes = 2708
-num_features = 1433
-hidden = 16
-target_catg = 7
-epochs = 200
-λ = 5e-4
-
-## Preprocessing data
-train_X = Matrix{Float32}(features) |> gpu  # dim: num_features * num_nodes
-train_y = Matrix{Float32}(labels) |> gpu  # dim: target_catg * num_nodes
-fg = FeaturedGraph(g)  # pass to gpu together in model layers
-
-## Model
-model = Chain(GCNConv(fg, num_features=>hidden, relu),
-              Dropout(0.5),
-              GCNConv(fg, hidden=>target_catg),
-              ) |> gpu;
-# do not show model architecture, showing CuSparseMatrix will trigger errors
-
-## Loss
+## Loss: cross entropy with first layer L2 regularization 
 l2norm(x) = sum(abs2, x)
-# cross entropy with first layer L2 regularization 
-loss(x, y) = logitcrossentropy(model(x), y) + λ*sum(l2norm, Flux.params(model[1]))
-accuracy(x, y) = mean(onecold(softmax(cpu(model(x)))) .== onecold(cpu(y)))
+function model_loss(model, λ, batch)
+    loss = 0.f0
+    for (x, y) in batch
+        loss += logitcrossentropy(model(x), y)
+        loss += λ*sum(l2norm, Flux.params(model[1]))
+    end
+    return loss
+end
+
+function accuracy(model, batch::AbstractVector)
+    return mean(mean(onecold(softmax(cpu(model(x)))) .== onecold(cpu(y))) for (x, y) in batch)
+end
+
+accuracy(model, loader::DataLoader, device) = mean(accuracy(model, batch |> device) for batch in loader)
+
+function train(; kws...)
+    # load hyperparamters
+    args = Args(; kws...)
+    args.seed > 0 && Random.seed!(args.seed)
+
+    # GPU config
+    if args.cuda && CUDA.has_cuda()
+        device = gpu
+        @info "Training on GPU"
+    else
+        device = cpu
+        @info "Training on CPU"
+    end
+
+    # load Cora from Planetoid dataset
+    train_loader, test_loader = load_data(:cora, args.batch_size)
+    
+    # build model
+    model = Chain(
+        GCNConv(args.input_dim=>args.hidden_dim, relu),
+        GraphParallel(node_layer=Dropout(0.5)),
+        GCNConv(args.hidden_dim=>args.target_dim),
+        node_feature,
+    ) |> device
+
+    # ADAM optimizer
+    opt = ADAM(args.η)
+    
+    # parameters
+    ps = Flux.params(model)
+
+    # training
+    train_steps = 0
+    @info "Start Training, total $(args.epochs) epochs"
+    for epoch = 1:args.epochs
+        @info "Epoch $(epoch)"
+        progress = Progress(length(train_loader))
+
+        for batch in train_loader
+            loss, back = Flux.pullback(ps) do
+                model_loss(model, args.λ, batch |> device)
+            end
+            train_acc = accuracy(model, train_loader, device)
+            test_acc = accuracy(model, test_loader, device)
+            grad = back(1f0)
+            Flux.Optimise.update!(opt, ps, grad)
+
+            # progress meter
+            next!(progress; showvalues=[
+                (:loss, loss),
+                (:train_accuracy, train_acc),
+                (:test_accuracy, test_acc)
+            ])
 
+            train_steps += 1
+        end
+    end
 
-## Training
-ps = Flux.params(model)
-train_data = [(train_X, train_y)]
-opt = ADAM(0.01)
-evalcb() = @show(accuracy(train_X, train_y))
+    return model, args
+end
 
-@epochs epochs Flux.train!(loss, ps, train_data, opt, cb=throttle(evalcb, 10))
+model, args = train()
diff --git a/src/GeometricFlux.jl b/src/GeometricFlux.jl
@@ -20,8 +20,6 @@ using Zygote
 
 import Word2Vec: word2vec, wordvectors, get_vector
 
-const ConcreteFeaturedGraph = Union{FeaturedGraph,FeaturedSubgraph}
-
 export
     # layers/graphlayers
     AbstractGraphLayer,
diff --git a/src/layers/conv.jl b/src/layers/conv.jl
@@ -19,7 +19,7 @@ julia> gc = GCNConv(1024=>256, relu)
 GCNConv(1024 => 256, relu)
 ```
 
-See also [`WithGraph`](@ref) for training layer with fixed graph or subgraph.
+See also [`WithGraph`](@ref) for training layer with fixed graph.
 """
 struct GCNConv{A<:AbstractMatrix,B,F}
     weight::A
@@ -37,24 +37,21 @@ end
 
 @functor GCNConv
 
-(l::GCNConv)(Ã::AbstractArray, x::AbstractArray) = l.σ.(l.weight * x * Ã .+ l.bias)
+(l::GCNConv)(Ã::AbstractMatrix, x::AbstractMatrix) = l.σ.(l.weight * x * Ã .+ l.bias)
 
 function (l::GCNConv)(fg::AbstractFeaturedGraph)
     nf = node_feature(fg)
     Ã = Zygote.ignore() do
         GraphSignals.normalized_adjacency_matrix(fg, eltype(nf); selfloop=true)
     end
-    return FeaturedGraph(fg, nf = l(Ã, nf))
+    return ConcreteFeaturedGraph(fg, nf = l(Ã, nf))
 end
 
 function (wg::WithGraph{<:GCNConv})(X::AbstractArray)
-    N = size(X, 2)
-    wg.subgraph != (:) && N != length(wg.subgraph) &&
-        throw(ArgumentError("Layer with subgraph expecting subset of features, got #V=$N but #V for subgraph $(length(wg.subgraph))."))
     Ã = Zygote.ignore() do
         GraphSignals.normalized_adjacency_matrix(wg.fg, eltype(X); selfloop=true)
     end
-    return wg.layer(Ã[wg.subgraph, wg.subgraph], X)
+    return wg.layer(Ã, X)
 end
 
 function Base.show(io::IO, l::GCNConv)
@@ -101,7 +98,7 @@ ChebConv(ch::Pair{Int,Int}, k::Int; kwargs...) =
 
 Flux.trainable(l::ChebConv) = (l.weight, l.bias)
 
-function (c::ChebConv)(fg::ConcreteFeaturedGraph, X::AbstractMatrix{T}) where T
+function (c::ChebConv)(fg::AbstractFeaturedGraph, X::AbstractMatrix{T}) where T
     GraphSignals.check_num_nodes(fg, X)
     @assert size(X, 1) == size(c.weight, 2) "Input feature size must match input channel size."
     
@@ -175,7 +172,7 @@ message(gc::GraphConv, x_i, x_j::AbstractVector, e_ij) = gc.weight2 * x_j
 
 update(gc::GraphConv, m::AbstractVector, x::AbstractVector) = gc.σ.(gc.weight1*x .+ m .+ gc.bias)
 
-function (gc::GraphConv)(fg::ConcreteFeaturedGraph, x::AbstractMatrix)
+function (gc::GraphConv)(fg::AbstractFeaturedGraph, x::AbstractMatrix)
     # GraphSignals.check_num_nodes(fg, x)
     _, x, _ = propagate(gc, fg, edge_feature(fg), x, global_feature(fg), +)
     x
@@ -290,7 +287,7 @@ function update_batch_vertex(gat::GATConv, ::AbstractFeaturedGraph, M::AbstractM
     return M
 end
 
-function (gat::GATConv)(fg::ConcreteFeaturedGraph, X::AbstractMatrix)
+function (gat::GATConv)(fg::AbstractFeaturedGraph, X::AbstractMatrix)
     GraphSignals.check_num_nodes(fg, X)
     _, X, _ = propagate(gat, fg, edge_feature(fg), X, global_feature(fg), +)
     return X
@@ -349,7 +346,7 @@ message(ggc::GatedGraphConv, x_i, x_j::AbstractVector, e_ij) = x_j
 update(ggc::GatedGraphConv, m::AbstractVector, x) = m
 
 
-function (ggc::GatedGraphConv)(fg::ConcreteFeaturedGraph, H::AbstractMatrix{S}) where {T<:AbstractVector,S<:Real}
+function (ggc::GatedGraphConv)(fg::AbstractFeaturedGraph, H::AbstractMatrix{S}) where {T<:AbstractVector,S<:Real}
     GraphSignals.check_num_nodes(fg, H)
     m, n = size(H)
     @assert (m <= ggc.out_ch) "number of input features must less or equals to output features."
@@ -406,7 +403,7 @@ Flux.trainable(l::EdgeConv) = (l.nn,)
 message(ec::EdgeConv, x_i::AbstractVector, x_j::AbstractVector, e_ij) = ec.nn(vcat(x_i, x_j .- x_i))
 update(ec::EdgeConv, m::AbstractVector, x) = m
 
-function (ec::EdgeConv)(fg::ConcreteFeaturedGraph, X::AbstractMatrix)
+function (ec::EdgeConv)(fg::AbstractFeaturedGraph, X::AbstractMatrix)
     GraphSignals.check_num_nodes(fg, X)
     _, X, _ = propagate(ec, fg, edge_feature(fg), X, global_feature(fg), ec.aggr)
     X
@@ -457,7 +454,7 @@ Flux.trainable(g::GINConv) = (fg=g.fg, nn=g.nn)
 message(g::GINConv, x_i::AbstractVector, x_j::AbstractVector) = x_j 
 update(g::GINConv, m::AbstractVector, x) = g.nn((1 + g.eps) * x + m)
 
-function (g::GINConv)(fg::ConcreteFeaturedGraph, X::AbstractMatrix)
+function (g::GINConv)(fg::AbstractFeaturedGraph, X::AbstractMatrix)
     gf = graph(fg)
     GraphSignals.check_num_nodes(gf, X)
     _, X, _ = propagate(g, fg, edge_feature(fg), X, global_feature(fg), +)
@@ -526,7 +523,7 @@ message(c::CGConv,
 end
 update(c::CGConv, m::AbstractVector, x) = x + m
 
-function (c::CGConv)(fg::ConcreteFeaturedGraph, X::AbstractMatrix, E::AbstractMatrix)
+function (c::CGConv)(fg::AbstractFeaturedGraph, X::AbstractMatrix, E::AbstractMatrix)
     GraphSignals.check_num_nodes(fg, X)
     GraphSignals.check_num_edges(fg, E)
     _, Y, _ = propagate(c, fg, E, X, global_feature(fg), +)
diff --git a/src/layers/gn.jl b/src/layers/gn.jl
@@ -54,7 +54,7 @@ end
 
 function propagate(gn::GraphNet, fg::AbstractFeaturedGraph, naggr=nothing, eaggr=nothing, vaggr=nothing)
     E, V, u = propagate(gn, fg, edge_feature(fg), node_feature(fg), global_feature(fg), naggr, eaggr, vaggr)
-    FeaturedGraph(fg, nf=V, ef=E, gf=u)
+    return FeaturedGraph(fg, nf=V, ef=E, gf=u)
 end
 
 """
diff --git a/src/layers/utils.jl b/src/layers/utils.jl
@@ -1,13 +1,12 @@
 """
-    WithGraph(layer, fg, [subgraph=:])
+    WithGraph(layer, fg)
 
 Train GNN layers with fixed graph.
 
 # Arguments
 
 - `layer`: A GNN layer.
 - `fg`: A fixed `FeaturedGraph` to train with.
-- `subgraph`: Node indeices to get a subgraph from `fg`.
 
 # Example
 
@@ -21,30 +20,21 @@ julia> fg = FeaturedGraph(adj);
 
 julia> gc = WithGraph(GCNConv(1024=>256), fg)
 WithGraph(GCNConv(1024 => 256), FeaturedGraph(#V=4, #E=4))
-
-julia> subgraph = [1, 2, 4]  # specify subgraph nodes
-
-julia> gc = WithGraph(GCNConv(1024=>256), fg, subgraph)
-WithGraph(GCNConv(1024 => 256), FeaturedGraph(#V=4, #E=4), subgraph=[1, 2, 4])
 ```
 """
-struct WithGraph{L,G<:AbstractFeaturedGraph,S}
+struct WithGraph{L,G<:AbstractFeaturedGraph}
     layer::L
     fg::G
-    subgraph::S
 end
 
 @functor WithGraph
 
 Flux.trainable(l::WithGraph) = (l.layer, )
 
-WithGraph(layer, fg::AbstractFeaturedGraph) = WithGraph(layer, fg, :)
-
 function Base.show(io::IO, l::WithGraph)
     print(io, "WithGraph(")
     print(io, l.layer, ", ")
     print(io, "FeaturedGraph(#V=", nv(l.fg), ", #E=", ne(l.fg), ")")
-    l.subgraph == (:) || print(io, ", subgraph=", l.subgraph)
     print(io, ")")
 end
 
@@ -80,11 +70,11 @@ end
 GraphParallel(; node_layer=identity, edge_layer=identity, global_layer=identity) =
     GraphParallel(node_layer, edge_layer, global_layer)
 
-function (l::GraphParallel)(fg::FeaturedGraph)
+function (l::GraphParallel)(fg::AbstractFeaturedGraph)
     nf = l.node_layer(node_feature(fg))
     ef = l.edge_layer(edge_feature(fg))
     gf = l.global_layer(global_feature(fg))
-    return FeaturedGraph(fg, nf=nf, ef=ef, gf=gf)
+    return ConcreteFeaturedGraph(fg, nf=nf, ef=ef, gf=gf)
 end
 
 function Base.show(io::IO, l::GraphParallel)
diff --git a/test/layers/conv.jl b/test/layers/conv.jl