|
| 1 | +const Alias = Tuple{SparseVector{Int}, SparseVector{Float64}} |
| 2 | + |
| 3 | +""" |
| 4 | + node2vec(g; walks_per_node, len, p, q, dims) |
| 5 | +
|
| 6 | +Returns an embedding matrix with size of `nv(g)` x `dims`. It computes node embeddings |
| 7 | +on graph `g` accroding to node2vec [node2vec2016](@cite). It performs biased random walks on the graph, |
| 8 | +then computes word embeddings by treating those random walks as sentences. |
| 9 | +
|
| 10 | +# Arguments |
| 11 | +
|
| 12 | +- `g::FeaturedGraph`: The graph to perform random walk on. |
| 13 | +- `walks_per_node::Int`: Number of walks starting on each node, |
| 14 | +total number of walks is `nv(g) * walks_per_node` |
| 15 | +- `len::Int`: Length of random walks |
| 16 | +- `p::Real`: Return parameter from [node2vec2016](@cite) |
| 17 | +- `q::Real`: In-out parameter from [node2vec2016](@cite) |
| 18 | +- `dims::Int`: Number of vector dimensions |
| 19 | +""" |
| 20 | +function node2vec(g::FeaturedGraph; walks_per_node::Int=100, len::Int=5, p::Real=0.5, q::Real=0.5, dims::Int=128) |
| 21 | + walks = simulate_walks(g; walks_per_node=walks_per_node, len=len, p=p, q=q) |
| 22 | + model = walks2vec(walks,dims=dims) |
| 23 | + vecs = [] |
| 24 | + println(typeof(model)) |
| 25 | + for i in 1:nv(g) |
| 26 | + push!(vecs, get_vector(model, string(i))) |
| 27 | + end |
| 28 | + matrix = cat(vecs..., dims=2) |
| 29 | + return matrix |
| 30 | +end |
| 31 | + |
| 32 | +""" |
| 33 | +Modified version of Node2Vec.learn_embeddings[1]. Uses |
| 34 | +a Julia interface[2] to the original word2vec C code[3]. |
| 35 | +
|
| 36 | +Treats each random walk like a sentence, and computed word |
| 37 | +embeddings using node ID as words. |
| 38 | +
|
| 39 | +[1] https://github.com/ollin18/Node2Vec.jl |
| 40 | +[2] https://github.com/JuliaText/Word2Vec.jl |
| 41 | +[3] https://code.google.com/archive/p/word2vec/ |
| 42 | +""" |
| 43 | +function walks2vec(walks::Vector{Vector{Int}}; dims::Int=100) |
| 44 | + str_walks=map(x -> string.(x),walks) |
| 45 | + |
| 46 | + if Sys.iswindows() |
| 47 | + rpath = pwd() |
| 48 | + else |
| 49 | + rpath = "/tmp" |
| 50 | + end |
| 51 | + the_walks = joinpath(rpath,"str_walk.txt") |
| 52 | + the_vecs = joinpath(rpath,"str_walk-vec.txt") |
| 53 | + |
| 54 | + writedlm(the_walks,str_walks) |
| 55 | + word2vec(the_walks,the_vecs,verbose=true,size=dims) |
| 56 | + model=wordvectors(the_vecs) |
| 57 | + rm(the_walks) |
| 58 | + rm(the_vecs) |
| 59 | + model |
| 60 | +end |
| 61 | + |
| 62 | + |
| 63 | +""" |
| 64 | + Conducts a random walk over `g` in O(l) time, |
| 65 | +weighted by alias sampling probabilities `alias_nodes` |
| 66 | +and `alias_edges`. |
| 67 | +""" |
| 68 | +function node2vec_walk( |
| 69 | + g::FeaturedGraph, |
| 70 | + alias_nodes::Dict{Int, Alias}, |
| 71 | + alias_edges::Dict{Tuple{Int, Int}, Alias}; |
| 72 | + start_node::Int, |
| 73 | + walk_length::Int)::Vector{Int} |
| 74 | + walk::Vector{Int} = [start_node] |
| 75 | + for _ in 2:walk_length |
| 76 | + curr = walk[end] |
| 77 | + cur_nbrs = sort(neighbors(g, curr; dir=:out)) |
| 78 | + if length(walk) == 1 |
| 79 | + push!(walk, cur_nbrs[alias_sample(alias_nodes[curr]...)]) |
| 80 | + else |
| 81 | + prev = walk[end-1] |
| 82 | + next = cur_nbrs[alias_sample(alias_edges[(prev, curr)]...)] |
| 83 | + push!(walk, next) |
| 84 | + end |
| 85 | + end |
| 86 | + return walk |
| 87 | +end |
| 88 | + |
| 89 | +""" |
| 90 | +Returns J and q for a given edge |
| 91 | +""" |
| 92 | +function get_alias_edge(g::FeaturedGraph, src::Int, dst::Int, p::Float64, q::Float64)::Alias |
| 93 | + unnormalized_probs = spzeros(length(neighbors(g, dst; dir=:out))) |
| 94 | + neighbor_weight_pairs = zip(weighted_outneighbors(g, dst)...) |
| 95 | + for (i, (dst_nbr, weight)) in enumerate(neighbor_weight_pairs) |
| 96 | + if dst_nbr == src |
| 97 | + unnormalized_probs[i] = weight/p |
| 98 | + elseif has_edge(g, dst_nbr, src) |
| 99 | + unnormalized_probs[i] = weight |
| 100 | + else |
| 101 | + unnormalized_probs[i] = weight/q |
| 102 | + end |
| 103 | + end |
| 104 | + normalized_probs = unnormalized_probs ./ sum(unnormalized_probs) |
| 105 | + return alias_setup(normalized_probs) |
| 106 | +end |
| 107 | + |
| 108 | +# Returns (neighbors::Vector{Int}, weights::Vector{Float64}) |
| 109 | +function weighted_outneighbors(fg::FeaturedGraph, i::Int) |
| 110 | + nbrs = neighbors(fg, i; dir=:out) |
| 111 | + nbrs, sparse(graph(fg))[i, nbrs] |
| 112 | +end |
| 113 | + |
| 114 | +""" |
| 115 | + Computes weighted probability transition aliases J and q for nodes and edges |
| 116 | +using return parameter `p` and In-out parameter `q` |
| 117 | +
|
| 118 | +Implementation as specified in the node2vec paper [node2vec2016](@cite). |
| 119 | +""" |
| 120 | +function preprocess_modified_weights(g::FeaturedGraph, p::Real, q::Real) |
| 121 | + |
| 122 | + alias_nodes = Dict{Int, Alias}() |
| 123 | + alias_edges = Dict{Tuple{Int, Int}, Alias}() |
| 124 | + |
| 125 | + for node in 1:nv(g) |
| 126 | + nbrs = neighbors(g, node, dir=:out) |
| 127 | + probs = fill(1, length(nbrs)) ./ length(nbrs) |
| 128 | + alias_nodes[node] = alias_setup(probs) |
| 129 | + end |
| 130 | + for (_, edge) in edges(g) |
| 131 | + src, dst = edge |
| 132 | + alias_edges[(src, dst)] = get_alias_edge(g, src, dst, p, q) |
| 133 | + if !is_directed(g) |
| 134 | + alias_edges[(dst, src)] = get_alias_edge(g, dst, src, p, q) |
| 135 | + end |
| 136 | + end |
| 137 | + return alias_nodes, alias_edges |
| 138 | +end |
| 139 | + |
| 140 | + |
| 141 | +""" |
| 142 | +Given a graph, compute `walks_per_node` * nv(g) random walks. |
| 143 | +""" |
| 144 | +function simulate_walks(g::FeaturedGraph; walks_per_node::Int, len::Int, p::Real, q::Real)::Vector{Vector{Int}} |
| 145 | + alias_nodes, alias_edges = preprocess_modified_weights(g, p, q) |
| 146 | + walks = Vector{Int}[] |
| 147 | + for _ in 1:walks_per_node |
| 148 | + for node in shuffle(1:nv(g)) |
| 149 | + walk::Vector{Int} = node2vec_walk(g, alias_nodes, alias_edges; start_node=node, walk_length=len) |
| 150 | + push!(walks, walk) |
| 151 | + end |
| 152 | + end |
| 153 | + return walks |
| 154 | +end |
0 commit comments