@@ -80,6 +80,10 @@ print(f"Metadata for our loaded Cora graph `G`: {G}")
8080print(f"Node labels present in `G`: {G.node_labels()}")
8181----
8282
83+
84+ Metadata for our loaded Cora graph `G`: Graph(name=cora, node_count=2708, relationship_count=5429)
85+ Node labels present in `G`: ['Paper']
86+
8387It’s looks correct! Now let’s go ahead and sample the graph.
8488
8589We use the random walk with restarts sampling algorithm to get a smaller
@@ -104,6 +108,10 @@ print(f"Number of nodes in our sample: {G_sample.node_count()}")
104108print(f"Number of relationships in our sample: {G_sample.relationship_count()}")
105109----
106110
111+
112+ Number of nodes in our sample: 406
113+ Number of relationships in our sample: 532
114+
107115Let’s also compute
108116https://neo4j.com/docs/graph-data-science/current/algorithms/page-rank/[PageRank]
109117on our sample graph, in order to get an importance score that we call
@@ -115,6 +123,19 @@ visualize the graph.
115123gds.pageRank.mutate(G_sample, mutateProperty="rank")
116124----
117125
126+ ----
127+ mutateMillis 0
128+ nodePropertiesWritten 406
129+ ranIterations 20
130+ didConverge False
131+ centralityDistribution {'min': 0.14999961853027344, 'max': 2.27294921...
132+ postProcessingMillis 1
133+ preProcessingMillis 0
134+ computeMillis 7
135+ configuration {'mutateProperty': 'rank', 'jobId': '5ca450ff-...
136+ Name: 0, dtype: object
137+ ----
138+
118139== Exporting the sampled Cora graph
119140
120141We can now export the topology and node properties of our sampled graph
@@ -128,6 +149,24 @@ sample_topology_df = gds.graph.relationships.stream(G_sample)
128149display(sample_topology_df)
129150----
130151
152+ [cols=",,,",options="header",]
153+ |===
154+ | |sourceNodeId |targetNodeId |relationshipType
155+ |0 |31336 |31349 |CITES
156+ |1 |31336 |686532 |CITES
157+ |2 |31336 |1129442 |CITES
158+ |3 |31349 |686532 |CITES
159+ |4 |31353 |31336 |CITES
160+ |... |... |... |...
161+ |527 |34961 |31043 |CITES
162+ |528 |34961 |22883 |CITES
163+ |529 |102879 |9513 |CITES
164+ |530 |102884 |9513 |CITES
165+ |531 |767763 |1136631 |CITES
166+ |===
167+
168+ 532 rows × 3 columns
169+
131170We get the right amount of rows, one for each expected relationship. So
132171that looks good.
133172
@@ -147,6 +186,24 @@ sample_node_properties_df = gds.graph.nodeProperties.stream(
147186display(sample_node_properties_df)
148187----
149188
189+ [cols=",,,",options="header",]
190+ |===
191+ | |nodeId |rank |subject
192+ |0 |164 |0.245964 |4.0
193+ |1 |434 |0.158500 |2.0
194+ |2 |1694 |0.961240 |5.0
195+ |3 |1949 |0.224912 |6.0
196+ |4 |1952 |0.150000 |6.0
197+ |... |... |... |...
198+ |401 |1154103 |0.319498 |3.0
199+ |402 |1154124 |0.627706 |0.0
200+ |403 |1154169 |0.154784 |0.0
201+ |404 |1154251 |0.187675 |0.0
202+ |405 |1154276 |0.277500 |0.0
203+ |===
204+
205+ 406 rows × 3 columns
206+
150207Now that we have all the data we want to visualize, we can create a
151208network with PyVis. We color each node according to its ``subject'', and
152209size it according to its ``rank''.
@@ -174,6 +231,10 @@ net.add_edges(zip(sample_topology_df["sourceNodeId"], sample_topology_df["target
174231net.show("cora-sample.html")
175232----
176233
234+
235+ cora-sample.html
236+
237+
177238Unsurprisingly we can see that papers largely seem clustered by academic
178239subject. We also note that some nodes appear larger in size, indicating
179240that they have a higher centrality score according to PageRank.
0 commit comments