Skip to content

Commit 5cc78ae

Browse files
committed
Map node similarity
1 parent 43b8bdd commit 5cc78ae

12 files changed

+2556
-4
lines changed

graphdatascience/procedure_surface/api/similarity/node_similarity_endpoints.py

Lines changed: 396 additions & 0 deletions
Large diffs are not rendered by default.

graphdatascience/procedure_surface/api/similarity/node_similarity_filtered_endpoints.py

Lines changed: 425 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
from typing import Any
2+
3+
from graphdatascience.procedure_surface.api.base_result import BaseResult
4+
5+
6+
class NodeSimilarityMutateResult(BaseResult):
7+
pre_processing_millis: int
8+
compute_millis: int
9+
mutate_millis: int
10+
post_processing_millis: int
11+
nodes_compared: int
12+
relationships_written: int
13+
similarity_distribution: dict[str, Any]
14+
configuration: dict[str, Any]
15+
16+
17+
class NodeSimilarityStatsResult(BaseResult):
18+
pre_processing_millis: int
19+
compute_millis: int
20+
post_processing_millis: int
21+
nodes_compared: int
22+
similarity_pairs: int
23+
similarity_distribution: dict[str, Any]
24+
configuration: dict[str, Any]
25+
26+
27+
class NodeSimilarityWriteResult(BaseResult):
28+
pre_processing_millis: int
29+
compute_millis: int
30+
write_millis: int
31+
post_processing_millis: int
32+
nodes_compared: int
33+
relationships_written: int
34+
similarity_distribution: dict[str, Any]
35+
configuration: dict[str, Any]
Lines changed: 289 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,289 @@
1+
from __future__ import annotations
2+
3+
from typing import Any
4+
5+
from pandas import DataFrame
6+
7+
from graphdatascience.arrow_client.authenticated_flight_client import AuthenticatedArrowClient
8+
from graphdatascience.arrow_client.v2.remote_write_back_client import RemoteWriteBackClient
9+
from graphdatascience.procedure_surface.api.catalog.graph_api import GraphV2
10+
from graphdatascience.procedure_surface.api.estimation_result import EstimationResult
11+
from graphdatascience.procedure_surface.api.similarity.node_similarity_endpoints import NodeSimilarityEndpoints
12+
from graphdatascience.procedure_surface.api.similarity.node_similarity_filtered_endpoints import (
13+
NodeSimilarityFilteredEndpoints,
14+
)
15+
from graphdatascience.procedure_surface.api.similarity.node_similarity_results import (
16+
NodeSimilarityMutateResult,
17+
NodeSimilarityStatsResult,
18+
NodeSimilarityWriteResult,
19+
)
20+
from graphdatascience.procedure_surface.arrow.relationship_endpoints_helper import RelationshipEndpointsHelper
21+
from graphdatascience.procedure_surface.arrow.similarity.node_similarity_filtered_arrow_endpoints import (
22+
NodeSimilarityFilteredArrowEndpoints,
23+
)
24+
from graphdatascience.procedure_surface.arrow.stream_result_mapper import rename_similarity_stream_result
25+
26+
27+
class NodeSimilarityArrowEndpoints(NodeSimilarityEndpoints):
28+
def __init__(
29+
self,
30+
arrow_client: AuthenticatedArrowClient,
31+
write_back_client: RemoteWriteBackClient | None = None,
32+
show_progress: bool = False,
33+
):
34+
self._endpoints_helper = RelationshipEndpointsHelper(
35+
arrow_client, write_back_client=write_back_client, show_progress=show_progress
36+
)
37+
38+
@property
39+
def filtered(self) -> NodeSimilarityFilteredEndpoints:
40+
return NodeSimilarityFilteredArrowEndpoints(
41+
self._endpoints_helper._arrow_client,
42+
self._endpoints_helper._write_back_client,
43+
self._endpoints_helper._show_progress,
44+
)
45+
46+
def mutate(
47+
self,
48+
G: GraphV2,
49+
mutate_relationship_type: str,
50+
mutate_property: str,
51+
top_k: int = 10,
52+
bottom_k: int = 10,
53+
top_n: int = 0,
54+
bottom_n: int = 0,
55+
similarity_cutoff: float = 1.0e-42,
56+
degree_cutoff: int = 1,
57+
upper_degree_cutoff: int = 2147483647,
58+
similarity_metric: str = "JACCARD",
59+
use_components: bool | str = False,
60+
relationship_weight_property: str | None = None,
61+
relationship_types: list[str] | None = None,
62+
node_labels: list[str] | None = None,
63+
sudo: bool = False,
64+
log_progress: bool = True,
65+
username: str | None = None,
66+
concurrency: int | None = None,
67+
job_id: str | None = None,
68+
) -> NodeSimilarityMutateResult:
69+
config = self._endpoints_helper.create_base_config(
70+
G,
71+
topK=top_k,
72+
bottomK=bottom_k,
73+
topN=top_n,
74+
bottomN=bottom_n,
75+
similarityCutoff=similarity_cutoff,
76+
degreeCutoff=degree_cutoff,
77+
upperDegreeCutoff=upper_degree_cutoff,
78+
similarityMetric=similarity_metric,
79+
useComponents=use_components,
80+
relationshipWeightProperty=relationship_weight_property,
81+
relationshipTypes=relationship_types,
82+
nodeLabels=node_labels,
83+
sudo=sudo,
84+
logProgress=log_progress,
85+
username=username,
86+
concurrency=concurrency,
87+
jobId=job_id,
88+
)
89+
90+
result = self._endpoints_helper.run_job_and_mutate(
91+
"v2/similarity.nodeSimilarity", config, mutate_property, mutate_relationship_type
92+
)
93+
94+
return NodeSimilarityMutateResult(**result)
95+
96+
def stats(
97+
self,
98+
G: GraphV2,
99+
top_k: int = 10,
100+
bottom_k: int = 10,
101+
top_n: int = 0,
102+
bottom_n: int = 0,
103+
similarity_cutoff: float = 1.0e-42,
104+
degree_cutoff: int = 1,
105+
upper_degree_cutoff: int = 2147483647,
106+
similarity_metric: str = "JACCARD",
107+
use_components: bool | str = False,
108+
relationship_weight_property: str | None = None,
109+
relationship_types: list[str] | None = None,
110+
node_labels: list[str] | None = None,
111+
sudo: bool = False,
112+
log_progress: bool = True,
113+
username: str | None = None,
114+
concurrency: int | None = None,
115+
job_id: str | None = None,
116+
) -> NodeSimilarityStatsResult:
117+
config = self._endpoints_helper.create_base_config(
118+
G,
119+
topK=top_k,
120+
bottomK=bottom_k,
121+
topN=top_n,
122+
bottomN=bottom_n,
123+
similarityCutoff=similarity_cutoff,
124+
degreeCutoff=degree_cutoff,
125+
upperDegreeCutoff=upper_degree_cutoff,
126+
similarityMetric=similarity_metric,
127+
useComponents=use_components,
128+
relationshipWeightProperty=relationship_weight_property,
129+
relationshipTypes=relationship_types,
130+
nodeLabels=node_labels,
131+
sudo=sudo,
132+
logProgress=log_progress,
133+
username=username,
134+
concurrency=concurrency,
135+
jobId=job_id,
136+
)
137+
138+
result = self._endpoints_helper.run_job_and_get_summary(
139+
"v2/similarity.nodeSimilarity",
140+
config,
141+
)
142+
if "similarityPairs" not in result:
143+
result["similarityPairs"] = result.get("relationshipsWritten", 0)
144+
145+
return NodeSimilarityStatsResult(**result)
146+
147+
def stream(
148+
self,
149+
G: GraphV2,
150+
top_k: int = 10,
151+
bottom_k: int = 10,
152+
top_n: int = 0,
153+
bottom_n: int = 0,
154+
similarity_cutoff: float = 1.0e-42,
155+
degree_cutoff: int = 1,
156+
upper_degree_cutoff: int = 2147483647,
157+
similarity_metric: str = "JACCARD",
158+
use_components: bool | str = False,
159+
relationship_weight_property: str | None = None,
160+
relationship_types: list[str] | None = None,
161+
node_labels: list[str] | None = None,
162+
sudo: bool = False,
163+
log_progress: bool = True,
164+
username: str | None = None,
165+
concurrency: int | None = None,
166+
job_id: str | None = None,
167+
) -> DataFrame:
168+
config = self._endpoints_helper.create_base_config(
169+
G,
170+
topK=top_k,
171+
bottomK=bottom_k,
172+
topN=top_n,
173+
bottomN=bottom_n,
174+
similarityCutoff=similarity_cutoff,
175+
degreeCutoff=degree_cutoff,
176+
upperDegreeCutoff=upper_degree_cutoff,
177+
similarityMetric=similarity_metric,
178+
useComponents=use_components,
179+
relationshipWeightProperty=relationship_weight_property,
180+
relationshipTypes=relationship_types,
181+
nodeLabels=node_labels,
182+
sudo=sudo,
183+
logProgress=log_progress,
184+
username=username,
185+
concurrency=concurrency,
186+
jobId=job_id,
187+
)
188+
189+
result = self._endpoints_helper.run_job_and_stream("v2/similarity.nodeSimilarity", G, config)
190+
191+
rename_similarity_stream_result(result)
192+
return result
193+
194+
def write(
195+
self,
196+
G: GraphV2,
197+
write_relationship_type: str,
198+
write_property: str,
199+
top_k: int = 10,
200+
bottom_k: int = 10,
201+
top_n: int = 0,
202+
bottom_n: int = 0,
203+
similarity_cutoff: float = 1.0e-42,
204+
degree_cutoff: int = 1,
205+
upper_degree_cutoff: int = 2147483647,
206+
similarity_metric: str = "JACCARD",
207+
use_components: bool | str = False,
208+
relationship_weight_property: str | None = None,
209+
relationship_types: list[str] | None = None,
210+
node_labels: list[str] | None = None,
211+
sudo: bool = False,
212+
log_progress: bool = True,
213+
username: str | None = None,
214+
concurrency: int | None = None,
215+
job_id: str | None = None,
216+
write_concurrency: int | None = None,
217+
) -> NodeSimilarityWriteResult:
218+
config = self._endpoints_helper.create_base_config(
219+
G,
220+
topK=top_k,
221+
bottomK=bottom_k,
222+
topN=top_n,
223+
bottomN=bottom_n,
224+
similarityCutoff=similarity_cutoff,
225+
degreeCutoff=degree_cutoff,
226+
upperDegreeCutoff=upper_degree_cutoff,
227+
similarityMetric=similarity_metric,
228+
useComponents=use_components,
229+
relationshipWeightProperty=relationship_weight_property,
230+
relationshipTypes=relationship_types,
231+
nodeLabels=node_labels,
232+
sudo=sudo,
233+
logProgress=log_progress,
234+
username=username,
235+
concurrency=concurrency,
236+
jobId=job_id,
237+
writeConcurrency=write_concurrency,
238+
)
239+
240+
result = self._endpoints_helper.run_job_and_write(
241+
"v2/similarity.nodeSimilarity",
242+
G,
243+
config,
244+
property_overwrites=write_property,
245+
relationship_type_overwrite=write_relationship_type,
246+
write_concurrency=write_concurrency,
247+
concurrency=concurrency,
248+
)
249+
250+
return NodeSimilarityWriteResult(**result)
251+
252+
def estimate(
253+
self,
254+
G: GraphV2 | dict[str, Any],
255+
top_k: int = 10,
256+
bottom_k: int = 10,
257+
top_n: int = 0,
258+
bottom_n: int = 0,
259+
similarity_cutoff: float = 1.0e-42,
260+
degree_cutoff: int = 1,
261+
upper_degree_cutoff: int = 2147483647,
262+
similarity_metric: str = "JACCARD",
263+
use_components: bool | str = False,
264+
relationship_weight_property: str | None = None,
265+
relationship_types: list[str] | None = None,
266+
node_labels: list[str] | None = None,
267+
sudo: bool = False,
268+
username: str | None = None,
269+
concurrency: int | None = None,
270+
) -> EstimationResult:
271+
config = self._endpoints_helper.create_estimate_config(
272+
topK=top_k,
273+
bottomK=bottom_k,
274+
topN=top_n,
275+
bottomN=bottom_n,
276+
similarityCutoff=similarity_cutoff,
277+
degreeCutoff=degree_cutoff,
278+
upperDegreeCutoff=upper_degree_cutoff,
279+
similarityMetric=similarity_metric,
280+
useComponents=use_components,
281+
relationshipWeightProperty=relationship_weight_property,
282+
relationshipTypes=relationship_types,
283+
nodeLabels=node_labels,
284+
sudo=sudo,
285+
username=username,
286+
concurrency=concurrency,
287+
)
288+
289+
return self._endpoints_helper.estimate("v2/similarity.nodeSimilarity.estimate", G, config)

0 commit comments

Comments
 (0)