Skip to content

Commit 7a1253c

Browse files
committed
Add modularity optimization to v2 endpoints
1 parent 9c55142 commit 7a1253c

File tree

7 files changed

+1094
-2
lines changed

7 files changed

+1094
-2
lines changed
Lines changed: 359 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,359 @@
1+
from __future__ import annotations
2+
3+
from abc import ABC, abstractmethod
4+
from typing import Any, Dict, List, Optional, Union
5+
6+
from pandas import DataFrame
7+
8+
from graphdatascience.procedure_surface.api.base_result import BaseResult
9+
from graphdatascience.procedure_surface.api.catalog.graph_api import GraphV2
10+
from graphdatascience.procedure_surface.api.estimation_result import EstimationResult
11+
12+
13+
class ModularityOptimizationEndpoints(ABC):
14+
@abstractmethod
15+
def mutate(
16+
self,
17+
G: GraphV2,
18+
mutate_property: str,
19+
*,
20+
batch_size: Optional[int] = None,
21+
concurrency: Optional[int] = None,
22+
consecutive_ids: Optional[bool] = None,
23+
job_id: Optional[str] = None,
24+
log_progress: bool = True,
25+
max_iterations: Optional[int] = None,
26+
node_labels: Optional[List[str]] = None,
27+
relationship_types: Optional[List[str]] = None,
28+
relationship_weight_property: Optional[str] = None,
29+
seed_property: Optional[str] = None,
30+
sudo: Optional[bool] = False,
31+
tolerance: Optional[float] = None,
32+
username: Optional[str] = None,
33+
) -> ModularityOptimizationMutateResult:
34+
"""
35+
Executes the Modularity Optimization algorithm and writes the results to the in-memory graph as node properties.
36+
37+
Parameters
38+
----------
39+
G : GraphV2
40+
The graph to run the algorithm on
41+
mutate_property : str
42+
The property name to store the community ID for each node
43+
batch_size : Optional[int], default=None
44+
Number of nodes to process in each batch
45+
concurrency : Optional[int], default=None
46+
The number of concurrent threads
47+
consecutive_ids : Optional[bool], default=None
48+
Whether to assign consecutive community IDs
49+
job_id : Optional[str], default=None
50+
An identifier for the job
51+
log_progress : bool, default=True
52+
Whether to log progress
53+
max_iterations : Optional[int], default=None
54+
Maximum number of iterations for the algorithm
55+
node_labels : Optional[List[str]], default=None
56+
Filter nodes with specific labels
57+
relationship_types : Optional[List[str]], default=None
58+
Filter relationships with specific types
59+
relationship_weight_property : Optional[str], default=None
60+
Property name for relationship weights
61+
seed_property : Optional[str], default=None
62+
Property name for initial community assignments
63+
sudo : Optional[bool], default=False
64+
Run with elevated privileges
65+
tolerance : Optional[float], default=None
66+
Convergence tolerance for the algorithm
67+
username : Optional[str], default=None
68+
Username for authentication
69+
70+
Returns
71+
-------
72+
ModularityOptimizationMutateResult
73+
Result containing community statistics and timing information
74+
"""
75+
pass
76+
77+
@abstractmethod
78+
def stats(
79+
self,
80+
G: GraphV2,
81+
*,
82+
batch_size: Optional[int] = None,
83+
concurrency: Optional[int] = None,
84+
consecutive_ids: Optional[bool] = None,
85+
job_id: Optional[str] = None,
86+
log_progress: bool = True,
87+
max_iterations: Optional[int] = None,
88+
node_labels: Optional[List[str]] = None,
89+
relationship_types: Optional[List[str]] = None,
90+
relationship_weight_property: Optional[str] = None,
91+
seed_property: Optional[str] = None,
92+
sudo: Optional[bool] = False,
93+
tolerance: Optional[float] = None,
94+
username: Optional[str] = None,
95+
) -> ModularityOptimizationStatsResult:
96+
"""
97+
Executes the Modularity Optimization algorithm and returns statistics about the communities.
98+
99+
Parameters
100+
----------
101+
G : GraphV2
102+
The graph to run the algorithm on
103+
batch_size : Optional[int], default=None
104+
Number of nodes to process in each batch
105+
concurrency : Optional[int], default=None
106+
The number of concurrent threads
107+
consecutive_ids : Optional[bool], default=None
108+
Whether to assign consecutive community IDs
109+
job_id : Optional[str], default=None
110+
An identifier for the job
111+
log_progress : bool, default=True
112+
Whether to log progress
113+
max_iterations : Optional[int], default=None
114+
Maximum number of iterations for the algorithm
115+
node_labels : Optional[List[str]], default=None
116+
Filter nodes with specific labels
117+
relationship_types : Optional[List[str]], default=None
118+
Filter relationships with specific types
119+
relationship_weight_property : Optional[str], default=None
120+
Property name for relationship weights
121+
seed_property : Optional[str], default=None
122+
Property name for initial community assignments
123+
sudo : Optional[bool], default=False
124+
Run with elevated privileges
125+
tolerance : Optional[float], default=None
126+
Convergence tolerance for the algorithm
127+
username : Optional[str], default=None
128+
Username for authentication
129+
130+
Returns
131+
-------
132+
ModularityOptimizationStatsResult
133+
Result containing community statistics and timing information
134+
"""
135+
pass
136+
137+
@abstractmethod
138+
def stream(
139+
self,
140+
G: GraphV2,
141+
*,
142+
batch_size: Optional[int] = None,
143+
concurrency: Optional[int] = None,
144+
consecutive_ids: Optional[bool] = None,
145+
job_id: Optional[str] = None,
146+
log_progress: bool = True,
147+
max_iterations: Optional[int] = None,
148+
min_community_size: Optional[int] = None,
149+
node_labels: Optional[List[str]] = None,
150+
relationship_types: Optional[List[str]] = None,
151+
relationship_weight_property: Optional[str] = None,
152+
seed_property: Optional[str] = None,
153+
sudo: Optional[bool] = False,
154+
tolerance: Optional[float] = None,
155+
username: Optional[str] = None,
156+
) -> DataFrame:
157+
"""
158+
Executes the Modularity Optimization algorithm and returns the results as a DataFrame.
159+
160+
Parameters
161+
----------
162+
G : GraphV2
163+
The graph to run the algorithm on
164+
batch_size : Optional[int], default=None
165+
Number of nodes to process in each batch
166+
concurrency : Optional[int], default=None
167+
The number of concurrent threads
168+
consecutive_ids : Optional[bool], default=None
169+
Whether to assign consecutive community IDs
170+
job_id : Optional[str], default=None
171+
An identifier for the job
172+
log_progress : bool, default=True
173+
Whether to log progress
174+
max_iterations : Optional[int], default=None
175+
Maximum number of iterations for the algorithm
176+
min_community_size : Optional[int], default=None
177+
Minimum size for communities to be included in results
178+
node_labels : Optional[List[str]], default=None
179+
Filter nodes with specific labels
180+
relationship_types : Optional[List[str]], default=None
181+
Filter relationships with specific types
182+
relationship_weight_property : Optional[str], default=None
183+
Property name for relationship weights
184+
seed_property : Optional[str], default=None
185+
Property name for initial community assignments
186+
sudo : Optional[bool], default=False
187+
Run with elevated privileges
188+
tolerance : Optional[float], default=None
189+
Convergence tolerance for the algorithm
190+
username : Optional[str], default=None
191+
Username for authentication
192+
193+
Returns
194+
-------
195+
DataFrame
196+
A DataFrame with columns 'nodeId' and 'communityId'
197+
"""
198+
pass
199+
200+
@abstractmethod
201+
def write(
202+
self,
203+
G: GraphV2,
204+
write_property: str,
205+
*,
206+
batch_size: Optional[int] = None,
207+
concurrency: Optional[int] = None,
208+
consecutive_ids: Optional[bool] = None,
209+
job_id: Optional[str] = None,
210+
log_progress: bool = True,
211+
max_iterations: Optional[int] = None,
212+
min_community_size: Optional[int] = None,
213+
node_labels: Optional[List[str]] = None,
214+
relationship_types: Optional[List[str]] = None,
215+
relationship_weight_property: Optional[str] = None,
216+
seed_property: Optional[str] = None,
217+
sudo: Optional[bool] = False,
218+
tolerance: Optional[float] = None,
219+
username: Optional[str] = None,
220+
write_concurrency: Optional[int] = None,
221+
write_to_result_store: Optional[bool] = None,
222+
) -> ModularityOptimizationWriteResult:
223+
"""
224+
Executes the Modularity Optimization algorithm and writes the results back to the database.
225+
226+
Parameters
227+
----------
228+
G : GraphV2
229+
The graph to run the algorithm on
230+
write_property : str
231+
The property name to store the community ID for each node
232+
batch_size : Optional[int], default=None
233+
Number of nodes to process in each batch
234+
concurrency : Optional[int], default=None
235+
The number of concurrent threads
236+
consecutive_ids : Optional[bool], default=None
237+
Whether to assign consecutive community IDs
238+
job_id : Optional[str], default=None
239+
An identifier for the job
240+
log_progress : bool, default=True
241+
Whether to log progress
242+
max_iterations : Optional[int], default=None
243+
Maximum number of iterations for the algorithm
244+
min_community_size : Optional[int], default=None
245+
Minimum size for communities to be included in results
246+
node_labels : Optional[List[str]], default=None
247+
Filter nodes with specific labels
248+
relationship_types : Optional[List[str]], default=None
249+
Filter relationships with specific types
250+
relationship_weight_property : Optional[str], default=None
251+
Property name for relationship weights
252+
seed_property : Optional[str], default=None
253+
Property name for initial community assignments
254+
sudo : Optional[bool], default=False
255+
Run with elevated privileges
256+
tolerance : Optional[float], default=None
257+
Convergence tolerance for the algorithm
258+
username : Optional[str], default=None
259+
Username for authentication
260+
write_concurrency : Optional[int], default=None
261+
The number of concurrent threads for writing
262+
write_to_result_store : Optional[bool], default=None
263+
Whether to write results to the result store
264+
265+
Returns
266+
-------
267+
ModularityOptimizationWriteResult
268+
Result containing community statistics and timing information
269+
"""
270+
pass
271+
272+
@abstractmethod
273+
def estimate(
274+
self,
275+
G: Union[GraphV2, dict[str, Any]],
276+
*,
277+
batch_size: Optional[int] = None,
278+
concurrency: Optional[int] = None,
279+
consecutive_ids: Optional[bool] = None,
280+
max_iterations: Optional[int] = None,
281+
node_labels: Optional[List[str]] = None,
282+
relationship_types: Optional[List[str]] = None,
283+
relationship_weight_property: Optional[str] = None,
284+
seed_property: Optional[str] = None,
285+
tolerance: Optional[float] = None,
286+
) -> EstimationResult:
287+
"""
288+
Estimates the memory consumption for running the Modularity Optimization algorithm.
289+
290+
Parameters
291+
----------
292+
G : Union[GraphV2, dict[str, Any]]
293+
The graph configuration or graph object
294+
batch_size : Optional[int], default=None
295+
Number of nodes to process in each batch
296+
concurrency : Optional[int], default=None
297+
The number of concurrent threads
298+
consecutive_ids : Optional[bool], default=None
299+
Whether to assign consecutive community IDs
300+
max_iterations : Optional[int], default=None
301+
Maximum number of iterations for the algorithm
302+
node_labels : Optional[List[str]], default=None
303+
Filter nodes with specific labels
304+
relationship_types : Optional[List[str]], default=None
305+
Filter relationships with specific types
306+
relationship_weight_property : Optional[str], default=None
307+
Property name for relationship weights
308+
seed_property : Optional[str], default=None
309+
Property name for initial community assignments
310+
tolerance : Optional[float], default=None
311+
Convergence tolerance for the algorithm
312+
313+
Returns
314+
-------
315+
EstimationResult
316+
Estimated memory consumption and other metrics
317+
"""
318+
pass
319+
320+
321+
class ModularityOptimizationMutateResult(BaseResult):
322+
pre_processing_millis: int
323+
compute_millis: int
324+
post_processing_millis: int
325+
mutate_millis: int
326+
nodes: int
327+
did_converge: bool
328+
ran_iterations: int
329+
modularity: float
330+
community_count: int
331+
community_distribution: Dict[str, float]
332+
configuration: Dict[str, Any]
333+
334+
335+
class ModularityOptimizationStatsResult(BaseResult):
336+
pre_processing_millis: int
337+
compute_millis: int
338+
post_processing_millis: int
339+
nodes: int
340+
did_converge: bool
341+
ran_iterations: int
342+
modularity: float
343+
community_count: int
344+
community_distribution: Dict[str, float]
345+
configuration: Dict[str, Any]
346+
347+
348+
class ModularityOptimizationWriteResult(BaseResult):
349+
pre_processing_millis: int
350+
compute_millis: int
351+
post_processing_millis: int
352+
write_millis: int
353+
nodes: int
354+
did_converge: bool
355+
ran_iterations: int
356+
modularity: float
357+
community_count: int
358+
community_distribution: Dict[str, float]
359+
configuration: Dict[str, Any]

0 commit comments

Comments
 (0)