Skip to content

Commit cf01c46

Browse files
authored
Merge pull request #915 from DarthMax/wcc_endpoints
WCC Endpoints
2 parents 38f7982 + 32514d3 commit cf01c46

File tree

9 files changed

+1036
-0
lines changed

9 files changed

+1036
-0
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from __future__ import annotations
2+
3+
from typing import Any
4+
5+
from pydantic import BaseModel, ConfigDict
6+
from pydantic.alias_generators import to_camel
7+
8+
9+
class EstimationResult(BaseModel):
10+
model_config = ConfigDict(alias_generator=to_camel)
11+
12+
node_count: int
13+
relationship_count: int
14+
required_memory: str
15+
tree_view: str
16+
map_view: dict[str, Any]
17+
bytes_min: int
18+
bytes_max: int
19+
heap_percentage_min: float
20+
heap_percentage_max: float
21+
22+
def __getitem__(self, item: str) -> Any:
23+
return getattr(self, item)
24+
25+
@staticmethod
26+
def from_cypher(cypher_result: dict[str, Any]) -> EstimationResult:
27+
return EstimationResult(**cypher_result)
Lines changed: 319 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,319 @@
1+
from __future__ import annotations
2+
3+
from abc import ABC, abstractmethod
4+
from typing import Any, List, Optional
5+
6+
from pandas import DataFrame
7+
from pydantic import BaseModel, ConfigDict
8+
from pydantic.alias_generators import to_camel
9+
10+
from ...graph.graph_object import Graph
11+
from .estimation_result import EstimationResult
12+
13+
14+
class WccEndpoints(ABC):
15+
"""
16+
Abstract base class defining the API for the Weakly Connected Components (WCC) algorithm.
17+
"""
18+
19+
@abstractmethod
20+
def mutate(
21+
self,
22+
G: Graph,
23+
mutate_property: str,
24+
threshold: Optional[float] = None,
25+
relationship_types: Optional[List[str]] = None,
26+
node_labels: Optional[List[str]] = None,
27+
sudo: Optional[bool] = None,
28+
log_progress: Optional[bool] = None,
29+
username: Optional[str] = None,
30+
concurrency: Optional[Any] = None,
31+
job_id: Optional[Any] = None,
32+
seed_property: Optional[str] = None,
33+
consecutive_ids: Optional[bool] = None,
34+
relationship_weight_property: Optional[str] = None,
35+
) -> WccMutateResult:
36+
"""
37+
Executes the WCC algorithm and writes the results to the in-memory graph as node properties.
38+
39+
Parameters
40+
----------
41+
G : Graph
42+
The graph to run the algorithm on
43+
mutate_property : str
44+
The property name to store the component ID for each node
45+
threshold : Optional[float], default=None
46+
The minimum required weight to consider a relationship during traversal
47+
relationship_types : Optional[List[str]], default=None
48+
The relationships types used to select relationships for this algorithm run
49+
node_labels : Optional[List[str]], default=None
50+
The node labels used to select nodes for this algorithm run
51+
sudo : Optional[bool], default=None
52+
Override memory estimation limits
53+
log_progress : Optional[bool], default=None
54+
Whether to log progress
55+
username : Optional[str], default=None
56+
The username to attribute the procedure run to
57+
concurrency : Optional[Any], default=None
58+
The number of concurrent threads
59+
job_id : Optional[Any], default=None
60+
An identifier for the job
61+
seed_property : Optional[str], default=None
62+
Defines node properties that are used as initial component identifiers
63+
consecutive_ids : Optional[bool], default=None
64+
Flag to decide whether component identifiers are mapped into a consecutive id space
65+
relationship_weight_property : Optional[str], default=None
66+
The property name that contains weight
67+
68+
Returns
69+
-------
70+
WccMutateResult
71+
Algorithm metrics and statistics
72+
"""
73+
pass
74+
75+
@abstractmethod
76+
def stats(
77+
self,
78+
G: Graph,
79+
threshold: Optional[float] = None,
80+
relationship_types: Optional[List[str]] = None,
81+
node_labels: Optional[List[str]] = None,
82+
sudo: Optional[bool] = None,
83+
log_progress: Optional[bool] = None,
84+
username: Optional[str] = None,
85+
concurrency: Optional[Any] = None,
86+
job_id: Optional[Any] = None,
87+
seed_property: Optional[str] = None,
88+
consecutive_ids: Optional[bool] = None,
89+
relationship_weight_property: Optional[str] = None,
90+
) -> WccStatsResult:
91+
"""
92+
Executes the WCC algorithm and returns statistics.
93+
94+
Parameters
95+
----------
96+
G : Graph
97+
The graph to run the algorithm on
98+
threshold : Optional[float], default=None
99+
The minimum required weight to consider a relationship during traversal
100+
relationship_types : Optional[List[str]], default=None
101+
The relationships types used to select relationships for this algorithm run
102+
node_labels : Optional[List[str]], default=None
103+
The node labels used to select nodes for this algorithm run
104+
sudo : Optional[bool], default=None
105+
Override memory estimation limits
106+
log_progress : Optional[bool], default=None
107+
Whether to log progress
108+
username : Optional[str], default=None
109+
The username to attribute the procedure run to
110+
concurrency : Optional[Any], default=None
111+
The number of concurrent threads
112+
job_id : Optional[Any], default=None
113+
An identifier for the job
114+
seed_property : Optional[str], default=None
115+
Defines node properties that are used as initial component identifiers
116+
consecutive_ids : Optional[bool], default=None
117+
Flag to decide whether component identifiers are mapped into a consecutive id space
118+
relationship_weight_property : Optional[str], default=None
119+
The property name that contains weight
120+
121+
Returns
122+
-------
123+
WccStatsResult
124+
Algorithm metrics and statistics
125+
"""
126+
pass
127+
128+
@abstractmethod
129+
def stream(
130+
self,
131+
G: Graph,
132+
min_component_size: Optional[int] = None,
133+
threshold: Optional[float] = None,
134+
relationship_types: Optional[List[str]] = None,
135+
node_labels: Optional[List[str]] = None,
136+
sudo: Optional[bool] = None,
137+
log_progress: Optional[bool] = None,
138+
username: Optional[str] = None,
139+
concurrency: Optional[Any] = None,
140+
job_id: Optional[Any] = None,
141+
seed_property: Optional[str] = None,
142+
consecutive_ids: Optional[bool] = None,
143+
relationship_weight_property: Optional[str] = None,
144+
) -> DataFrame:
145+
"""
146+
Executes the WCC algorithm and returns a stream of results.
147+
148+
Parameters
149+
----------
150+
G : Graph
151+
The graph to run the algorithm on
152+
min_component_size : Optional[int], default=None
153+
Don't stream components with fewer nodes than this
154+
threshold : Optional[float], default=None
155+
The minimum required weight to consider a relationship during traversal
156+
relationship_types : Optional[List[str]], default=None
157+
The relationships types considered in this algorithm run
158+
node_labels : Optional[List[str]], default=None
159+
The node labels used to select nodes for this algorithm run
160+
sudo : Optional[bool], default=None
161+
Override memory estimation limits
162+
log_progress : Optional[bool], default=None
163+
Whether to log progress
164+
username : Optional[str], default=None
165+
The username to attribute the procedure run to
166+
concurrency : Optional[Any], default=None
167+
The number of concurrent threads
168+
job_id : Optional[Any], default=None
169+
An identifier for the job
170+
seed_property : Optional[str], default=None
171+
Defines node properties that are used as initial component identifiers
172+
consecutive_ids : Optional[bool], default=None
173+
Flag to decide whether component identifiers are mapped into a consecutive id space
174+
relationship_weight_property : Optional[str], default=None
175+
The property name that contains weight
176+
177+
Returns
178+
-------
179+
DataFrame
180+
DataFrame with the algorithm results
181+
"""
182+
pass
183+
184+
@abstractmethod
185+
def write(
186+
self,
187+
G: Graph,
188+
write_property: str,
189+
min_component_size: Optional[int] = None,
190+
threshold: Optional[float] = None,
191+
relationship_types: Optional[List[str]] = None,
192+
node_labels: Optional[List[str]] = None,
193+
sudo: Optional[bool] = None,
194+
log_progress: Optional[bool] = None,
195+
username: Optional[str] = None,
196+
concurrency: Optional[Any] = None,
197+
job_id: Optional[Any] = None,
198+
seed_property: Optional[str] = None,
199+
consecutive_ids: Optional[bool] = None,
200+
relationship_weight_property: Optional[str] = None,
201+
write_concurrency: Optional[Any] = None,
202+
) -> WccWriteResult:
203+
"""
204+
Executes the WCC algorithm and writes the results to the Neo4j database.
205+
206+
Parameters
207+
----------
208+
G : Graph
209+
The graph to run the algorithm on
210+
write_property : str
211+
The property name to write component IDs to
212+
min_component_size : Optional[int], default=None
213+
Don't write components with fewer nodes than this
214+
threshold : Optional[float], default=None
215+
The minimum required weight to consider a relationship during traversal
216+
relationship_types : Optional[List[str]], default=None
217+
The relationships types considered in this algorithm run
218+
node_labels : Optional[List[str]], default=None
219+
The node labels used to select nodes for this algorithm run
220+
sudo : Optional[bool], default=None
221+
Override memory estimation limits
222+
log_progress : Optional[bool], default=None
223+
Whether to log progress
224+
username : Optional[str], default=None
225+
The username to attribute the procedure run to
226+
concurrency : Optional[Any], default=None
227+
The number of concurrent threads
228+
job_id : Optional[Any], default=None
229+
An identifier for the job
230+
seed_property : Optional[str], default=None
231+
Defines node properties that are used as initial component identifiers
232+
consecutive_ids : Optional[bool], default=None
233+
Flag to decide whether component identifiers are mapped into a consecutive id space
234+
relationship_weight_property : Optional[str], default=None
235+
The property name that contains weight
236+
write_concurrency : Optional[Any], default=None
237+
The number of concurrent threads during the write phase
238+
239+
Returns
240+
-------
241+
WccWriteResult
242+
Algorithm metrics and statistics
243+
"""
244+
pass
245+
246+
@abstractmethod
247+
def estimate(
248+
self,
249+
graph_name: Optional[str] = None,
250+
projection_config: Optional[dict[str, Any]] = None,
251+
) -> EstimationResult:
252+
"""
253+
Estimate the results based on the provided graph and configuration.
254+
255+
This abstract method is intended to be implemented in a subclass to provide
256+
specific functionality for estimating outcomes based on a graph name and a
257+
projection configuration object. The implementation should use the given
258+
parameters to compute and return an appropriate estimation result.
259+
260+
Parameters
261+
----------
262+
graph_name : Optional[str], optional
263+
Name of the graph to be used in the estimation
264+
projection_config : Optional[dict[str, Any]], optional
265+
Configuration dictionary for the projection.
266+
267+
Returns
268+
-------
269+
EstimationResult
270+
An object containing the result of the estimation, with relevant details
271+
as defined by the specific implementation.
272+
"""
273+
pass
274+
275+
276+
class WccMutateResult(BaseModel):
277+
model_config = ConfigDict(alias_generator=to_camel)
278+
279+
component_count: int
280+
component_distribution: dict[str, Any]
281+
pre_processing_millis: int
282+
compute_millis: int
283+
post_processing_millis: int
284+
mutate_millis: int
285+
node_properties_written: int
286+
configuration: dict[str, Any]
287+
288+
def __getitem__(self, item: str) -> Any:
289+
return getattr(self, item)
290+
291+
292+
class WccStatsResult(BaseModel):
293+
model_config = ConfigDict(alias_generator=to_camel)
294+
295+
component_count: int
296+
component_distribution: dict[str, Any]
297+
pre_processing_millis: int
298+
compute_millis: int
299+
post_processing_millis: int
300+
configuration: dict[str, Any]
301+
302+
def __getitem__(self, item: str) -> Any:
303+
return getattr(self, item)
304+
305+
306+
class WccWriteResult(BaseModel):
307+
model_config = ConfigDict(alias_generator=to_camel)
308+
309+
component_count: int
310+
component_distribution: dict[str, Any]
311+
pre_processing_millis: int
312+
compute_millis: int
313+
write_millis: int
314+
post_processing_millis: int
315+
node_properties_written: int
316+
configuration: dict[str, Any]
317+
318+
def __getitem__(self, item: str) -> Any:
319+
return getattr(self, item)

0 commit comments

Comments
 (0)