Skip to content

Commit 9bd0148

Browse files
committed
Add triangle count support
1 parent 41f15bb commit 9bd0148

File tree

7 files changed

+1001
-3
lines changed

7 files changed

+1001
-3
lines changed
Lines changed: 375 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,375 @@
1+
from __future__ import annotations
2+
3+
from abc import ABC, abstractmethod
4+
from typing import Any, List, Optional, Union
5+
6+
from pandas import DataFrame
7+
8+
from graphdatascience.procedure_surface.api.base_result import BaseResult
9+
from graphdatascience.procedure_surface.api.catalog.graph_api import GraphV2
10+
from graphdatascience.procedure_surface.api.estimation_result import EstimationResult
11+
12+
13+
class TriangleCountEndpoints(ABC):
14+
@abstractmethod
15+
def mutate(
16+
self,
17+
G: GraphV2,
18+
mutate_property: str,
19+
*,
20+
concurrency: Optional[int] = None,
21+
job_id: Optional[str] = None,
22+
label_filter: Optional[List[str]] = None,
23+
log_progress: bool = True,
24+
max_degree: Optional[int] = None,
25+
node_labels: Optional[List[str]] = None,
26+
relationship_types: Optional[List[str]] = None,
27+
sudo: Optional[bool] = False,
28+
username: Optional[str] = None,
29+
) -> TriangleCountMutateResult:
30+
"""
31+
Executes the Triangle Count algorithm and writes the results to the in-memory graph as node properties.
32+
33+
The Triangle Count algorithm computes the number of triangles each node participates in.
34+
35+
Parameters
36+
----------
37+
G : GraphV2
38+
The graph to run the algorithm on
39+
mutate_property : str
40+
The property name to store the triangle count for each node
41+
concurrency : Optional[int], default=4
42+
The number of concurrent threads. Setting this to 1 will run the algorithm single-threaded.
43+
job_id : Optional[str], default=None
44+
An identifier for the job that can be used to cancel or monitor progress
45+
label_filter : Optional[List[str]], default=None
46+
Filter triangles by node labels. Only triangles where all nodes have one of the specified
47+
labels will be counted.
48+
log_progress : bool, default=True
49+
Whether to log progress information during execution
50+
max_degree : Optional[int], default=None
51+
Maximum degree of nodes to consider. Nodes with higher degrees will be excluded from
52+
triangle counting to improve performance.
53+
node_labels : Optional[List[str]], default=None
54+
The node labels used to select nodes for this algorithm run. If None, all nodes are used.
55+
relationship_types : Optional[List[str]], default=None
56+
The relationship types used to select relationships for this algorithm run. If None, all
57+
relationship types are used.
58+
sudo : Optional[bool], default=False
59+
Override memory estimation limits. Setting this to True allows running the algorithm
60+
even if the estimated memory requirements exceed available memory.
61+
username : Optional[str], default=None
62+
The username to attribute the procedure run to
63+
64+
Returns
65+
-------
66+
TriangleCountMutateResult
67+
Algorithm metrics and statistics including the global triangle count and processing times
68+
"""
69+
70+
@abstractmethod
71+
def stats(
72+
self,
73+
G: GraphV2,
74+
*,
75+
concurrency: Optional[int] = None,
76+
job_id: Optional[str] = None,
77+
label_filter: Optional[List[str]] = None,
78+
log_progress: bool = True,
79+
max_degree: Optional[int] = None,
80+
node_labels: Optional[List[str]] = None,
81+
relationship_types: Optional[List[str]] = None,
82+
sudo: Optional[bool] = False,
83+
username: Optional[str] = None,
84+
) -> TriangleCountStatsResult:
85+
"""
86+
Executes the Triangle Count algorithm and returns statistics about the computation.
87+
88+
This method computes triangle counts without storing results in the graph, providing
89+
aggregate statistics about the triangle structure of the graph.
90+
91+
Parameters
92+
----------
93+
G : GraphV2
94+
The graph to run the algorithm on
95+
concurrency : Optional[int], default=4
96+
The number of concurrent threads. Setting this to 1 will run the algorithm single-threaded.
97+
job_id : Optional[str], default=None
98+
An identifier for the job that can be used to cancel or monitor progress
99+
label_filter : Optional[List[str]], default=None
100+
Filter triangles by node labels. Only triangles where all nodes have one of the specified
101+
labels will be counted.
102+
log_progress : bool, default=True
103+
Whether to log progress information during execution
104+
max_degree : Optional[int], default=None
105+
Maximum degree of nodes to consider. Nodes with higher degrees will be excluded from
106+
triangle counting to improve performance.
107+
node_labels : Optional[List[str]], default=None
108+
The node labels used to select nodes for this algorithm run. If None, all nodes are used.
109+
relationship_types : Optional[List[str]], default=None
110+
The relationship types used to select relationships for this algorithm run. If None, all
111+
relationship types are used.
112+
sudo : Optional[bool], default=False
113+
Override memory estimation limits. Setting this to True allows running the algorithm
114+
even if the estimated memory requirements exceed available memory.
115+
username : Optional[str], default=None
116+
The username to attribute the procedure run to
117+
118+
Returns
119+
-------
120+
TriangleCountStatsResult
121+
Algorithm statistics including the global triangle count and processing times
122+
"""
123+
124+
@abstractmethod
125+
def stream(
126+
self,
127+
G: GraphV2,
128+
*,
129+
concurrency: Optional[int] = None,
130+
job_id: Optional[str] = None,
131+
label_filter: Optional[List[str]] = None,
132+
log_progress: bool = True,
133+
max_degree: Optional[int] = None,
134+
node_labels: Optional[List[str]] = None,
135+
relationship_types: Optional[List[str]] = None,
136+
sudo: Optional[bool] = False,
137+
username: Optional[str] = None,
138+
) -> DataFrame:
139+
"""
140+
Executes the Triangle Count algorithm and returns a stream of results.
141+
142+
The Triangle Count algorithm computes the number of triangles each node participates in.
143+
This method returns the triangle count for each node as a stream.
144+
145+
Parameters
146+
----------
147+
G : GraphV2
148+
The graph to run the algorithm on
149+
concurrency : Optional[int], default=4
150+
The number of concurrent threads. Setting this to 1 will run the algorithm single-threaded.
151+
job_id : Optional[str], default=None
152+
An identifier for the job that can be used to cancel or monitor progress
153+
label_filter : Optional[List[str]], default=None
154+
Filter triangles by node labels. Only triangles where all nodes have one of the specified
155+
labels will be counted.
156+
log_progress : bool, default=True
157+
Whether to log progress information during execution
158+
max_degree : Optional[int], default=None
159+
Maximum degree of nodes to consider. Nodes with higher degrees will be excluded from
160+
triangle counting to improve performance.
161+
node_labels : Optional[List[str]], default=None
162+
The node labels used to select nodes for this algorithm run. If None, all nodes are used.
163+
relationship_types : Optional[List[str]], default=None
164+
The relationship types used to select relationships for this algorithm run. If None, all
165+
relationship types are used.
166+
sudo : Optional[bool], default=False
167+
Override memory estimation limits. Setting this to True allows running the algorithm
168+
even if the estimated memory requirements exceed available memory.
169+
username : Optional[str], default=None
170+
The username to attribute the procedure run to
171+
172+
Returns
173+
-------
174+
DataFrame
175+
A DataFrame with columns:
176+
- nodeId: The node identifier
177+
- triangleCount: The number of triangles the node participates in
178+
"""
179+
180+
@abstractmethod
181+
def write(
182+
self,
183+
G: GraphV2,
184+
write_property: str,
185+
*,
186+
concurrency: Optional[int] = None,
187+
job_id: Optional[str] = None,
188+
label_filter: Optional[List[str]] = None,
189+
log_progress: bool = True,
190+
max_degree: Optional[int] = None,
191+
node_labels: Optional[List[str]] = None,
192+
relationship_types: Optional[List[str]] = None,
193+
sudo: Optional[bool] = False,
194+
username: Optional[str] = None,
195+
write_concurrency: Optional[int] = None,
196+
) -> TriangleCountWriteResult:
197+
"""
198+
Executes the Triangle Count algorithm and writes the results back to the database.
199+
200+
This method computes triangle counts and writes the results directly to the Neo4j database
201+
as node properties, making them available for subsequent Cypher queries.
202+
203+
Parameters
204+
----------
205+
G : GraphV2
206+
The graph to run the algorithm on
207+
write_property : str
208+
The property name to store the triangle count for each node in the database
209+
concurrency : Optional[int], default=4
210+
The number of concurrent threads. Setting this to 1 will run the algorithm single-threaded.
211+
job_id : Optional[str], default=None
212+
An identifier for the job that can be used to cancel or monitor progress
213+
label_filter : Optional[List[str]], default=None
214+
Filter triangles by node labels. Only triangles where all nodes have one of the specified
215+
labels will be counted.
216+
log_progress : bool, default=True
217+
Whether to log progress information during execution
218+
max_degree : Optional[int], default=None
219+
Maximum degree of nodes to consider. Nodes with higher degrees will be excluded from
220+
triangle counting to improve performance.
221+
node_labels : Optional[List[str]], default=None
222+
The node labels used to select nodes for this algorithm run. If None, all nodes are used.
223+
relationship_types : Optional[List[str]], default=None
224+
The relationship types used to select relationships for this algorithm run. If None, all
225+
relationship types are used.
226+
sudo : Optional[bool], default=False
227+
Override memory estimation limits. Setting this to True allows running the algorithm
228+
even if the estimated memory requirements exceed available memory.
229+
username : Optional[str], default=None
230+
The username to attribute the procedure run to
231+
write_concurrency : Optional[int], default=None
232+
The number of concurrent threads for writing results to the database
233+
234+
Returns
235+
-------
236+
TriangleCountWriteResult
237+
Algorithm metrics and statistics including the global triangle count and processing times
238+
"""
239+
240+
@abstractmethod
241+
def estimate(
242+
self,
243+
G: Union[GraphV2, dict[str, Any]],
244+
*,
245+
concurrency: Optional[int] = None,
246+
label_filter: Optional[List[str]] = None,
247+
max_degree: Optional[int] = None,
248+
node_labels: Optional[List[str]] = None,
249+
relationship_types: Optional[List[str]] = None,
250+
) -> EstimationResult:
251+
"""
252+
Estimate the memory requirements for running the Triangle Count algorithm.
253+
254+
This method provides memory estimates without actually running the algorithm, helping you
255+
determine if you have sufficient memory available.
256+
257+
Parameters
258+
----------
259+
G : Union[GraphV2, dict[str, Any]]
260+
The graph to estimate for, or a graph configuration dictionary
261+
concurrency : Optional[int], default=4
262+
The number of concurrent threads. Setting this to 1 will run the algorithm single-threaded.
263+
label_filter : Optional[List[str]], default=None
264+
Filter triangles by node labels. Only triangles where all nodes have one of the specified
265+
labels will be counted.
266+
max_degree : Optional[int], default=None
267+
Maximum degree of nodes to consider. Nodes with higher degrees will be excluded from
268+
triangle counting to improve performance.
269+
node_labels : Optional[List[str]], default=None
270+
The node labels used to select nodes for this algorithm run. If None, all nodes are used.
271+
relationship_types : Optional[List[str]], default=None
272+
The relationship types used to select relationships for this algorithm run. If None, all
273+
relationship types are used.
274+
275+
Returns
276+
-------
277+
EstimationResult
278+
The memory estimation result including required memory in bytes and as heap percentage
279+
"""
280+
281+
282+
class TriangleCountMutateResult(BaseResult):
283+
"""
284+
Result object returned by the Triangle Count mutate operation.
285+
286+
Attributes
287+
----------
288+
compute_millis : int
289+
Time spent on computation in milliseconds
290+
configuration : dict[str, Any]
291+
The configuration used for the algorithm execution
292+
global_triangle_count : int
293+
The total number of triangles in the graph
294+
mutate_millis : int
295+
Time spent on mutating the graph in milliseconds
296+
node_count : int
297+
The total number of nodes processed
298+
node_properties_written : int
299+
The number of node properties written to the graph
300+
post_processing_millis : int
301+
Time spent on post-processing in milliseconds
302+
pre_processing_millis : int
303+
Time spent on pre-processing in milliseconds
304+
"""
305+
306+
compute_millis: int
307+
configuration: dict[str, Any]
308+
global_triangle_count: int
309+
mutate_millis: int
310+
node_count: int
311+
node_properties_written: int
312+
post_processing_millis: int
313+
pre_processing_millis: int
314+
315+
316+
class TriangleCountStatsResult(BaseResult):
317+
"""
318+
Result object returned by the Triangle Count stats operation.
319+
320+
Attributes
321+
----------
322+
compute_millis : int
323+
Time spent on computation in milliseconds
324+
configuration : dict[str, Any]
325+
The configuration used for the algorithm execution
326+
global_triangle_count : int
327+
The total number of triangles in the graph
328+
node_count : int
329+
The total number of nodes processed
330+
post_processing_millis : int
331+
Time spent on post-processing in milliseconds
332+
pre_processing_millis : int
333+
Time spent on pre-processing in milliseconds
334+
"""
335+
336+
compute_millis: int
337+
configuration: dict[str, Any]
338+
global_triangle_count: int
339+
node_count: int
340+
post_processing_millis: int
341+
pre_processing_millis: int
342+
343+
344+
class TriangleCountWriteResult(BaseResult):
345+
"""
346+
Result object returned by the Triangle Count write operation.
347+
348+
Attributes
349+
----------
350+
compute_millis : int
351+
Time spent on computation in milliseconds
352+
configuration : dict[str, Any]
353+
The configuration used for the algorithm execution
354+
global_triangle_count : int
355+
The total number of triangles in the graph
356+
node_count : int
357+
The total number of nodes processed
358+
node_properties_written : int
359+
The number of node properties written to the database
360+
post_processing_millis : int
361+
Time spent on post-processing in milliseconds
362+
pre_processing_millis : int
363+
Time spent on pre-processing in milliseconds
364+
write_millis : int
365+
Time spent on writing results to the database in milliseconds
366+
"""
367+
368+
compute_millis: int
369+
configuration: dict[str, Any]
370+
global_triangle_count: int
371+
node_count: int
372+
node_properties_written: int
373+
post_processing_millis: int
374+
pre_processing_millis: int
375+
write_millis: int

0 commit comments

Comments
 (0)