Skip to content

Commit 41f15bb

Browse files
committed
Add maxkcut to v2 endpoints
1 parent cb7b12f commit 41f15bb

File tree

7 files changed

+678
-2
lines changed

7 files changed

+678
-2
lines changed
Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
from __future__ import annotations
2+
3+
from abc import ABC, abstractmethod
4+
from typing import Any, List, Optional, Union
5+
6+
from pandas import DataFrame
7+
8+
from graphdatascience.procedure_surface.api.base_result import BaseResult
9+
from graphdatascience.procedure_surface.api.catalog.graph_api import GraphV2
10+
from graphdatascience.procedure_surface.api.estimation_result import EstimationResult
11+
12+
13+
class MaxKCutEndpoints(ABC):
14+
@abstractmethod
15+
def mutate(
16+
self,
17+
G: GraphV2,
18+
mutate_property: str,
19+
*,
20+
concurrency: Optional[int] = None,
21+
iterations: Optional[int] = None,
22+
job_id: Optional[str] = None,
23+
k: Optional[int] = None,
24+
log_progress: bool = True,
25+
node_labels: Optional[List[str]] = None,
26+
random_seed: Optional[int] = None,
27+
relationship_types: Optional[List[str]] = None,
28+
relationship_weight_property: Optional[str] = None,
29+
sudo: Optional[bool] = False,
30+
username: Optional[str] = None,
31+
vns_max_neighborhood_order: Optional[int] = None,
32+
) -> MaxKCutMutateResult:
33+
"""
34+
Executes the Approximate Maximum k-cut algorithm and writes the results to the in-memory graph as node properties.
35+
36+
The Approximate Maximum k-cut algorithm is a community detection algorithm that partitions a graph into k communities
37+
such that the sum of weights of edges between different communities is maximized. It uses a
38+
variable neighborhood search (VNS) approach to find high-quality cuts. Parameters
39+
----------
40+
G : GraphV2
41+
The graph to run the algorithm on
42+
mutate_property : str
43+
The property name to store the community ID for each node
44+
concurrency : Optional[int], default=4
45+
The number of concurrent threads. Setting this to 1 will run the algorithm single-threaded.
46+
iterations : Optional[int], default=8
47+
The number of iterations the algorithm runs. More iterations may lead to better results but
48+
will increase computation time.
49+
job_id : Optional[str], default=None
50+
An identifier for the job that can be used to cancel or monitor progress
51+
k : Optional[int], default=2
52+
The number of communities to detect. Must be at least 2.
53+
log_progress : bool, default=True
54+
Whether to log progress information during execution
55+
node_labels : Optional[List[str]], default=None
56+
The node labels used to select nodes for this algorithm run. If None, all nodes are used.
57+
random_seed : Optional[int], default=None
58+
Random seed for reproducible results. If None, a random seed is used.
59+
relationship_types : Optional[List[str]], default=None
60+
The relationship types used to select relationships for this algorithm run. If None, all
61+
relationship types are used.
62+
relationship_weight_property : Optional[str], default=None
63+
The relationship weight property. If None, each relationship has weight 1.0.
64+
sudo : Optional[bool], default=False
65+
Override memory estimation limits. Setting this to True allows running the algorithm
66+
even if the estimated memory requirements exceed available memory.
67+
username : Optional[str], default=None
68+
The username to attribute the procedure run to
69+
vns_max_neighborhood_order : Optional[int], default=0
70+
The maximum neighborhood order for the Variable Neighborhood Search. Higher values may
71+
lead to better results but increase computation time.
72+
73+
Returns
74+
-------
75+
MaxKCutMutateResult
76+
Algorithm metrics and statistics including the cut cost and processing times
77+
"""
78+
79+
@abstractmethod
80+
def stream(
81+
self,
82+
G: GraphV2,
83+
*,
84+
concurrency: Optional[int] = None,
85+
iterations: Optional[int] = None,
86+
job_id: Optional[str] = None,
87+
k: Optional[int] = None,
88+
log_progress: bool = True,
89+
min_community_size: Optional[int] = None,
90+
node_labels: Optional[List[str]] = None,
91+
random_seed: Optional[int] = None,
92+
relationship_types: Optional[List[str]] = None,
93+
relationship_weight_property: Optional[str] = None,
94+
sudo: Optional[bool] = False,
95+
username: Optional[str] = None,
96+
vns_max_neighborhood_order: Optional[int] = None,
97+
) -> DataFrame:
98+
"""
99+
Executes the Approximate Maximum k-cut algorithm and returns a stream of results.
100+
101+
The Approximate Maximum k-cut algorithm partitions a graph into k communities to maximize the cut cost.
102+
This method returns the community assignment for each node as a stream.
103+
104+
Parameters
105+
----------
106+
G : GraphV2
107+
The graph to run the algorithm on
108+
concurrency : Optional[int], default=4
109+
The number of concurrent threads. Setting this to 1 will run the algorithm single-threaded.
110+
iterations : Optional[int], default=8
111+
The number of iterations the algorithm runs. More iterations may lead to better results but
112+
will increase computation time.
113+
job_id : Optional[str], default=None
114+
An identifier for the job that can be used to cancel or monitor progress
115+
k : Optional[int], default=2
116+
The number of communities to detect. Must be at least 2.
117+
log_progress : bool, default=True
118+
Whether to log progress information during execution
119+
min_community_size : Optional[int], default=None
120+
The minimum community size. Communities smaller than this will be filtered from results.
121+
node_labels : Optional[List[str]], default=None
122+
The node labels used to select nodes for this algorithm run. If None, all nodes are used.
123+
random_seed : Optional[int], default=None
124+
Random seed for reproducible results. If None, a random seed is used.
125+
relationship_types : Optional[List[str]], default=None
126+
The relationship types used to select relationships for this algorithm run. If None, all
127+
relationship types are used.
128+
relationship_weight_property : Optional[str], default=None
129+
The relationship weight property. If None, each relationship has weight 1.0.
130+
sudo : Optional[bool], default=False
131+
Override memory estimation limits. Setting this to True allows running the algorithm
132+
even if the estimated memory requirements exceed available memory.
133+
username : Optional[str], default=None
134+
The username to attribute the procedure run to
135+
vns_max_neighborhood_order : Optional[int], default=0
136+
The maximum neighborhood order for the Variable Neighborhood Search. Higher values may
137+
lead to better results but increase computation time.
138+
139+
Returns
140+
-------
141+
DataFrame
142+
A DataFrame with columns:
143+
- nodeId: The node identifier
144+
- communityId: The community assignment for the node
145+
"""
146+
147+
@abstractmethod
148+
def estimate(
149+
self,
150+
G: Union[GraphV2, dict[str, Any]],
151+
*,
152+
concurrency: Optional[int] = None,
153+
iterations: Optional[int] = None,
154+
k: Optional[int] = None,
155+
node_labels: Optional[List[str]] = None,
156+
random_seed: Optional[int] = None,
157+
relationship_types: Optional[List[str]] = None,
158+
relationship_weight_property: Optional[str] = None,
159+
vns_max_neighborhood_order: Optional[int] = None,
160+
) -> EstimationResult:
161+
"""
162+
Estimate the memory requirements for running the Approximate Maximum k-cut algorithm.
163+
164+
This method provides memory estimates without actually running the algorithm, helping you
165+
determine if you have sufficient memory available.
166+
167+
Parameters
168+
----------
169+
G : Union[GraphV2, dict[str, Any]]
170+
The graph to estimate for, or a graph configuration dictionary
171+
concurrency : Optional[int], default=4
172+
The number of concurrent threads. Setting this to 1 will run the algorithm single-threaded.
173+
iterations : Optional[int], default=8
174+
The number of iterations the algorithm runs
175+
k : Optional[int], default=2
176+
The number of communities to detect. Must be at least 2.
177+
node_labels : Optional[List[str]], default=None
178+
The node labels used to select nodes for this algorithm run. If None, all nodes are used.
179+
random_seed : Optional[int], default=None
180+
Random seed for reproducible results
181+
relationship_types : Optional[List[str]], default=None
182+
The relationship types used to select relationships for this algorithm run. If None, all
183+
relationship types are used.
184+
relationship_weight_property : Optional[str], default=None
185+
The relationship weight property. If None, each relationship has weight 1.0.
186+
vns_max_neighborhood_order : Optional[int], default=0
187+
The maximum neighborhood order for the Variable Neighborhood Search
188+
189+
Returns
190+
-------
191+
EstimationResult
192+
The memory estimation result including required memory in bytes and as heap percentage
193+
"""
194+
195+
196+
class MaxKCutMutateResult(BaseResult):
197+
"""
198+
Result object returned by the Approximate Maximum k-cut mutate operation.
199+
200+
Attributes
201+
----------
202+
cut_cost : float
203+
The cost of the cut, representing the sum of weights of edges between different communities
204+
compute_millis : int
205+
Time spent on computation in milliseconds
206+
configuration : dict[str, Any]
207+
The configuration used for the algorithm execution
208+
mutate_millis : int
209+
Time spent on mutating the graph in milliseconds
210+
node_properties_written : int
211+
The number of node properties written to the graph
212+
post_processing_millis : int
213+
Time spent on post-processing in milliseconds
214+
pre_processing_millis : int
215+
Time spent on pre-processing in milliseconds
216+
"""
217+
218+
cut_cost: float
219+
compute_millis: int
220+
configuration: dict[str, Any]
221+
mutate_millis: int
222+
node_properties_written: int
223+
post_processing_millis: int
224+
pre_processing_millis: int
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
from typing import Any, List, Optional, Union
2+
3+
from pandas import DataFrame
4+
5+
from graphdatascience.procedure_surface.api.catalog.graph_api import GraphV2
6+
from graphdatascience.procedure_surface.api.community.maxkcut_endpoints import (
7+
MaxKCutEndpoints,
8+
MaxKCutMutateResult,
9+
)
10+
11+
from ...arrow_client.authenticated_flight_client import AuthenticatedArrowClient
12+
from ...arrow_client.v2.remote_write_back_client import RemoteWriteBackClient
13+
from ..api.estimation_result import EstimationResult
14+
from .node_property_endpoints import NodePropertyEndpoints
15+
16+
17+
class MaxKCutArrowEndpoints(MaxKCutEndpoints):
18+
def __init__(
19+
self,
20+
arrow_client: AuthenticatedArrowClient,
21+
write_back_client: Optional[RemoteWriteBackClient] = None,
22+
show_progress: bool = True,
23+
):
24+
self._arrow_client = arrow_client
25+
self._write_back_client = write_back_client
26+
self._show_progress = show_progress
27+
self._node_property_endpoints = NodePropertyEndpoints(arrow_client, write_back_client, show_progress)
28+
29+
def mutate(
30+
self,
31+
G: GraphV2,
32+
mutate_property: str,
33+
*,
34+
concurrency: Optional[int] = None,
35+
iterations: Optional[int] = None,
36+
job_id: Optional[str] = None,
37+
k: Optional[int] = None,
38+
log_progress: bool = True,
39+
node_labels: Optional[List[str]] = None,
40+
random_seed: Optional[int] = None,
41+
relationship_types: Optional[List[str]] = None,
42+
relationship_weight_property: Optional[str] = None,
43+
sudo: Optional[bool] = False,
44+
username: Optional[str] = None,
45+
vns_max_neighborhood_order: Optional[int] = None,
46+
) -> MaxKCutMutateResult:
47+
config = self._node_property_endpoints.create_base_config(
48+
G,
49+
concurrency=concurrency,
50+
iterations=iterations,
51+
job_id=job_id,
52+
k=k,
53+
log_progress=log_progress,
54+
node_labels=node_labels,
55+
random_seed=random_seed,
56+
relationship_types=relationship_types,
57+
relationship_weight_property=relationship_weight_property,
58+
sudo=sudo,
59+
username=username,
60+
vns_max_neighborhood_order=vns_max_neighborhood_order,
61+
)
62+
63+
result = self._node_property_endpoints.run_job_and_mutate("v2/community.maxkcut", G, config, mutate_property)
64+
65+
return MaxKCutMutateResult(**result)
66+
67+
def stream(
68+
self,
69+
G: GraphV2,
70+
*,
71+
concurrency: Optional[int] = None,
72+
iterations: Optional[int] = None,
73+
job_id: Optional[str] = None,
74+
k: Optional[int] = None,
75+
log_progress: bool = True,
76+
min_community_size: Optional[int] = None,
77+
node_labels: Optional[List[str]] = None,
78+
random_seed: Optional[int] = None,
79+
relationship_types: Optional[List[str]] = None,
80+
relationship_weight_property: Optional[str] = None,
81+
sudo: Optional[bool] = False,
82+
username: Optional[str] = None,
83+
vns_max_neighborhood_order: Optional[int] = None,
84+
) -> DataFrame:
85+
config = self._node_property_endpoints.create_base_config(
86+
G,
87+
concurrency=concurrency,
88+
iterations=iterations,
89+
job_id=job_id,
90+
k=k,
91+
log_progress=log_progress,
92+
min_community_size=min_community_size,
93+
node_labels=node_labels,
94+
random_seed=random_seed,
95+
relationship_types=relationship_types,
96+
relationship_weight_property=relationship_weight_property,
97+
sudo=sudo,
98+
username=username,
99+
vns_max_neighborhood_order=vns_max_neighborhood_order,
100+
)
101+
102+
return self._node_property_endpoints.run_job_and_stream("v2/community.maxkcut", G, config)
103+
104+
def estimate(
105+
self,
106+
G: Union[GraphV2, dict[str, Any]],
107+
*,
108+
concurrency: Optional[int] = None,
109+
iterations: Optional[int] = None,
110+
k: Optional[int] = None,
111+
node_labels: Optional[List[str]] = None,
112+
random_seed: Optional[int] = None,
113+
relationship_types: Optional[List[str]] = None,
114+
relationship_weight_property: Optional[str] = None,
115+
vns_max_neighborhood_order: Optional[int] = None,
116+
) -> EstimationResult:
117+
config = self._node_property_endpoints.create_estimate_config(
118+
concurrency=concurrency,
119+
iterations=iterations,
120+
k=k,
121+
node_labels=node_labels,
122+
random_seed=random_seed,
123+
relationship_types=relationship_types,
124+
relationship_weight_property=relationship_weight_property,
125+
vns_max_neighborhood_order=vns_max_neighborhood_order,
126+
)
127+
128+
return self._node_property_endpoints.estimate("v2/community.maxkcut.estimate", G, config)

0 commit comments

Comments
 (0)