Skip to content

Commit 89da26d

Browse files
committed
Map scale properties
1 parent 0f29bd1 commit 89da26d

File tree

9 files changed

+939
-4
lines changed

9 files changed

+939
-4
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from graphdatascience.procedure_surface.api.catalog.scaler_config import ScalerConfig
2+
3+
__all__ = ["ScalerConfig"]
Lines changed: 287 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,287 @@
1+
from __future__ import annotations
2+
3+
from abc import ABC, abstractmethod
4+
from typing import Any
5+
6+
from pandas import DataFrame
7+
8+
from graphdatascience.procedure_surface.api.base_result import BaseResult
9+
from graphdatascience.procedure_surface.api.catalog.graph_api import GraphV2
10+
from graphdatascience.procedure_surface.api.catalog.scaler_config import ScalerConfig
11+
from graphdatascience.procedure_surface.api.default_values import ALL_LABELS
12+
from graphdatascience.procedure_surface.api.estimation_result import EstimationResult
13+
14+
15+
class ScalePropertiesEndpoints(ABC):
16+
@abstractmethod
17+
def mutate(
18+
self,
19+
G: GraphV2,
20+
mutate_property: str,
21+
node_properties: list[str],
22+
scaler: str | dict[str, str | int | float] | ScalerConfig,
23+
node_labels: list[str] = ALL_LABELS,
24+
sudo: bool = False,
25+
log_progress: bool = True,
26+
username: str | None = None,
27+
concurrency: Any | None = None,
28+
job_id: Any | None = None,
29+
) -> ScalePropertiesMutateResult:
30+
"""
31+
Runs the Scale Properties algorithm and stores the results in the graph catalog as a new node property.
32+
33+
Scale Properties scales node properties using a specified scaler (e.g., MinMax, Mean, Max, Log, StdScore, Center).
34+
35+
Parameters
36+
----------
37+
G : GraphV2
38+
The graph to run the algorithm on
39+
mutate_property : str
40+
Name of the node property to store the results in.
41+
node_properties : list[str]
42+
The node properties to scale. Can be a list of property names or a dictionary mapping property names to configurations.
43+
scaler : str | dict[str, str | int | float] | ScalerConfig
44+
The scaler to use. Can be:
45+
- A string (e.g., 'MinMax', 'Mean', 'Max', 'Log', 'StdScore', 'Center', 'L1Norm', 'L2Norm')
46+
- A dictionary with scaler configuration (e.g., {'type': 'Log', 'offset': 1.0})
47+
- A ScalerConfig instance
48+
node_labels : list[str]
49+
Filter the graph using the given node labels. Nodes with any of the given labels will be included.
50+
sudo : bool, default=False
51+
Disable the memory guard.
52+
log_progress : bool, default=True
53+
Display progress logging.
54+
username : str | None, default=None
55+
The username to attribute the procedure run to
56+
concurrency : Any | None, default=None
57+
Number of threads to use for running the algorithm.
58+
job_id : Any | None, default=None
59+
Identifier for the job.
60+
61+
Returns
62+
-------
63+
ScalePropertiesMutateResult
64+
Algorithm metrics and statistics including the scaler statistics
65+
"""
66+
pass
67+
68+
@abstractmethod
69+
def stats(
70+
self,
71+
G: GraphV2,
72+
node_properties: list[str],
73+
scaler: str | dict[str, str | int | float] | ScalerConfig,
74+
node_labels: list[str] = ALL_LABELS,
75+
sudo: bool = False,
76+
log_progress: bool = True,
77+
username: str | None = None,
78+
concurrency: int | None = None,
79+
job_id: str | None = None,
80+
) -> ScalePropertiesStatsResult:
81+
"""
82+
Runs the Scale Properties algorithm and returns result statistics without storing the results.
83+
84+
Scale Properties scales node properties using a specified scaler (e.g., MinMax, Mean, Max, Log, StdScore, Center).
85+
86+
Parameters
87+
----------
88+
G : GraphV2
89+
The graph to run the algorithm on
90+
node_properties : list[str]
91+
The node properties to scale. Can be a list of property names or a dictionary mapping property names to configurations.
92+
scaler : str | dict[str, str | int | float] | ScalerConfig
93+
The scaler to use. Can be:
94+
- A string (e.g., 'MinMax', 'Mean', 'Max', 'Log', 'StdScore', 'Center')
95+
- A dictionary with scaler configuration (e.g., {'type': 'Log', 'offset': 1.0})
96+
- A ScalerConfig or LogScalerConfig instance
97+
node_labels : list[str]
98+
Filter the graph using the given node labels. Nodes with any of the given labels will be included.
99+
sudo : bool
100+
Disable the memory guard.
101+
log_progress : bool
102+
Display progress logging.
103+
username : str | None, default=None
104+
The username to attribute the procedure run to
105+
concurrency : int | None, default=None
106+
Number of threads to use for running the algorithm.
107+
job_id : str | None, default=None
108+
Identifier for the job.
109+
110+
Returns
111+
-------
112+
ScalePropertiesStatsResult
113+
Algorithm statistics including the scaler statistics
114+
"""
115+
pass
116+
117+
@abstractmethod
118+
def stream(
119+
self,
120+
G: GraphV2,
121+
node_properties: list[str],
122+
scaler: str | dict[str, str | int | float] | ScalerConfig,
123+
node_labels: list[str] = ALL_LABELS,
124+
sudo: bool = False,
125+
log_progress: bool = True,
126+
username: str | None = None,
127+
concurrency: int | None = None,
128+
job_id: str | None = None,
129+
) -> DataFrame:
130+
"""
131+
Executes the Scale Properties algorithm and returns a stream of results.
132+
133+
Parameters
134+
----------
135+
G : GraphV2
136+
The graph to run the algorithm on
137+
node_properties : list[str]
138+
The node properties to scale. Can be a list of property names or a dictionary mapping property names to configurations.
139+
scaler : str | dict[str, str | int | float] | ScalerConfig
140+
The scaler to use. Can be:
141+
- A string (e.g., 'MinMax', 'Mean', 'Max', 'Log', 'StdScore', 'Center')
142+
- A dictionary with scaler configuration (e.g., {'type': 'Log', 'offset': 1.0})
143+
- A ScalerConfig instance
144+
node_labels : list[str]
145+
The node labels used to select nodes for this algorithm run.
146+
sudo : bool
147+
Override memory estimation limits. Use with caution as this can lead to
148+
memory issues if the estimation is significantly wrong.
149+
log_progress : bool
150+
Whether to log progress of the algorithm execution
151+
username : str | None
152+
The username to attribute the procedure run to
153+
concurrency : int | None
154+
The number of concurrent threads used for the algorithm execution.
155+
job_id : str | None
156+
An identifier for the job that can be used for monitoring and cancellation
157+
158+
Returns
159+
-------
160+
DataFrame
161+
DataFrame with nodeId and scaledProperty columns containing scaled property values.
162+
Each row represents a node with its corresponding scaled property values.
163+
"""
164+
pass
165+
166+
@abstractmethod
167+
def write(
168+
self,
169+
G: GraphV2,
170+
write_property: str,
171+
node_properties: list[str],
172+
scaler: str | dict[str, str | int | float] | ScalerConfig,
173+
node_labels: list[str] = ALL_LABELS,
174+
sudo: bool = False,
175+
log_progress: bool = True,
176+
username: str | None = None,
177+
concurrency: int | None = None,
178+
job_id: str | None = None,
179+
write_concurrency: int | None = None,
180+
) -> ScalePropertiesWriteResult:
181+
"""
182+
Runs the Scale Properties algorithm and stores the result in the Neo4j database as a new node property.
183+
184+
Scale Properties scales node properties using a specified scaler (e.g., MinMax, Mean, Max, Log, StdScore, Center).
185+
186+
Parameters
187+
----------
188+
G : GraphV2
189+
The graph to run the algorithm on
190+
write_property : str
191+
The property name to store the scaled property values for each node in the database
192+
node_properties : list[str]
193+
The node properties to scale. Can be a list of property names or a dictionary mapping property names to configurations.
194+
scaler : str | dict[str, str | int | float] | ScalerConfig
195+
The scaler to use. Can be:
196+
- A string (e.g., 'MinMax', 'Mean', 'Max', 'Log', 'StdScore', 'Center')
197+
- A dictionary with scaler configuration (e.g., {'type': 'Log', 'offset': 1.0})
198+
- A ScalerConfig instance
199+
node_labels : list[str]
200+
Filter the graph using the given node labels. Nodes with any of the given labels will be included.
201+
sudo : bool, default=False
202+
Disable the memory guard.
203+
log_progress : bool, default=True
204+
Display progress logging.
205+
username : str | None, default=None
206+
The username to attribute the procedure run to
207+
concurrency : int | None, default=None
208+
The number of threads to use for running the algorithm.
209+
job_id : str | None, default=None
210+
Identifier for the job.
211+
write_concurrency : int | None, default=None
212+
The number of concurrent threads used during the write phase.
213+
214+
Returns
215+
-------
216+
ScalePropertiesWriteResult
217+
Algorithm metrics and statistics including the scaler statistics and write timing
218+
"""
219+
pass
220+
221+
@abstractmethod
222+
def estimate(
223+
self,
224+
G: GraphV2 | dict[str, Any],
225+
node_properties: list[str],
226+
scaler: str | dict[str, str | int | float] | ScalerConfig,
227+
node_labels: list[str] = ALL_LABELS,
228+
concurrency: int | None = None,
229+
) -> EstimationResult:
230+
"""
231+
Estimate the memory consumption of an algorithm run.
232+
233+
Parameters
234+
----------
235+
G : GraphV2 | dict[str, Any]
236+
The graph to run the algorithm on or a dictionary representing the graph.
237+
node_properties : Any
238+
The node properties to scale. Can be a list of property names or a dictionary mapping property names to configurations.
239+
scaler : str | dict[str, str | int | float] | ScalerConfig
240+
The scaler to use. Can be:
241+
- A string (e.g., 'MinMax', 'Mean', 'Max', 'Log', 'StdScore', 'Center')
242+
- A dictionary with scaler configuration (e.g., {'type': 'Log', 'offset': 1.0})
243+
- A ScalerConfig instance
244+
node_labels : list[str]
245+
The node labels used to select nodes for this algorithm run
246+
concurrency : int | None
247+
The number of concurrent threads
248+
249+
Returns
250+
-------
251+
EstimationResult
252+
Memory estimation details
253+
"""
254+
255+
256+
class ScalePropertiesMutateResult(BaseResult):
257+
"""Result of running Scale Properties algorithm with mutate mode."""
258+
259+
compute_millis: int
260+
configuration: dict[str, Any]
261+
mutate_millis: int
262+
node_properties_written: int
263+
post_processing_millis: int
264+
pre_processing_millis: int
265+
scaler_statistics: dict[str, Any]
266+
267+
268+
class ScalePropertiesStatsResult(BaseResult):
269+
"""Result of running Scale Properties algorithm with stats mode."""
270+
271+
compute_millis: int
272+
configuration: dict[str, Any]
273+
post_processing_millis: int
274+
pre_processing_millis: int
275+
scaler_statistics: dict[str, Any]
276+
277+
278+
class ScalePropertiesWriteResult(BaseResult):
279+
"""Result of running Scale Properties algorithm with write mode."""
280+
281+
compute_millis: int
282+
configuration: dict[str, Any]
283+
node_properties_written: int
284+
post_processing_millis: int
285+
pre_processing_millis: int
286+
scaler_statistics: dict[str, Any]
287+
write_millis: int
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from pydantic import BaseModel
2+
3+
4+
class ScalerConfig(BaseModel):
5+
"""
6+
Configuration for a scaler used in the Scale Properties algorithm.
7+
8+
Attributes
9+
----------
10+
type : str
11+
The type of scaler to use. Can be 'MinMax', 'Mean', 'Max', 'Log', 'StdScore', 'Center'.
12+
offset : int | float
13+
The offset to add to the property values before applying the log transformation. Only used when type is 'Log'.
14+
"""
15+
16+
type: str
17+
offset: int | float | None = None

0 commit comments

Comments
 (0)