Skip to content

Commit e65b28f

Browse files
committed
Use typed scaler parameter for centraliyt algos
1 parent 89da26d commit e65b28f

File tree

11 files changed

+232
-104
lines changed

11 files changed

+232
-104
lines changed

graphdatascience/procedure_surface/api/catalog/scale_properties_endpoints.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ def stream(
158158
Returns
159159
-------
160160
DataFrame
161-
DataFrame with nodeId and scaledProperty columns containing scaled property values.
161+
DataFrame with nodeId and scaledProperties columns containing scaled property values.
162162
Each row represents a node with its corresponding scaled property values.
163163
"""
164164
pass

graphdatascience/procedure_surface/api/centrality/articlerank_endpoints.py

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from graphdatascience.procedure_surface.api.base_result import BaseResult
99
from graphdatascience.procedure_surface.api.catalog.graph_api import GraphV2
10+
from graphdatascience.procedure_surface.api.catalog.scaler_config import ScalerConfig
1011
from graphdatascience.procedure_surface.api.default_values import ALL_LABELS, ALL_TYPES
1112
from graphdatascience.procedure_surface.api.estimation_result import EstimationResult
1213

@@ -21,7 +22,7 @@ def mutate(
2122
damping_factor: float = 0.85,
2223
tolerance: float = 1.0e-7,
2324
max_iterations: int = 20,
24-
scaler: Any = "NONE",
25+
scaler: str | dict[str, str | int | float] | ScalerConfig = "NONE",
2526
relationship_types: list[str] = ALL_TYPES,
2627
node_labels: list[str] = ALL_LABELS,
2728
sudo: bool = False,
@@ -51,8 +52,12 @@ def mutate(
5152
Minimum change in scores between iterations.
5253
max_iterations : int
5354
Maximum number of iterations to run.
54-
scaler : Any
55-
Name of the scaler applied on the resulting scores.
55+
scaler : str | dict[str, str | int | float] | ScalerConfig, default="NONE"
56+
The scaler to use. Can be:
57+
- A string (e.g., 'MinMax', 'Mean', 'Max', 'Log', 'StdScore', 'Center', 'L1Norm', 'L2Norm', 'NONE')
58+
- A dictionary with scaler configuration (e.g., {'type': 'Log', 'offset': 1.0})
59+
- A ScalerConfig instance
60+
- "NONE" (default, no scaling)
5661
relationship_types : list[str]
5762
Filter the graph using the given relationship types. Relationships with any of the given types will be included.
5863
node_labels : list[str]
@@ -86,7 +91,7 @@ def stats(
8691
damping_factor: float = 0.85,
8792
tolerance: float = 1.0e-7,
8893
max_iterations: int = 20,
89-
scaler: Any = "NONE",
94+
scaler: str | dict[str, str | int | float] | ScalerConfig = "NONE",
9095
relationship_types: list[str] = ALL_TYPES,
9196
node_labels: list[str] = ALL_LABELS,
9297
sudo: bool = False,
@@ -114,8 +119,12 @@ def stats(
114119
Minimum change in scores between iterations.
115120
max_iterations : int
116121
Maximum number of iterations to run.
117-
scaler : Any
118-
Name of the scaler applied on the resulting scores.
122+
scaler : str | dict[str, str | int | float] | ScalerConfig, default="NONE"
123+
The scaler to use. Can be:
124+
- A string (e.g., 'MinMax', 'Mean', 'Max', 'Log', 'StdScore', 'Center', 'L1Norm', 'L2Norm', 'NONE')
125+
- A dictionary with scaler configuration (e.g., {'type': 'Log', 'offset': 1.0})
126+
- A ScalerConfig instance
127+
- "NONE" (default, no scaling)
119128
relationship_types : list[str]
120129
Filter the graph using the given relationship types. Relationships with any of the given types will be included.
121130
node_labels : list[str]
@@ -149,7 +158,7 @@ def stream(
149158
damping_factor: float = 0.85,
150159
tolerance: float = 1.0e-7,
151160
max_iterations: int = 20,
152-
scaler: Any = "NONE",
161+
scaler: str | dict[str, str | int | float] | ScalerConfig = "NONE",
153162
relationship_types: list[str] = ALL_TYPES,
154163
node_labels: list[str] = ALL_LABELS,
155164
sudo: bool = False,
@@ -240,8 +249,12 @@ def write(
240249
Minimum change in scores between iterations.
241250
max_iterations : int
242251
Maximum number of iterations to run.
243-
scaler : Any
244-
Name of the scaler applied on the resulting scores.
252+
scaler : str | dict[str, str | int | float] | ScalerConfig, default="NONE"
253+
The scaler to use. Can be:
254+
- A string (e.g., 'MinMax', 'Mean', 'Max', 'Log', 'StdScore', 'Center', 'L1Norm', 'L2Norm', 'NONE')
255+
- A dictionary with scaler configuration (e.g., {'type': 'Log', 'offset': 1.0})
256+
- A ScalerConfig instance
257+
- "NONE" (default, no scaling)
245258
relationship_types : list[str]
246259
Filter the graph using the given relationship types. Relationships with any of the given types will be included.
247260
node_labels : list[str]

graphdatascience/procedure_surface/api/centrality/eigenvector_endpoints.py

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from graphdatascience.procedure_surface.api.base_result import BaseResult
99
from graphdatascience.procedure_surface.api.catalog.graph_api import GraphV2
10+
from graphdatascience.procedure_surface.api.catalog.scaler_config import ScalerConfig
1011
from graphdatascience.procedure_surface.api.default_values import ALL_LABELS, ALL_TYPES
1112
from graphdatascience.procedure_surface.api.estimation_result import EstimationResult
1213

@@ -20,7 +21,7 @@ def mutate(
2021
max_iterations: int = 20,
2122
tolerance: float = 1.0e-7,
2223
source_nodes: Any | None = None,
23-
scaler: Any = "NONE",
24+
scaler: str | dict[str, str | int | float] | ScalerConfig = "NONE",
2425
relationship_weight_property: str | None = None,
2526
relationship_types: list[str] = ALL_TYPES,
2627
node_labels: list[str] = ALL_LABELS,
@@ -50,8 +51,12 @@ def mutate(
5051
The tolerance for convergence detection
5152
source_nodes : Any | None, default=None
5253
The source nodes to start the computation from
53-
scaler : Any
54-
Scaling configuration for the algorithm
54+
scaler : str | dict[str, str | int | float] | ScalerConfig, default="NONE"
55+
The scaler to use. Can be:
56+
- A string (e.g., 'MinMax', 'Mean', 'Max', 'Log', 'StdScore', 'Center', 'L1Norm', 'L2Norm', 'NONE')
57+
- A dictionary with scaler configuration (e.g., {'type': 'Log', 'offset': 1.0})
58+
- A ScalerConfig instance
59+
- "NONE" (default, no scaling)
5560
relationship_weight_property : str | None, default=None
5661
The property name that contains weight values for relationships
5762
relationship_types : list[str]
@@ -84,7 +89,7 @@ def stats(
8489
max_iterations: int = 20,
8590
tolerance: float = 1.0e-7,
8691
source_nodes: Any | None = None,
87-
scaler: Any = "NONE",
92+
scaler: str | dict[str, str | int | float] | ScalerConfig = "NONE",
8893
relationship_weight_property: str | None = None,
8994
relationship_types: list[str] = ALL_TYPES,
9095
node_labels: list[str] = ALL_LABELS,
@@ -112,8 +117,12 @@ def stats(
112117
The tolerance for convergence detection
113118
source_nodes : Any | None, default=None
114119
The source nodes to start the computation from
115-
scaler : Any
116-
Scaling configuration for the algorithm
120+
scaler : str | dict[str, str | int | float] | ScalerConfig, default="NONE"
121+
The scaler to use. Can be:
122+
- A string (e.g., 'MinMax', 'Mean', 'Max', 'Log', 'StdScore', 'Center', 'L1Norm', 'L2Norm', 'NONE')
123+
- A dictionary with scaler configuration (e.g., {'type': 'Log', 'offset': 1.0})
124+
- A ScalerConfig instance
125+
- "NONE" (default, no scaling)
117126
relationship_weight_property : str | None, default=None
118127
The property name that contains weight values for relationships
119128
relationship_types : list[str]
@@ -146,7 +155,7 @@ def stream(
146155
max_iterations: int = 20,
147156
tolerance: float = 1.0e-7,
148157
source_nodes: Any | None = None,
149-
scaler: Any = "NONE",
158+
scaler: str | dict[str, str | int | float] | ScalerConfig = "NONE",
150159
relationship_weight_property: str | None = None,
151160
relationship_types: list[str] = ALL_TYPES,
152161
node_labels: list[str] = ALL_LABELS,
@@ -169,8 +178,12 @@ def stream(
169178
The tolerance for convergence detection
170179
source_nodes : Any | None, default=None
171180
The source nodes to start the computation from
172-
scaler : Any
173-
Scaling configuration for the algorithm
181+
scaler : str | dict[str, str | int | float] | ScalerConfig, default="NONE"
182+
The scaler to use. Can be:
183+
- A string (e.g., 'MinMax', 'Mean', 'Max', 'Log', 'StdScore', 'Center', 'L1Norm', 'L2Norm', 'NONE')
184+
- A dictionary with scaler configuration (e.g., {'type': 'Log', 'offset': 1.0})
185+
- A ScalerConfig instance
186+
- "NONE" (default, no scaling)
174187
relationship_weight_property : str | None, default=None
175188
The property name that contains weight values for relationships
176189
relationship_types : list[str]
@@ -204,7 +217,7 @@ def write(
204217
max_iterations: int = 20,
205218
tolerance: float = 1.0e-7,
206219
source_nodes: Any | None = None,
207-
scaler: Any = "NONE",
220+
scaler: str | dict[str, str | int | float] | ScalerConfig = "NONE",
208221
relationship_weight_property: str | None = None,
209222
relationship_types: list[str] = ALL_TYPES,
210223
node_labels: list[str] = ALL_LABELS,
@@ -235,8 +248,12 @@ def write(
235248
The tolerance for convergence detection
236249
source_nodes : Any | None, default=None
237250
The source nodes to start the computation from
238-
scaler : Any
239-
Scaling configuration for the algorithm
251+
scaler : str | dict[str, str | int | float] | ScalerConfig, default="NONE"
252+
The scaler to use. Can be:
253+
- A string (e.g., 'MinMax', 'Mean', 'Max', 'Log', 'StdScore', 'Center', 'L1Norm', 'L2Norm', 'NONE')
254+
- A dictionary with scaler configuration (e.g., {'type': 'Log', 'offset': 1.0})
255+
- A ScalerConfig instance
256+
- "NONE" (default, no scaling)
240257
relationship_weight_property : str | None, default=None
241258
The property name that contains weight values for relationships
242259
relationship_types : list[str]
@@ -271,7 +288,7 @@ def estimate(
271288
max_iterations: int = 20,
272289
tolerance: float = 1.0e-7,
273290
source_nodes: Any | None = None,
274-
scaler: Any = "NONE",
291+
scaler: str | dict[str, str | int | float] | ScalerConfig = "NONE",
275292
relationship_weight_property: str | None = None,
276293
relationship_types: list[str] = ALL_TYPES,
277294
node_labels: list[str] = ALL_LABELS,
@@ -290,8 +307,12 @@ def estimate(
290307
The tolerance for convergence detection
291308
source_nodes : Any | None, default=None
292309
The source nodes to start the computation from
293-
scaler : Any
294-
Scaling configuration for the algorithm
310+
scaler : str | dict[str, str | int | float] | ScalerConfig, default="NONE"
311+
The scaler to use. Can be:
312+
- A string (e.g., 'MinMax', 'Mean', 'Max', 'Log', 'StdScore', 'Center', 'L1Norm', 'L2Norm', 'NONE')
313+
- A dictionary with scaler configuration (e.g., {'type': 'Log', 'offset': 1.0})
314+
- A ScalerConfig instance
315+
- "NONE" (default, no scaling)
295316
relationship_weight_property : str | None, default=None
296317
The property name that contains weight values for relationships
297318
relationship_types : list[str]

graphdatascience/procedure_surface/api/centrality/pagerank_endpoints.py

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from graphdatascience.procedure_surface.api.base_result import BaseResult
99
from graphdatascience.procedure_surface.api.catalog.graph_api import GraphV2
10+
from graphdatascience.procedure_surface.api.catalog.scaler_config import ScalerConfig
1011
from graphdatascience.procedure_surface.api.default_values import ALL_LABELS, ALL_TYPES
1112
from graphdatascience.procedure_surface.api.estimation_result import EstimationResult
1213

@@ -20,7 +21,7 @@ def mutate(
2021
damping_factor: float = 0.85,
2122
tolerance: float = 1.0e-7,
2223
max_iterations: int = 20,
23-
scaler: Any = "NONE",
24+
scaler: str | dict[str, str | int | float] | ScalerConfig = "NONE",
2425
relationship_types: list[str] = ALL_TYPES,
2526
node_labels: list[str] = ALL_LABELS,
2627
sudo: bool = False,
@@ -49,8 +50,12 @@ def mutate(
4950
Minimum change in scores between iterations.
5051
max_iterations : int
5152
Maximum number of iterations to run.
52-
scaler : Any
53-
Name of the scaler applied on the resulting scores.
53+
scaler : str | dict[str, str | int | float] | ScalerConfig, default="NONE"
54+
The scaler to use. Can be:
55+
- A string (e.g., 'MinMax', 'Mean', 'Max', 'Log', 'StdScore', 'Center', 'L1Norm', 'L2Norm', 'NONE')
56+
- A dictionary with scaler configuration (e.g., {'type': 'Log', 'offset': 1.0})
57+
- A ScalerConfig instance
58+
- "NONE" (default, no scaling)
5459
relationship_types : list[str]
5560
Filter the graph using the given relationship types. Relationships with any of the given types will be included.
5661
node_labels : list[str]
@@ -84,7 +89,7 @@ def stats(
8489
damping_factor: float = 0.85,
8590
tolerance: float = 1.0e-7,
8691
max_iterations: int = 20,
87-
scaler: Any = "NONE",
92+
scaler: str | dict[str, str | int | float] | ScalerConfig = "NONE",
8893
relationship_types: list[str] = ALL_TYPES,
8994
node_labels: list[str] = ALL_LABELS,
9095
sudo: bool = False,
@@ -111,8 +116,12 @@ def stats(
111116
Minimum change in scores between iterations.
112117
max_iterations : int
113118
Maximum number of iterations to run.
114-
scaler : Any
115-
Name of the scaler applied on the resulting scores.
119+
scaler : str | dict[str, str | int | float] | ScalerConfig, default="NONE"
120+
The scaler to use. Can be:
121+
- A string (e.g., 'MinMax', 'Mean', 'Max', 'Log', 'StdScore', 'Center', 'L1Norm', 'L2Norm', 'NONE')
122+
- A dictionary with scaler configuration (e.g., {'type': 'Log', 'offset': 1.0})
123+
- A ScalerConfig instance
124+
- "NONE" (default, no scaling)
116125
relationship_types : list[str]
117126
Filter the graph using the given relationship types. Relationships with any of the given types will be included.
118127
node_labels : list[str]
@@ -146,7 +155,7 @@ def stream(
146155
damping_factor: float = 0.85,
147156
tolerance: float = 1.0e-7,
148157
max_iterations: int = 20,
149-
scaler: Any = "NONE",
158+
scaler: str | dict[str, str | int | float] | ScalerConfig = "NONE",
150159
relationship_types: list[str] = ALL_TYPES,
151160
node_labels: list[str] = ALL_LABELS,
152161
sudo: bool = False,
@@ -170,8 +179,12 @@ def stream(
170179
Minimum change in scores between iterations
171180
max_iterations : int
172181
The maximum number of iterations to run
173-
scaler : Any
174-
Configuration for scaling the scores
182+
scaler : str | dict[str, str | int | float] | ScalerConfig, default="NONE"
183+
The scaler to use. Can be:
184+
- A string (e.g., 'MinMax', 'Mean', 'Max', 'Log', 'StdScore', 'Center', 'L1Norm', 'L2Norm', 'NONE')
185+
- A dictionary with scaler configuration (e.g., {'type': 'Log', 'offset': 1.0})
186+
- A ScalerConfig instance
187+
- "NONE" (default, no scaling)
175188
relationship_types : list[str]
176189
The relationships types used to select relationships for this algorithm run
177190
node_labels : list[str]
@@ -206,7 +219,7 @@ def write(
206219
damping_factor: float = 0.85,
207220
tolerance: float = 1.0e-7,
208221
max_iterations: int = 20,
209-
scaler: Any = "NONE",
222+
scaler: str | dict[str, str | int | float] | ScalerConfig = "NONE",
210223
relationship_types: list[str] = ALL_TYPES,
211224
node_labels: list[str] = ALL_LABELS,
212225
sudo: bool = False,
@@ -236,8 +249,12 @@ def write(
236249
Minimum change in scores between iterations.
237250
max_iterations : int
238251
Maximum number of iterations to run.
239-
scaler : Any
240-
Name of the scaler applied on the resulting scores.
252+
scaler : str | dict[str, str | int | float] | ScalerConfig, default="NONE"
253+
The scaler to use. Can be:
254+
- A string (e.g., 'MinMax', 'Mean', 'Max', 'Log', 'StdScore', 'Center', 'L1Norm', 'L2Norm', 'NONE')
255+
- A dictionary with scaler configuration (e.g., {'type': 'Log', 'offset': 1.0})
256+
- A ScalerConfig instance
257+
- "NONE" (default, no scaling)
241258
relationship_types : list[str]
242259
Filter the graph using the given relationship types. Relationships with any of the given types will be included.
243260
node_labels : list[str]
@@ -273,7 +290,7 @@ def estimate(
273290
damping_factor: float = 0.85,
274291
tolerance: float = 1.0e-7,
275292
max_iterations: int = 20,
276-
scaler: Any = "NONE",
293+
scaler: str | dict[str, str | int | float] | ScalerConfig = "NONE",
277294
relationship_types: list[str] = ALL_TYPES,
278295
node_labels: list[str] = ALL_LABELS,
279296
concurrency: Any | None = None,
@@ -293,8 +310,12 @@ def estimate(
293310
Minimum change in scores between iterations
294311
max_iterations : int
295312
The maximum number of iterations to run
296-
scaler : Any
297-
Configuration for scaling the scores
313+
scaler : str | dict[str, str | int | float] | ScalerConfig, default="NONE"
314+
The scaler to use. Can be:
315+
- A string (e.g., 'MinMax', 'Mean', 'Max', 'Log', 'StdScore', 'Center', 'L1Norm', 'L2Norm', 'NONE')
316+
- A dictionary with scaler configuration (e.g., {'type': 'Log', 'offset': 1.0})
317+
- A ScalerConfig instance
318+
- "NONE" (default, no scaling)
298319
relationship_types : list[str]
299320
The relationships types used to select relationships for this algorithm run
300321
node_labels : list[str]

0 commit comments

Comments
 (0)