diff --git a/graphdatascience/procedure_surface/api/catalog/catalog_endpoints.py b/graphdatascience/procedure_surface/api/catalog/catalog_endpoints.py index 0bd25922b..269f09122 100644 --- a/graphdatascience/procedure_surface/api/catalog/catalog_endpoints.py +++ b/graphdatascience/procedure_surface/api/catalog/catalog_endpoints.py @@ -118,7 +118,7 @@ def generate( job_id : str | None, default=None Unique identifier for the job associated with the graph generation. sudo : bool - Override memory estimation limits + Disable the memory guard. log_progress : bool | None, default=None Whether to log progress during graph generation. username : str | None, default=None diff --git a/graphdatascience/procedure_surface/api/catalog/node_label_endpoints.py b/graphdatascience/procedure_surface/api/catalog/node_label_endpoints.py index 72fa35b89..af2c1fd01 100644 --- a/graphdatascience/procedure_surface/api/catalog/node_label_endpoints.py +++ b/graphdatascience/procedure_surface/api/catalog/node_label_endpoints.py @@ -33,7 +33,7 @@ def mutate( node_filter : str A Cypher predicate for filtering nodes in the input graph. sudo : bool - Override memory estimation limits + Disable the memory guard. log_progress : bool | None, default=None Whether to log progress username : str | None, default=None @@ -77,7 +77,7 @@ def write( node_filter : str A Cypher predicate for filtering nodes in the input graph. sudo : bool - Override memory estimation limits + Disable the memory guard. log_progress : bool | None, default=None Whether to log progress username : str | None, default=None diff --git a/graphdatascience/procedure_surface/api/catalog/node_properties_endpoints.py b/graphdatascience/procedure_surface/api/catalog/node_properties_endpoints.py index 117a28ef9..11d5036b4 100644 --- a/graphdatascience/procedure_surface/api/catalog/node_properties_endpoints.py +++ b/graphdatascience/procedure_surface/api/catalog/node_properties_endpoints.py @@ -43,7 +43,7 @@ def stream( concurrency : int | None The number of concurrent threads sudo : bool - Override memory estimation limits + Disable the memory guard. log_progress : bool | None, default=None Whether to log progress username : str | None, default=None @@ -90,7 +90,7 @@ def write( write_concurrency : int | None The number of concurrent threads used for writing sudo : bool - Override memory estimation limits + Disable the memory guard. log_progress : bool | None, default=None Whether to log progress username : str | None, default=None diff --git a/graphdatascience/procedure_surface/api/catalog/relationships_endpoints.py b/graphdatascience/procedure_surface/api/catalog/relationships_endpoints.py index 386b05f9a..cde5de2e9 100644 --- a/graphdatascience/procedure_surface/api/catalog/relationships_endpoints.py +++ b/graphdatascience/procedure_surface/api/catalog/relationships_endpoints.py @@ -40,7 +40,7 @@ def stream( concurrency : int | None, default=None The number of concurrent threads sudo : bool - Override memory estimation limits + Disable the memory guard. log_progress : bool = True Whether to log progress username : str | None, default=None @@ -82,7 +82,7 @@ def write( write_concurrency : int | None, default=None The number of concurrent threads used for writing sudo : bool - Override memory estimation limits + Disable the memory guard. log_progress : bool | None, default=None Whether to log progress username : str | None, default=None @@ -147,7 +147,7 @@ def index_inverse( concurrency : int | None, default=None The number of concurrent threads sudo : bool = False, - Override memory estimation limits + Disable the memory guard. log_progress : bool = True Whether to log progress username : str | None, default=None @@ -193,7 +193,7 @@ def to_undirected( concurrency : int | None, default=None The number of concurrent threads sudo : bool = False, - Override memory estimation limits + Disable the memory guard. log_progress : bool = True Whether to log progress username : str | None, default=None diff --git a/graphdatascience/procedure_surface/api/centrality/articlerank_endpoints.py b/graphdatascience/procedure_surface/api/centrality/articlerank_endpoints.py index 10039b496..90ab407ae 100644 --- a/graphdatascience/procedure_surface/api/centrality/articlerank_endpoints.py +++ b/graphdatascience/procedure_surface/api/centrality/articlerank_endpoints.py @@ -195,7 +195,7 @@ def stream( node_labels : list[str] The node labels used to select nodes for this algorithm run sudo : bool - Override memory estimation limits + Disable the memory guard. log_progress : bool Whether to log progress username : str | None, default=None diff --git a/graphdatascience/procedure_surface/api/centrality/articulationpoints_endpoints.py b/graphdatascience/procedure_surface/api/centrality/articulationpoints_endpoints.py index f3890647c..ff74b5a51 100644 --- a/graphdatascience/procedure_surface/api/centrality/articulationpoints_endpoints.py +++ b/graphdatascience/procedure_surface/api/centrality/articulationpoints_endpoints.py @@ -41,7 +41,7 @@ def mutate( node_labels : list[str] The node labels used to select nodes for this algorithm run sudo : bool - Override memory estimation limits + Disable the memory guard. log_progress : bool | None, default=None Whether to log progress username : str | None, default=None @@ -83,7 +83,7 @@ def stats( node_labels : list[str] The node labels used to select nodes for this algorithm run sudo : bool - Override memory estimation limits + Disable the memory guard. log_progress : bool | None, default=None Whether to log progress username : str | None, default=None @@ -123,7 +123,7 @@ def stream( node_labels : list[str] The node labels used to select nodes for this algorithm run sudo : bool - Override memory estimation limits + Disable the memory guard. log_progress : bool | None, default=None Whether to log progress username : str | None, default=None @@ -171,7 +171,7 @@ def write( node_labels : list[str] The node labels used to select nodes for this algorithm run sudo : bool - Override memory estimation limits + Disable the memory guard. log_progress : bool | None, default=None Whether to log progress username : str | None, default=None diff --git a/graphdatascience/procedure_surface/api/centrality/betweenness_endpoints.py b/graphdatascience/procedure_surface/api/centrality/betweenness_endpoints.py index 43a7d4748..cff87c047 100644 --- a/graphdatascience/procedure_surface/api/centrality/betweenness_endpoints.py +++ b/graphdatascience/procedure_surface/api/centrality/betweenness_endpoints.py @@ -12,6 +12,7 @@ class BetweennessEndpoints(ABC): + @abstractmethod def mutate( self, diff --git a/graphdatascience/procedure_surface/api/centrality/pagerank_endpoints.py b/graphdatascience/procedure_surface/api/centrality/pagerank_endpoints.py index c4d6c2ca7..564019b66 100644 --- a/graphdatascience/procedure_surface/api/centrality/pagerank_endpoints.py +++ b/graphdatascience/procedure_surface/api/centrality/pagerank_endpoints.py @@ -13,6 +13,7 @@ class PageRankEndpoints(ABC): + @abstractmethod def mutate( self, diff --git a/graphdatascience/procedure_surface/api/community/k1coloring_endpoints.py b/graphdatascience/procedure_surface/api/community/k1coloring_endpoints.py index 58c34dffb..2f1d3e0e4 100644 --- a/graphdatascience/procedure_surface/api/community/k1coloring_endpoints.py +++ b/graphdatascience/procedure_surface/api/community/k1coloring_endpoints.py @@ -12,9 +12,6 @@ class K1ColoringEndpoints(ABC): - """ - Abstract base class defining the API for the K-1 Coloring algorithm. - """ @abstractmethod def mutate( @@ -33,32 +30,35 @@ def mutate( username: str | None = None, ) -> K1ColoringMutateResult: """ - Executes the K-1 Coloring algorithm and writes the results to the in-memory graph as node properties. + Runs the K-1 Coloring algorithm and stores the results in the graph catalog as a new node property. + + The K-1 Coloring algorithm assigns a color to every node in the graph, trying to optimize for two objectives: + to make sure that every neighbor of a given node has a different color than the node itself, and to use as few colors as possible. Parameters ---------- G : GraphV2 The graph to run the algorithm on mutate_property : str - The property name to store the color for each node + Name of the node property to store the results in. batch_size : int, default=10000 The batch size for processing concurrency : int | None, default=None - The number of concurrent threads + Number of CPU threads to use. job_id : str | None, default=None - An identifier for the job + Identifier for the computation. log_progress : bool, default=True - Whether to log progress + Display progress logging. max_iterations : int, default=10 - The maximum number of iterations of K-1 Coloring to run + Maximum number of iterations to run. node_labels : list[str] - The node labels used to select nodes for this algorithm run + Filter the graph using the given node labels. Nodes with any of the given labels will be included. relationship_types : list[str] - The relationship types used to select relationships for this algorithm run + Filter the graph using the given relationship types. Relationships with any of the given types will be included. sudo : bool, default=False - Override memory estimation limits + Disable the memory guard. username : str | None, default=None - The username to attribute the procedure run to + As an administrator, run the algorithm as a different user, to access also their graphs. Returns ------- diff --git a/graphdatascience/procedure_surface/api/community/kcore_endpoints.py b/graphdatascience/procedure_surface/api/community/kcore_endpoints.py index 99b1125a9..451854519 100644 --- a/graphdatascience/procedure_surface/api/community/kcore_endpoints.py +++ b/graphdatascience/procedure_surface/api/community/kcore_endpoints.py @@ -31,28 +31,32 @@ def mutate( username: str | None = None, ) -> KCoreMutateResult: """ - Executes the K-Core algorithm and writes the results to the in-memory graph as node properties. + Runs the K-Core Decomposition algorithm and stores the results in the graph catalog as a new node property. + + The K-core decomposition constitutes a process that separates the nodes in a graph into groups based on the degree sequence and topology of the graph. + The term `i-core` refers to a maximal subgraph of the original graph such that each node in this subgraph has degree at least `i`. + Each node is associated with a core value which denotes the largest value `i` such that the node belongs to the `i-core`. Parameters ---------- G : GraphV2 The graph to run the algorithm on mutate_property : str - The property name to store the core value for each node + Name of the node property to store the results in. concurrency : int | None, default=None - The number of concurrent threads + Number of CPU threads to use. job_id : str | None, default=None - An identifier for the job + Identifier for the computation. log_progress : bool, default=True - Whether to log progress + Display progress logging. node_labels : list[str] - The node labels used to select nodes for this algorithm run + Filter the graph using the given node labels. Nodes with any of the given labels will be included. relationship_types : list[str] - The relationship types used to select relationships for this algorithm run + Filter the graph using the given relationship types. Relationships with any of the given types will be included. sudo : bool, default=False - Override memory estimation limits + Disable the memory guard. username : str | None, default=None - The username to attribute the procedure run to + As an administrator, run the algorithm as a different user, to access also their graphs. Returns ------- diff --git a/graphdatascience/procedure_surface/api/community/louvain_endpoints.py b/graphdatascience/procedure_surface/api/community/louvain_endpoints.py index d2d73f0a3..077592c98 100644 --- a/graphdatascience/procedure_surface/api/community/louvain_endpoints.py +++ b/graphdatascience/procedure_surface/api/community/louvain_endpoints.py @@ -37,42 +37,46 @@ def mutate( relationship_weight_property: str | None = None, ) -> LouvainMutateResult: """ - Executes the Louvain algorithm and writes the results to the in-memory graph as node properties. + Runs the Louvain algorithm and stores the results in the graph catalog as a new node property. + + The Louvain method is an algorithm to detect communities in large networks. + It maximizes a modularity score for each community, where the modularity quantifies the quality of an assignment of nodes to communities by evaluating how much more densely connected the nodes within a community are, compared to how connected they would be in a random network. + The Louvain algorithm is a hierarchical clustering algorithm that recursively merges communities into a single node and runs the modularity clustering on the condensed graphs. Parameters ---------- G : GraphV2 The graph to run the algorithm on mutate_property : str - The property name to store the community ID for each node + Name of the node property to store the results in. tolerance : float, default=0.0001 - The tolerance value for the algorithm convergence + Minimum change in scores between iterations. max_levels : int, default=10 The maximum number of levels in the hierarchy include_intermediate_communities : bool, default=False - Whether to include intermediate community assignments + Whether to include intermediate communities max_iterations : int, default=10 - The maximum number of iterations per level + Maximum number of iterations to run. relationship_types : list[str] - The relationships types used to select relationships for this algorithm run + Filter the graph using the given relationship types. Relationships with any of the given types will be included. node_labels : list[str] - The node labels used to select nodes for this algorithm run + Filter the graph using the given node labels. Nodes with any of the given labels will be included. sudo : bool, default=False - Override memory estimation limits + Disable the memory guard. log_progress : bool, default=True - Whether to log progress + Display progress logging. username : str | None, default=None - The username to attribute the procedure run to + As an administrator, run the algorithm as a different user, to access also their graphs. concurrency : int | None, default=None - The number of concurrent threads + Number of CPU threads to use. job_id : str | None, default=None - An identifier for the job + Identifier for the computation. seed_property : str | None, default=None - Defines node properties that are used as initial community identifiers + The property name that contains seed values consecutive_ids : bool, default=False - Flag to decide whether community identifiers are mapped into a consecutive id space + Whether to use consecutive IDs relationship_weight_property : str | None, default=None - The property name that contains weight + Name of the property to be used as weights. Returns ------- diff --git a/graphdatascience/procedure_surface/api/community/scc_endpoints.py b/graphdatascience/procedure_surface/api/community/scc_endpoints.py index 6e095de3f..f3c437dc6 100644 --- a/graphdatascience/procedure_surface/api/community/scc_endpoints.py +++ b/graphdatascience/procedure_surface/api/community/scc_endpoints.py @@ -32,30 +32,33 @@ def mutate( username: str | None = None, ) -> SccMutateResult: """ - Executes the SCC algorithm and writes the results to the in-memory graph as node properties. + Runs the Strongly Connected Components algorithm and stores the results in the graph catalog as a new node property. + + The Strongly Connected Components (SCC) algorithm finds maximal sets of connected nodes in a directed graph. + A set is considered a strongly connected component if there is a directed path between each pair of nodes within the set. Parameters ---------- G : GraphV2 The graph to run the algorithm on mutate_property : str - The property name to store the component ID for each node + Name of the node property to store the results in. concurrency : int | None, default=None - The number of concurrent threads + Number of CPU threads to use. consecutive_ids : bool, default=False - Flag to decide whether component identifiers are mapped into a consecutive id space + Whether to use consecutive IDs for components job_id : str | None, default=None - An identifier for the job + Identifier for the computation. log_progress : bool, default=True - Whether to log progress + Display progress logging. node_labels : list[str] - The node labels used to select nodes for this algorithm run + Filter the graph using the given node labels. Nodes with any of the given labels will be included. relationship_types : list[str] - The relationship types used to select relationships for this algorithm run + Filter the graph using the given relationship types. Relationships with any of the given types will be included. sudo : bool, default=False - Override memory estimation limits + Disable the memory guard. username : str | None, default=None - The username to attribute the procedure run to + As an administrator, run the algorithm as a different user, to access also their graphs. Returns ------- diff --git a/graphdatascience/procedure_surface/api/community/wcc_endpoints.py b/graphdatascience/procedure_surface/api/community/wcc_endpoints.py index c7882553d..a9881306e 100644 --- a/graphdatascience/procedure_surface/api/community/wcc_endpoints.py +++ b/graphdatascience/procedure_surface/api/community/wcc_endpoints.py @@ -34,36 +34,39 @@ def mutate( relationship_weight_property: str | None = None, ) -> WccMutateResult: """ - Executes the WCC algorithm and writes the results to the in-memory graph as node properties. + Runs the Weakly Connected Components algorithm and stores the results in the graph catalog as a new node property. + + The Weakly Connected Components (WCC) algorithm finds sets of connected nodes in directed and undirected graphs where two nodes are connected if there exists a path between them. + In contrast to Strongly Connected Components (SCC), the direction of relationships on the path between two nodes is not considered. Parameters ---------- G : GraphV2 The graph to run the algorithm on mutate_property : str - The property name to store the component ID for each node + Name of the node property to store the results in. threshold : float, default=0.0 The minimum required weight to consider a relationship during traversal relationship_types : list[str] - The relationships types used to select relationships for this algorithm run + Filter the graph using the given relationship types. Relationships with any of the given types will be included. node_labels : list[str] - The node labels used to select nodes for this algorithm run + Filter the graph using the given node labels. Nodes with any of the given labels will be included. sudo : bool, default=False - Override memory estimation limits + Disable the memory guard. log_progress : bool, default=True - Whether to log progress + Display progress logging. username : str | None, default=None - The username to attribute the procedure run to + As an administrator, run the algorithm as a different user, to access also their graphs. concurrency : int | None, default=None - The number of concurrent threads + Number of CPU threads to use. job_id : str | None, default=None - An identifier for the job + Identifier for the computation. seed_property : str | None, default=None - Defines node properties that are used as initial component identifiers + The property name that contains seed values consecutive_ids : bool, default=False - Flag to decide whether component identifiers are mapped into a consecutive id space + Whether to use consecutive IDs for components relationship_weight_property : str | None, default=None - The property name that contains weight + Name of the property to be used as weights. Returns ------- diff --git a/graphdatascience/procedure_surface/api/docstrings/parameters.json b/graphdatascience/procedure_surface/api/docstrings/parameters.json new file mode 100644 index 000000000..7ca23faea --- /dev/null +++ b/graphdatascience/procedure_surface/api/docstrings/parameters.json @@ -0,0 +1,29 @@ +{ + "G": "The graph to run the algorithm on.", + "sudo": "Disable the memory guard.", + "log_progress": "Display progress logging.", + "username": "As an administrator, run the algorithm as a different user, to access also their graphs.", + "job_id": "Identifier for the computation.", + + "tolerance": "Minimum change in scores between iterations.", + "concurrency": "Number of CPU threads to use.", + "max_iterations": "Maximum number of iterations to run.", + "scaler": "Name of the scaler applied on the resulting scores.", + "source_nodes": "List of node ids to use as starting points. Use a list of list pairs to associate each node with a bias > 0.", + "orientation": "The orientation of relationships to consider. Can be 'NATURAL', 'REVERSE', or 'UNDIRECTED'.", + + "sampling_size": "Number of source nodes to consider for computing centrality scores.", + "sampling_seed": "Seed value for the random number generator that selects source nodes.", + "damping_factor": "Probability of a jump to a random node.", + "use_wasserman_faust": "Use the improved Wasserman-Faust formula for closeness computation.", + "seed_set_size": "Number of nodes that maximize the expected spread in the network.", + "propagation_probability": "Probability of a node being activated by an active neighbour node.", + "monte_carlo_simulations": "Number of Monte-Carlo simulations.", + + "mutate_property": "Name of the node property to store the results in.", + "write_property": "Name of the node property to store the results in.", + + "node_labels": "Filter the graph using the given node labels. Nodes with any of the given labels will be included.", + "relationship_types": "Filter the graph using the given relationship types. Relationships with any of the given types will be included.", + "relationship_weight_property": "Name of the property to be used as weights.", +} \ No newline at end of file