Reduce run_cypher calls for GDS endpoints

FlorentinD · FlorentinD · commit 1c8c99b05cfd · 2024-07-12T09:58:03.000+02:00
diff --git a/graphdatascience/model/model.py b/graphdatascience/model/model.py
@@ -1,5 +1,7 @@
+from __future__ import annotations
+
 from abc import ABC, abstractmethod
-from typing import Any, Dict
+from typing import Any, Dict, Optional
 
 from pandas import DataFrame, Series
 
@@ -41,14 +43,15 @@ def _list_info(self) -> DataFrame:
         params = {"name": self.name()}
 
         # FIXME use call procedure + do post processing on the client side
+        # TODO really fixme
         info = self._query_runner.run_cypher(query, params, custom_error=False)
 
         if len(info) == 0:
             raise ValueError(f"There is no '{self.name()}' in the model catalog")
 
         return info
 
-    def _estimate_predict(self, predict_mode: str, graph_name: str, config: Dict[str, Any]) -> "Series[Any]":
+    def _estimate_predict(self, predict_mode: str, graph_name: str, config: Dict[str, Any]) -> Series[Any]:
         endpoint = f"{self._endpoint_prefix()}{predict_mode}.estimate"
         config["modelName"] = self.name()
         params = CallParameters(graph_name=graph_name, config=config)
@@ -78,26 +81,26 @@ def type(self) -> str:
         """
         return self._list_info()["modelInfo"][0]["modelType"]  # type: ignore
 
-    def train_config(self) -> "Series[Any]":
+    def train_config(self) -> Series[Any]:
         """
         Get the train config of the model.
 
         Returns:
             The train config of the model.
 
         """
-        train_config: "Series[Any]" = Series(self._list_info()["trainConfig"][0])
+        train_config: Series[Any] = Series(self._list_info()["trainConfig"][0])
         return train_config
 
-    def graph_schema(self) -> "Series[Any]":
+    def graph_schema(self) -> Series[Any]:
         """
         Get the graph schema of the model.
 
         Returns:
             The graph schema of the model.
 
         """
-        graph_schema: "Series[Any]" = Series(self._list_info()["graphSchema"][0])
+        graph_schema: Series[Any] = Series(self._list_info()["graphSchema"][0])
         return graph_schema
 
     def loaded(self) -> bool:
@@ -151,7 +154,7 @@ def published(self) -> bool:
         """
         return self._list_info()["published"].squeeze()  # type: ignore
 
-    def model_info(self) -> "Series[Any]":
+    def model_info(self) -> Series[Any]:
         """
         Get the model info of the model.
 
@@ -179,7 +182,7 @@ def exists(self) -> bool:
             endpoint=endpoint, params=params, yields=yields, custom_error=False
         ).squeeze()
 
-    def drop(self, failIfMissing: bool = False) -> "Series[Any]":
+    def drop(self, failIfMissing: bool = False) -> Series[Any]:
         """
         Drop the model.
 
@@ -190,27 +193,29 @@ def drop(self, failIfMissing: bool = False) -> "Series[Any]":
             The result of the drop operation.
 
         """
+        params = CallParameters(model_name=self._name, fail_if_missing=failIfMissing)
         if self._server_version < ServerVersion(2, 5, 0):
-            query = "CALL gds.beta.model.drop($model_name, $fail_if_missing)"
+            return self._query_runner.call_procedure(  # type: ignore
+                "gds.beta.model.drop", params=params, custom_error=False
+            ).squeeze()
         else:
-            query = """
-                    CALL gds.model.drop($model_name, $fail_if_missing)
-                    YIELD
-                      modelName, modelType, modelInfo,
-                      creationTime, trainConfig, graphSchema,
-                      loaded, stored, published
-                    RETURN
-                      modelName, modelType,
-                      modelInfo {.*, modelName: modelName, modelType: modelType} AS modelInfo,
-                      creationTime, trainConfig, graphSchema,
-                      loaded, stored, published, published AS shared
-                    """
+            result: Optional[Series[Any]] = self._query_runner.call_procedure(
+                "gds.model.drop", params=params, custom_error=False
+            ).squeeze()
 
-        params = {"model_name": self._name, "fail_if_missing": failIfMissing}
-        # FIXME use call procedure + do post processing on the client side
-        return self._query_runner.run_cypher(query, params, custom_error=False).squeeze()  # type: ignore
+            if result is None:
+                return Series()
+
+            #  modelInfo {.*, modelName: modelName, modelType: modelType} AS modelInfo
+            result["modelInfo"] = {
+                **result["modelInfo"],
+                "modelName": result["modelName"],
+                "modelType": result["modelType"],
+            }
+            result["shared"] = result["published"]
+            return result
 
-    def metrics(self) -> "Series[Any]":
+    def metrics(self) -> Series[Any]:
         """
         Get the metrics of the model.
 
@@ -219,7 +224,7 @@ def metrics(self) -> "Series[Any]":
 
         """
         model_info = self._list_info()["modelInfo"][0]
-        metrics: "Series[Any]" = Series(model_info["metrics"])
+        metrics: Series[Any] = Series(model_info["metrics"])
         return metrics
 
     @graph_type_check
@@ -242,7 +247,7 @@ def predict_stream(self, G: Graph, **config: Any) -> DataFrame:
         return self._query_runner.call_procedure(endpoint=endpoint, params=params, logging=True)
 
     @graph_type_check
-    def predict_stream_estimate(self, G: Graph, **config: Any) -> "Series[Any]":
+    def predict_stream_estimate(self, G: Graph, **config: Any) -> Series[Any]:
         """
         Estimate the prediction on the given graph using the model and stream the results as DataFrame
 
@@ -257,7 +262,7 @@ def predict_stream_estimate(self, G: Graph, **config: Any) -> "Series[Any]":
         return self._estimate_predict("stream", G.name(), config)
 
     @graph_type_check
-    def predict_mutate(self, G: Graph, **config: Any) -> "Series[Any]":
+    def predict_mutate(self, G: Graph, **config: Any) -> Series[Any]:
         """
         Predict on the given graph using the model and mutate the graph with the results.
 
@@ -278,7 +283,7 @@ def predict_mutate(self, G: Graph, **config: Any) -> "Series[Any]":
         ).squeeze()
 
     @graph_type_check
-    def predict_mutate_estimate(self, G: Graph, **config: Any) -> "Series[Any]":
+    def predict_mutate_estimate(self, G: Graph, **config: Any) -> Series[Any]:
         """
         Estimate the memory needed to predict on the given graph using the model.
 
diff --git a/graphdatascience/model/model_proc_runner.py b/graphdatascience/model/model_proc_runner.py
@@ -1,4 +1,6 @@
-from typing import Any, Dict, List, Optional, Tuple
+from __future__ import annotations
+
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 from pandas import DataFrame, Series
 
@@ -49,23 +51,29 @@ def create(
 class ModelProcRunner(ModelResolver):
     @client_only_endpoint("gds.model")
     def get(self, model_name: str) -> Model:
+        params = CallParameters(model_name=model_name)
         if self._server_version < ServerVersion(2, 5, 0):
-            query = "CALL gds.beta.model.list($model_name) YIELD modelInfo RETURN modelInfo.modelType AS modelType"
+            endpoint = "gds.beta.model.list"
+            yields = ["modelInfo"]
+            result: Series[Any] = self._query_runner.call_procedure(
+                endpoint=endpoint, params=params, yields=yields, custom_error=False
+            ).squeeze()
+            model_type = str(result["modelInfo"]["modelType"]) if not result.empty else None
         else:
-            query = "CALL gds.model.list($model_name) YIELD modelType"
-
-        params = {"model_name": model_name}
-        # FIXME use call procedure + do post processing on the client side
-        result = self._query_runner.run_cypher(query, params, custom_error=False)
-
-        if len(result) == 0:
+            endpoint = "gds.model.list"
+            yields = ["modelType"]
+            result: Union[str, Series[Any]] = self._query_runner.call_procedure(
+                endpoint=endpoint, params=params, yields=yields, custom_error=False
+            ).squeeze()
+            model_type = result if isinstance(result, str) else None
+
+        if model_type is None:
             raise ValueError(f"No loaded model named '{model_name}' exists")
 
-        model_type = str(result["modelType"].squeeze())
         return self._resolve_model(model_type, model_name)
 
     @compatible_with("store", min_inclusive=ServerVersion(2, 5, 0))
-    def store(self, model: Model, failIfUnsupportedType: bool = True) -> "Series[Any]":
+    def store(self, model: Model, failIfUnsupportedType: bool = True) -> Series[Any]:
         self._namespace += ".store"
         params = CallParameters(model_name=model.name(), fail_flag=failIfUnsupportedType)
 
@@ -88,7 +96,7 @@ def publish(self, model: Model) -> Model:
         return self._resolve_model(model_type, model_name)
 
     @compatible_with("load", min_inclusive=ServerVersion(2, 5, 0))
-    def load(self, model_name: str) -> Tuple[Model, "Series[Any]"]:
+    def load(self, model_name: str) -> Tuple[Model, Series[Any]]:
         self._namespace += ".load"
 
         params = CallParameters(model_name=model_name)
@@ -101,7 +109,7 @@ def load(self, model_name: str) -> Tuple[Model, "Series[Any]"]:
         return proc_runner.get(result["modelName"]), result
 
     @compatible_with("delete", min_inclusive=ServerVersion(2, 5, 0))
-    def delete(self, model: Model) -> "Series[Any]":
+    def delete(self, model: Model) -> Series[Any]:
         self._namespace += ".delete"
         params = CallParameters(model_name=model.name())
         return self._query_runner.call_procedure(endpoint=self._namespace, params=params).squeeze()  # type: ignore
@@ -117,15 +125,15 @@ def list(self, model: Optional[Model] = None) -> DataFrame:
         return self._query_runner.call_procedure(endpoint=self._namespace, params=params)
 
     @compatible_with("exists", min_inclusive=ServerVersion(2, 5, 0))
-    def exists(self, model_name: str) -> "Series[Any]":
+    def exists(self, model_name: str) -> Series[Any]:
         self._namespace += ".exists"
 
         return self._query_runner.call_procedure(  # type: ignore
             endpoint=self._namespace, params=CallParameters(model_name=model_name)
         ).squeeze()
 
     @compatible_with("drop", min_inclusive=ServerVersion(2, 5, 0))
-    def drop(self, model: Model) -> "Series[Any]":
+    def drop(self, model: Model) -> Series[Any]:
         self._namespace += ".drop"
 
         return self._query_runner.call_procedure(  # type: ignore
diff --git a/graphdatascience/tests/integration/test_model_object.py b/graphdatascience/tests/integration/test_model_object.py
@@ -44,7 +44,7 @@ def gs_model(gds: GraphDataScience, G: Graph, runner: Neo4jQueryRunner) -> Gener
     yield model
 
     namespace = "beta." if gds.server_version() < ServerVersion(2, 5, 0) else ""
-    query = f"CALL gds.{namespace}model.drop($name)"
+    query = f"CALL gds.{namespace}model.drop($name, false)"
     params = {"name": model.name()}
     runner.run_cypher(query, params)
 
@@ -53,25 +53,24 @@ def test_model_exists(gs_model: GraphSageModel) -> None:
     assert gs_model.exists()
 
 
-def test_model_drop(gds: GraphDataScience, G: Graph) -> None:
-    model, _ = gds.beta.graphSage.train(G, modelName="gs-model", featureProperties=["age"])
+def test_model_drop(gds: GraphDataScience, G: Graph, gs_model: GraphSageModel) -> None:
+    model_type = gs_model.type()
+    model_published = gs_model.shared()
 
-    model_type = model.type()
-    model_published = model.shared()
-    drop_result = model.drop()
+    drop_result = gs_model.drop()
     if gds.server_version() >= ServerVersion(2, 5, 0):
-        assert drop_result["modelName"] == model.name()
+        assert drop_result["modelName"] == gs_model.name()
         assert drop_result["modelType"] == model_type
         assert drop_result["published"] == model_published
-    assert drop_result["modelInfo"]["modelName"] == model.name()
+    assert drop_result["modelInfo"]["modelName"] == gs_model.name()
 
-    assert not model.exists()
+    assert not gs_model.exists()
 
     # Should not raise error.
-    model.drop(failIfMissing=False)
+    gs_model.drop(failIfMissing=False)
 
     with pytest.raises(Exception):
-        model.drop(failIfMissing=True)
+        gs_model.drop(failIfMissing=True)
 
 
 def test_model_name(gs_model: GraphSageModel) -> None:
diff --git a/graphdatascience/tests/integration/test_model_ops.py b/graphdatascience/tests/integration/test_model_ops.py
@@ -300,6 +300,11 @@ def test_model_get_graphsage(gds: GraphDataScience, gs_model: GraphSageModel) ->
     model.drop()
 
 
+def test_model_get_no_model(gds: GraphDataScience) -> None:
+    with pytest.raises(ValueError, match="No loaded model named 'no_model' exists"):
+        gds.model.get("no_model")
+
+
 @pytest.mark.model_store_location
 @pytest.mark.filterwarnings("ignore: The query used a deprecated procedure.")
 def test_deprecated_model_Functions_still_work(gds: GraphDataScience, gs_model: GraphSageModel) -> None: