add implementation for predict

Ziqun Ye · Ziqun Ye · commit 429d4ea25d82 · 2023-04-10T11:29:37.000-07:00
diff --git a/ads/model/deployment/model_deployment.py b/ads/model/deployment/model_deployment.py
@@ -828,6 +828,8 @@ def predict(
         data: Any = None,
         serializer: "ads.model.ModelInputSerializer" = model_input_serializer,
         auto_serialize_data: bool = False,
+        model_name: str = None,
+        model_version: str = None,
         **kwargs,
     ) -> dict:
         """Returns prediction of input data run against the model deployment endpoint.
@@ -860,6 +862,10 @@ def predict(
             If `auto_serialize_data=False`, `data` required to be bytes or json serializable
             and `json_input` required to be json serializable. If `auto_serialize_data` set
             to True, data will be serialized before sending to model deployment endpoint.
+        model_name: str
+            Defaults to None. When the `Inference_server="triton"`, the name of the model to invoke.
+        model_version: str
+            Defaults to None. When the `Inference_server="triton"`, the version of the model to invoke.
         kwargs:
             content_type: str
                 Used to indicate the media type of the resource.
@@ -917,9 +923,16 @@ def predict(
             raise TypeError(
                 "`data` is not bytes or json serializable. Set `auto_serialize_data` to `True` to serialize the input data."
             )
-
+        if model_name and model_version:
+            header['model-name'] = model_name
+            header['model-version'] = model_version
+        elif not model_version and not model_name:
+            
+            pass
+        else:
+            raise ValueError("`model_name` and `model_version` have to be provided together.")
         prediction = send_request(
-            data=data, endpoint=endpoint, is_json_payload=is_json_payload, header=header
+            data=data, endpoint=endpoint, is_json_payload=is_json_payload, header=header,
         )
         return prediction
 
@@ -1391,9 +1404,9 @@ def _update_from_oci_model(self, oci_model_instance) -> "ModelDeployment":
             infrastructure.CONST_WEB_CONCURRENCY,
             runtime.env.get("WEB_CONCURRENCY", None),
         )
-        if runtime.env.get("CONTAINER_TYPE", None) == "TRITON":
+        if runtime.env.pop("CONTAINER_TYPE", None) == "TRITON":
             runtime.set_spec(
-                runtime.CONST_TRITON, True
+                runtime.CONST_INFERENCE_SERVER, "triton"
             )
 
         self.set_spec(self.CONST_INFRASTRUCTURE, infrastructure)
@@ -1571,7 +1584,7 @@ def _build_model_deployment_configuration_details(self) -> Dict:
                 infrastructure.web_concurrency
             )
             runtime.set_spec(runtime.CONST_ENV, environment_variables)
-        if runtime.triton:
+        if runtime.inference_server.lower() == "triton":
             environment_variables["CONTAINER_TYPE"] = "TRITON"
             runtime.set_spec(runtime.CONST_ENV, environment_variables)
         environment_configuration_details = {
diff --git a/ads/model/deployment/model_deployment_runtime.py b/ads/model/deployment/model_deployment_runtime.py
@@ -330,7 +330,7 @@ class ModelDeploymentContainerRuntime(ModelDeploymentRuntime):
     CONST_ENTRYPOINT = "entrypoint"
     CONST_SERVER_PORT = "serverPort"
     CONST_HEALTH_CHECK_PORT = "healthCheckPort"
-    CONST_TRITON = "triton"
+    CONST_INFERENCE_SERVER = "inferenceServer"
 
     attribute_map = {
         **ModelDeploymentRuntime.attribute_map,
@@ -340,7 +340,7 @@ class ModelDeploymentContainerRuntime(ModelDeploymentRuntime):
         CONST_ENTRYPOINT: "entrypoint",
         CONST_SERVER_PORT: "server_port",
         CONST_HEALTH_CHECK_PORT: "health_check_port",
-        CONST_TRITON: "triton"
+        CONST_INFERENCE_SERVER: "inference_server"
     }
 
     payload_attribute_map = {
@@ -544,7 +544,7 @@ def inference_server(self) -> str:
         str
             The inference server.
         """
-        return self.get_spec(self.CONST_TRITON, None)
+        return self.get_spec(self.CONST_INFERENCE_SERVER, None)
 
     def with_inference_server(self, inference_server: str = "triton") -> "ModelDeploymentRuntime":
         """Sets the inference server. Current supported inference server is "triton".
@@ -559,5 +559,29 @@ def with_inference_server(self, inference_server: str = "triton") -> "ModelDeplo
         -------
         ModelDeploymentRuntime
             The ModelDeploymentRuntime instance (self).
+            
+        Example
+        -------
+        >>> infrastructure = ModelDeploymentInfrastructure()\
+        ...                 .with_project_id(<project_id>)\
+        ...                 .with_compartment_id(<comparment_id>)\
+        ...                 .with_shape_name("VM.Standard.E4.Flex")\
+        ...                 .with_replica(2)\
+        ...                 .with_bandwidth_mbps(10)\
+        ...                 .with_access_log(log_group_id=<deployment_log_group_id>, log_id=<deployment_access_log_id>)\
+        ...                 .with_predict_log(log_group_id=<deployment_log_group_id>, log_id=<deployment_predict_log_id>)
+
+        >>> runtime = ModelDeploymentContainerRuntime()\
+        ...                 .with_image(<container_image>)\
+        ...                 .with_server_port(<server_port>)\
+        ...                 .with_health_check_port(<health_check_port>)\
+        ...                 .with_model_uri(<model_id>)\
+        ...                 .with_env({"key":"value", "key2":"value2"})\
+        ...                 .with_inference_server("triton")
+        ...                 deployment = ModelDeployment()\
+        ...                 .with_display_name("Triton Example")\
+        ...                 .with_infrastructure(infrastructure)\
+        ...                 .with_runtime(runtime)
+        >>> deployment.deploy()
         """
-        return self.set_spec(self.CONST_TRITON, inference_server.lower())
+        return self.set_spec(self.CONST_INFERENCE_SERVER, inference_server.lower())