Add init method for the infrastructure classes.

mrDzurb · mrDzurb · commit 31abe0d98861 · 2023-05-01T15:21:52.000-07:00
diff --git a/ads/common/serializer.py b/ads/common/serializer.py
@@ -243,7 +243,7 @@ def to_yaml(
         """
         note = kwargs.pop("note", "")
 
-        yaml_string = note + yaml.dump(self.to_dict(**kwargs), Dumper=dumper)
+        yaml_string = f"{note}\n" + yaml.dump(self.to_dict(**kwargs), Dumper=dumper)
         if uri:
             self._write_to_file(s=yaml_string, uri=uri, **kwargs)
             return None
diff --git a/ads/jobs/builders/infrastructure/dataflow.py b/ads/jobs/builders/infrastructure/dataflow.py
@@ -41,6 +41,7 @@
 DEFAULT_LANGUAGE = "PYTHON"
 DEFAULT_SPARK_VERSION = "3.2.1"
 DEFAULT_NUM_EXECUTORS = 1
+DEFAULT_SHAPE = "VM.Standard.E3.Flex"
 
 
 def conda_pack_name_to_dataflow_config(conda_uri):
@@ -366,7 +367,6 @@ def executor(self):
 
 
 class DataFlow(Infrastructure):
-
     CONST_COMPARTMENT_ID = "compartment_id"
     CONST_CONFIG = "configuration"
     CONST_EXECUTE = "execute"
@@ -423,7 +423,6 @@ def __init__(self, spec: dict = None, **kwargs):
         self.runtime = None
         self._name = None
 
-
     def _load_default_properties(self) -> Dict:
         """
         Load default properties from environment variables, notebook session, etc.
@@ -1133,3 +1132,21 @@ def to_yaml(self, **kwargs) -> str:
             YAML stored in a string.
         """
         return yaml.safe_dump(self.to_dict(**kwargs))
+
+    def init(self) -> "DataFlow":
+        """Initializes a starter specification for the DataFlow.
+
+        Returns
+        -------
+        DataFlow
+            The DataFlow instance (self)
+        """
+        return (
+            self.build()
+            .with_compartment_id(self.compartment_id or "{Provide a compartment OCID}")
+            .with_language(self.language or DEFAULT_LANGUAGE)
+            .with_spark_version(self.spark_version or DEFAULT_SPARK_VERSION)
+            .with_num_executors(self.num_executors or DEFAULT_NUM_EXECUTORS)
+            .with_driver_shape(self.driver_shape or DEFAULT_SHAPE)
+            .with_executor_shape(self.with_executor_shape or DEFAULT_SHAPE)
+        )
diff --git a/ads/jobs/builders/infrastructure/dsc_job.py b/ads/jobs/builders/infrastructure/dsc_job.py
@@ -1438,6 +1438,21 @@ def build(self) -> DataScienceJob:
         self._update_from_dsc_model(self.dsc_job, overwrite=False)
         return self
 
+    def init(self) -> DataScienceJob:
+        """Initializes a starter specification for the DataScienceJob.
+
+        Returns
+        -------
+        DataScienceJob
+            The DataScienceJob instance (self)
+        """
+        return (
+            self.build()
+            .with_compartment_id(self.compartment_id or "{Provide a compartment OCID}")
+            .with_project_id(self.project_id or "{Provide a project OCID}")
+            .with_subnet_id(self.subnet_id or "{Provide a subnet OCID}")
+        )
+
     def create(self, runtime, **kwargs) -> DataScienceJob:
         """Creates a job with runtime.
 
diff --git a/ads/jobs/builders/runtimes/python_runtime.py b/ads/jobs/builders/runtimes/python_runtime.py
@@ -125,7 +125,10 @@ def init(self) -> "CondaRuntime":
             The runtime instance.
         """
         super().init()
-        return self.with_custom_conda("oci://your_bucket@namespace/object_name")
+        return self.with_custom_conda(
+            "{Path to the custom conda environment. "
+            "Example: oci://your_bucket@namespace/object_name"
+        )
 
 
 class ScriptRuntime(CondaRuntime):
@@ -254,10 +257,10 @@ def init(self) -> "ScriptRuntime":
         super().init()
         return (
             self.with_entrypoint(
-                "{Entry point script. For the MLflow will be replaced with the CMD}"
+                "{Entrypoint script. For MLFlow, it will be replaced with the CMD}"
             )
             .with_script(
-                "{Path to the script. For the MLFlow will be replaced with path to the project}"
+                "{Path to the script. For MLFlow, it will be replaced with the path to the project}"
             )
             .with_argument(key1="val1")
         )
@@ -442,12 +445,12 @@ def init(self) -> "PythonRuntime":
         """
         super().init()
         return (
-            self.with_working_dir("{For the MLflow the project folder will be used.}")
+            self.with_working_dir("{For MLflow the project folder will be used.}")
             .with_entrypoint(
-                "{Entry point script. For the MLFlow will be replaced with the CMD}"
+                "{Entrypoint script. For MLFlow, it will be replaced with the CMD}"
             )
             .with_script(
-                "{Path to the script. For the MLFlow will be replaced with path to the project}"
+                "{Path to the script. For MLFlow, it will be replaced with the path to the project}"
             )
         )
 
@@ -630,8 +633,8 @@ def init(self) -> "NotebookRuntime":
         """
         super().init()
         return self.with_source(
-            uri="{Path to the source code directory. For the MLFlow will be replaced with path to the project}",
-            notebook="{Entry point notebook. For the MLFlow will be replaced with the CMD}",
+            uri="{Path to the source code directory. For MLflow, it will be replaced with the path to the project}",
+            notebook="{Entrypoint notebook. For MLflow, it will be replaced with the CMD}",
         ).with_exclude_tag("tag1")
 
 
@@ -751,9 +754,9 @@ def init(self) -> "GitPythonRuntime":
         """
         super().init()
         return self.with_source(
-            "{Git URI. For the MLFlow will be replaced with the Project URI}"
+            "{Git URI. For MLFlow, it will be replaced with the Project URI}"
         ).with_entrypoint(
-            "{Entry point script. For the MLflow will be replaced with the CMD}"
+            "{Entry point script. For MLFlow, it will be replaced with the CMD}"
         )
 
 
@@ -976,10 +979,12 @@ def init(self) -> "DataFlowRuntime":
         self._spec.pop(self.CONST_ENV_VAR, None)
         return (
             self.with_script_uri(
-                "{Path to the executable script. For the MLFlow will be replaced with the CMD}"
+                "{Path to the executable script. For MLFlow, it will be replaced with the CMD}"
+            )
+            .with_script_bucket(
+                "{The object storage bucket to save a script. "
+                "Example: oci://<bucket_name>@<tenancy>/<prefix>}"
             )
-            .with_argument(key1="val1")
-            .with_script_bucket("oci://<bucket_name>@<tenancy>/<prefix>")
             .with_overwrite(True)
             .with_configuration({"spark.driverEnv.env_key": "env_value"})
         )
diff --git a/ads/jobs/serializer.py b/ads/jobs/serializer.py
@@ -203,7 +203,7 @@ def to_yaml(
         """
         note = kwargs.pop("note", "")
 
-        yaml_string = note + yaml.dump(self.to_dict(**kwargs), Dumper=dumper)
+        yaml_string = f"{note}\n" + yaml.dump(self.to_dict(**kwargs), Dumper=dumper)
         if uri:
             self._write_to_file(s=yaml_string, uri=uri, **kwargs)
             return None
diff --git a/ads/model/deployment/model_deployment_infrastructure.py b/ads/model/deployment/model_deployment_infrastructure.py
@@ -19,6 +19,11 @@
 MODEL_DEPLOYMENT_INFRASTRUCTURE_TYPE = "datascienceModelDeployment"
 MODEL_DEPLOYMENT_INFRASTRUCTURE_KIND = "infrastructure"
 
+DEFAULT_BANDWIDTH_MBPS = 10
+DEFAULT_WEB_CONCURRENCY = 10
+DEFAULT_REPLICA = 1
+DEFAULT_SHAPE_NAME = "VM.Standard.E2.4"
+
 logger = logging.getLogger(__name__)
 
 
@@ -152,7 +157,7 @@ class ModelDeploymentInfrastructure(Builder):
         CONST_LOG_ID: "log_id",
         CONST_LOG_GROUP_ID: "log_group_id",
         CONST_WEB_CONCURRENCY: "web_concurrency",
-        CONST_SUBNET_ID: "subnet_id"
+        CONST_SUBNET_ID: "subnet_id",
     }
 
     shape_config_details_attribute_map = {
@@ -211,14 +216,15 @@ def _load_default_properties(self) -> Dict:
         if PROJECT_OCID:
             defaults[self.CONST_PROJECT_ID] = PROJECT_OCID
 
+        defaults[self.CONST_BANDWIDTH_MBPS] = DEFAULT_BANDWIDTH_MBPS
+        defaults[self.CONST_WEB_CONCURRENCY] = DEFAULT_WEB_CONCURRENCY
+        defaults[self.CONST_REPLICA] = DEFAULT_REPLICA
+
         if NB_SESSION_OCID:
             try:
                 nb_session = DSCNotebookSession.from_ocid(NB_SESSION_OCID)
                 nb_config = nb_session.notebook_session_configuration_details
                 defaults[self.CONST_SHAPE_NAME] = nb_config.shape
-                defaults[self.CONST_BANDWIDTH_MBPS] = 10
-                defaults[self.CONST_WEB_CONCURRENCY] = 10
-                defaults[self.CONST_REPLICA] = 1
 
                 if nb_config.notebook_session_shape_config_details:
                     notebook_shape_config_details = oci_util.to_dict(
@@ -602,3 +608,21 @@ def subnet_id(self) -> str:
             The model deployment subnet id.
         """
         return self.get_spec(self.CONST_SUBNET_ID, None)
+
+    def init(self) -> "ModelDeploymentInfrastructure":
+        """Initializes a starter specification for the ModelDeploymentInfrastructure.
+
+        Returns
+        -------
+        ModelDeploymentInfrastructure
+            The ModelDeploymentInfrastructure instance (self)
+        """
+        return (
+            self.build()
+            .with_compartment_id(self.compartment_id or "{Provide a compartment OCID}")
+            .with_project_id(self.project_id or "{Provide a project OCID}")
+            .with_bandwidth_mbps(self.bandwidth_mbps or DEFAULT_BANDWIDTH_MBPS)
+            .with_web_concurrency(self.web_concurrency or DEFAULT_WEB_CONCURRENCY)
+            .with_replica(self.replica or DEFAULT_REPLICA)
+            .with_shape_name(self.shape_name or DEFAULT_SHAPE_NAME)
+        )
diff --git a/ads/opctl/backend/ads_ml_job.py b/ads/opctl/backend/ads_ml_job.py
@@ -101,14 +101,15 @@ def init(
                     "{Job name. For the MLFlow will be auto replaced with the Project name}"
                 )
                 .with_infrastructure(
-                    DataScienceJob(**(self.config.get("infrastructure", {}) or {}))
+                    DataScienceJob(
+                        **(self.config.get("infrastructure", {}) or {})
+                    ).init()
                 )
                 .with_runtime(
                     JobRuntimeFactory.get_runtime(
                         key=runtime_type or PythonRuntime().type
                     ).init()
                 )
-                .build()
             )
 
             note = (
diff --git a/ads/opctl/backend/ads_ml_pipeline.py b/ads/opctl/backend/ads_ml_pipeline.py
@@ -120,7 +120,7 @@ def init(
             pipeline_step = (
                 PipelineStep("pipeline_step_name_1")
                 .with_description("A step running a python script")
-                .with_infrastructure(CustomScriptStep().build())
+                .with_infrastructure(CustomScriptStep().init())
                 .with_runtime(
                     JobRuntimeFactory.get_runtime(
                         key=runtime_type or PythonRuntime().type
@@ -136,7 +136,7 @@ def init(
                 )
                 .with_step_details([pipeline_step])
                 .with_dag(["pipeline_step_name_1"])
-                .build()
+                .init()
             )
 
             note = (
diff --git a/ads/opctl/backend/ads_model_deployment.py b/ads/opctl/backend/ads_model_deployment.py
@@ -72,14 +72,13 @@ def init(
                 .with_infrastructure(
                     ModelDeploymentInfrastructure(
                         **(self.config.get("infrastructure", {}) or {})
-                    )
+                    ).init()
                 )
                 .with_runtime(
                     ModelDeploymentRuntimeFactory.get_runtime(
                         key=runtime_type or ModelDeploymentCondaRuntime().type
                     ).init()
                 )
-                .build()
             )
 
             note = (
diff --git a/ads/pipeline/ads_pipeline.py b/ads/pipeline/ads_pipeline.py
@@ -1981,6 +1981,21 @@ def status(self) -> Optional[str]:
             return self.data_science_pipeline.lifecycle_state
         return None
 
+    def init(self) -> "Pipeline":
+        """Initializes a starter specification for the Pipeline.
+
+        Returns
+        -------
+        Pipeline
+            The Pipeline instance (self)
+        """
+        return (
+            self.build()
+            .with_compartment_id(self.compartment_id or "{Provide a compartment OCID}")
+            .with_project_id(self.project_id or "{Provide a project OCID}")
+        )
+
+
 
 class DataSciencePipeline(OCIDataScienceMixin, oci.data_science.models.Pipeline):
     @classmethod
@@ -2262,4 +2277,4 @@ def delete(
             operation_kwargs=operation_kwargs,
             waiter_kwargs=waiter_kwargs,
         )
-        return self.sync()
+        return self.sync()

Original file line number	Diff line number	Diff line change
`@@ -101,14 +101,15 @@ def init(`
`101`	`101`	`"{Job name. For the MLFlow will be auto replaced with the Project name}"`
`102`	`102`	`)`
`103`	`103`	`.with_infrastructure(`
`104`		`- DataScienceJob(**(self.config.get("infrastructure", {}) or {}))`
	`104`	`+ DataScienceJob(`
	`105`	`+ **(self.config.get("infrastructure", {}) or {})`
	`106`	`+ ).init()`
`105`	`107`	`)`
`106`	`108`	`.with_runtime(`
`107`	`109`	`JobRuntimeFactory.get_runtime(`
`108`	`110`	`key=runtime_type or PythonRuntime().type`
`109`	`111`	`).init()`
`110`	`112`	`)`
`111`		`- .build()`
`112`	`113`	`)`
`113`	`114`
`114`	`115`	`note = (`
Original file line number	Diff line number	Diff line change
`@@ -72,14 +72,13 @@ def init(`
`72`	`72`	`.with_infrastructure(`
`73`	`73`	`ModelDeploymentInfrastructure(`
`74`	`74`	`**(self.config.get("infrastructure", {}) or {})`
`75`		`- )`
	`75`	`+ ).init()`
`76`	`76`	`)`
`77`	`77`	`.with_runtime(`
`78`	`78`	`ModelDeploymentRuntimeFactory.get_runtime(`
`79`	`79`	`key=runtime_type or ModelDeploymentCondaRuntime().type`
`80`	`80`	`).init()`
`81`	`81`	`)`
`82`		`- .build()`
`83`	`82`	`)`
`84`	`83`
`85`	`84`	`note = (`