Merge branch 'develop' into ODSC-39392/triton

Ziqun Ye · web-flow · commit 5985af201e90 · 2023-04-10T12:26:40.000-07:00
diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -0,0 +1,58 @@
+name: Feature Request
+description: Feature and enhancement proposals in oracle-ads library
+title: "[FR]: "
+labels: [Task, Backlog]
+assignees:
+  - octocat
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Before proceeding, please review the [Contributing to this repository](https://github.com/oracle/accelerated-data-science/blob/main/CONTRIBUTING.md) and the [Code of Conduct](https://github.com/oracle/.github/blob/main/CODE_OF_CONDUCT.md).
+        
+        ---
+        
+        Thank you for submitting a feature request.
+  - type: dropdown
+    id: contribution
+    attributes:
+      label: Willingness to contribute
+      description: Would you or another member of your organization be willing to contribute an implementation of this feature?
+      options:
+        - Yes. I can contribute this feature independently.
+        - Yes. I would be willing to contribute this feature with guidance from the oracle-ads team.
+        - No. I cannot contribute this feature at this time.
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: Proposal Summary
+      description: |
+        In a few sentences, provide a clear, high-level description of the feature request
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: Motivation
+      description: |
+        - What is the use case for this feature?
+        - Why is this use case valuable to support for OCI DataScience users in general?
+        - Why is this use case valuable to support for your project(s) or organization?
+        - Why is it currently difficult to achieve this use case?
+      value: |
+        > #### What is the use case for this feature?
+
+        > #### Why is this use case valuable to support for OCI DataScience users in general?
+
+        > #### Why is this use case valuable to support for your project(s) or organization?
+
+        > #### Why is it currently difficult to achieve this use case?
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: Details
+      description: |
+        Use this section to include any additional information about the feature. If you have a proposal for how to implement this feature, please include it here. For implementation guidelines, please refer to the [Contributing to this repository](https://github.com/oracle/accelerated-data-science/blob/main/CONTRIBUTING.md).
+    validations:
+      required: false
diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml
@@ -0,0 +1,42 @@
+name: "[DO NOT TRIGGER] Publish to PyPI"
+
+# To run this workflow manually from the Actions tab
+on: workflow_dispatch
+
+jobs:
+  build-n-publish:
+    name: Build and publish Python 🐍 distribution 📦 to PyPI
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.x"
+      - name: Build distribution 📦
+        run: |
+          pip install wheel
+          make dist
+      - name: Validate
+        run: |
+          pip install dist/*.whl
+          python -c "import ads;"
+## To run publish to test PyPI secret with token needs to be added,
+##    this one GH_ADS_TESTPYPI_TOKEN - removed after initial test.
+## Project name also needed to be updated in setup.py - setup(name="test_oracle_ads", ...),
+##    regular name is occupied by former developer and can't be used for testing
+#      - name: Publish distribution 📦 to Test PyPI
+#        env:
+#          TWINE_USERNAME: __token__
+#          TWINE_PASSWORD: ${{ secrets.GH_ADS_TESTPYPI_TOKEN }}
+#        run: |
+#          pip install twine
+#          twine upload -r testpypi dist/* -u $TWINE_USERNAME -p $TWINE_PASSWORD
+      - name: Publish distribution 📦 to PyPI
+        env:
+          TWINE_USERNAME: __token__
+          TWINE_PASSWORD: ${{ secrets.GH_ADS_PYPI_TOKEN }}
+        run: |
+          pip install twine
+          twine upload dist/* -u $TWINE_USERNAME -p $TWINE_PASSWORD
diff --git a/ads/database/connection.py b/ads/database/connection.py
@@ -52,7 +52,7 @@ def __init__(
             The local database information store, default to ~/.database unless specified otherwise.
         kwargs: dict, optional
             Name-value pairs that are to be added to the list of connection parameters.
-            For example, database_name="mydb", database_type="oracle", username = "root", password = "pwd".
+            For example, database_name="mydb", database_type="oracle", username = "root", password = "example-password".
 
         Returns
         -------
diff --git a/ads/model/generic_model.py b/ads/model/generic_model.py
@@ -789,6 +789,7 @@ def prepare(
         ignore_pending_changes: bool = True,
         max_col_num: int = DATA_SCHEMA_MAX_COL_NUM,
         ignore_conda_error: bool = False,
+        score_py_uri: str = None,
         **kwargs: Dict,
     ) -> "GenericModel":
         """Prepare and save the score.py, serialized model and runtime.yaml file.
@@ -841,6 +842,10 @@ def prepare(
             number of features(columns).
         ignore_conda_error: (bool, optional). Defaults to False.
             Parameter to ignore error when collecting conda information.
+        score_py_uri: (str, optional). Defaults to None.
+            The uri of the customized score.py, which can be local path or OCI object storage URI.
+            When provide with this attibute, the `score.py` will not be auto generated, and the
+            provided `score.py` will be added into artifact_dir.
         kwargs:
             impute_values: (dict, optional).
                 The dictionary where the key is the column index(or names is accepted
@@ -1001,13 +1006,22 @@ def prepare(
                 jinja_template_filename = (
                     "score-pkl" if self._serialize else "score_generic"
                 )
-        self.model_artifact.prepare_score_py(
-            jinja_template_filename=jinja_template_filename,
-            model_file_name=self.model_file_name,
-            data_deserializer=self.model_input_serializer.name,
-            model_serializer=self.model_save_serializer.name,
-            **{**kwargs, **self._score_args},
-        )
+
+        if score_py_uri:
+            utils.copy_file(
+                uri_src=score_py_uri,
+                uri_dst=os.path.join(self.artifact_dir, "score.py"),
+                force_overwrite=force_overwrite,
+                auth=self.auth
+            )
+        else:
+            self.model_artifact.prepare_score_py(
+                jinja_template_filename=jinja_template_filename,
+                model_file_name=self.model_file_name,
+                data_deserializer=self.model_input_serializer.name,
+                model_serializer=self.model_save_serializer.name,
+                **{**kwargs, **self._score_args},
+            )
 
         self._summary_status.update_status(
             detail="Generated score.py", status=ModelState.DONE.value
@@ -2483,6 +2497,7 @@ def predict(
         self,
         data: Any = None,
         auto_serialize_data: bool = False,
+        local: bool = False,
         **kwargs,
     ) -> Dict[str, Any]:
         """Returns prediction of input data run against the model deployment endpoint.
@@ -2507,6 +2522,8 @@ def predict(
             Whether to auto serialize input data. Defauls to `False` for GenericModel, and `True` for other frameworks.
             `data` required to be json serializable if `auto_serialize_data=False`.
             If `auto_serialize_data` set to True, data will be serialized before sending to model deployment endpoint.
+        local: bool.
+            Whether to invoke the prediction locally. Default to False.
         kwargs:
             content_type: str, used to indicate the media type of the resource.
             image: PIL.Image Object or uri for the image.
@@ -2525,10 +2542,21 @@ def predict(
         NotActiveDeploymentError
             If model deployment process was not started or not finished yet.
         ValueError
-            If `data` is empty or not JSON serializable.
+            If model is not deployed yet or the endpoint information is not available.
         """
-        if not self.model_deployment:
-            raise ValueError("Use `deploy()` method to start model deployment.")
+        if local:
+            return self.verify(
+                data=data, auto_serialize_data=auto_serialize_data, **kwargs
+            )
+
+        if not (self.model_deployment and self.model_deployment.url):
+            raise ValueError(
+                "Error invoking the remote endpoint as the model is not "
+                "deployed yet or the endpoint information is not available. "
+                "Use `deploy()` method to start model deployment. "
+                "If you intend to invoke inference using locally available "
+                "model artifact, set parameter `local=True`"
+            )
 
         current_state = self.model_deployment.state.name.upper()
         if current_state != ModelDeploymentState.ACTIVE.name:
diff --git a/ads/opctl/cmds.py b/ads/opctl/cmds.py
@@ -141,7 +141,7 @@ def _save_yaml(yaml_content, **kwargs):
     yaml_content : str
         YAML content as string.
     """
-    if kwargs["job_info"]:
+    if kwargs.get("job_info"):
         yaml_path = os.path.abspath(os.path.expanduser(kwargs["job_info"]))
         if os.path.isfile(yaml_path):
             overwrite = input(
@@ -210,7 +210,7 @@ def run(config: Dict, **kwargs) -> Dict:
                 "backend operator for distributed training can either be local or job"
             )
         else:
-            if not kwargs["dry_run"]:
+            if not kwargs["dry_run"] and not kwargs["nobuild"]:
                 verify_and_publish_image(kwargs["nopush"], config)
                 print("running image: " + config["spec"]["cluster"]["spec"]["image"])
             cluster_def = YamlSpecParser.parse_content(config)
diff --git a/ads/opctl/config/resolver.py b/ads/opctl/config/resolver.py
@@ -155,7 +155,11 @@ def _resolve_source_folder_path(self) -> None:
     def _resolve_entry_script(self) -> None:
         # this should be run after _resolve_source_folder_path
         if not self._is_ads_operator():
-            if os.path.splitext(self.config["execution"]["entrypoint"])[1] == ".ipynb":
+            if (
+                self.config["execution"].get("entrypoint")
+                and os.path.splitext(self.config["execution"]["entrypoint"])[1]
+                == ".ipynb"
+            ):
                 input_path = os.path.join(
                     self.config["execution"]["source_folder"],
                     self.config["execution"]["entrypoint"],
diff --git a/ads/templates/score_pytorch.jinja2 b/ads/templates/score_pytorch.jinja2
@@ -10,8 +10,21 @@ import pandas as pd
 from io import BytesIO
 import base64
 import logging
+from random import randint
+
+
+def get_torch_device():
+    num_devices = torch.cuda.device_count()
+    if num_devices == 0:
+        return "cpu"
+    if num_devices == 1:
+        return "cuda:0"
+    else:
+        return f"cuda:{randint(0, num_devices-1)}"
+
 
 model_name = '{{model_file_name}}'
+device = torch.device(get_torch_device())
 
 """
    Inference script. This script is used for prediction by scoring server when schema is known.
@@ -59,6 +72,7 @@ def load_model(model_file_name=model_name):
 
 {% endif %}
     print("Model is successfully loaded.")
+    the_model = the_model.to(device)
     return the_model
 
 @lru_cache(maxsize=1)
@@ -158,6 +172,7 @@ def pre_inference(data, input_schema_path):
     data = deserialize(data, input_schema_path)
 
     # Add further data preprocessing if needed
+    data = data.to(device)
     return data
 
 def post_inference(yhat):
@@ -199,6 +214,6 @@ def predict(data, model=load_model(), input_schema_path=os.path.join(os.path.dir
 
     with torch.no_grad():
         yhat = post_inference(
-        model(inputs)
+        model(inputs).to("cpu")
     )
     return {'prediction': yhat}
diff --git a/dev-requirements.txt b/dev-requirements.txt
@@ -13,4 +13,5 @@ xlrd>=1.2.0
 lxml
 fastparquet
 imbalanced-learn
-pyarrow
+pyarrow
+mysql-connector-python
diff --git a/docs/source/user_guide/model_registration/model_artifact.rst b/docs/source/user_guide/model_registration/model_artifact.rst
@@ -30,6 +30,7 @@ Auto generation of ``score.py`` with framework specific code for loading models
 
 To accomodate for other frameworks that are unknown to ADS, a template code for ``score.py`` is generated in the provided artificat directory location.
 
+
 Prepare the Model Artifact
 --------------------------
 
@@ -98,8 +99,25 @@ ADS automatically captures:
 *  ``UseCaseType`` in ``metadata_taxonomy`` cannot be automatically populated. One way to populate the use case is to pass ``use_case_type`` to the ``prepare`` method.
 *  Model introspection is automatically triggered.
 
-.. include:: _template/score.rst
+Prepare with custom ``score.py``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. versionadded:: 2.8.4
 
+You could provide the location of your own ``score.py`` by ``score_py_uri`` in :py:meth:`~ads.model.GenericModel.prepare`.
+The provided ``score.py`` will be added into model artifact.
+
+.. code-block:: python3
+
+    tf_model.prepare(
+        inference_conda_env="generalml_p38_cpu_v1",
+        use_case_type=UseCaseType.MULTINOMIAL_CLASSIFICATION,
+        X_sample=trainx,
+        y_sample=trainy,
+        score_py_uri="/path/to/score.py"
+    )
+
+.. include:: _template/score.rst
 
 Model Introspection
 -------------------
diff --git a/setup.py b/setup.py
@@ -69,7 +69,6 @@
         "nbformat",
         "inflection",
     ],
-    "mysql": ["mysql-connector-python"],
     "bds": ["ibis-framework[impala]", "hdfs[kerberos]", "sqlalchemy"],
     "spark": ["pyspark>=3.0.0"],
     "huggingface": ["transformers"],
diff --git a/tests/unitary/with_extras/model/test_files/custom_score.py b/tests/unitary/with_extras/model/test_files/custom_score.py
@@ -0,0 +1,11 @@
+# THIS IS A CUSTOM SCORE.PY
+
+model_name = "model.pkl"
+
+
+def load_model(model_file_name=model_name):
+    return model_file_name
+
+
+def predict(data, model=load_model()):
+    return {"prediction": "This is a custom score.py."}
diff --git a/tests/unitary/with_extras/model/test_generic_model.py b/tests/unitary/with_extras/model/test_generic_model.py