Closes OPEN-3602 Optionally use the model runner for model validations

gustavocidornelas · whoseoyster · commit 617a95b4ed66 · 2023-02-25T12:51:13.000-08:00
diff --git a/openlayer/__init__.py b/openlayer/__init__.py
@@ -410,11 +410,6 @@ def add_model(
         with tempfile.TemporaryDirectory() as temp_dir:
             if model_package_dir:
                 shutil.copytree(model_package_dir, temp_dir, dirs_exist_ok=True)
-                current_file_dir = os.path.dirname(os.path.abspath(__file__))
-                shutil.copy(
-                    f"{current_file_dir}/prediction_job.py",
-                    f"{temp_dir}/prediction_job.py",
-                )
                 utils.write_python_version(temp_dir)
 
             utils.write_yaml(model_data, f"{temp_dir}/model_config.yaml")
diff --git a/openlayer/models.py b/openlayer/models.py
@@ -1,8 +1,10 @@
 import os
+import shutil
 import subprocess
+import tempfile
 from enum import Enum
 from typing import List, Set
-import tempfile
+
 import pandas as pd
 
 
@@ -284,6 +286,9 @@ def __init__(self, model_package: str):
             logs_file_path=f"{model_package}/logs.txt",
         )
 
+    def __del__(self):
+        self._conda_environment.delete()
+
     def run(self, input_data: pd.DataFrame) -> pd.DataFrame:
         """Runs the input data through the model in the conda
         environment.
@@ -299,6 +304,13 @@ def run(self, input_data: pd.DataFrame) -> pd.DataFrame:
             Output from the model. The output is a dataframe with a single
             column named 'prediction' and lists of class probabilities as values.
         """
+        # Copy the prediction job script to the model package
+        current_file_dir = os.path.dirname(os.path.abspath(__file__))
+        shutil.copy(
+            f"{current_file_dir}/prediction_job.py",
+            f"{self.model_package}/prediction_job.py",
+        )
+
         with tempfile.TemporaryDirectory() as temp_dir:
             # Save the input data to a csv file
             input_data.to_csv(f"{temp_dir}/input_data.csv", index=False)
diff --git a/openlayer/validators.py b/openlayer/validators.py
@@ -21,7 +21,7 @@
 import pkg_resources
 import yaml
 
-from . import schemas, utils
+from . import models, schemas, utils
 
 
 class BaselineModelValidator:
@@ -96,18 +96,22 @@ class CommitBundleValidator:
         Whether to skip model validation, by default False
     skip_dataset_validation : bool
         Whether to skip dataset validation, by default False
+    use_runner : bool
+        Whether to use the runner to validate the model, by default False.
     """
 
     def __init__(
         self,
         bundle_path: str,
         skip_model_validation: bool = False,
         skip_dataset_validation: bool = False,
+        use_runner: bool = False,
     ):
         self.bundle_path = bundle_path
         self._bundle_resources = utils.list_resources_in_bundle(bundle_path)
         self._skip_model_validation = skip_model_validation
         self._skip_dataset_validation = skip_dataset_validation
+        self._use_runner = use_runner
         self.failed_validations = []
 
     def _validate_bundle_state(self):
@@ -268,6 +272,7 @@ def _validate_bundle_resources(self):
                     model_config_file_path=f"{self.bundle_path}/model/model_config.yaml",
                     model_package_dir=f"{self.bundle_path}/model",
                     sample_data=sample_data,
+                    use_runner=self._use_runner,
                 )
                 bundle_resources_failed_validations.extend(model_validator.validate())
 
@@ -844,6 +849,8 @@ class ModelValidator:
 
     Parameters
     ----------
+    model_config_file_path: str
+        Path to the model config file.
     model_package_dir : str
         Path to the model package directory.
     sample_data : pd.DataFrame
@@ -862,6 +869,7 @@ class ModelValidator:
     >>> from openlayer import ModelValidator
     >>>
     >>> model_validator = ModelValidator(
+    ...     model_config_file_path="/path/to/model/config/file",
     ...     model_package_dir="/path/to/model/package",
     ...     sample_data=df,
     ... )
@@ -872,12 +880,14 @@ class ModelValidator:
     def __init__(
         self,
         model_config_file_path: str,
+        use_runner: bool = False,
         model_package_dir: Optional[str] = None,
         sample_data: Optional[pd.DataFrame] = None,
     ):
         self.model_config_file_path = model_config_file_path
         self.model_package_dir = model_package_dir
         self.sample_data = sample_data
+        self._use_runner = use_runner
         self.failed_validations = []
 
     def _validate_model_package_dir(self):
@@ -932,7 +942,7 @@ def _validate_model_package_dir(self):
         # Add the model package failed validations to the list of all failed validations
         self.failed_validations.extend(model_package_failed_validations)
 
-    def _validate_requirements(self):
+    def _validate_requirements_file(self):
         """Validates the requirements.txt file.
 
         Checks for the existence of the file and parses it to check for
@@ -1109,6 +1119,33 @@ def _validate_prediction_interface(self):
         # Add the `prediction_interface.py` failed validations to the list of all failed validations
         self.failed_validations.extend(prediction_interface_failed_validations)
 
+    def _validate_model_runner(self):
+        """Validates the model using the model runner.
+
+        This is mostly meant to be used by the platform, to validate the model. It will
+        create the model's environment and use it to run the model.
+        """
+        model_runner_failed_validations = []
+
+        model_runner = models.ModelRunner(self.model_package_dir)
+
+        # Try to run some data through the runner
+        # Will create the model environment if it doesn't exist
+        try:
+            model_runner.run(self.sample_data)
+        except Exception as exc:
+            model_runner_failed_validations.append(
+                f"Failed to run the model with the following error: \n {exc}"
+            )
+
+        # Print results of the validation
+        if model_runner_failed_validations:
+            print("Model runner failed validations: \n")
+            _list_failed_validation_messages(model_runner_failed_validations)
+
+        # Add the model runner failed validations to the list of all failed validations
+        self.failed_validations.extend(model_runner_failed_validations)
+
     def validate(self) -> List[str]:
         """Runs all model validations.
 
@@ -1121,8 +1158,11 @@ def validate(self) -> List[str]:
         """
         if self.model_package_dir:
             self._validate_model_package_dir()
-            self._validate_requirements()
-            self._validate_prediction_interface()
+            if self._use_runner:
+                self._validate_model_runner()
+            else:
+                self._validate_requirements_file()
+                self._validate_prediction_interface()
         self._validate_model_config()
 
         if not self.failed_validations: