Completes OPEN-5005 Accept configs as dicts

gustavocidornelas · whoseoyster · commit ce7db20dce8a · 2023-09-12T21:47:24.000-07:00
diff --git a/openlayer/__init__.py b/openlayer/__init__.py
@@ -28,7 +28,7 @@
 import time
 import uuid
 import warnings
-from typing import Optional
+from typing import Dict, Optional
 
 import pandas as pd
 import yaml
@@ -252,8 +252,9 @@ def create_or_load_project(
 
     def add_model(
         self,
-        model_config_file_path: str,
         task_type: TaskType,
+        model_config: Optional[Dict[str, any]] = None,
+        model_config_file_path: Optional[str] = None,
         model_package_dir: Optional[str] = None,
         sample_data: Optional[pd.DataFrame] = None,
         force: bool = False,
@@ -263,8 +264,19 @@ def add_model(
 
         Parameters
         ----------
+        model_config : Dict[str, any]
+            Dictionary containing the model configuration. This is not needed if
+            ``model_config_file_path`` is provided.
+
+            .. admonition:: What's in the model config dict?
+
+                The model configuration depends on the :obj:`TaskType`.
+                Refer to the `documentation <https://docs.openlayer.com/docs/tabular-classification-model-config>`_
+                for examples.
+
         model_config_file_path : str
-            Path to the model configuration YAML file.
+            Path to the model configuration YAML file. This is not needed if
+            ``model_config`` is provided.
 
             .. admonition:: What's in the model config file?
 
@@ -407,10 +419,15 @@ def add_model(
                     "The sample data must contain at least 2 rows, but only"
                     f"{len(sample_data)} rows were provided."
                 )
+        if model_config is None and model_config_file_path is None:
+            raise ValueError(
+                "Either `model_config` or `model_config_file_path` must be provided."
+            )
 
         # Validate model package
         model_validator = model_validators.get_validator(
             task_type=task_type,
+            model_config=model_config,
             model_package_dir=model_package_dir,
             model_config_file_path=model_config_file_path,
             sample_data=sample_data,
@@ -424,7 +441,8 @@ def add_model(
             ) from None
 
         # Load model config and augment with defaults
-        model_config = utils.read_yaml(model_config_file_path)
+        if model_config_file_path is not None:
+            model_config = utils.read_yaml(model_config_file_path)
         model_data = ModelSchema().load({"task_type": task_type.value, **model_config})
 
         # Copy relevant resources to temp directory
@@ -451,6 +469,7 @@ def add_baseline_model(
         self,
         project_id: str,
         task_type: TaskType,
+        model_config: Optional[Dict[str, any]] = None,
         model_config_file_path: Optional[str] = None,
         force: bool = False,
     ):
@@ -469,9 +488,23 @@ def add_baseline_model(
 
         Parameters
         ----------
+        model_config : Dict[str, any], optional
+            Dictionary containing the model configuration. This is not needed if
+            ``model_config_file_path`` is provided. If none of these are provided,
+            the default model config will be used.
+
+            .. admonition:: What's on the model config file?
+
+                For baseline models, the config should contain:
+
+                - ``metadata`` : Dict[str, any], default {}
+                    Dictionary containing metadata about the model. This is the
+                    metadata that will be displayed on the Openlayer platform.
+
         model_config_file_path : str, optional
-            Path to the model configuration YAML file. If not provided, the default
-            model config will be used.
+            Path to the model configuration YAML file. This is not needed if
+            ``model_config`` is provided. If none of these are provided,
+            the default model config will be used.
 
             .. admonition:: What's on the model config file?
 
@@ -490,9 +523,9 @@ def add_baseline_model(
             )
 
         # Validate the baseline model
-
         baseline_model_validator = baseline_model_validators.get_validator(
             task_type=task_type,
+            model_config=model_config,
             model_config_file_path=model_config_file_path,
         )
         failed_validations = baseline_model_validator.validate()
@@ -504,7 +537,7 @@ def add_baseline_model(
             ) from None
 
         # Load model config and augment with defaults
-        model_config = {}
+        model_config = {} or model_config
         if model_config_file_path is not None:
             model_config = utils.read_yaml(model_config_file_path)
         model_config["modelType"] = "baseline"
@@ -527,7 +560,8 @@ def add_dataset(
         self,
         file_path: str,
         task_type: TaskType,
-        dataset_config_file_path: str,
+        dataset_config: Optional[Dict[str, any]] = None,
+        dataset_config_file_path: Optional[str] = None,
         project_id: str = None,
         force: bool = False,
     ):
@@ -537,8 +571,19 @@ def add_dataset(
         ----------
         file_path : str
             Path to the csv file containing the dataset.
+        dataset_config: Dict[str, any]
+            Dictionary containing the dataset configuration. This is not needed if
+            ``dataset_config_file_path`` is provided.
+
+            .. admonition:: What's in the dataset config?
+
+                The dataset configuration depends on the :obj:`TaskType`.
+                Refer to the `documentation <https://docs.openlayer.com/docs/tabular-classification-dataset-config>`_
+                for examples.
+
         dataset_config_file_path : str
-            Path to the dataset configuration YAML file.
+            Path to the dataset configuration YAML file. This is not needed if
+            ``dataset_config`` is provided.
 
             .. admonition:: What's in the dataset config file?
 
@@ -668,9 +713,15 @@ def add_dataset(
         >>> project.commit("Initial dataset commit.")
         >>> project.push()
         """
+        if dataset_config is None and dataset_config_file_path is None:
+            raise ValueError(
+                "Either `dataset_config` or `dataset_config_file_path` must be"
+                " provided."
+            )
         # Validate dataset
         dataset_validator = dataset_validators.get_validator(
             task_type=task_type,
+            dataset_config=dataset_config,
             dataset_config_file_path=dataset_config_file_path,
             dataset_file_path=file_path,
         )
@@ -683,7 +734,8 @@ def add_dataset(
             ) from None
 
         # Load dataset config and augment with defaults
-        dataset_config = utils.read_yaml(dataset_config_file_path)
+        if dataset_config_file_path is not None:
+            dataset_config = utils.read_yaml(dataset_config_file_path)
         dataset_data = DatasetSchema().load(
             {"task_type": task_type.value, **dataset_config}
         )
@@ -704,7 +756,8 @@ def add_dataframe(
         self,
         dataset_df: pd.DataFrame,
         task_type: TaskType,
-        dataset_config_file_path: str,
+        dataset_config: Optional[Dict[str, any]] = None,
+        dataset_config_file_path: Optional[str] = None,
         project_id: str = None,
         force: bool = False,
     ):
@@ -714,8 +767,19 @@ def add_dataframe(
         ----------
         dataset_df : pd.DataFrame
             Dataframe containing your dataset.
+        dataset_config: Dict[str, any]
+            Dictionary containing the dataset configuration. This is not needed if
+            ``dataset_config_file_path`` is provided.
+
+            .. admonition:: What's in the dataset config?
+
+                The dataset configuration depends on the :obj:`TaskType`.
+                Refer to the `documentation <https://docs.openlayer.com/docs/tabular-classification-dataset-config>`_
+                for examples.
+
         dataset_config_file_path : str
-            Path to the dataset configuration YAML file.
+            Path to the dataset configuration YAML file. This is not needed if
+            ``dataset_config`` is provided.
 
             .. admonition:: What's in the dataset config file?
 
@@ -856,6 +920,7 @@ def add_dataframe(
                 file_path=file_path,
                 project_id=project_id,
                 dataset_config_file_path=dataset_config_file_path,
+                dataset_config=dataset_config,
                 force=force,
                 task_type=task_type,
             )
diff --git a/openlayer/schemas.py b/openlayer/schemas.py
@@ -404,6 +404,7 @@ class ProjectSchema(ma.Schema):
             min=1,
             max=140,
         ),
+        allow_none=True,
     )
     name = ma.fields.Str(
         required=True,
diff --git a/openlayer/validators/baseline_model_validators.py b/openlayer/validators/baseline_model_validators.py
@@ -2,7 +2,7 @@
 """
 import logging
 import os
-from typing import List, Optional
+from typing import Dict, List, Optional
 
 import marshmallow as ma
 import yaml
@@ -20,15 +20,21 @@ class BaseBaselineModelValidator(BaseValidator):
     ----------
     task_type : tasks.TaskType
         The task type.
+    model_config : Optional[Dict[str, any]], optional
+        The model config, by default None
     model_config_file_path : Optional[str], optional
         The path to the model config file, by default None
     """
 
     def __init__(
-        self, task_type: tasks.TaskType, model_config_file_path: Optional[str] = None
+        self,
+        task_type: tasks.TaskType,
+        model_config: Optional[Dict[str, any]] = None,
+        model_config_file_path: Optional[str] = None,
     ):
         super().__init__(resource_display_name="baseline model")
         self.task_type = task_type
+        self.model_config = model_config
         self.model_config_file_path = model_config_file_path
 
     def _validate(self) -> List[str]:
@@ -38,7 +44,7 @@ def _validate(self) -> List[str]:
         List[str]
             The list of failed validations.
         """
-        if self.model_config_file_path:
+        if self.model_config_file_path or self.model_config:
             self._validate_model_config()
 
     def _validate_model_config(self):
@@ -51,13 +57,13 @@ def _validate_model_config(self):
                 )
             else:
                 with open(self.model_config_file_path, "r", encoding="UTF-8") as stream:
-                    model_config = yaml.safe_load(stream)
+                    self.model_config = yaml.safe_load(stream)
 
-        if model_config:
+        if self.model_config:
             baseline_model_schema = schemas.BaselineModelSchema()
             try:
                 baseline_model_schema.load(
-                    {"task_type": self.task_type.value, **model_config}
+                    {"task_type": self.task_type.value, **self.model_config}
                 )
             except ma.ValidationError as err:
                 self.failed_validations.extend(
@@ -74,13 +80,15 @@ class TabularClassificationBaselineModelValidator(BaseBaselineModelValidator):
 # ----------------------------- Factory function ----------------------------- #
 def get_validator(
     task_type: tasks.TaskType,
-    model_config_file_path: str,
+    model_config: Optional[Dict[str, any]] = None,
+    model_config_file_path: Optional[str] = None,
 ) -> BaseBaselineModelValidator:
     """Factory function to get the correct baseline model validator.
 
     Parameters
     ----------
         task_type: The task type of the model.
+        model_config: The model config.
         model_config_file_path: Path to the model config file.
 
     Returns
@@ -89,6 +97,7 @@ def get_validator(
     """
     if task_type == tasks.TaskType.TabularClassification:
         return TabularClassificationBaselineModelValidator(
+            model_config=model_config,
             model_config_file_path=model_config_file_path,
             task_type=task_type,
         )
diff --git a/openlayer/validators/model_validators.py b/openlayer/validators/model_validators.py

Original file line number	Diff line number	Diff line change
`@@ -404,6 +404,7 @@ class ProjectSchema(ma.Schema):`
`404`	`404`	`min=1,`
`405`	`405`	`max=140,`
`406`	`406`	`),`
	`407`	`+ allow_none=True,`
`407`	`408`	`)`
`408`	`409`	`name = ma.fields.Str(`
`409`	`410`	`required=True,`