Improved error messages

gustavocidornelas · gustavocidornelas · commit 7245b3eaaf01 · 2022-07-19T11:31:22.000-03:00
diff --git a/unboxapi/__init__.py b/unboxapi/__init__.py
@@ -412,15 +412,15 @@ def add_model(
             os.path.expanduser(requirements_txt_file)
         ):
             raise UnboxResourceError(
-                f"The file path `{requirements_txt_file}` specified on `requirements_txt_file` does not"
-                " contain a file with the requirements. \n"
+                f"File at path `{requirements_txt_file}` does not"
+                " contain the requirements. \n"
             ) from None
 
         # Setup script
         if setup_script and not os.path.isfile(os.path.expanduser(setup_script)):
             raise UnboxResourceError(
-                f"The file path `{setup_script}` specified on `setup_script` does not"
-                " contain a file with the bash script with commands required before model loading. \n"
+                f"File at path `{setup_script}` does not"
+                " contain the bash script with commands required before model loading. \n"
             ) from None
 
         # Dependent dir
@@ -441,9 +441,8 @@ def add_model(
             if train_sample_df.isnull().values.any():
                 raise UnboxResourceError(
                     context="There is an issue with the specified `train_sample_df`. \n",
-                    message=f"The `train_sample_df` contains missing values. \n",
-                    mitigation="Currently, Unbox does not support datasets with missing values."
-                    + "Make sure to upload a training set sample without missing values by applying the same"
+                    message=f"The `train_sample_df` contains missing values, which is currently not supported. \n",
+                    mitigation="Make sure to upload a training set sample without missing values by applying the same"
                     + " preprocessing steps expected by your model.",
                 ) from None
 
@@ -454,7 +453,7 @@ def add_model(
         # predict_proba
         if not isinstance(function, Callable):
             raise UnboxValidationError(
-                f"- The argument `{function}` specified as `function` is not callable. \n"
+                f"- `{function}` specified as `function` is not callable. \n"
             ) from None
 
         user_args = function.__code__.co_varnames[: function.__code__.co_argcount][2:]
@@ -496,7 +495,7 @@ def add_model(
             if "tokenizer" not in kwargs:
                 raise UnboxResourceError(
                     context="There is a missing keyword argument for the specified model type. \n",
-                    message="The `tokenizer` must be specified in kwargs when using a transformers model. \n",
+                    message="`tokenizer` must be specified in kwargs when using a transformers model. \n",
                     mitigation="Make sure to specify the additional kwargs needed for the model type.",
                 ) from None
 
@@ -516,7 +515,7 @@ def add_model(
                     if feature not in headers
                 ]
                 raise UnboxDatasetInconsistencyError(
-                    f"The features {features_not_in_dataset} specified in `feature_names` are not on the dataset. \n"
+                    f"Features {features_not_in_dataset} specified in `feature_names` are not on the dataset. \n"
                 ) from None
 
             required_fields = [
@@ -792,7 +791,7 @@ def add_dataset(
         object_name = "original.csv"
         if not os.path.isfile(exp_file_path):
             raise UnboxResourceError(
-                f"The file path `{file_path}` specified on `file_path` does not contain a file with the dataset. \n"
+                f"File at path `{file_path}` does not contain the dataset. \n"
             ) from None
 
         with open(exp_file_path, "rt") as f:
@@ -805,9 +804,8 @@ def add_dataset(
         if df.isnull().values.any():
             raise UnboxResourceError(
                 context="There is an issue with the specified dataset. \n",
-                message="The dataset contains missing values. \n",
-                mitigation="Currently, Unbox does not support datasets with missing values."
-                + "Make sure to upload a training set sample without missing values by applying the same"
+                message="The dataset contains missing values, which is currently not supported. \n",
+                mitigation="Make sure to upload a training set sample without missing values by applying the same"
                 + " preprocessing steps expected by your model.",
             ) from None
 
@@ -817,13 +815,13 @@ def add_dataset(
             headers.index(label_column_name)
         except ValueError:
             raise UnboxDatasetInconsistencyError(
-                f"The column `{label_column_name}` specified as `label_column_name` is not on the dataset. \n"
+                f"`{label_column_name}` specified as `label_column_name` is not on the dataset. \n"
             ) from None
 
         dataset_classes = list(df[label_column_name].unique())
         if len(dataset_classes) > len(class_names):
             raise UnboxDatasetInconsistencyError(
-                f"There are {len(dataset_classes)} classes represented on the dataset, but there are only"
+                f"There are {len(dataset_classes)} classes represented on the dataset, but there are only "
                 f"{len(class_names)} items on the `class_names` list. \n",
                 mitigation=f"Make sure that there are at most {len(class_names)} classes in your dataset.",
             ) from None
@@ -837,14 +835,14 @@ def add_dataset(
         except ValueError:
             if text_column_name:
                 raise UnboxDatasetInconsistencyError(
-                    f"The column `{text_column_name}` specified as `text_column_name` is not on the dataset. \n"
+                    f"`{text_column_name}` specified as `text_column_name` is not on the dataset. \n"
                 ) from None
             else:
                 features_not_in_dataset = [
                     feature for feature in feature_names if feature not in headers
                 ]
                 raise UnboxDatasetInconsistencyError(
-                    f"The features {features_not_in_dataset} specified in `feature_names` are not on the dataset. \n"
+                    f"Features {features_not_in_dataset} specified in `feature_names` are not on the dataset. \n"
                 ) from None
 
         # Tag column validation
@@ -853,7 +851,7 @@ def add_dataset(
                 headers.index(tag_column_name)
         except ValueError:
             raise UnboxDatasetInconsistencyError(
-                f"The column `{tag_column_name}` specified as `tag_column_name` is not on the dataset. \n"
+                f"`{tag_column_name}` specified as `tag_column_name` is not on the dataset. \n"
             ) from None
 
         # ----------------------- Subscription plan validations ---------------------- #
diff --git a/unboxapi/schemas.py b/unboxapi/schemas.py
@@ -46,7 +46,7 @@ class ModelSchema(Schema):
         },
         validate=validate.OneOf(
             ["text-classification", "tabular-classification"],
-            error=f"The `task_type` must be one of either TaskType.TextClassification or TaskType.TabularClassification.",
+            error=f"`task_type` must be one of either TaskType.TextClassification or TaskType.TabularClassification.",
         ),
     )
     model_type = fields.Str(
@@ -56,7 +56,7 @@ class ModelSchema(Schema):
         },
         validate=validate.OneOf(
             [model_framework.value for model_framework in ModelType],
-            error=f"The `model_type` must be one of the supported frameworks. Check out our API reference for a full list https://reference.unbox.ai/reference/api/unboxapi.ModelType.html.\n ",
+            error=f"`model_type` must be one of the supported frameworks. Check out our API reference for a full list https://reference.unbox.ai/reference/api/unboxapi.ModelType.html.\n ",
         ),
     )
     class_names = fields.List(
@@ -88,25 +88,25 @@ def validate_custom_model_code(self, data, **kwargs):
         """Validates the model type when `custom_code` is specified"""
         if data["model_type"] == "Custom" and data["custom_model_code"] is None:
             raise ValidationError(
-                "Must specify `custom_model_code` when using ModelType.custom. \n"
+                "ModelType.custom without `custom_model_code`. Must specify `custom_model_code` when using ModelType.custom. \n"
             )
         elif data["custom_model_code"] is not None and data["model_type"] != "Custom":
             raise ValidationError(
-                "`model_type` must be ModelType.custom if specifying `custom_model_code`. \n"
+                "Incompatible `model_type` for `custom_model_code`. `model_type` must be ModelType.custom if specifying `custom_model_code`. \n"
             )
 
     @validates_schema
     def validate_custom_model_dependent_dir(self, data, **kwargs):
         if data["model_type"] == "Custom" and data["dependent_dir"] is None:
             raise ValidationError(
-                "Must specify `dependent_dir` when using ModelType.custom. \n"
+                "`dependent_dir` not specified with ModelType.custom. Must specify `dependent_dir` when using ModelType.custom. \n"
             )
 
     @validates_schema
     def validate_custom_model_requirements(self, data, **kwargs):
         if data["model_type"] == "Custom" and data["requirements_txt_file"] is None:
             raise ValidationError(
-                "Must specify `requirements_txt_file` when using ModelType.custom. \n"
+                "`requirements_txt_file` not specified with ModelType.custom. Must specify `requirements_txt_file` when using ModelType.custom. \n"
             )
 
 
@@ -132,7 +132,7 @@ class DatasetSchema(Schema):
         },
         validate=validate.OneOf(
             ["text-classification", "tabular-classification"],
-            error=f"The `task_type` must be one of either TaskType.TextClassification or TaskType.TabularClassification.",
+            error=f"`task_type` must be one of either TaskType.TextClassification or TaskType.TabularClassification.",
         ),
     )
     tag_column_name = fields.List(
@@ -147,7 +147,7 @@ class DatasetSchema(Schema):
         default="en",
         validate=validate.Regexp(
             r"^[a-z]{2}(-[A-Z]{2})?$",
-            error="The `language` of the dataset is not in the ISO 639-1 (alpha-2 code) format.",
+            error="`language` of the dataset is not in the ISO 639-1 (alpha-2 code) format.",
         ),
     )
     sep = fields.Str()
@@ -167,21 +167,21 @@ def validates_label_column_not_in_feature_names(self, data, **kwargs):
         """Validates whether the label column name is not on the feature names list"""
         if data["label_column_name"] in data["feature_names"]:
             raise ValidationError(
-                f"The `label_column_name` `{data['label_column_name']}` must not be in `feature_names`."
+                f"`label_column_name` `{data['label_column_name']}` must not be in `feature_names`."
             )
 
     @validates_schema
     def validates_task_type_and_data_column(self, data, **kwargs):
         """Validates whether the data columns are present according to the task type"""
         if data["task_type"] == "tabular-classification" and not data["feature_names"]:
             raise ValidationError(
-                "Must specify `feature_names` for TabularClassification `task_type`."
+                "`feature_names` not specified for tabular classification task. Must specify `feature_names` for TabularClassification `task_type`."
             )
         elif (
             data["task_type"] == "text-classification" and not data["text_column_name"]
         ):
             raise ValidationError(
-                "Must specify `text_column_name` for TextClassification `task_type`."
+                "`text_column_name` not specified for text classification task. Must specify `text_column_name` for TextClassification `task_type`."
             )
         elif data["feature_names"] and data["text_column_name"]:
             if data["task_type"] == "tabular-classification":