Skip to content

Commit dc447f2

Browse files
Checkpoint: finished client-side validations for models and datasets
1 parent 6073ffd commit dc447f2

File tree

2 files changed

+48
-8
lines changed

2 files changed

+48
-8
lines changed

unboxapi/__init__.py

Lines changed: 48 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@
33
import shutil
44
import tarfile
55
import tempfile
6+
import traceback
67
import uuid
78
from enum import Enum
8-
from typing import Dict, List, Optional
9+
from typing import Callable, Dict, List, Optional
910

1011
import pandas as pd
1112
from bentoml.saved_bundle.bundler import _write_bento_content_to_dir
@@ -322,7 +323,6 @@ def add_model(
322323
model_schema.load(
323324
{
324325
"name": name,
325-
"function": function,
326326
"description": description,
327327
"task_type": task_type.value,
328328
"model_type": model_type.value,
@@ -363,18 +363,33 @@ def add_model(
363363
mitigation=f"Make sure that the specified `dependent_dir` is different than {os.getcwd()}",
364364
)
365365

366-
# Training set size
366+
# Training set
367367
if task_type in [TaskType.TabularClassification, TaskType.TabularRegression]:
368368
if len(train_sample_df.index) < 100:
369369
raise UnboxResourceError(
370370
context="There is an issue with the specified `train_sample_df`. \n",
371371
message=f"The `train_sample_df` is too small, with only {len(train_sample_df.index)} rows. \n",
372372
mitigation="Make sure to upload a training set sample with at least 100 rows.",
373373
)
374+
if train_sample_df.isnull().values.any():
375+
raise UnboxResourceError(
376+
context="There is an issue with the specified `train_sample_df`. \n",
377+
message=f"The `train_sample_df` contains missing values. \n",
378+
mitigation="Currently, Unbox does not support datasets with missing values."
379+
+ "Make sure to upload a training set sample without missing values by applying the same"
380+
+ " preprocessing steps expected by your model.",
381+
)
382+
374383
train_sample_df = train_sample_df.sample(
375384
min(3000, len(train_sample_df.index))
376385
)
377-
# predict_proba extra args
386+
387+
# predict_proba
388+
if not isinstance(function, Callable):
389+
raise UnboxValidationError(
390+
f"- The argument `{function}` specified as `function` is not callable. \n"
391+
)
392+
378393
user_args = function.__code__.co_varnames[: function.__code__.co_argcount][2:]
379394
kwarg_keys = tuple(kwargs)
380395
if user_args != kwarg_keys:
@@ -383,6 +398,25 @@ def add_model(
383398
message=f"Your function's additional args {user_args} do not match the kwargs you specifed {kwarg_keys}. \n",
384399
mitigation=f"Make sure to include all of the required kwargs to run inference with your `function`.",
385400
)
401+
try:
402+
if task_type in [
403+
TaskType.TabularClassification,
404+
TaskType.TabularRegression,
405+
]:
406+
test_input = train_sample_df[:3][feature_names].to_numpy()
407+
function(model, test_input, **kwargs)
408+
else:
409+
test_input = ["Test predict function.", "Unbox is great!"]
410+
function(model, test_input, **kwargs)
411+
except Exception as e:
412+
exception_stack = "".join(
413+
traceback.format_exception(type(e), e, e.__traceback__)
414+
)
415+
raise UnboxResourceError(
416+
context="There is n issue with the specified `function`. \n",
417+
message=f"It is failing with the following error: \n{exception_stack} \n",
418+
mitigation="Make sure your function receives the model and the input as arguments, plus the additional kwargs.",
419+
)
386420

387421
# Transformers resources
388422
if model_type is ModelType.transformers:
@@ -681,9 +715,18 @@ def add_dataset(
681715
headers = next(reader)
682716
row_count = sum(1 for _ in reader)
683717

684-
# ----------------- Resource-schema consistency validations ---------------- #
685718
df = pd.read_csv(file_path, sep=sep)
686719

720+
if df.isnull().values.any():
721+
raise UnboxResourceError(
722+
context="There is an issue with the specified dataset. \n",
723+
message="The dataset contains missing values. \n",
724+
mitigation="Currently, Unbox does not support datasets with missing values."
725+
+ "Make sure to upload a training set sample without missing values by applying the same"
726+
+ " preprocessing steps expected by your model.",
727+
)
728+
729+
# ----------------- Resource-schema consistency validations ---------------- #
687730
# Label column validations
688731
try:
689732
headers.index(label_column_name)

unboxapi/schemas.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,9 +89,6 @@ class ModelSchema(Schema):
8989
max=64,
9090
),
9191
)
92-
function = fields.Function(
93-
required=True,
94-
)
9592
description = fields.Str(
9693
required=True,
9794
validate=validate.Length(

0 commit comments

Comments
 (0)