33import shutil
44import tarfile
55import tempfile
6+ import traceback
67import uuid
78from enum import Enum
8- from typing import Dict , List , Optional
9+ from typing import Callable , Dict , List , Optional
910
1011import pandas as pd
1112from bentoml .saved_bundle .bundler import _write_bento_content_to_dir
@@ -322,7 +323,6 @@ def add_model(
322323 model_schema .load (
323324 {
324325 "name" : name ,
325- "function" : function ,
326326 "description" : description ,
327327 "task_type" : task_type .value ,
328328 "model_type" : model_type .value ,
@@ -363,18 +363,33 @@ def add_model(
363363 mitigation = f"Make sure that the specified `dependent_dir` is different than { os .getcwd ()} " ,
364364 )
365365
366- # Training set size
366+ # Training set
367367 if task_type in [TaskType .TabularClassification , TaskType .TabularRegression ]:
368368 if len (train_sample_df .index ) < 100 :
369369 raise UnboxResourceError (
370370 context = "There is an issue with the specified `train_sample_df`. \n " ,
371371 message = f"The `train_sample_df` is too small, with only { len (train_sample_df .index )} rows. \n " ,
372372 mitigation = "Make sure to upload a training set sample with at least 100 rows." ,
373373 )
374+ if train_sample_df .isnull ().values .any ():
375+ raise UnboxResourceError (
376+ context = "There is an issue with the specified `train_sample_df`. \n " ,
377+ message = f"The `train_sample_df` contains missing values. \n " ,
378+ mitigation = "Currently, Unbox does not support datasets with missing values."
379+ + "Make sure to upload a training set sample without missing values by applying the same"
380+ + " preprocessing steps expected by your model." ,
381+ )
382+
374383 train_sample_df = train_sample_df .sample (
375384 min (3000 , len (train_sample_df .index ))
376385 )
377- # predict_proba extra args
386+
387+ # predict_proba
388+ if not isinstance (function , Callable ):
389+ raise UnboxValidationError (
390+ f"- The argument `{ function } ` specified as `function` is not callable. \n "
391+ )
392+
378393 user_args = function .__code__ .co_varnames [: function .__code__ .co_argcount ][2 :]
379394 kwarg_keys = tuple (kwargs )
380395 if user_args != kwarg_keys :
@@ -383,6 +398,25 @@ def add_model(
383398 message = f"Your function's additional args { user_args } do not match the kwargs you specifed { kwarg_keys } . \n " ,
384399 mitigation = f"Make sure to include all of the required kwargs to run inference with your `function`." ,
385400 )
401+ try :
402+ if task_type in [
403+ TaskType .TabularClassification ,
404+ TaskType .TabularRegression ,
405+ ]:
406+ test_input = train_sample_df [:3 ][feature_names ].to_numpy ()
407+ function (model , test_input , ** kwargs )
408+ else :
409+ test_input = ["Test predict function." , "Unbox is great!" ]
410+ function (model , test_input , ** kwargs )
411+ except Exception as e :
412+ exception_stack = "" .join (
413+ traceback .format_exception (type (e ), e , e .__traceback__ )
414+ )
415+ raise UnboxResourceError (
416+ context = "There is n issue with the specified `function`. \n " ,
417+ message = f"It is failing with the following error: \n { exception_stack } \n " ,
418+ mitigation = "Make sure your function receives the model and the input as arguments, plus the additional kwargs." ,
419+ )
386420
387421 # Transformers resources
388422 if model_type is ModelType .transformers :
@@ -681,9 +715,18 @@ def add_dataset(
681715 headers = next (reader )
682716 row_count = sum (1 for _ in reader )
683717
684- # ----------------- Resource-schema consistency validations ---------------- #
685718 df = pd .read_csv (file_path , sep = sep )
686719
720+ if df .isnull ().values .any ():
721+ raise UnboxResourceError (
722+ context = "There is an issue with the specified dataset. \n " ,
723+ message = "The dataset contains missing values. \n " ,
724+ mitigation = "Currently, Unbox does not support datasets with missing values."
725+ + "Make sure to upload a training set sample without missing values by applying the same"
726+ + " preprocessing steps expected by your model." ,
727+ )
728+
729+ # ----------------- Resource-schema consistency validations ---------------- #
687730 # Label column validations
688731 try :
689732 headers .index (label_column_name )
0 commit comments