22import os
33import pandas as pd
44import shutil
5+ import sys
56import tarfile
67import tempfile
78import traceback
89import uuid
10+ import warnings
911
1012from enum import Enum
1113from typing import Callable , Dict , List , Optional
3537from marshmallow import ValidationError
3638
3739
40+ class HidePrints :
41+ """Class that suppresses the prints and warnings to stdout and Jupyter's stdout. Used
42+ to hide the print / warning statements that can be inside the uploaded function while
43+ we test it.
44+ """
45+
46+ def __enter__ (self ):
47+ self ._original_stdout = sys .stdout
48+ sys .stdout = open (os .devnull , "w" )
49+ sys ._jupyter_stdout = sys .stdout
50+ warnings .filterwarnings ("ignore" )
51+
52+ def __exit__ (self , exc_type , exc_val , exc_tb ):
53+ sys .stdout .close ()
54+ sys .stdout = self ._original_stdout
55+ sys ._jupyter_stdout = sys .stdout
56+ warnings .filterwarnings ("default" )
57+
58+
3859class DeploymentType (Enum ):
3960 """Specify the storage medium being used by your Unbox deployment."""
4061
@@ -95,6 +116,10 @@ def create_project(
95116 description = description ,
96117 )
97118 project_data = self .api .post_request (endpoint , body = payload )
119+
120+ print (
121+ f"Creating project on Unbox! Check out https://unbox.ai/projects to have a look!"
122+ )
98123 return Project (project_data , self .upload , self .subscription_plan , self )
99124
100125 def load_project (self , name : str ):
@@ -355,11 +380,11 @@ def add_model(
355380 ]:
356381 raise UnboxValidationError (
357382 "`task_type` must be either TaskType.TabularClassification or TaskType.TextClassification. \n "
358- )
383+ ) from None
359384 if model_type not in [model_framework for model_framework in ModelType ]:
360385 raise UnboxValidationError (
361386 "`model_type` must be one of the supported ModelTypes. Check out our API reference for a full list https://reference.unbox.ai/reference/api/unboxapi.ModelType.html. \n "
362- )
387+ ) from None
363388 model_schema = ModelSchema ()
364389 try :
365390 model_schema .load (
@@ -379,7 +404,7 @@ def add_model(
379404 }
380405 )
381406 except ValidationError as err :
382- raise UnboxValidationError (self ._format_error_message (err ))
407+ raise UnboxValidationError (self ._format_error_message (err )) from None
383408
384409 # --------------------------- Resource validations --------------------------- #
385410 # Requirements check
@@ -389,21 +414,21 @@ def add_model(
389414 raise UnboxResourceError (
390415 f"The file path `{ requirements_txt_file } ` specified on `requirements_txt_file` does not"
391416 " contain a file with the requirements. \n "
392- )
417+ ) from None
393418
394419 # Setup script
395420 if setup_script and not os .path .isfile (os .path .expanduser (setup_script )):
396421 raise UnboxResourceError (
397422 f"The file path `{ setup_script } ` specified on `setup_script` does not"
398423 " contain a file with the bash script with commands required before model loading. \n "
399- )
424+ ) from None
400425
401426 # Dependent dir
402427 if dependent_dir and dependent_dir == os .getcwd ():
403428 raise UnboxResourceError (
404429 "`dependent_dir` cannot be the working directory. \n " ,
405- mitigation = f"Make sure that the specified `dependent_dir` is different than { os .getcwd ()} " ,
406- )
430+ mitigation = f"Make sure that the specified `dependent_dir` is different than ` { os .getcwd ()} `. " ,
431+ ) from None
407432
408433 # Training set
409434 if task_type in [TaskType .TabularClassification , TaskType .TabularRegression ]:
@@ -412,15 +437,15 @@ def add_model(
412437 context = "There is an issue with the specified `train_sample_df`. \n " ,
413438 message = f"The `train_sample_df` is too small, with only { len (train_sample_df .index )} rows. \n " ,
414439 mitigation = "Make sure to upload a training set sample with at least 100 rows." ,
415- )
440+ ) from None
416441 if train_sample_df .isnull ().values .any ():
417442 raise UnboxResourceError (
418443 context = "There is an issue with the specified `train_sample_df`. \n " ,
419444 message = f"The `train_sample_df` contains missing values. \n " ,
420445 mitigation = "Currently, Unbox does not support datasets with missing values."
421446 + "Make sure to upload a training set sample without missing values by applying the same"
422447 + " preprocessing steps expected by your model." ,
423- )
448+ ) from None
424449
425450 train_sample_df = train_sample_df .sample (
426451 min (3000 , len (train_sample_df .index ))
@@ -430,7 +455,7 @@ def add_model(
430455 if not isinstance (function , Callable ):
431456 raise UnboxValidationError (
432457 f"- The argument `{ function } ` specified as `function` is not callable. \n "
433- )
458+ ) from None
434459
435460 user_args = function .__code__ .co_varnames [: function .__code__ .co_argcount ][2 :]
436461 kwarg_keys = tuple (kwargs )
@@ -439,26 +464,32 @@ def add_model(
439464 context = "There is an issue with the speficied `function`. \n " ,
440465 message = f"Your function's additional args { user_args } do not match the kwargs you specifed { kwarg_keys } . \n " ,
441466 mitigation = f"Make sure to include all of the required kwargs to run inference with your `function`." ,
442- )
467+ ) from None
443468 try :
444469 if task_type in [
445470 TaskType .TabularClassification ,
446471 TaskType .TabularRegression ,
447472 ]:
448473 test_input = train_sample_df [:3 ][feature_names ].to_numpy ()
449- function (model , test_input , ** kwargs )
474+ with HidePrints ():
475+ function (model , test_input , ** kwargs )
450476 else :
451- test_input = ["Test predict function." , "Unbox is great!" ]
452- function (model , test_input , ** kwargs )
477+ test_input = [
478+ "Unbox is great!" ,
479+ "Let's see if this function is ready for some error analysis" ,
480+ ]
481+ with HidePrints ():
482+ function (model , test_input , ** kwargs )
453483 except Exception as e :
454484 exception_stack = "" .join (
455485 traceback .format_exception (type (e ), e , e .__traceback__ )
456486 )
457487 raise UnboxResourceError (
458- context = "There is n issue with the specified `function`. \n " ,
459- message = f"It is failing with the following error: \n { exception_stack } \n " ,
460- mitigation = "Make sure your function receives the model and the input as arguments, plus the additional kwargs." ,
461- )
488+ context = "There is an issue with the specified `function`. \n " ,
489+ message = f"It is failing with the following error: \n { exception_stack } " ,
490+ mitigation = "Make sure your function receives the model and the input as arguments, plus the additional kwargs. Additionally,"
491+ + "you may find it useful to debug it on the Jupyter notebook, to ensure it is working correctly before uploading it." ,
492+ ) from None
462493
463494 # Transformers resources
464495 if model_type is ModelType .transformers :
@@ -467,7 +498,7 @@ def add_model(
467498 context = "There is a missing keyword argument for the specified model type. \n " ,
468499 message = "The `tokenizer` must be specified in kwargs when using a transformers model. \n " ,
469500 mitigation = "Make sure to specify the additional kwargs needed for the model type." ,
470- )
501+ ) from None
471502
472503 # ------------------ Resource-schema consistency validations ----------------- #
473504 # Feature validations
@@ -486,7 +517,7 @@ def add_model(
486517 ]
487518 raise UnboxDatasetInconsistencyError (
488519 f"The features { features_not_in_dataset } specified in `feature_names` are not on the dataset. \n "
489- )
520+ ) from None
490521
491522 required_fields = [
492523 (feature_names , "feature_names" ),
@@ -498,7 +529,7 @@ def add_model(
498529 raise UnboxDatasetInconsistencyError (
499530 message = f"TabularClassification task with `{ field } ` missing. \n " ,
500531 mitigation = f"Make sure to specify `{ field } ` for tabular classification tasks." ,
501- )
532+ ) from None
502533
503534 with TempDirectory () as dir :
504535 bento_service = create_template_model (
@@ -568,14 +599,18 @@ def add_model(
568599 categoricalFeatureNames = categorical_feature_names ,
569600 trainSampleLabelColumnName = train_sample_label_column_name ,
570601 )
571- print ( "Uploading model to Unbox..." )
602+
572603 modeldata = self .upload (
573604 endpoint = endpoint ,
574605 file_path = tarfile_path ,
575606 object_name = "tarfile" ,
576607 body = payload ,
577608 )
578609 os .remove ("template_model.py" )
610+
611+ print (
612+ f"Uploading model to Unbox! Check out https://unbox.ai/models to have a look!"
613+ )
579614 return Model (modeldata )
580615
581616 def add_dataset (
@@ -730,7 +765,7 @@ def add_dataset(
730765 ]:
731766 raise UnboxValidationError (
732767 "`task_type` must be either TaskType.TabularClassification or TaskType.TextClassification. \n "
733- )
768+ ) from None
734769 dataset_schema = DatasetSchema ()
735770 try :
736771 dataset_schema .load (
@@ -750,15 +785,15 @@ def add_dataset(
750785 }
751786 )
752787 except ValidationError as err :
753- raise UnboxValidationError (self ._format_error_message (err ))
788+ raise UnboxValidationError (self ._format_error_message (err )) from None
754789
755790 # --------------------------- Resource validations --------------------------- #
756791 exp_file_path = os .path .expanduser (file_path )
757792 object_name = "original.csv"
758793 if not os .path .isfile (exp_file_path ):
759794 raise UnboxResourceError (
760- f"The file path `{ file_path } ` specified on `file_path` does not contain a file with the dataset."
761- )
795+ f"The file path `{ file_path } ` specified on `file_path` does not contain a file with the dataset. \n "
796+ ) from None
762797
763798 with open (exp_file_path , "rt" ) as f :
764799 reader = csv .reader (f , delimiter = sep )
@@ -774,7 +809,7 @@ def add_dataset(
774809 mitigation = "Currently, Unbox does not support datasets with missing values."
775810 + "Make sure to upload a training set sample without missing values by applying the same"
776811 + " preprocessing steps expected by your model." ,
777- )
812+ ) from None
778813
779814 # ------------------ Resource-schema consistency validations ----------------- #
780815 # Label column validations
@@ -783,15 +818,15 @@ def add_dataset(
783818 except ValueError :
784819 raise UnboxDatasetInconsistencyError (
785820 f"The column `{ label_column_name } ` specified as `label_column_name` is not on the dataset. \n "
786- )
821+ ) from None
787822
788823 dataset_classes = list (df [label_column_name ].unique ())
789824 if len (dataset_classes ) > len (class_names ):
790825 raise UnboxDatasetInconsistencyError (
791826 f"There are { len (dataset_classes )} classes represented on the dataset, but there are only"
792827 f"{ len (class_names )} items on the `class_names` list. \n " ,
793828 mitigation = f"Make sure that there are at most { len (class_names )} classes in your dataset." ,
794- )
829+ ) from None
795830
796831 # Feature validations
797832 try :
@@ -803,14 +838,14 @@ def add_dataset(
803838 if text_column_name :
804839 raise UnboxDatasetInconsistencyError (
805840 f"The column `{ text_column_name } ` specified as `text_column_name` is not on the dataset. \n "
806- )
841+ ) from None
807842 else :
808843 features_not_in_dataset = [
809844 feature for feature in feature_names if feature not in headers
810845 ]
811846 raise UnboxDatasetInconsistencyError (
812847 f"The features { features_not_in_dataset } specified in `feature_names` are not on the dataset. \n "
813- )
848+ ) from None
814849
815850 # Tag column validation
816851 try :
@@ -819,22 +854,22 @@ def add_dataset(
819854 except ValueError :
820855 raise UnboxDatasetInconsistencyError (
821856 f"The column `{ tag_column_name } ` specified as `tag_column_name` is not on the dataset. \n "
822- )
857+ ) from None
823858
824859 # ----------------------- Subscription plan validations ---------------------- #
825860 if row_count > self .subscription_plan ["datasetSize" ]:
826861 raise UnboxSubscriptionPlanException (
827862 f"The dataset your are trying to upload contains { row_count } rows, which exceeds your plan's"
828863 f" limit of { self .subscription_plan ['datasetSize' ]} . \n "
829- )
864+ ) from None
830865 if task_type == TaskType .TextClassification :
831- max_text_size = df . text_column .str .len ().max ()
866+ max_text_size = df [ text_column_name ] .str .len ().max ()
832867 # TODO: set limit per subscription plan
833868 if max_text_size > 100000 :
834869 raise UnboxSubscriptionPlanException (
835870 f"The dataset you are trying to upload contains texts with { max_text_size } characters,"
836871 "which exceeds your plan's limit of 100,000 characters."
837- )
872+ ) from None
838873
839874 endpoint = "datasets"
840875 payload = dict (
@@ -850,6 +885,9 @@ def add_dataset(
850885 featureNames = feature_names ,
851886 categoricalFeatureNames = categorical_feature_names ,
852887 )
888+ print (
889+ f"Uploading dataset to Unbox! Check out https://unbox.ai/datasets to have a look!"
890+ )
853891 return Dataset (
854892 self .upload (
855893 endpoint = endpoint ,
@@ -999,6 +1037,11 @@ def add_dataframe(
9991037 ... )
10001038 >>> dataset.to_dict()
10011039 """
1040+ # --------------------------- Resource validations --------------------------- #
1041+ if not isinstance (df , pd .DataFrame ):
1042+ raise UnboxValidationError (
1043+ f"- `df` is a { type (df )} , but it must be a pandas dataframe (pd.DataFrame). \n "
1044+ )
10021045 with tempfile .TemporaryDirectory () as tmp_dir :
10031046 file_path = os .path .join (tmp_dir , str (uuid .uuid1 ()))
10041047 df .to_csv (file_path , index = False )
@@ -1032,14 +1075,3 @@ def _format_error_message(err) -> str:
10321075 temp_msg = list (msg .values ())[0 ][0 ].lower ()
10331076 error_msg += f"- `{ input } ` contains items that are { temp_msg } \n "
10341077 return error_msg
1035-
1036- @staticmethod
1037- def _validate_categorical_features (
1038- df : pd .DataFrame , categorical_features_map : Dict [str , List [str ]]
1039- ):
1040- for feature , options in categorical_features_map .items ():
1041- if len (df [feature ].unique ()) > len (options ):
1042- raise UnboxInvalidRequest (
1043- f"Feature '{ feature } ' contains more options in the df than provided "
1044- "for it in `categorical_features_map`"
1045- )
0 commit comments