66from .models import ModelType
77from .tasks import TaskType
88
9-
9+ # ---------------------------- Regular expressions --------------------------- #
10+ COLUMN_NAME_REGEX = validate = ma .validate .Regexp (
11+ r"^[a-zA-Z0-9_-]+$" ,
12+ error = "strings that are not alphanumeric with underscores or hyphens."
13+ + " Spaces and special characters are not allowed." ,
14+ )
15+ LANGUAGE_CODE_REGEX = ma .validate .Regexp (
16+ r"^[a-z]{2}(-[A-Z]{2})?$" ,
17+ error = "`language` of the dataset is not in the ISO 639-1 (alpha-2 code) format." ,
18+ )
19+
20+ # ---------------------------------- Schemas --------------------------------- #
1021class CommitSchema (ma .Schema ):
1122 """Schema for commits."""
1223
@@ -23,11 +34,13 @@ class DatasetSchema(ma.Schema):
2334 """Schema for datasets."""
2435
2536 categoricalFeatureNames = ma .fields .List (
26- ma .fields .Str (), allow_none = True , load_default = []
37+ ma .fields .Str (validate = COLUMN_NAME_REGEX ),
38+ allow_none = True ,
39+ load_default = [],
2740 )
2841 classNames = ma .fields .List (ma .fields .Str (), required = True )
2942 columnNames = ma .fields .List (
30- ma .fields .Str (),
43+ ma .fields .Str (validate = COLUMN_NAME_REGEX ),
3144 required = True ,
3245 )
3346 label = ma .fields .Str (
@@ -40,21 +53,26 @@ class DatasetSchema(ma.Schema):
4053 required = True ,
4154 )
4255 featureNames = ma .fields .List (
43- ma .fields .Str (),
56+ ma .fields .Str (validate = COLUMN_NAME_REGEX ),
4457 load_default = [],
4558 )
46- labelColumnName = ma .fields .Str (required = True )
59+ labelColumnName = ma .fields .Str (
60+ validate = COLUMN_NAME_REGEX ,
61+ required = True ,
62+ )
4763 language = ma .fields .Str (
4864 load_default = "en" ,
49- validate = ma .validate .Regexp (
50- r"^[a-z]{2}(-[A-Z]{2})?$" ,
51- error = "`language` of the dataset is not in the ISO 639-1 (alpha-2 code) format." ,
52- ),
65+ validate = LANGUAGE_CODE_REGEX ,
5366 )
5467 metadata = ma .fields .Dict (allow_none = True , load_default = {})
55- predictionsColumnName = ma .fields .Str (allow_none = True , load_default = None )
68+ predictionsColumnName = ma .fields .Str (
69+ validate = COLUMN_NAME_REGEX ,
70+ allow_none = True ,
71+ load_default = None ,
72+ )
5673 sep = ma .fields .Str (load_default = "," )
5774 textColumnName = ma .fields .Str (
75+ validate = COLUMN_NAME_REGEX ,
5876 allow_none = True ,
5977 )
6078
@@ -70,7 +88,10 @@ def validates_label_column_not_in_feature_names(self, data, **kwargs):
7088class ModelSchema (ma .Schema ):
7189 """Schema for models with artifacts (i.e., model_package)."""
7290
73- categoricalFeatureNames = ma .fields .List (ma .fields .Str (), load_default = [])
91+ categoricalFeatureNames = ma .fields .List (
92+ ma .fields .Str (validate = COLUMN_NAME_REGEX ),
93+ load_default = [],
94+ )
7495 classNames = ma .fields .List (
7596 ma .fields .Str (),
7697 )
@@ -81,7 +102,11 @@ class ModelSchema(ma.Schema):
81102 max = 64 ,
82103 ),
83104 )
84- featureNames = ma .fields .List (ma .fields .Str (), allow_none = True , load_default = [])
105+ featureNames = ma .fields .List (
106+ ma .fields .Str (validate = COLUMN_NAME_REGEX ),
107+ allow_none = True ,
108+ load_default = [],
109+ )
85110 metadata = ma .fields .Dict (
86111 allow_none = True ,
87112 load_default = {},
0 commit comments