@@ -33,49 +33,46 @@ class CategoricalImputer(BaseEstimator, TransformerMixin):
3333 copy : boolean, optional (default=True)
3434 If True, a copy of X will be created.
3535
36- strategy : string, optional (default = 'mode')
37- If set to 'mode', replace all instances of `missing_values`
38- with the modal value. Otherwise, replace with
39- the value specified via `replacement`.
36+ strategy : string, optional (default = 'most_frequent')
37+ The imputation strategy.
4038
41- replacement : string, optional (default='?')
39+ - If "most_frequent", then replace missing using the most frequent
40+ value along each column. Can be used with strings or numeric data.
41+ - If "constant", then replace missing values with fill_value. Can be
42+ used with strings or numeric data.
43+
44+ fill_value : string, optional (default='?')
4245 The value that all instances of `missing_values` are replaced
43- with if `strategy` is not set to 'mode' . This is useful if
46+ with if `strategy` is set to `constant` . This is useful if
4447 you don't want to impute with the mode, or if there are multiple
4548 modes in your data and you want to choose a particular one. If
46- `strategy` is set to `mode `, this parameter is ignored.
49+ `strategy` is not set to `constant `, this parameter is ignored.
4750
4851 Attributes
4952 ----------
5053 fill_ : str
51- Most frequent value of the training data.
54+ The imputation fill value
5255
5356 """
5457
5558 def __init__ (
5659 self ,
5760 missing_values = 'NaN' ,
58- strategy = 'mode ' ,
59- replacement = None ,
61+ strategy = 'most_frequent ' ,
62+ fill_value = '?' ,
6063 copy = True
6164 ):
6265 self .missing_values = missing_values
6366 self .copy = copy
64- self .replacement = replacement
67+ self .fill_value = fill_value
6568 self .strategy = strategy
6669
67- strategies = ['fixed_value ' , 'mode ' ]
70+ strategies = ['constant ' , 'most_frequent ' ]
6871 if self .strategy not in strategies :
6972 raise ValueError (
7073 'Strategy {0} not in {1}' .format (self .strategy , strategies )
7174 )
7275
73- if self .strategy == 'fixed_value' and self .replacement is None :
74- raise ValueError (
75- 'Please specify a value for \' replacement\' '
76- 'when using the fixed_value strategy.'
77- )
78-
7976 def fit (self , X , y = None ):
8077 """
8178
@@ -95,10 +92,10 @@ def fit(self, X, y=None):
9592
9693 mask = _get_mask (X , self .missing_values )
9794 X = X [~ mask ]
98- if self .strategy == 'mode ' :
95+ if self .strategy == 'most_frequent ' :
9996 modes = pd .Series (X ).mode ()
100- elif self .strategy == 'fixed_value ' :
101- modes = np .array ([self .replacement ])
97+ elif self .strategy == 'constant ' :
98+ modes = np .array ([self .fill_value ])
10299 if modes .shape [0 ] == 0 :
103100 raise ValueError ('Data is empty or all values are null' )
104101 elif modes .shape [0 ] > 1 :
0 commit comments