@@ -33,49 +33,46 @@ class CategoricalImputer(BaseEstimator, TransformerMixin):
33
33
copy : boolean, optional (default=True)
34
34
If True, a copy of X will be created.
35
35
36
- strategy : string, optional (default = 'mode')
37
- If set to 'mode', replace all instances of `missing_values`
38
- with the modal value. Otherwise, replace with
39
- the value specified via `replacement`.
36
+ strategy : string, optional (default = 'most_frequent')
37
+ The imputation strategy.
40
38
41
- replacement : string, optional (default='?')
39
+ - If "most_frequent", then replace missing using the most frequent
40
+ value along each column. Can be used with strings or numeric data.
41
+ - If "constant", then replace missing values with fill_value. Can be
42
+ used with strings or numeric data.
43
+
44
+ fill_value : string, optional (default='?')
42
45
The value that all instances of `missing_values` are replaced
43
- with if `strategy` is not set to 'mode' . This is useful if
46
+ with if `strategy` is set to `constant` . This is useful if
44
47
you don't want to impute with the mode, or if there are multiple
45
48
modes in your data and you want to choose a particular one. If
46
- `strategy` is set to `mode `, this parameter is ignored.
49
+ `strategy` is not set to `constant `, this parameter is ignored.
47
50
48
51
Attributes
49
52
----------
50
53
fill_ : str
51
- Most frequent value of the training data.
54
+ The imputation fill value
52
55
53
56
"""
54
57
55
58
def __init__ (
56
59
self ,
57
60
missing_values = 'NaN' ,
58
- strategy = 'mode ' ,
59
- replacement = None ,
61
+ strategy = 'most_frequent ' ,
62
+ fill_value = '?' ,
60
63
copy = True
61
64
):
62
65
self .missing_values = missing_values
63
66
self .copy = copy
64
- self .replacement = replacement
67
+ self .fill_value = fill_value
65
68
self .strategy = strategy
66
69
67
- strategies = ['fixed_value ' , 'mode ' ]
70
+ strategies = ['constant ' , 'most_frequent ' ]
68
71
if self .strategy not in strategies :
69
72
raise ValueError (
70
73
'Strategy {0} not in {1}' .format (self .strategy , strategies )
71
74
)
72
75
73
- if self .strategy == 'fixed_value' and self .replacement is None :
74
- raise ValueError (
75
- 'Please specify a value for \' replacement\' '
76
- 'when using the fixed_value strategy.'
77
- )
78
-
79
76
def fit (self , X , y = None ):
80
77
"""
81
78
@@ -95,10 +92,10 @@ def fit(self, X, y=None):
95
92
96
93
mask = _get_mask (X , self .missing_values )
97
94
X = X [~ mask ]
98
- if self .strategy == 'mode ' :
95
+ if self .strategy == 'most_frequent ' :
99
96
modes = pd .Series (X ).mode ()
100
- elif self .strategy == 'fixed_value ' :
101
- modes = np .array ([self .replacement ])
97
+ elif self .strategy == 'constant ' :
98
+ modes = np .array ([self .fill_value ])
102
99
if modes .shape [0 ] == 0 :
103
100
raise ValueError ('Data is empty or all values are null' )
104
101
elif modes .shape [0 ] > 1 :
0 commit comments