Skip to content

Commit 5ce407c

Browse files
committed
CategoricalImputer: Error out when no mode is found
1 parent c2bccd1 commit 5ce407c

File tree

2 files changed

+20
-2
lines changed

2 files changed

+20
-2
lines changed

sklearn_pandas/categorical_imputer.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import pandas as pd
22
import numpy as np
33

4-
from collections import Counter
54

65
from sklearn.base import BaseEstimator, TransformerMixin
76
from sklearn.utils.validation import check_is_fitted
@@ -65,7 +64,12 @@ def fit(self, X, y=None):
6564
mask = _get_mask(X, self.missing_values)
6665
X = X[~mask]
6766

68-
self.fill_ = Counter(X).most_common(1)[0][0]
67+
modes = pd.Series(X).mode()
68+
if modes.shape[0] == 0:
69+
raise ValueError('No value is repeated more than '
70+
'once in the column')
71+
else:
72+
self.fill_ = modes[0]
6973

7074
return self
7175

tests/test_categorical_imputer.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,20 @@ def test_unit(input_type, none_value):
3434
assert (Xt == ['a', 'b', 'b', 'b']).all()
3535

3636

37+
@pytest.mark.parametrize('input_type', ['np', 'pd'])
38+
def test_no_mode(input_type):
39+
40+
data = ['a', 'b', 'c', np.nan]
41+
42+
if input_type == 'pd':
43+
X = pd.Series(data)
44+
else:
45+
X = np.asarray(data, dtype=object)
46+
47+
with pytest.raises(ValueError):
48+
CategoricalImputer().fit_transform(X)
49+
50+
3751
@pytest.mark.parametrize('input_type', ['np', 'pd'])
3852
def test_missing_values_param(input_type):
3953

0 commit comments

Comments
 (0)