|
16 | 16 |
|
17 | 17 | class Factorize(object):
|
18 | 18 |
|
19 |
| - params = [True, False] |
20 |
| - param_names = ['sort'] |
| 19 | + params = [[True, False], ['int', 'uint', 'float', 'string']] |
| 20 | + param_names = ['sort', 'dtype'] |
21 | 21 |
|
22 |
| - def setup(self, sort): |
| 22 | + def setup(self, sort, dtype): |
23 | 23 | N = 10**5
|
24 |
| - self.int_idx = pd.Int64Index(np.arange(N).repeat(5)) |
25 |
| - self.float_idx = pd.Float64Index(np.random.randn(N).repeat(5)) |
26 |
| - self.string_idx = tm.makeStringIndex(N) |
| 24 | + data = {'int': pd.Int64Index(np.arange(N).repeat(5)), |
| 25 | + 'uint': pd.UInt64Index(np.arange(N).repeat(5)), |
| 26 | + 'float': pd.Float64Index(np.random.randn(N).repeat(5)), |
| 27 | + 'string': tm.makeStringIndex(N).repeat(5)} |
| 28 | + self.idx = data[dtype] |
27 | 29 |
|
28 |
| - def time_factorize_int(self, sort): |
29 |
| - self.int_idx.factorize(sort=sort) |
| 30 | + def time_factorize(self, sort, dtype): |
| 31 | + self.idx.factorize(sort=sort) |
30 | 32 |
|
31 |
| - def time_factorize_float(self, sort): |
32 |
| - self.float_idx.factorize(sort=sort) |
33 | 33 |
|
34 |
| - def time_factorize_string(self, sort): |
35 |
| - self.string_idx.factorize(sort=sort) |
| 34 | +class FactorizeUnique(object): |
36 | 35 |
|
| 36 | + params = [[True, False], ['int', 'uint', 'float', 'string']] |
| 37 | + param_names = ['sort', 'dtype'] |
37 | 38 |
|
38 |
| -class Duplicated(object): |
| 39 | + def setup(self, sort, dtype): |
| 40 | + N = 10**5 |
| 41 | + data = {'int': pd.Int64Index(np.arange(N)), |
| 42 | + 'uint': pd.UInt64Index(np.arange(N)), |
| 43 | + 'float': pd.Float64Index(np.arange(N)), |
| 44 | + 'string': tm.makeStringIndex(N)} |
| 45 | + self.idx = data[dtype] |
| 46 | + assert self.idx.is_unique |
39 | 47 |
|
40 |
| - params = ['first', 'last', False] |
41 |
| - param_names = ['keep'] |
| 48 | + def time_factorize(self, sort, dtype): |
| 49 | + self.idx.factorize(sort=sort) |
42 | 50 |
|
43 |
| - def setup(self, keep): |
44 |
| - N = 10**5 |
45 |
| - self.int_idx = pd.Int64Index(np.arange(N).repeat(5)) |
46 |
| - self.float_idx = pd.Float64Index(np.random.randn(N).repeat(5)) |
47 |
| - self.string_idx = tm.makeStringIndex(N) |
48 | 51 |
|
49 |
| - def time_duplicated_int(self, keep): |
50 |
| - self.int_idx.duplicated(keep=keep) |
| 52 | +class Duplicated(object): |
51 | 53 |
|
52 |
| - def time_duplicated_float(self, keep): |
53 |
| - self.float_idx.duplicated(keep=keep) |
| 54 | + params = [['first', 'last', False], ['int', 'uint', 'float', 'string']] |
| 55 | + param_names = ['keep', 'dtype'] |
54 | 56 |
|
55 |
| - def time_duplicated_string(self, keep): |
56 |
| - self.string_idx.duplicated(keep=keep) |
| 57 | + def setup(self, keep, dtype): |
| 58 | + N = 10**5 |
| 59 | + data = {'int': pd.Int64Index(np.arange(N).repeat(5)), |
| 60 | + 'uint': pd.UInt64Index(np.arange(N).repeat(5)), |
| 61 | + 'float': pd.Float64Index(np.random.randn(N).repeat(5)), |
| 62 | + 'string': tm.makeStringIndex(N).repeat(5)} |
| 63 | + self.idx = data[dtype] |
| 64 | + # cache is_unique |
| 65 | + self.idx.is_unique |
| 66 | + |
| 67 | + def time_duplicated(self, keep, dtype): |
| 68 | + self.idx.duplicated(keep=keep) |
57 | 69 |
|
58 | 70 |
|
59 | 71 | class DuplicatedUniqueIndex(object):
|
60 | 72 |
|
61 |
| - def setup(self): |
| 73 | + params = ['int', 'uint', 'float', 'string'] |
| 74 | + param_names = ['dtype'] |
| 75 | + |
| 76 | + def setup(self, dtype): |
62 | 77 | N = 10**5
|
63 |
| - self.idx_int_dup = pd.Int64Index(np.arange(N * 5)) |
| 78 | + data = {'int': pd.Int64Index(np.arange(N)), |
| 79 | + 'uint': pd.UInt64Index(np.arange(N)), |
| 80 | + 'float': pd.Float64Index(np.random.randn(N)), |
| 81 | + 'string': tm.makeStringIndex(N)} |
| 82 | + self.idx = data[dtype] |
64 | 83 | # cache is_unique
|
65 |
| - self.idx_int_dup.is_unique |
| 84 | + self.idx.is_unique |
66 | 85 |
|
67 |
| - def time_duplicated_unique_int(self): |
68 |
| - self.idx_int_dup.duplicated() |
| 86 | + def time_duplicated_unique(self, dtype): |
| 87 | + self.idx.duplicated() |
69 | 88 |
|
70 | 89 |
|
71 | 90 | class Match(object):
|
@@ -116,12 +135,13 @@ def time_series_dates(self, df):
|
116 | 135 | class Quantile(object):
|
117 | 136 | params = [[0, 0.5, 1],
|
118 | 137 | ['linear', 'nearest', 'lower', 'higher', 'midpoint'],
|
119 |
| - ['float', 'int']] |
| 138 | + ['float', 'int', 'uint']] |
120 | 139 | param_names = ['quantile', 'interpolation', 'dtype']
|
121 | 140 |
|
122 | 141 | def setup(self, quantile, interpolation, dtype):
|
123 | 142 | N = 10**5
|
124 | 143 | data = {'int': np.arange(N),
|
| 144 | + 'uint': np.arange(N).astype(np.uint64), |
125 | 145 | 'float': np.random.randn(N)}
|
126 | 146 | self.idx = pd.Series(data[dtype].repeat(5))
|
127 | 147 |
|
|
0 commit comments