|
1 |
| -from .pandas_vb_common import * |
| 1 | +import numpy as np |
| 2 | +import pandas as pd |
| 3 | +import pandas.util.testing as tm |
2 | 4 | try:
|
3 | 5 | from pandas.api.types import union_categoricals
|
4 | 6 | except ImportError:
|
|
8 | 10 | pass
|
9 | 11 |
|
10 | 12 |
|
11 |
| -class Categoricals(object): |
| 13 | +class Concat(object): |
| 14 | + |
12 | 15 | goal_time = 0.2
|
13 | 16 |
|
14 | 17 | def setup(self):
|
15 |
| - N = 100000 |
16 |
| - self.s = pd.Series((list('aabbcd') * N)).astype('category') |
| 18 | + N = 10**5 |
| 19 | + self.s = pd.Series(list('aabbcd') * N).astype('category') |
| 20 | + |
| 21 | + self.a = pd.Categorical(list('aabbcd') * N) |
| 22 | + self.b = pd.Categorical(list('bbcdjk') * N) |
| 23 | + |
| 24 | + def time_concat(self): |
| 25 | + pd.concat([self.s, self.s]) |
| 26 | + |
| 27 | + def time_union(self): |
| 28 | + union_categoricals([self.a, self.b]) |
| 29 | + |
17 | 30 |
|
18 |
| - self.a = pd.Categorical((list('aabbcd') * N)) |
19 |
| - self.b = pd.Categorical((list('bbcdjk') * N)) |
| 31 | +class Constructor(object): |
20 | 32 |
|
| 33 | + goal_time = 0.2 |
| 34 | + |
| 35 | + def setup(self): |
| 36 | + N = 10**5 |
21 | 37 | self.categories = list('abcde')
|
22 |
| - self.cat_idx = Index(self.categories) |
| 38 | + self.cat_idx = pd.Index(self.categories) |
23 | 39 | self.values = np.tile(self.categories, N)
|
24 | 40 | self.codes = np.tile(range(len(self.categories)), N)
|
25 | 41 |
|
26 |
| - self.datetimes = pd.Series(pd.date_range( |
27 |
| - '1995-01-01 00:00:00', periods=10000, freq='s')) |
| 42 | + self.datetimes = pd.Series(pd.date_range('1995-01-01 00:00:00', |
| 43 | + periods=N / 10, |
| 44 | + freq='s')) |
| 45 | + self.datetimes_with_nat = self.datetimes.copy() |
| 46 | + self.datetimes_with_nat.iloc[-1] = pd.NaT |
28 | 47 |
|
29 | 48 | self.values_some_nan = list(np.tile(self.categories + [np.nan], N))
|
30 | 49 | self.values_all_nan = [np.nan] * len(self.values)
|
31 | 50 |
|
32 |
| - def time_concat(self): |
33 |
| - concat([self.s, self.s]) |
34 |
| - |
35 |
| - def time_union(self): |
36 |
| - union_categoricals([self.a, self.b]) |
| 51 | + def time_regular(self): |
| 52 | + pd.Categorical(self.values, self.categories) |
37 | 53 |
|
38 |
| - def time_constructor_regular(self): |
39 |
| - Categorical(self.values, self.categories) |
| 54 | + def time_fastpath(self): |
| 55 | + pd.Categorical(self.codes, self.cat_idx, fastpath=True) |
40 | 56 |
|
41 |
| - def time_constructor_fastpath(self): |
42 |
| - Categorical(self.codes, self.cat_idx, fastpath=True) |
| 57 | + def time_datetimes(self): |
| 58 | + pd.Categorical(self.datetimes) |
43 | 59 |
|
44 |
| - def time_constructor_datetimes(self): |
45 |
| - Categorical(self.datetimes) |
| 60 | + def time_datetimes_with_nat(self): |
| 61 | + pd.Categorical(self.datetimes_with_nat) |
46 | 62 |
|
47 |
| - def time_constructor_datetimes_with_nat(self): |
48 |
| - t = self.datetimes |
49 |
| - t.iloc[-1] = pd.NaT |
50 |
| - Categorical(t) |
| 63 | + def time_with_nan(self): |
| 64 | + pd.Categorical(self.values_some_nan) |
51 | 65 |
|
52 |
| - def time_constructor_with_nan(self): |
53 |
| - Categorical(self.values_some_nan) |
| 66 | + def time_all_nan(self): |
| 67 | + pd.Categorical(self.values_all_nan) |
54 | 68 |
|
55 |
| - def time_constructor_all_nan(self): |
56 |
| - Categorical(self.values_all_nan) |
57 | 69 |
|
| 70 | +class ValueCounts(object): |
58 | 71 |
|
59 |
| -class Categoricals2(object): |
60 | 72 | goal_time = 0.2
|
61 | 73 |
|
62 |
| - def setup(self): |
63 |
| - n = 500000 |
| 74 | + params = [True, False] |
| 75 | + param_names = ['dropna'] |
| 76 | + |
| 77 | + def setup(self, dropna): |
| 78 | + n = 5 * 10**5 |
64 | 79 | np.random.seed(2718281)
|
65 | 80 | arr = ['s%04d' % i for i in np.random.randint(0, n // 10, size=n)]
|
66 |
| - self.ts = Series(arr).astype('category') |
| 81 | + self.ts = pd.Series(arr).astype('category') |
| 82 | + |
| 83 | + def time_value_counts(self, dropna): |
| 84 | + self.ts.value_counts(dropna=dropna) |
| 85 | + |
67 | 86 |
|
68 |
| - self.sel = self.ts.loc[[0]] |
| 87 | +class Repr(object): |
69 | 88 |
|
70 |
| - def time_value_counts(self): |
71 |
| - self.ts.value_counts(dropna=False) |
| 89 | + goal_time = 0.2 |
72 | 90 |
|
73 |
| - def time_value_counts_dropna(self): |
74 |
| - self.ts.value_counts(dropna=True) |
| 91 | + def setup(self): |
| 92 | + self.sel = pd.Series(['s1234']).astype('category') |
75 | 93 |
|
76 | 94 | def time_rendering(self):
|
77 | 95 | str(self.sel)
|
78 | 96 |
|
| 97 | + |
| 98 | +class SetCategories(object): |
| 99 | + |
| 100 | + goal_time = 0.2 |
| 101 | + |
| 102 | + def setup(self): |
| 103 | + n = 5 * 10**5 |
| 104 | + np.random.seed(2718281) |
| 105 | + arr = ['s%04d' % i for i in np.random.randint(0, n // 10, size=n)] |
| 106 | + self.ts = pd.Series(arr).astype('category') |
| 107 | + |
79 | 108 | def time_set_categories(self):
|
80 | 109 | self.ts.cat.set_categories(self.ts.cat.categories[::2])
|
81 | 110 |
|
82 | 111 |
|
83 |
| -class Categoricals3(object): |
| 112 | +class Rank(object): |
| 113 | + |
84 | 114 | goal_time = 0.2
|
85 | 115 |
|
86 | 116 | def setup(self):
|
87 |
| - N = 100000 |
| 117 | + N = 10**5 |
88 | 118 | ncats = 100
|
| 119 | + np.random.seed(1234) |
89 | 120 |
|
90 |
| - self.s1 = Series(np.array(tm.makeCategoricalIndex(N, ncats))) |
91 |
| - self.s1_cat = self.s1.astype('category') |
92 |
| - self.s1_cat_ordered = self.s1.astype('category', ordered=True) |
| 121 | + self.s_str = pd.Series(tm.makeCategoricalIndex(N, ncats)).astype(str) |
| 122 | + self.s_str_cat = self.s_str.astype('category') |
| 123 | + self.s_str_cat_ordered = self.s_str.astype('category', ordered=True) |
93 | 124 |
|
94 |
| - self.s2 = Series(np.random.randint(0, ncats, size=N)) |
95 |
| - self.s2_cat = self.s2.astype('category') |
96 |
| - self.s2_cat_ordered = self.s2.astype('category', ordered=True) |
| 125 | + self.s_int = pd.Series(np.random.randint(0, ncats, size=N)) |
| 126 | + self.s_int_cat = self.s_int.astype('category') |
| 127 | + self.s_int_cat_ordered = self.s_int.astype('category', ordered=True) |
97 | 128 |
|
98 | 129 | def time_rank_string(self):
|
99 |
| - self.s1.rank() |
| 130 | + self.s_str.rank() |
100 | 131 |
|
101 | 132 | def time_rank_string_cat(self):
|
102 |
| - self.s1_cat.rank() |
| 133 | + self.s_str_cat.rank() |
103 | 134 |
|
104 | 135 | def time_rank_string_cat_ordered(self):
|
105 |
| - self.s1_cat_ordered.rank() |
| 136 | + self.s_str_cat_ordered.rank() |
106 | 137 |
|
107 | 138 | def time_rank_int(self):
|
108 |
| - self.s2.rank() |
| 139 | + self.s_int.rank() |
109 | 140 |
|
110 | 141 | def time_rank_int_cat(self):
|
111 |
| - self.s2_cat.rank() |
| 142 | + self.s_int_cat.rank() |
112 | 143 |
|
113 | 144 | def time_rank_int_cat_ordered(self):
|
114 |
| - self.s2_cat_ordered.rank() |
| 145 | + self.s_int_cat_ordered.rank() |
0 commit comments