|
1 |
| -from .pandas_vb_common import * |
2 |
| -try: |
3 |
| - from pandas.api.types import union_categoricals |
4 |
| -except ImportError: |
5 |
| - try: |
6 |
| - from pandas.types.concat import union_categoricals |
7 |
| - except ImportError: |
8 |
| - pass |
| 1 | +import numpy as np |
| 2 | +import pandas as pd |
| 3 | +import pandas.util.testing as tm |
| 4 | +from pandas.core.dtypes.concat import union_categoricals |
9 | 5 |
|
10 | 6 |
|
11 |
| -class Categoricals(object): |
| 7 | +class Concat(object): |
| 8 | + |
12 | 9 | goal_time = 0.2
|
13 | 10 |
|
14 | 11 | def setup(self):
|
15 |
| - N = 100000 |
16 |
| - self.s = pd.Series((list('aabbcd') * N)).astype('category') |
| 12 | + N = 10**5 |
| 13 | + self.s = pd.Series(list('aabbcd') * N).astype('category') |
| 14 | + |
| 15 | + self.a = pd.Categorical(list('aabbcd') * N) |
| 16 | + self.b = pd.Categorical(list('bbcdjk') * N) |
| 17 | + |
| 18 | + def time_concat(self): |
| 19 | + pd.concat([self.s, self.s]) |
| 20 | + |
| 21 | + def time_union(self): |
| 22 | + union_categoricals([self.a, self.b]) |
| 23 | + |
17 | 24 |
|
18 |
| - self.a = pd.Categorical((list('aabbcd') * N)) |
19 |
| - self.b = pd.Categorical((list('bbcdjk') * N)) |
| 25 | +class Constructor(object): |
20 | 26 |
|
| 27 | + goal_time = 0.2 |
| 28 | + |
| 29 | + def setup(self): |
| 30 | + N = 10**5 |
21 | 31 | self.categories = list('abcde')
|
22 |
| - self.cat_idx = Index(self.categories) |
| 32 | + self.cat_idx = pd.Index(self.categories) |
23 | 33 | self.values = np.tile(self.categories, N)
|
24 | 34 | self.codes = np.tile(range(len(self.categories)), N)
|
25 | 35 |
|
26 |
| - self.datetimes = pd.Series(pd.date_range( |
27 |
| - '1995-01-01 00:00:00', periods=10000, freq='s')) |
| 36 | + self.datetimes = pd.Series(pd.date_range('1995-01-01 00:00:00', |
| 37 | + periods=N / 10, |
| 38 | + freq='s')) |
| 39 | + self.datetimes_with_nat = self.datetimes.copy() |
| 40 | + self.datetimes_with_nat.iloc[-1] = pd.NaT |
28 | 41 |
|
29 | 42 | self.values_some_nan = list(np.tile(self.categories + [np.nan], N))
|
30 | 43 | self.values_all_nan = [np.nan] * len(self.values)
|
31 | 44 |
|
32 |
| - def time_concat(self): |
33 |
| - concat([self.s, self.s]) |
34 |
| - |
35 |
| - def time_union(self): |
36 |
| - union_categoricals([self.a, self.b]) |
37 |
| - |
38 | 45 | def time_constructor_regular(self):
|
39 |
| - Categorical(self.values, self.categories) |
| 46 | + pd.Categorical(self.values, self.categories) |
40 | 47 |
|
41 | 48 | def time_constructor_fastpath(self):
|
42 |
| - Categorical(self.codes, self.cat_idx, fastpath=True) |
| 49 | + pd.Categorical(self.codes, self.cat_idx, fastpath=True) |
43 | 50 |
|
44 | 51 | def time_constructor_datetimes(self):
|
45 |
| - Categorical(self.datetimes) |
| 52 | + pd.Categorical(self.datetimes) |
46 | 53 |
|
47 | 54 | def time_constructor_datetimes_with_nat(self):
|
48 |
| - t = self.datetimes |
49 |
| - t.iloc[-1] = pd.NaT |
50 |
| - Categorical(t) |
| 55 | + pd.Categorical(self.datetimes_with_nat) |
51 | 56 |
|
52 | 57 | def time_constructor_with_nan(self):
|
53 |
| - Categorical(self.values_some_nan) |
| 58 | + pd.Categorical(self.values_some_nan) |
54 | 59 |
|
55 | 60 | def time_constructor_all_nan(self):
|
56 |
| - Categorical(self.values_all_nan) |
| 61 | + pd.Categorical(self.values_all_nan) |
57 | 62 |
|
58 | 63 |
|
59 |
| -class Categoricals2(object): |
| 64 | +class ValueCounts(object): |
| 65 | + |
60 | 66 | goal_time = 0.2
|
61 | 67 |
|
62 |
| - def setup(self): |
63 |
| - n = 500000 |
| 68 | + params = [True, False] |
| 69 | + param_names = ['dropna'] |
| 70 | + |
| 71 | + def setup(self, dropna): |
| 72 | + n = 5 * 10**5 |
64 | 73 | np.random.seed(2718281)
|
65 | 74 | arr = ['s%04d' % i for i in np.random.randint(0, n // 10, size=n)]
|
66 |
| - self.ts = Series(arr).astype('category') |
| 75 | + self.ts = pd.Series(arr).astype('category') |
| 76 | + self.dropna = dropna |
| 77 | + |
| 78 | + def time_value_counts(self, dropna): |
| 79 | + self.ts.value_counts(dropna=self.dropna) |
67 | 80 |
|
68 |
| - self.sel = self.ts.loc[[0]] |
69 | 81 |
|
70 |
| - def time_value_counts(self): |
71 |
| - self.ts.value_counts(dropna=False) |
| 82 | +class Repr(object): |
72 | 83 |
|
73 |
| - def time_value_counts_dropna(self): |
74 |
| - self.ts.value_counts(dropna=True) |
| 84 | + goal_time = 0.2 |
| 85 | + |
| 86 | + def setup(self): |
| 87 | + self.sel = pd.Series(['s1234']).astype('category') |
75 | 88 |
|
76 | 89 | def time_rendering(self):
|
77 | 90 | str(self.sel)
|
78 | 91 |
|
| 92 | + |
| 93 | +class SetCategories(object): |
| 94 | + |
| 95 | + goal_time = 0.2 |
| 96 | + |
| 97 | + def setup(self): |
| 98 | + n = 5 * 10**5 |
| 99 | + np.random.seed(2718281) |
| 100 | + arr = ['s%04d' % i for i in np.random.randint(0, n // 10, size=n)] |
| 101 | + self.ts = pd.Series(arr).astype('category') |
| 102 | + |
79 | 103 | def time_set_categories(self):
|
80 | 104 | self.ts.cat.set_categories(self.ts.cat.categories[::2])
|
81 | 105 |
|
82 | 106 |
|
83 |
| -class Categoricals3(object): |
| 107 | +class Rank(object): |
| 108 | + |
84 | 109 | goal_time = 0.2
|
85 | 110 |
|
86 | 111 | def setup(self):
|
87 |
| - N = 100000 |
| 112 | + N = 10**5 |
88 | 113 | ncats = 100
|
| 114 | + np.random.seed(1234) |
89 | 115 |
|
90 |
| - self.s1 = Series(np.array(tm.makeCategoricalIndex(N, ncats))) |
91 |
| - self.s1_cat = self.s1.astype('category') |
92 |
| - self.s1_cat_ordered = self.s1.astype('category', ordered=True) |
| 116 | + self.s_str = pd.Series(tm.makeCategoricalIndex(N, ncats)).astype(str) |
| 117 | + self.s_str_cat = self.s_str.astype('category') |
| 118 | + self.s_str_cat_ordered = self.s_str.astype('category', ordered=True) |
93 | 119 |
|
94 |
| - self.s2 = Series(np.random.randint(0, ncats, size=N)) |
95 |
| - self.s2_cat = self.s2.astype('category') |
96 |
| - self.s2_cat_ordered = self.s2.astype('category', ordered=True) |
| 120 | + self.s_int = pd.Series(np.random.randint(0, ncats, size=N)) |
| 121 | + self.s_int_cat = self.s_int.astype('category') |
| 122 | + self.s_int_cat_ordered = self.s_int.astype('category', ordered=True) |
97 | 123 |
|
98 | 124 | def time_rank_string(self):
|
99 |
| - self.s1.rank() |
| 125 | + self.s_str.rank() |
100 | 126 |
|
101 | 127 | def time_rank_string_cat(self):
|
102 |
| - self.s1_cat.rank() |
| 128 | + self.s_str_cat.rank() |
103 | 129 |
|
104 | 130 | def time_rank_string_cat_ordered(self):
|
105 |
| - self.s1_cat_ordered.rank() |
| 131 | + self.s_str_cat_ordered.rank() |
106 | 132 |
|
107 | 133 | def time_rank_int(self):
|
108 |
| - self.s2.rank() |
| 134 | + self.s_int.rank() |
109 | 135 |
|
110 | 136 | def time_rank_int_cat(self):
|
111 |
| - self.s2_cat.rank() |
| 137 | + self.s_int_cat.rank() |
112 | 138 |
|
113 | 139 | def time_rank_int_cat_ordered(self):
|
114 |
| - self.s2_cat_ordered.rank() |
| 140 | + self.s_int_cat_ordered.rank() |
0 commit comments