Skip to content

Commit 50407cf

Browse files
committed
CLN: ASV categoricals benchmark
More splitting in classes & flake 8
1 parent 412988e commit 50407cf

File tree

1 file changed

+79
-53
lines changed

1 file changed

+79
-53
lines changed

asv_bench/benchmarks/categoricals.py

Lines changed: 79 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,114 +1,140 @@
1-
from .pandas_vb_common import *
2-
try:
3-
from pandas.api.types import union_categoricals
4-
except ImportError:
5-
try:
6-
from pandas.types.concat import union_categoricals
7-
except ImportError:
8-
pass
1+
import numpy as np
2+
import pandas as pd
3+
import pandas.util.testing as tm
4+
from pandas.core.dtypes.concat import union_categoricals
95

106

11-
class Categoricals(object):
7+
class Concat(object):
8+
129
goal_time = 0.2
1310

1411
def setup(self):
15-
N = 100000
16-
self.s = pd.Series((list('aabbcd') * N)).astype('category')
12+
N = 10**5
13+
self.s = pd.Series(list('aabbcd') * N).astype('category')
14+
15+
self.a = pd.Categorical(list('aabbcd') * N)
16+
self.b = pd.Categorical(list('bbcdjk') * N)
17+
18+
def time_concat(self):
19+
pd.concat([self.s, self.s])
20+
21+
def time_union(self):
22+
union_categoricals([self.a, self.b])
23+
1724

18-
self.a = pd.Categorical((list('aabbcd') * N))
19-
self.b = pd.Categorical((list('bbcdjk') * N))
25+
class Constructor(object):
2026

27+
goal_time = 0.2
28+
29+
def setup(self):
30+
N = 10**5
2131
self.categories = list('abcde')
22-
self.cat_idx = Index(self.categories)
32+
self.cat_idx = pd.Index(self.categories)
2333
self.values = np.tile(self.categories, N)
2434
self.codes = np.tile(range(len(self.categories)), N)
2535

26-
self.datetimes = pd.Series(pd.date_range(
27-
'1995-01-01 00:00:00', periods=10000, freq='s'))
36+
self.datetimes = pd.Series(pd.date_range('1995-01-01 00:00:00',
37+
periods=N / 10,
38+
freq='s'))
39+
self.datetimes_with_nat = self.datetimes.copy()
40+
self.datetimes_with_nat.iloc[-1] = pd.NaT
2841

2942
self.values_some_nan = list(np.tile(self.categories + [np.nan], N))
3043
self.values_all_nan = [np.nan] * len(self.values)
3144

32-
def time_concat(self):
33-
concat([self.s, self.s])
34-
35-
def time_union(self):
36-
union_categoricals([self.a, self.b])
37-
3845
def time_constructor_regular(self):
39-
Categorical(self.values, self.categories)
46+
pd.Categorical(self.values, self.categories)
4047

4148
def time_constructor_fastpath(self):
42-
Categorical(self.codes, self.cat_idx, fastpath=True)
49+
pd.Categorical(self.codes, self.cat_idx, fastpath=True)
4350

4451
def time_constructor_datetimes(self):
45-
Categorical(self.datetimes)
52+
pd.Categorical(self.datetimes)
4653

4754
def time_constructor_datetimes_with_nat(self):
48-
t = self.datetimes
49-
t.iloc[-1] = pd.NaT
50-
Categorical(t)
55+
pd.Categorical(self.datetimes_with_nat)
5156

5257
def time_constructor_with_nan(self):
53-
Categorical(self.values_some_nan)
58+
pd.Categorical(self.values_some_nan)
5459

5560
def time_constructor_all_nan(self):
56-
Categorical(self.values_all_nan)
61+
pd.Categorical(self.values_all_nan)
5762

5863

59-
class Categoricals2(object):
64+
class ValueCounts(object):
65+
6066
goal_time = 0.2
6167

62-
def setup(self):
63-
n = 500000
68+
params = [True, False]
69+
param_names = ['dropna']
70+
71+
def setup(self, dropna):
72+
n = 5 * 10**5
6473
np.random.seed(2718281)
6574
arr = ['s%04d' % i for i in np.random.randint(0, n // 10, size=n)]
66-
self.ts = Series(arr).astype('category')
75+
self.ts = pd.Series(arr).astype('category')
76+
self.dropna = dropna
77+
78+
def time_value_counts(self, dropna):
79+
self.ts.value_counts(dropna=self.dropna)
6780

68-
self.sel = self.ts.loc[[0]]
6981

70-
def time_value_counts(self):
71-
self.ts.value_counts(dropna=False)
82+
class Repr(object):
7283

73-
def time_value_counts_dropna(self):
74-
self.ts.value_counts(dropna=True)
84+
goal_time = 0.2
85+
86+
def setup(self):
87+
self.sel = pd.Series(['s1234']).astype('category')
7588

7689
def time_rendering(self):
7790
str(self.sel)
7891

92+
93+
class SetCategories(object):
94+
95+
goal_time = 0.2
96+
97+
def setup(self):
98+
n = 5 * 10**5
99+
np.random.seed(2718281)
100+
arr = ['s%04d' % i for i in np.random.randint(0, n // 10, size=n)]
101+
self.ts = pd.Series(arr).astype('category')
102+
79103
def time_set_categories(self):
80104
self.ts.cat.set_categories(self.ts.cat.categories[::2])
81105

82106

83-
class Categoricals3(object):
107+
class Rank(object):
108+
84109
goal_time = 0.2
85110

86111
def setup(self):
87-
N = 100000
112+
N = 10**5
88113
ncats = 100
114+
np.random.seed(1234)
89115

90-
self.s1 = Series(np.array(tm.makeCategoricalIndex(N, ncats)))
91-
self.s1_cat = self.s1.astype('category')
92-
self.s1_cat_ordered = self.s1.astype('category', ordered=True)
116+
self.s_str = pd.Series(tm.makeCategoricalIndex(N, ncats)).astype(str)
117+
self.s_str_cat = self.s_str.astype('category')
118+
self.s_str_cat_ordered = self.s_str.astype('category', ordered=True)
93119

94-
self.s2 = Series(np.random.randint(0, ncats, size=N))
95-
self.s2_cat = self.s2.astype('category')
96-
self.s2_cat_ordered = self.s2.astype('category', ordered=True)
120+
self.s_int = pd.Series(np.random.randint(0, ncats, size=N))
121+
self.s_int_cat = self.s_int.astype('category')
122+
self.s_int_cat_ordered = self.s_int.astype('category', ordered=True)
97123

98124
def time_rank_string(self):
99-
self.s1.rank()
125+
self.s_str.rank()
100126

101127
def time_rank_string_cat(self):
102-
self.s1_cat.rank()
128+
self.s_str_cat.rank()
103129

104130
def time_rank_string_cat_ordered(self):
105-
self.s1_cat_ordered.rank()
131+
self.s_str_cat_ordered.rank()
106132

107133
def time_rank_int(self):
108-
self.s2.rank()
134+
self.s_int.rank()
109135

110136
def time_rank_int_cat(self):
111-
self.s2_cat.rank()
137+
self.s_int_cat.rank()
112138

113139
def time_rank_int_cat_ordered(self):
114-
self.s2_cat_ordered.rank()
140+
self.s_int_cat_ordered.rank()

0 commit comments

Comments
 (0)