Skip to content

Commit 22601f7

Browse files
mroeschkejreback
authored andcommitted
CLN: ASV reshape (#18944)
1 parent ee9c7e9 commit 22601f7

File tree

1 file changed

+53
-58
lines changed

1 file changed

+53
-58
lines changed

asv_bench/benchmarks/reshape.py

Lines changed: 53 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
1-
from .pandas_vb_common import *
2-
from pandas import melt, wide_to_long
1+
from itertools import product
32

3+
import numpy as np
4+
from pandas import DataFrame, MultiIndex, date_range, melt, wide_to_long
5+
6+
from .pandas_vb_common import setup # noqa
7+
8+
9+
class Melt(object):
410

5-
class melt_dataframe(object):
611
goal_time = 0.2
712

813
def setup(self):
9-
self.index = MultiIndex.from_arrays([np.arange(100).repeat(100), np.roll(np.tile(np.arange(100), 100), 25)])
10-
self.df = DataFrame(np.random.randn(10000, 4), index=self.index)
1114
self.df = DataFrame(np.random.randn(10000, 3), columns=['A', 'B', 'C'])
1215
self.df['id1'] = np.random.randint(0, 10, 10000)
1316
self.df['id2'] = np.random.randint(100, 1000, 10000)
@@ -16,110 +19,102 @@ def time_melt_dataframe(self):
1619
melt(self.df, id_vars=['id1', 'id2'])
1720

1821

19-
class reshape_pivot_time_series(object):
22+
class Pivot(object):
23+
2024
goal_time = 0.2
2125

2226
def setup(self):
23-
self.index = MultiIndex.from_arrays([np.arange(100).repeat(100), np.roll(np.tile(np.arange(100), 100), 25)])
24-
self.df = DataFrame(np.random.randn(10000, 4), index=self.index)
25-
self.index = date_range('1/1/2000', periods=10000, freq='h')
26-
self.df = DataFrame(randn(10000, 50), index=self.index, columns=range(50))
27-
self.pdf = self.unpivot(self.df)
28-
self.f = (lambda : self.pdf.pivot('date', 'variable', 'value'))
27+
N = 10000
28+
index = date_range('1/1/2000', periods=N, freq='h')
29+
data = {'value': np.random.randn(N * 50),
30+
'variable': np.arange(50).repeat(N),
31+
'date': np.tile(index.values, 50)}
32+
self.df = DataFrame(data)
2933

3034
def time_reshape_pivot_time_series(self):
31-
self.f()
35+
self.df.pivot('date', 'variable', 'value')
3236

33-
def unpivot(self, frame):
34-
(N, K) = frame.shape
35-
self.data = {'value': frame.values.ravel('F'), 'variable': np.asarray(frame.columns).repeat(N), 'date': np.tile(np.asarray(frame.index), K), }
36-
return DataFrame(self.data, columns=['date', 'variable', 'value'])
3737

38+
class SimpleReshape(object):
3839

39-
class reshape_stack_simple(object):
4040
goal_time = 0.2
4141

4242
def setup(self):
43-
self.index = MultiIndex.from_arrays([np.arange(100).repeat(100), np.roll(np.tile(np.arange(100), 100), 25)])
44-
self.df = DataFrame(np.random.randn(10000, 4), index=self.index)
43+
arrays = [np.arange(100).repeat(100),
44+
np.roll(np.tile(np.arange(100), 100), 25)]
45+
index = MultiIndex.from_arrays(arrays)
46+
self.df = DataFrame(np.random.randn(10000, 4), index=index)
4547
self.udf = self.df.unstack(1)
4648

47-
def time_reshape_stack_simple(self):
49+
def time_stack(self):
4850
self.udf.stack()
4951

50-
51-
class reshape_unstack_simple(object):
52-
goal_time = 0.2
53-
54-
def setup(self):
55-
self.index = MultiIndex.from_arrays([np.arange(100).repeat(100), np.roll(np.tile(np.arange(100), 100), 25)])
56-
self.df = DataFrame(np.random.randn(10000, 4), index=self.index)
57-
58-
def time_reshape_unstack_simple(self):
52+
def time_unstack(self):
5953
self.df.unstack(1)
6054

6155

62-
class reshape_unstack_large_single_dtype(object):
56+
class Unstack(object):
57+
6358
goal_time = 0.2
6459

6560
def setup(self):
6661
m = 100
6762
n = 1000
6863

6964
levels = np.arange(m)
70-
index = pd.MultiIndex.from_product([levels]*2)
65+
index = MultiIndex.from_product([levels] * 2)
7166
columns = np.arange(n)
72-
values = np.arange(m*m*n).reshape(m*m, n)
73-
self.df = pd.DataFrame(values, index, columns)
67+
values = np.arange(m * m * n).reshape(m * m, n)
68+
self.df = DataFrame(values, index, columns)
7469
self.df2 = self.df.iloc[:-1]
7570

76-
def time_unstack_full_product(self):
71+
def time_full_product(self):
7772
self.df.unstack()
7873

79-
def time_unstack_with_mask(self):
74+
def time_without_last_row(self):
8075
self.df2.unstack()
8176

8277

83-
class unstack_sparse_keyspace(object):
78+
class SparseIndex(object):
79+
8480
goal_time = 0.2
8581

8682
def setup(self):
87-
self.index = MultiIndex.from_arrays([np.arange(100).repeat(100), np.roll(np.tile(np.arange(100), 100), 25)])
88-
self.df = DataFrame(np.random.randn(10000, 4), index=self.index)
89-
self.NUM_ROWS = 1000
90-
for iter in range(10):
91-
self.df = DataFrame({'A': np.random.randint(50, size=self.NUM_ROWS), 'B': np.random.randint(50, size=self.NUM_ROWS), 'C': np.random.randint((-10), 10, size=self.NUM_ROWS), 'D': np.random.randint((-10), 10, size=self.NUM_ROWS), 'E': np.random.randint(10, size=self.NUM_ROWS), 'F': np.random.randn(self.NUM_ROWS), })
92-
self.idf = self.df.set_index(['A', 'B', 'C', 'D', 'E'])
93-
if (len(self.idf.index.unique()) == self.NUM_ROWS):
94-
break
83+
NUM_ROWS = 1000
84+
self.df = DataFrame({'A': np.random.randint(50, size=NUM_ROWS),
85+
'B': np.random.randint(50, size=NUM_ROWS),
86+
'C': np.random.randint(-10, 10, size=NUM_ROWS),
87+
'D': np.random.randint(-10, 10, size=NUM_ROWS),
88+
'E': np.random.randint(10, size=NUM_ROWS),
89+
'F': np.random.randn(NUM_ROWS)})
90+
self.df = self.df.set_index(['A', 'B', 'C', 'D', 'E'])
91+
92+
def time_unstack(self):
93+
self.df.unstack()
9594

96-
def time_unstack_sparse_keyspace(self):
97-
self.idf.unstack()
9895

96+
class WideToLong(object):
9997

100-
class wide_to_long_big(object):
10198
goal_time = 0.2
10299

103100
def setup(self):
104-
vars = 'ABCD'
105101
nyrs = 20
106102
nidvars = 20
107103
N = 5000
108-
yrvars = []
109-
for var in vars:
110-
for yr in range(1, nyrs + 1):
111-
yrvars.append(var + str(yr))
104+
self.letters = list('ABCD')
105+
yrvars = [l + str(num)
106+
for l, num in product(self.letters, range(1, nyrs + 1))]
112107

113-
self.df = pd.DataFrame(np.random.randn(N, nidvars + len(yrvars)),
114-
columns=list(range(nidvars)) + yrvars)
115-
self.vars = vars
108+
self.df = DataFrame(np.random.randn(N, nidvars + len(yrvars)),
109+
columns=list(range(nidvars)) + yrvars)
110+
self.df['id'] = self.df.index
116111

117112
def time_wide_to_long_big(self):
118-
self.df['id'] = self.df.index
119-
wide_to_long(self.df, list(self.vars), i='id', j='year')
113+
wide_to_long(self.df, self.letters, i='id', j='year')
120114

121115

122116
class PivotTable(object):
117+
123118
goal_time = 0.2
124119

125120
def setup(self):

0 commit comments

Comments
 (0)