Skip to content

Commit c8a3eba

Browse files
committed
Merge pull request #7627 from immerrr/perf-multiindex-fromproduct
PERF: optimize MultiIndex.from_product
2 parents cba5720 + 0564d36 commit c8a3eba

File tree

4 files changed

+29
-4
lines changed

4 files changed

+29
-4
lines changed

doc/source/v0.14.1.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ Performance
151151
- Improvements in Series.transform for significant performance gains (:issue:`6496`)
152152
- Improvements in DataFrame.transform with ufuncs and built-in grouper functions for signifcant performance gains (:issue:`7383`)
153153
- Regression in groupby aggregation of datetime64 dtypes (:issue:`7555`)
154-
154+
- Improvements in `MultiIndex.from_product` for large iterables (:issue:`7627`)
155155

156156

157157

pandas/core/index.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2875,10 +2875,14 @@ def from_product(cls, iterables, sortorder=None, names=None):
28752875
MultiIndex.from_arrays : Convert list of arrays to MultiIndex
28762876
MultiIndex.from_tuples : Convert list of tuples to MultiIndex
28772877
"""
2878+
from pandas.core.categorical import Categorical
28782879
from pandas.tools.util import cartesian_product
2879-
product = cartesian_product(iterables)
2880-
return MultiIndex.from_arrays(product, sortorder=sortorder,
2881-
names=names)
2880+
2881+
categoricals = [Categorical.from_array(it) for it in iterables]
2882+
labels = cartesian_product([c.labels for c in categoricals])
2883+
2884+
return MultiIndex(levels=[c.levels for c in categoricals],
2885+
labels=labels, sortorder=sortorder, names=names)
28822886

28832887
@property
28842888
def nlevels(self):

pandas/tests/test_index.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1861,6 +1861,15 @@ def test_from_product(self):
18611861
assert_array_equal(result, expected)
18621862
self.assertEqual(result.names, names)
18631863

1864+
def test_from_product_datetimeindex(self):
1865+
dt_index = pd.date_range('2000-01-01', periods=2)
1866+
mi = pd.MultiIndex.from_product([[1, 2], dt_index])
1867+
etalon = pd.lib.list_to_object_array([(1, pd.Timestamp('2000-01-01')),
1868+
(1, pd.Timestamp('2000-01-02')),
1869+
(2, pd.Timestamp('2000-01-01')),
1870+
(2, pd.Timestamp('2000-01-02'))])
1871+
assert_array_equal(mi.values, etalon)
1872+
18641873
def test_append(self):
18651874
result = self.index[:3].append(self.index[3:])
18661875
self.assertTrue(result.equals(self.index))

vb_suite/index_object.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,3 +105,15 @@
105105
start_date=datetime(2014, 4, 13))
106106
index_float64_div = Benchmark('idx / 2', setup, name='index_float64_div',
107107
start_date=datetime(2014, 4, 13))
108+
109+
110+
# Constructing MultiIndex from cartesian product of iterables
111+
#
112+
113+
setup = common_setup + """
114+
iterables = [tm.makeStringIndex(10000), xrange(20)]
115+
"""
116+
117+
multiindex_from_product = Benchmark('MultiIndex.from_product(iterables)',
118+
setup, name='multiindex_from_product',
119+
start_date=datetime(2014, 6, 30))

0 commit comments

Comments
 (0)