Skip to content

PERF: optimize MultiIndex.from_product #7627

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 1, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/v0.14.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ Performance
- Improvements in Series.transform for significant performance gains (:issue:`6496`)
- Improvements in DataFrame.transform with ufuncs and built-in grouper functions for signifcant performance gains (:issue:`7383`)
- Regression in groupby aggregation of datetime64 dtypes (:issue:`7555`)

- Improvements in `MultiIndex.from_product` for large iterables (:issue:`7627`)



Expand Down
10 changes: 7 additions & 3 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2875,10 +2875,14 @@ def from_product(cls, iterables, sortorder=None, names=None):
MultiIndex.from_arrays : Convert list of arrays to MultiIndex
MultiIndex.from_tuples : Convert list of tuples to MultiIndex
"""
from pandas.core.categorical import Categorical
from pandas.tools.util import cartesian_product
product = cartesian_product(iterables)
return MultiIndex.from_arrays(product, sortorder=sortorder,
names=names)

categoricals = [Categorical.from_array(it) for it in iterables]
labels = cartesian_product([c.labels for c in categoricals])

return MultiIndex(levels=[c.levels for c in categoricals],
labels=labels, sortorder=sortorder, names=names)

@property
def nlevels(self):
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1861,6 +1861,15 @@ def test_from_product(self):
assert_array_equal(result, expected)
self.assertEqual(result.names, names)

def test_from_product_datetimeindex(self):
dt_index = pd.date_range('2000-01-01', periods=2)
mi = pd.MultiIndex.from_product([[1, 2], dt_index])
etalon = pd.lib.list_to_object_array([(1, pd.Timestamp('2000-01-01')),
(1, pd.Timestamp('2000-01-02')),
(2, pd.Timestamp('2000-01-01')),
(2, pd.Timestamp('2000-01-02'))])
assert_array_equal(mi.values, etalon)

def test_append(self):
result = self.index[:3].append(self.index[3:])
self.assertTrue(result.equals(self.index))
Expand Down
12 changes: 12 additions & 0 deletions vb_suite/index_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,15 @@
start_date=datetime(2014, 4, 13))
index_float64_div = Benchmark('idx / 2', setup, name='index_float64_div',
start_date=datetime(2014, 4, 13))


# Constructing MultiIndex from cartesian product of iterables
#

setup = common_setup + """
iterables = [tm.makeStringIndex(10000), xrange(20)]
"""

multiindex_from_product = Benchmark('MultiIndex.from_product(iterables)',
setup, name='multiindex_from_product',
start_date=datetime(2014, 6, 30))