Skip to content

Commit aea92fc

Browse files
committed
moved tests according to feedback
test_nlevels -> test_integrity.py unsort tests -> test_sorting.py duplicates and unique test -> test_unqi_dups.py
1 parent ea763f6 commit aea92fc

File tree

5 files changed

+310
-300
lines changed

5 files changed

+310
-300
lines changed

pandas/tests/indexes/multi/test_equivalence.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,3 +232,21 @@ def test_nulls(named_index):
232232
result = isna(index)
233233
tm.assert_numpy_array_equal(index.isna(), result)
234234
tm.assert_numpy_array_equal(index.notna(), ~result)
235+
236+
237+
def test_multiindex_compare():
238+
# GH 21149
239+
# Ensure comparison operations for MultiIndex with nlevels == 1
240+
# behave consistently with those for MultiIndex with nlevels > 1
241+
242+
midx = pd.MultiIndex.from_product([[0, 1]])
243+
244+
# Equality self-test: MultiIndex object vs self
245+
expected = pd.Series([True, True])
246+
result = pd.Series(midx == midx)
247+
tm.assert_series_equal(result, expected)
248+
249+
# Greater than comparison: MultiIndex object vs self
250+
expected = pd.Series([False, False])
251+
result = pd.Series(midx > midx)
252+
tm.assert_series_equal(result, expected)

pandas/tests/indexes/multi/test_integrity.py

Lines changed: 4 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
RangeIndex, compat, date_range)
1111
from pandas.compat import lrange, range
1212
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
13-
from pandas.errors import PerformanceWarning, UnsortedIndexError
1413

1514

1615
def test_labels_dtypes():
@@ -361,53 +360,6 @@ def test_rangeindex_fallback_coercion_bug():
361360
tm.assert_index_equal(result, expected)
362361

363362

364-
def test_unsortedindex():
365-
# GH 11897
366-
mi = pd.MultiIndex.from_tuples([('z', 'a'), ('x', 'a'), ('y', 'b'),
367-
('x', 'b'), ('y', 'a'), ('z', 'b')],
368-
names=['one', 'two'])
369-
df = pd.DataFrame([[i, 10 * i] for i in lrange(6)], index=mi,
370-
columns=['one', 'two'])
371-
372-
# GH 16734: not sorted, but no real slicing
373-
result = df.loc(axis=0)['z', 'a']
374-
expected = df.iloc[0]
375-
tm.assert_series_equal(result, expected)
376-
377-
with pytest.raises(UnsortedIndexError):
378-
df.loc(axis=0)['z', slice('a')]
379-
df.sort_index(inplace=True)
380-
assert len(df.loc(axis=0)['z', :]) == 2
381-
382-
with pytest.raises(KeyError):
383-
df.loc(axis=0)['q', :]
384-
385-
386-
def test_unsortedindex_doc_examples():
387-
# http://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex # noqa
388-
dfm = DataFrame({'jim': [0, 0, 1, 1],
389-
'joe': ['x', 'x', 'z', 'y'],
390-
'jolie': np.random.rand(4)})
391-
392-
dfm = dfm.set_index(['jim', 'joe'])
393-
with tm.assert_produces_warning(PerformanceWarning):
394-
dfm.loc[(1, 'z')]
395-
396-
with pytest.raises(UnsortedIndexError):
397-
dfm.loc[(0, 'y'):(1, 'z')]
398-
399-
assert not dfm.index.is_lexsorted()
400-
assert dfm.index.lexsort_depth == 1
401-
402-
# sort it
403-
dfm = dfm.sort_index()
404-
dfm.loc[(1, 'z')]
405-
dfm.loc[(0, 'y'):(1, 'z')]
406-
407-
assert dfm.index.is_lexsorted()
408-
assert dfm.index.lexsort_depth == 2
409-
410-
411363
def test_hash_error(indices):
412364
index = indices
413365
tm.assert_raises_regex(TypeError, "unhashable type: %r" %
@@ -446,3 +398,7 @@ def test_memory_usage(named_index):
446398

447399
# we report 0 for no-length
448400
assert result == 0
401+
402+
403+
def test_nlevels(idx):
404+
assert idx.nlevels == 2

pandas/tests/indexes/multi/test_operations.py

Lines changed: 1 addition & 251 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,13 @@
11
# -*- coding: utf-8 -*-
22

3-
import warnings
4-
from itertools import product
5-
63
import numpy as np
74
import pandas as pd
85
import pandas.util.testing as tm
96
import pytest
107
from pandas import (DataFrame, DatetimeIndex, Float64Index, Index, Int64Index,
118
MultiIndex, PeriodIndex, TimedeltaIndex, UInt64Index,
129
compat, date_range, period_range)
13-
from pandas.compat import lrange, range, u
10+
from pandas.compat import lrange, range
1411
from pandas.core.dtypes.dtypes import CategoricalDtype
1512
from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
1613
from pandas.util.testing import assert_copy
@@ -385,10 +382,6 @@ def test_sub(idx):
385382
first.tolist() - idx[-3:]
386383

387384

388-
def test_nlevels(idx):
389-
assert idx.nlevels == 2
390-
391-
392385
def test_argsort(idx):
393386
result = idx.argsort()
394387
expected = idx.values.argsort()
@@ -410,249 +403,6 @@ def test_remove_unused_nan(level0, level1):
410403
assert('unused' not in result.levels[level])
411404

412405

413-
@pytest.mark.parametrize('names', [None, ['first', 'second']])
414-
def test_unique(names):
415-
mi = pd.MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]],
416-
names=names)
417-
418-
res = mi.unique()
419-
exp = pd.MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]], names=mi.names)
420-
tm.assert_index_equal(res, exp)
421-
422-
mi = pd.MultiIndex.from_arrays([list('aaaa'), list('abab')],
423-
names=names)
424-
res = mi.unique()
425-
exp = pd.MultiIndex.from_arrays([list('aa'), list('ab')],
426-
names=mi.names)
427-
tm.assert_index_equal(res, exp)
428-
429-
mi = pd.MultiIndex.from_arrays([list('aaaa'), list('aaaa')],
430-
names=names)
431-
res = mi.unique()
432-
exp = pd.MultiIndex.from_arrays([['a'], ['a']], names=mi.names)
433-
tm.assert_index_equal(res, exp)
434-
435-
# GH #20568 - empty MI
436-
mi = pd.MultiIndex.from_arrays([[], []], names=names)
437-
res = mi.unique()
438-
tm.assert_index_equal(mi, res)
439-
440-
441-
def test_unique_datetimelike():
442-
idx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-01',
443-
'2015-01-01', 'NaT', 'NaT'])
444-
idx2 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-02',
445-
'2015-01-02', 'NaT', '2015-01-01'],
446-
tz='Asia/Tokyo')
447-
result = pd.MultiIndex.from_arrays([idx1, idx2]).unique()
448-
449-
eidx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', 'NaT', 'NaT'])
450-
eidx2 = pd.DatetimeIndex(['2015-01-01', '2015-01-02',
451-
'NaT', '2015-01-01'],
452-
tz='Asia/Tokyo')
453-
exp = pd.MultiIndex.from_arrays([eidx1, eidx2])
454-
tm.assert_index_equal(result, exp)
455-
456-
457-
@pytest.mark.parametrize('level', [0, 'first', 1, 'second'])
458-
def test_unique_level(idx, level):
459-
# GH #17896 - with level= argument
460-
result = idx.unique(level=level)
461-
expected = idx.get_level_values(level).unique()
462-
tm.assert_index_equal(result, expected)
463-
464-
# With already unique level
465-
mi = pd.MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]],
466-
names=['first', 'second'])
467-
result = mi.unique(level=level)
468-
expected = mi.get_level_values(level)
469-
tm.assert_index_equal(result, expected)
470-
471-
# With empty MI
472-
mi = pd.MultiIndex.from_arrays([[], []], names=['first', 'second'])
473-
result = mi.unique(level=level)
474-
expected = mi.get_level_values(level)
475-
476-
477-
def test_multiindex_compare():
478-
# GH 21149
479-
# Ensure comparison operations for MultiIndex with nlevels == 1
480-
# behave consistently with those for MultiIndex with nlevels > 1
481-
482-
midx = pd.MultiIndex.from_product([[0, 1]])
483-
484-
# Equality self-test: MultiIndex object vs self
485-
expected = pd.Series([True, True])
486-
result = pd.Series(midx == midx)
487-
tm.assert_series_equal(result, expected)
488-
489-
# Greater than comparison: MultiIndex object vs self
490-
expected = pd.Series([False, False])
491-
result = pd.Series(midx > midx)
492-
tm.assert_series_equal(result, expected)
493-
494-
495-
def test_duplicate_multiindex_labels():
496-
# GH 17464
497-
# Make sure that a MultiIndex with duplicate levels throws a ValueError
498-
with pytest.raises(ValueError):
499-
ind = pd.MultiIndex([['A'] * 10, range(10)], [[0] * 10, range(10)])
500-
501-
# And that using set_levels with duplicate levels fails
502-
ind = MultiIndex.from_arrays([['A', 'A', 'B', 'B', 'B'],
503-
[1, 2, 1, 2, 3]])
504-
with pytest.raises(ValueError):
505-
ind.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]],
506-
inplace=True)
507-
508-
509-
@pytest.mark.parametrize('names', [['a', 'b', 'a'], ['1', '1', '2'],
510-
['1', 'a', '1']])
511-
def test_duplicate_level_names(names):
512-
# GH18872
513-
pytest.raises(ValueError, pd.MultiIndex.from_product,
514-
[[0, 1]] * 3, names=names)
515-
516-
# With .rename()
517-
mi = pd.MultiIndex.from_product([[0, 1]] * 3)
518-
tm.assert_raises_regex(ValueError, "Duplicated level name:",
519-
mi.rename, names)
520-
521-
# With .rename(., level=)
522-
mi.rename(names[0], level=1, inplace=True)
523-
tm.assert_raises_regex(ValueError, "Duplicated level name:",
524-
mi.rename, names[:2], level=[0, 2])
525-
526-
527-
def test_duplicate_meta_data():
528-
# GH 10115
529-
index = MultiIndex(
530-
levels=[[0, 1], [0, 1, 2]],
531-
labels=[[0, 0, 0, 0, 1, 1, 1],
532-
[0, 1, 2, 0, 0, 1, 2]])
533-
534-
for idx in [index,
535-
index.set_names([None, None]),
536-
index.set_names([None, 'Num']),
537-
index.set_names(['Upper', 'Num']), ]:
538-
assert idx.has_duplicates
539-
assert idx.drop_duplicates().names == idx.names
540-
541-
542-
def test_duplicates(idx):
543-
assert not idx.has_duplicates
544-
assert idx.append(idx).has_duplicates
545-
546-
index = MultiIndex(levels=[[0, 1], [0, 1, 2]], labels=[
547-
[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]])
548-
assert index.has_duplicates
549-
550-
# GH 9075
551-
t = [(u('x'), u('out'), u('z'), 5, u('y'), u('in'), u('z'), 169),
552-
(u('x'), u('out'), u('z'), 7, u('y'), u('in'), u('z'), 119),
553-
(u('x'), u('out'), u('z'), 9, u('y'), u('in'), u('z'), 135),
554-
(u('x'), u('out'), u('z'), 13, u('y'), u('in'), u('z'), 145),
555-
(u('x'), u('out'), u('z'), 14, u('y'), u('in'), u('z'), 158),
556-
(u('x'), u('out'), u('z'), 16, u('y'), u('in'), u('z'), 122),
557-
(u('x'), u('out'), u('z'), 17, u('y'), u('in'), u('z'), 160),
558-
(u('x'), u('out'), u('z'), 18, u('y'), u('in'), u('z'), 180),
559-
(u('x'), u('out'), u('z'), 20, u('y'), u('in'), u('z'), 143),
560-
(u('x'), u('out'), u('z'), 21, u('y'), u('in'), u('z'), 128),
561-
(u('x'), u('out'), u('z'), 22, u('y'), u('in'), u('z'), 129),
562-
(u('x'), u('out'), u('z'), 25, u('y'), u('in'), u('z'), 111),
563-
(u('x'), u('out'), u('z'), 28, u('y'), u('in'), u('z'), 114),
564-
(u('x'), u('out'), u('z'), 29, u('y'), u('in'), u('z'), 121),
565-
(u('x'), u('out'), u('z'), 31, u('y'), u('in'), u('z'), 126),
566-
(u('x'), u('out'), u('z'), 32, u('y'), u('in'), u('z'), 155),
567-
(u('x'), u('out'), u('z'), 33, u('y'), u('in'), u('z'), 123),
568-
(u('x'), u('out'), u('z'), 12, u('y'), u('in'), u('z'), 144)]
569-
570-
index = pd.MultiIndex.from_tuples(t)
571-
assert not index.has_duplicates
572-
573-
# handle int64 overflow if possible
574-
def check(nlevels, with_nulls):
575-
labels = np.tile(np.arange(500), 2)
576-
level = np.arange(500)
577-
578-
if with_nulls: # inject some null values
579-
labels[500] = -1 # common nan value
580-
labels = [labels.copy() for i in range(nlevels)]
581-
for i in range(nlevels):
582-
labels[i][500 + i - nlevels // 2] = -1
583-
584-
labels += [np.array([-1, 1]).repeat(500)]
585-
else:
586-
labels = [labels] * nlevels + [np.arange(2).repeat(500)]
587-
588-
levels = [level] * nlevels + [[0, 1]]
589-
590-
# no dups
591-
index = MultiIndex(levels=levels, labels=labels)
592-
assert not index.has_duplicates
593-
594-
# with a dup
595-
if with_nulls:
596-
def f(a):
597-
return np.insert(a, 1000, a[0])
598-
labels = list(map(f, labels))
599-
index = MultiIndex(levels=levels, labels=labels)
600-
else:
601-
values = index.values.tolist()
602-
index = MultiIndex.from_tuples(values + [values[0]])
603-
604-
assert index.has_duplicates
605-
606-
# no overflow
607-
check(4, False)
608-
check(4, True)
609-
610-
# overflow possible
611-
check(8, False)
612-
check(8, True)
613-
614-
# GH 9125
615-
n, k = 200, 5000
616-
levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)]
617-
labels = [np.random.choice(n, k * n) for lev in levels]
618-
mi = MultiIndex(levels=levels, labels=labels)
619-
620-
for keep in ['first', 'last', False]:
621-
left = mi.duplicated(keep=keep)
622-
right = pd._libs.hashtable.duplicated_object(mi.values, keep=keep)
623-
tm.assert_numpy_array_equal(left, right)
624-
625-
# GH5873
626-
for a in [101, 102]:
627-
mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]])
628-
assert not mi.has_duplicates
629-
630-
with warnings.catch_warnings(record=True):
631-
# Deprecated - see GH20239
632-
assert mi.get_duplicates().equals(MultiIndex.from_arrays(
633-
[[], []]))
634-
635-
tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(
636-
2, dtype='bool'))
637-
638-
for n in range(1, 6): # 1st level shape
639-
for m in range(1, 5): # 2nd level shape
640-
# all possible unique combinations, including nan
641-
lab = product(range(-1, n), range(-1, m))
642-
mi = MultiIndex(levels=[list('abcde')[:n], list('WXYZ')[:m]],
643-
labels=np.random.permutation(list(lab)).T)
644-
assert len(mi) == (n + 1) * (m + 1)
645-
assert not mi.has_duplicates
646-
647-
with warnings.catch_warnings(record=True):
648-
# Deprecated - see GH20239
649-
assert mi.get_duplicates().equals(MultiIndex.from_arrays(
650-
[[], []]))
651-
652-
tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(
653-
len(mi), dtype='bool'))
654-
655-
656406
def test_map(idx):
657407
# callable
658408
index = idx

0 commit comments

Comments
 (0)