1
1
# -*- coding: utf-8 -*-
2
2
3
- import warnings
4
- from itertools import product
5
-
6
3
import numpy as np
7
4
import pandas as pd
8
5
import pandas .util .testing as tm
9
6
import pytest
10
7
from pandas import (DataFrame , DatetimeIndex , Float64Index , Index , Int64Index ,
11
8
MultiIndex , PeriodIndex , TimedeltaIndex , UInt64Index ,
12
9
compat , date_range , period_range )
13
- from pandas .compat import lrange , range , u
10
+ from pandas .compat import lrange , range
14
11
from pandas .core .dtypes .dtypes import CategoricalDtype
15
12
from pandas .core .indexes .datetimelike import DatetimeIndexOpsMixin
16
13
from pandas .util .testing import assert_copy
@@ -385,10 +382,6 @@ def test_sub(idx):
385
382
first .tolist () - idx [- 3 :]
386
383
387
384
388
- def test_nlevels (idx ):
389
- assert idx .nlevels == 2
390
-
391
-
392
385
def test_argsort (idx ):
393
386
result = idx .argsort ()
394
387
expected = idx .values .argsort ()
@@ -410,249 +403,6 @@ def test_remove_unused_nan(level0, level1):
410
403
assert ('unused' not in result .levels [level ])
411
404
412
405
413
- @pytest .mark .parametrize ('names' , [None , ['first' , 'second' ]])
414
- def test_unique (names ):
415
- mi = pd .MultiIndex .from_arrays ([[1 , 2 , 1 , 2 ], [1 , 1 , 1 , 2 ]],
416
- names = names )
417
-
418
- res = mi .unique ()
419
- exp = pd .MultiIndex .from_arrays ([[1 , 2 , 2 ], [1 , 1 , 2 ]], names = mi .names )
420
- tm .assert_index_equal (res , exp )
421
-
422
- mi = pd .MultiIndex .from_arrays ([list ('aaaa' ), list ('abab' )],
423
- names = names )
424
- res = mi .unique ()
425
- exp = pd .MultiIndex .from_arrays ([list ('aa' ), list ('ab' )],
426
- names = mi .names )
427
- tm .assert_index_equal (res , exp )
428
-
429
- mi = pd .MultiIndex .from_arrays ([list ('aaaa' ), list ('aaaa' )],
430
- names = names )
431
- res = mi .unique ()
432
- exp = pd .MultiIndex .from_arrays ([['a' ], ['a' ]], names = mi .names )
433
- tm .assert_index_equal (res , exp )
434
-
435
- # GH #20568 - empty MI
436
- mi = pd .MultiIndex .from_arrays ([[], []], names = names )
437
- res = mi .unique ()
438
- tm .assert_index_equal (mi , res )
439
-
440
-
441
- def test_unique_datetimelike ():
442
- idx1 = pd .DatetimeIndex (['2015-01-01' , '2015-01-01' , '2015-01-01' ,
443
- '2015-01-01' , 'NaT' , 'NaT' ])
444
- idx2 = pd .DatetimeIndex (['2015-01-01' , '2015-01-01' , '2015-01-02' ,
445
- '2015-01-02' , 'NaT' , '2015-01-01' ],
446
- tz = 'Asia/Tokyo' )
447
- result = pd .MultiIndex .from_arrays ([idx1 , idx2 ]).unique ()
448
-
449
- eidx1 = pd .DatetimeIndex (['2015-01-01' , '2015-01-01' , 'NaT' , 'NaT' ])
450
- eidx2 = pd .DatetimeIndex (['2015-01-01' , '2015-01-02' ,
451
- 'NaT' , '2015-01-01' ],
452
- tz = 'Asia/Tokyo' )
453
- exp = pd .MultiIndex .from_arrays ([eidx1 , eidx2 ])
454
- tm .assert_index_equal (result , exp )
455
-
456
-
457
- @pytest .mark .parametrize ('level' , [0 , 'first' , 1 , 'second' ])
458
- def test_unique_level (idx , level ):
459
- # GH #17896 - with level= argument
460
- result = idx .unique (level = level )
461
- expected = idx .get_level_values (level ).unique ()
462
- tm .assert_index_equal (result , expected )
463
-
464
- # With already unique level
465
- mi = pd .MultiIndex .from_arrays ([[1 , 3 , 2 , 4 ], [1 , 3 , 2 , 5 ]],
466
- names = ['first' , 'second' ])
467
- result = mi .unique (level = level )
468
- expected = mi .get_level_values (level )
469
- tm .assert_index_equal (result , expected )
470
-
471
- # With empty MI
472
- mi = pd .MultiIndex .from_arrays ([[], []], names = ['first' , 'second' ])
473
- result = mi .unique (level = level )
474
- expected = mi .get_level_values (level )
475
-
476
-
477
- def test_multiindex_compare ():
478
- # GH 21149
479
- # Ensure comparison operations for MultiIndex with nlevels == 1
480
- # behave consistently with those for MultiIndex with nlevels > 1
481
-
482
- midx = pd .MultiIndex .from_product ([[0 , 1 ]])
483
-
484
- # Equality self-test: MultiIndex object vs self
485
- expected = pd .Series ([True , True ])
486
- result = pd .Series (midx == midx )
487
- tm .assert_series_equal (result , expected )
488
-
489
- # Greater than comparison: MultiIndex object vs self
490
- expected = pd .Series ([False , False ])
491
- result = pd .Series (midx > midx )
492
- tm .assert_series_equal (result , expected )
493
-
494
-
495
- def test_duplicate_multiindex_labels ():
496
- # GH 17464
497
- # Make sure that a MultiIndex with duplicate levels throws a ValueError
498
- with pytest .raises (ValueError ):
499
- ind = pd .MultiIndex ([['A' ] * 10 , range (10 )], [[0 ] * 10 , range (10 )])
500
-
501
- # And that using set_levels with duplicate levels fails
502
- ind = MultiIndex .from_arrays ([['A' , 'A' , 'B' , 'B' , 'B' ],
503
- [1 , 2 , 1 , 2 , 3 ]])
504
- with pytest .raises (ValueError ):
505
- ind .set_levels ([['A' , 'B' , 'A' , 'A' , 'B' ], [2 , 1 , 3 , - 2 , 5 ]],
506
- inplace = True )
507
-
508
-
509
- @pytest .mark .parametrize ('names' , [['a' , 'b' , 'a' ], ['1' , '1' , '2' ],
510
- ['1' , 'a' , '1' ]])
511
- def test_duplicate_level_names (names ):
512
- # GH18872
513
- pytest .raises (ValueError , pd .MultiIndex .from_product ,
514
- [[0 , 1 ]] * 3 , names = names )
515
-
516
- # With .rename()
517
- mi = pd .MultiIndex .from_product ([[0 , 1 ]] * 3 )
518
- tm .assert_raises_regex (ValueError , "Duplicated level name:" ,
519
- mi .rename , names )
520
-
521
- # With .rename(., level=)
522
- mi .rename (names [0 ], level = 1 , inplace = True )
523
- tm .assert_raises_regex (ValueError , "Duplicated level name:" ,
524
- mi .rename , names [:2 ], level = [0 , 2 ])
525
-
526
-
527
- def test_duplicate_meta_data ():
528
- # GH 10115
529
- index = MultiIndex (
530
- levels = [[0 , 1 ], [0 , 1 , 2 ]],
531
- labels = [[0 , 0 , 0 , 0 , 1 , 1 , 1 ],
532
- [0 , 1 , 2 , 0 , 0 , 1 , 2 ]])
533
-
534
- for idx in [index ,
535
- index .set_names ([None , None ]),
536
- index .set_names ([None , 'Num' ]),
537
- index .set_names (['Upper' , 'Num' ]), ]:
538
- assert idx .has_duplicates
539
- assert idx .drop_duplicates ().names == idx .names
540
-
541
-
542
- def test_duplicates (idx ):
543
- assert not idx .has_duplicates
544
- assert idx .append (idx ).has_duplicates
545
-
546
- index = MultiIndex (levels = [[0 , 1 ], [0 , 1 , 2 ]], labels = [
547
- [0 , 0 , 0 , 0 , 1 , 1 , 1 ], [0 , 1 , 2 , 0 , 0 , 1 , 2 ]])
548
- assert index .has_duplicates
549
-
550
- # GH 9075
551
- t = [(u ('x' ), u ('out' ), u ('z' ), 5 , u ('y' ), u ('in' ), u ('z' ), 169 ),
552
- (u ('x' ), u ('out' ), u ('z' ), 7 , u ('y' ), u ('in' ), u ('z' ), 119 ),
553
- (u ('x' ), u ('out' ), u ('z' ), 9 , u ('y' ), u ('in' ), u ('z' ), 135 ),
554
- (u ('x' ), u ('out' ), u ('z' ), 13 , u ('y' ), u ('in' ), u ('z' ), 145 ),
555
- (u ('x' ), u ('out' ), u ('z' ), 14 , u ('y' ), u ('in' ), u ('z' ), 158 ),
556
- (u ('x' ), u ('out' ), u ('z' ), 16 , u ('y' ), u ('in' ), u ('z' ), 122 ),
557
- (u ('x' ), u ('out' ), u ('z' ), 17 , u ('y' ), u ('in' ), u ('z' ), 160 ),
558
- (u ('x' ), u ('out' ), u ('z' ), 18 , u ('y' ), u ('in' ), u ('z' ), 180 ),
559
- (u ('x' ), u ('out' ), u ('z' ), 20 , u ('y' ), u ('in' ), u ('z' ), 143 ),
560
- (u ('x' ), u ('out' ), u ('z' ), 21 , u ('y' ), u ('in' ), u ('z' ), 128 ),
561
- (u ('x' ), u ('out' ), u ('z' ), 22 , u ('y' ), u ('in' ), u ('z' ), 129 ),
562
- (u ('x' ), u ('out' ), u ('z' ), 25 , u ('y' ), u ('in' ), u ('z' ), 111 ),
563
- (u ('x' ), u ('out' ), u ('z' ), 28 , u ('y' ), u ('in' ), u ('z' ), 114 ),
564
- (u ('x' ), u ('out' ), u ('z' ), 29 , u ('y' ), u ('in' ), u ('z' ), 121 ),
565
- (u ('x' ), u ('out' ), u ('z' ), 31 , u ('y' ), u ('in' ), u ('z' ), 126 ),
566
- (u ('x' ), u ('out' ), u ('z' ), 32 , u ('y' ), u ('in' ), u ('z' ), 155 ),
567
- (u ('x' ), u ('out' ), u ('z' ), 33 , u ('y' ), u ('in' ), u ('z' ), 123 ),
568
- (u ('x' ), u ('out' ), u ('z' ), 12 , u ('y' ), u ('in' ), u ('z' ), 144 )]
569
-
570
- index = pd .MultiIndex .from_tuples (t )
571
- assert not index .has_duplicates
572
-
573
- # handle int64 overflow if possible
574
- def check (nlevels , with_nulls ):
575
- labels = np .tile (np .arange (500 ), 2 )
576
- level = np .arange (500 )
577
-
578
- if with_nulls : # inject some null values
579
- labels [500 ] = - 1 # common nan value
580
- labels = [labels .copy () for i in range (nlevels )]
581
- for i in range (nlevels ):
582
- labels [i ][500 + i - nlevels // 2 ] = - 1
583
-
584
- labels += [np .array ([- 1 , 1 ]).repeat (500 )]
585
- else :
586
- labels = [labels ] * nlevels + [np .arange (2 ).repeat (500 )]
587
-
588
- levels = [level ] * nlevels + [[0 , 1 ]]
589
-
590
- # no dups
591
- index = MultiIndex (levels = levels , labels = labels )
592
- assert not index .has_duplicates
593
-
594
- # with a dup
595
- if with_nulls :
596
- def f (a ):
597
- return np .insert (a , 1000 , a [0 ])
598
- labels = list (map (f , labels ))
599
- index = MultiIndex (levels = levels , labels = labels )
600
- else :
601
- values = index .values .tolist ()
602
- index = MultiIndex .from_tuples (values + [values [0 ]])
603
-
604
- assert index .has_duplicates
605
-
606
- # no overflow
607
- check (4 , False )
608
- check (4 , True )
609
-
610
- # overflow possible
611
- check (8 , False )
612
- check (8 , True )
613
-
614
- # GH 9125
615
- n , k = 200 , 5000
616
- levels = [np .arange (n ), tm .makeStringIndex (n ), 1000 + np .arange (n )]
617
- labels = [np .random .choice (n , k * n ) for lev in levels ]
618
- mi = MultiIndex (levels = levels , labels = labels )
619
-
620
- for keep in ['first' , 'last' , False ]:
621
- left = mi .duplicated (keep = keep )
622
- right = pd ._libs .hashtable .duplicated_object (mi .values , keep = keep )
623
- tm .assert_numpy_array_equal (left , right )
624
-
625
- # GH5873
626
- for a in [101 , 102 ]:
627
- mi = MultiIndex .from_arrays ([[101 , a ], [3.5 , np .nan ]])
628
- assert not mi .has_duplicates
629
-
630
- with warnings .catch_warnings (record = True ):
631
- # Deprecated - see GH20239
632
- assert mi .get_duplicates ().equals (MultiIndex .from_arrays (
633
- [[], []]))
634
-
635
- tm .assert_numpy_array_equal (mi .duplicated (), np .zeros (
636
- 2 , dtype = 'bool' ))
637
-
638
- for n in range (1 , 6 ): # 1st level shape
639
- for m in range (1 , 5 ): # 2nd level shape
640
- # all possible unique combinations, including nan
641
- lab = product (range (- 1 , n ), range (- 1 , m ))
642
- mi = MultiIndex (levels = [list ('abcde' )[:n ], list ('WXYZ' )[:m ]],
643
- labels = np .random .permutation (list (lab )).T )
644
- assert len (mi ) == (n + 1 ) * (m + 1 )
645
- assert not mi .has_duplicates
646
-
647
- with warnings .catch_warnings (record = True ):
648
- # Deprecated - see GH20239
649
- assert mi .get_duplicates ().equals (MultiIndex .from_arrays (
650
- [[], []]))
651
-
652
- tm .assert_numpy_array_equal (mi .duplicated (), np .zeros (
653
- len (mi ), dtype = 'bool' ))
654
-
655
-
656
406
def test_map (idx ):
657
407
# callable
658
408
index = idx
0 commit comments