@@ -491,6 +491,146 @@ def test_grouper_column_and_index(self):
491
491
expected = df_single .reset_index ().groupby (['inner' , 'B' ]).mean ()
492
492
assert_frame_equal (result , expected )
493
493
494
+ def test_grouper_column_and_index_sugar (self ):
495
+ # GH 5677, allow strings passed as the `by` parameter to reference
496
+ # columns or index levels
497
+
498
+ idx = pd .MultiIndex .from_tuples ([('a' , 1 ), ('a' , 2 ), ('a' , 3 ),
499
+ ('b' , 1 ), ('b' , 2 ), ('b' , 3 )])
500
+ idx .names = ['outer' , 'inner' ]
501
+ df_multi = pd .DataFrame ({"A" : np .arange (6 ),
502
+ 'B' : ['one' , 'one' , 'two' ,
503
+ 'two' , 'one' , 'one' ]},
504
+ index = idx )
505
+
506
+ df_single = df_multi .reset_index ('outer' )
507
+
508
+ # Column and Index on MultiIndex
509
+ result = df_multi .groupby (['B' , 'inner' ]).mean ()
510
+ expected = df_multi .groupby (['B' , pd .Grouper (level = 'inner' )]).mean ()
511
+ assert_frame_equal (result , expected )
512
+
513
+ # Index and Column on MultiIndex
514
+ result = df_multi .groupby (['inner' , 'B' ]).mean ()
515
+ expected = df_multi .groupby ([pd .Grouper (level = 'inner' ), 'B' ]).mean ()
516
+ assert_frame_equal (result , expected )
517
+
518
+ # Column and Index on single Index
519
+ result = df_single .groupby (['B' , 'inner' ]).mean ()
520
+ expected = df_single .groupby (['B' , pd .Grouper (level = 'inner' )]).mean ()
521
+ assert_frame_equal (result , expected )
522
+
523
+ # Index and Column on single Index
524
+ result = df_single .groupby (['inner' , 'B' ]).mean ()
525
+ expected = df_single .groupby ([pd .Grouper (level = 'inner' ), 'B' ]).mean ()
526
+ assert_frame_equal (result , expected )
527
+
528
+ # Single element list of Index on MultiIndex
529
+ result = df_multi .groupby (['inner' ]).mean ()
530
+ expected = df_multi .groupby (pd .Grouper (level = 'inner' )).mean ()
531
+ assert_frame_equal (result , expected )
532
+
533
+ # Single element list of Index on single Index
534
+ result = df_single .groupby (['inner' ]).mean ()
535
+ expected = df_single .groupby (pd .Grouper (level = 'inner' )).mean ()
536
+ assert_frame_equal (result , expected )
537
+
538
+ # Index on MultiIndex
539
+ result = df_multi .groupby ('inner' ).mean ()
540
+ expected = df_multi .groupby (pd .Grouper (level = 'inner' )).mean ()
541
+ assert_frame_equal (result , expected )
542
+
543
+ # Index on single Index
544
+ result = df_single .groupby ('inner' ).mean ()
545
+ expected = df_single .groupby (pd .Grouper (level = 'inner' )).mean ()
546
+ assert_frame_equal (result , expected )
547
+
548
+ def test_grouper_column_takes_precedence_over_level (self ):
549
+ # GH 5677, when a string passed as the `by` parameter
550
+ # matches a column and an index level the column takes
551
+ # precedence
552
+
553
+ idx = pd .MultiIndex .from_tuples ([('a' , 1 ), ('a' , 2 ), ('a' , 3 ),
554
+ ('b' , 1 ), ('b' , 2 ), ('b' , 3 )])
555
+ idx .names = ['outer' , 'inner' ]
556
+ df_multi_both = pd .DataFrame ({"A" : np .arange (6 ),
557
+ 'B' : ['one' , 'one' , 'two' ,
558
+ 'two' , 'one' , 'one' ],
559
+ 'inner' : [1 , 1 , 1 , 1 , 1 , 1 ]},
560
+ index = idx )
561
+
562
+ df_single_both = df_multi_both .reset_index ('outer' )
563
+
564
+ # Group MultiIndex by single key
565
+ result = df_multi_both .groupby ('inner' ).mean ()
566
+ expected = df_multi_both .groupby (pd .Grouper (key = 'inner' )).mean ()
567
+ assert_frame_equal (result , expected )
568
+ not_expected = df_multi_both .groupby (pd .Grouper (level = 'inner' )).mean ()
569
+ assert not result .index .equals (not_expected .index )
570
+
571
+ # Group single Index by single key
572
+ result = df_single_both .groupby ('inner' ).mean ()
573
+ expected = df_single_both .groupby (pd .Grouper (key = 'inner' )).mean ()
574
+ assert_frame_equal (result , expected )
575
+ not_expected = df_single_both .groupby (pd .Grouper (level = 'inner' )).mean ()
576
+ assert not result .index .equals (not_expected .index )
577
+
578
+ # Group MultiIndex by single key list
579
+ result = df_multi_both .groupby (['inner' ]).mean ()
580
+ expected = df_multi_both .groupby (pd .Grouper (key = 'inner' )).mean ()
581
+ assert_frame_equal (result , expected )
582
+ not_expected = df_multi_both .groupby (pd .Grouper (level = 'inner' )).mean ()
583
+ assert not result .index .equals (not_expected .index )
584
+
585
+ # Group single Index by single key list
586
+ result = df_single_both .groupby (['inner' ]).mean ()
587
+ expected = df_single_both .groupby (pd .Grouper (key = 'inner' )).mean ()
588
+ assert_frame_equal (result , expected )
589
+ not_expected = df_single_both .groupby (pd .Grouper (level = 'inner' )).mean ()
590
+ assert not result .index .equals (not_expected .index )
591
+
592
+ # Group MultiIndex by two keys (1)
593
+ result = df_multi_both .groupby (['B' , 'inner' ]).mean ()
594
+ expected = df_multi_both .groupby (['B' ,
595
+ pd .Grouper (key = 'inner' )]).mean ()
596
+ assert_frame_equal (result , expected )
597
+
598
+ not_expected = df_multi_both .groupby (['B' ,
599
+ pd .Grouper (level = 'inner' )
600
+ ]).mean ()
601
+ assert not result .index .equals (not_expected .index )
602
+
603
+ # Group MultiIndex by two keys (2)
604
+ result = df_multi_both .groupby (['inner' , 'B' ]).mean ()
605
+ expected = df_multi_both .groupby ([pd .Grouper (key = 'inner' ),
606
+ 'B' ]).mean ()
607
+ assert_frame_equal (result , expected )
608
+
609
+ not_expected = df_multi_both .groupby ([pd .Grouper (level = 'inner' ),
610
+ 'B' ]).mean ()
611
+ assert not result .index .equals (not_expected .index )
612
+
613
+ # Group single Index by two keys (1)
614
+ result = df_single_both .groupby (['B' , 'inner' ]).mean ()
615
+ expected = df_single_both .groupby (['B' ,
616
+ pd .Grouper (key = 'inner' )]).mean ()
617
+ assert_frame_equal (result , expected )
618
+
619
+ not_expected = df_single_both .groupby (['B' ,
620
+ pd .Grouper (level = 'inner' )
621
+ ]).mean ()
622
+ assert not result .index .equals (not_expected .index )
623
+
624
+ # Group single Index by two keys (2)
625
+ result = df_single_both .groupby (['inner' , 'B' ]).mean ()
626
+ expected = df_single_both .groupby ([pd .Grouper (key = 'inner' ),
627
+ 'B' ]).mean ()
628
+ assert_frame_equal (result , expected )
629
+
630
+ not_expected = df_single_both .groupby ([pd .Grouper (level = 'inner' ),
631
+ 'B' ]).mean ()
632
+ assert not result .index .equals (not_expected .index )
633
+
494
634
def test_grouper_getting_correct_binner (self ):
495
635
496
636
# GH 10063
0 commit comments