@@ -521,28 +521,6 @@ a trivial example is ``df.groupby('A').agg(lambda ser: 1)``. Note that
521
521
:meth: `~pd.core.groupby.DataFrameGroupBy.nth ` can act as a reducer *or * a
522
522
filter, see :ref: `here <groupby.nth >`.
523
523
524
- Decimal columns are "nuisance" columns that .agg automatically excludes in groupby.
525
-
526
- If you do wish to aggregate them you must do so explicitly:
527
-
528
- .. ipython :: python
529
-
530
- from decimal import Decimal
531
- dec = pd.DataFrame(
532
- {' name' : [' foo' , ' bar' , ' foo' , ' bar' ],
533
- ' title' : [' boo' , ' far' , ' boo' , ' far' ],
534
- ' id' : [123 , 456 , 123 , 456 ],
535
- ' int_column' : [1 , 2 , 3 , 4 ],
536
- ' dec_column1' : [Decimal(' 0.50' ), Decimal(' 0.15' ), Decimal(' 0.25' ), Decimal(' 0.40' )],
537
- ' dec_column2' : [Decimal(' 0.20' ), Decimal(' 0.30' ), Decimal(' 0.55' ), Decimal(' 0.60' )]
538
- },
539
- columns = [' name' ,' title' ,' id' ,' int_column' ,' dec_column1' ,' dec_column2' ]
540
- )
541
-
542
- dec.groupby([' name' , ' title' , ' id' ], as_index = False ).sum()
543
-
544
- dec.groupby([' name' , ' title' , ' id' ], as_index = False ).agg({' dec_column1' : ' sum' , ' dec_column2' : ' sum' })
545
-
546
524
.. _groupby.aggregate.multifunc :
547
525
548
526
Applying multiple functions at once
@@ -1034,6 +1012,42 @@ The returned dtype of the grouped will *always* include *all* of the categories
1034
1012
s = pd.Series([1 , 1 , 1 ]).groupby(pd.Categorical([' a' , ' a' , ' a' ], categories = [' a' , ' b' ]), observed = False ).count()
1035
1013
s.index.dtype
1036
1014
1015
+ .. note ::
1016
+ Decimal columns are also "nuisance" columns. They are excluded from aggregate functions automatically in groupby.
1017
+
1018
+ If you do wish to include decimal columns in the aggregation, you must do so explicitly:
1019
+
1020
+ .. ipython :: python
1021
+
1022
+ from decimal import Decimal
1023
+ dec = pd.DataFrame(
1024
+ {' name' : [' foo' , ' bar' , ' foo' , ' bar' ],
1025
+ ' title' : [' boo' , ' far' , ' boo' , ' far' ],
1026
+ ' id' : [123 , 456 , 123 , 456 ],
1027
+ ' int_column' : [1 , 2 , 3 , 4 ],
1028
+ ' dec_column1' : [Decimal(' 0.50' ), Decimal(' 0.15' ), Decimal(' 0.25' ), Decimal(' 0.40' )],
1029
+ ' dec_column2' : [Decimal(' 0.20' ), Decimal(' 0.30' ), Decimal(' 0.55' ), Decimal(' 0.60' )]
1030
+ },
1031
+ columns = [' name' ,' title' ,' id' ,' int_column' ,' dec_column1' ,' dec_column2' ]
1032
+ )
1033
+
1034
+ dec.head()
1035
+
1036
+ dec.dtypes
1037
+
1038
+ # Decimal columns excluded from sum by default
1039
+ dec.groupby([' name' , ' title' , ' id' ], as_index = False ).sum()
1040
+
1041
+ # Decimal columns can be sum'd explicitly by themselves...
1042
+ dec.groupby([' name' , ' title' , ' id' ], as_index = False )[' dec_column1' ,' dec_column2' ].sum()
1043
+
1044
+ # ...but cannot be combined with standard data types or they will be excluded
1045
+ dec.groupby([' name' , ' title' , ' id' ], as_index = False )[' int_column' ,' dec_column1' ,' dec_column2' ].sum()
1046
+
1047
+ # Use .agg function to aggregate over standard and "nuisance" data types at the same time
1048
+ dec.groupby([' name' , ' title' , ' id' ], as_index = False ).agg({' int_column' : ' sum' , ' dec_column1' : ' sum' , ' dec_column2' : ' sum' })
1049
+
1050
+
1037
1051
.. _groupby.missing :
1038
1052
1039
1053
NA and NaT group handling
0 commit comments