pandas-dev · jreback · Nov 20, 2019 · Oct 17, 2019 · Oct 17, 2019 · Oct 17, 2019
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -438,6 +438,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`)
 - Bug in :meth:`DataFrame.groupby` losing column name information when grouping by a categorical column (:issue:`28787`)
 - Bug in :meth:`DataFrameGroupBy.rolling().quantile()` ignoring ``interpolation`` keyword argument (:issue:`28779`)
+- Bug in :meth:`DataFrame.groupby` where ``any``, ``all``, ``nunique`` and transform functions would incorrectly handle duplicate column labels (:issue:`21668`)
 
 Reshaping
 ^^^^^^^^^

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -10,7 +10,17 @@
 from functools import partial
 from textwrap import dedent
 import typing
-from typing import Any, Callable, FrozenSet, Iterable, Sequence, Type, Union, cast
+from typing import (
+    Any,
+    Callable,
+    FrozenSet,
+    Iterable,
+    Mapping,
+    Sequence,
+    Type,
+    Union,
+    cast,
+)
 import warnings
 
 import numpy as np
@@ -322,29 +332,108 @@ def _aggregate_multiple_funcs(self, arg, _level):
 
         return DataFrame(results, columns=columns)
 
-    def _wrap_series_output(self, output, index, names=None):
-        """ common agg/transform wrapping logic """
-        output = output[self._selection_name]
+    def _wrap_series_output(
+        self,
+        output: Mapping[int, Union[Series, np.ndarray]],
+        index: Index,
+        columns: Index,
+    ) -> Union[Series, DataFrame]:
+        """
+        Wraps the output of a SeriesGroupBy operation into the expected result.
+
+        Parameters
+        ----------
+        output : Mapping[int, Union[Series, np.ndarray]]
+            Dict where the key represents the columnar-index and the values are
+            the actual results. Must be ordered from 0..n
+        index : pd.Index
+            Index to apply to the output.
+        columns : pd.Index
+            Columns to apply to the output.
 
-        if names is not None:
-            return DataFrame(output, index=index, columns=names)
+        Returns
+        -------
+        Series or DataFrame
+
+        Notes
+        -----
+        In the vast majority of cases output and columns will only contain one
+        element. The exception is operations that expand dimensions, like ohlc.
+        """
+        assert len(output) == len(columns)
+        assert list(output.keys()) == sorted(output.keys())
+
+        result: Union[Series, DataFrame]
+        if len(output) > 1:
+            result = DataFrame(output, index=index)
+            result.columns = columns
         else:
-            name = self._selection_name
-            if name is None:
-                name = self._selected_obj.name
-            return Series(output, index=index, name=name)
+            result = Series(output[0], index=index, name=columns[0])
+
+        return result
+
+    def _wrap_aggregated_output(
+        self, output: Mapping[int, Union[Series, np.ndarray]], columns: Index
+    ) -> Union[Series, DataFrame]:
+        """
+        Wraps the output of a SeriesGroupBy aggregation into the expected result.
+
+        Parameters
+        ----------
+        output : Mapping[int, Union[Series, np.ndarray]]
+            Dict where the key represents the columnar-index and the values are
+            the actual results.
+        columns : pd.Index
+            Columns to apply to the output.
+
+        Returns
+        -------
+        Series or DataFrame
+
+        Notes
+        -----
+        In the vast majority of cases output and columns will only contain one
+        element. The exception is operations that expand dimensions, like ohlc.
+        """
+        assert list(output.keys()) == sorted(output.keys())
 
-    def _wrap_aggregated_output(self, output, names=None):
         result = self._wrap_series_output(
-            output=output, index=self.grouper.result_index, names=names
+            output=output, index=self.grouper.result_index, columns=columns
         )
         return self._reindex_output(result)._convert(datetime=True)
 
-    def _wrap_transformed_output(self, output, names=None):
-        return self._wrap_series_output(
-            output=output, index=self.obj.index, names=names
+    def _wrap_transformed_output(
+        self, output: Mapping[int, Union[Series, np.ndarray]], columns: Index
+    ) -> Series:
+        """
+        Wraps the output of a SeriesGroupBy aggregation into the expected result.
+
+        Parameters
+        ----------
+        output : dict[int, Union[Series, np.ndarray]]
+            Dict with a sole key of 0 and a value of the result values.
+        columns : pd.Index
+            Columns to apply to the output.
+
+        Returns
+        -------
+        Series
+
+        Notes
+        -----
+        output and columns should only contain one element. These are containers
+        for generic compatability with the DataFrameGroupBy class.
+        """
+        assert list(output.keys()) == sorted(output.keys())
+
+        result = self._wrap_series_output(
+            output=output, index=self.obj.index, columns=columns
         )
 
+        # No transformations increase the ndim of the result
+        assert isinstance(result, Series)
+        return result
+
     def _wrap_applied_output(self, keys, values, not_indexed_same=False):
         if len(keys) == 0:
             # GH #6265
@@ -1104,17 +1193,6 @@ def _aggregate_item_by_item(self, func, *args, **kwargs) -> DataFrame:
 
         return DataFrame(result, columns=result_columns)
 
-    def _decide_output_index(self, output, labels):
-        if len(output) == len(labels):
-            output_keys = labels
-        else:
-            output_keys = sorted(output)
-
-            if isinstance(labels, MultiIndex):
-                output_keys = MultiIndex.from_tuples(output_keys, names=labels.names)
-
-        return output_keys
-
     def _wrap_applied_output(self, keys, values, not_indexed_same=False):
         if len(keys) == 0:
             return DataFrame(index=keys)
@@ -1579,27 +1657,66 @@ def _insert_inaxis_grouper_inplace(self, result):
             if in_axis:
                 result.insert(0, name, lev)
 
-    def _wrap_aggregated_output(self, output, names=None):
-        agg_axis = 0 if self.axis == 1 else 1
-        agg_labels = self._obj_with_exclusions._get_axis(agg_axis)
+    def _wrap_aggregated_output(
+        self, output: Mapping[int, Union[Series, np.ndarray]], columns: Index
+    ) -> DataFrame:
+        """
+        Wraps the output of DataFrameGroupBy aggregations into the expected result.
+
+        Parameters
+        ----------
+        output : dict[int, Union[Series, np.ndarray]]
+            Dict where the key represents the columnar-index and the values are
+            the actual results.
+        columns : pd.Index
+            Column names to apply
+
+        Returns
+        -------
+        DataFrame
+        """
+        assert list(output.keys()) == sorted(output.keys())
 
-        output_keys = self._decide_output_index(output, agg_labels)
+        result = DataFrame(output)
+        result.columns = columns
 
         if not self.as_index:
-            result = DataFrame(output, columns=output_keys)
             self._insert_inaxis_grouper_inplace(result)
             result = result._consolidate()
         else:
             index = self.grouper.result_index
-            result = DataFrame(output, index=index, columns=output_keys)
+            result.index = index
 
         if self.axis == 1:
             result = result.T
 
         return self._reindex_output(result)._convert(datetime=True)
 
-    def _wrap_transformed_output(self, output, names=None) -> DataFrame:
-        return DataFrame(output, index=self.obj.index)
+    def _wrap_transformed_output(
+        self, output: Mapping[int, Union[Series, np.ndarray]], columns: Index
+    ) -> DataFrame:
+        """
+        Wraps the output of DataFrameGroupBy transformations into the expected result.
+
+        Parameters
+        ----------
+        output : dict[int, Union[Series, np.ndarray]]
+            Dict where the key represents the columnar-index and the values are
+            the actual results.
+        columns : pd.Index
+            Column names to apply.
+
+        Returns
+        -------
+        DataFrame
+        """
+        assert list(output.keys()) == sorted(output.keys())
+
+        result = DataFrame(output)
+        result.columns = columns
+        result.index = self.obj.index
+
+        return result
 
     def _wrap_agged_blocks(self, items, blocks):
         if not self.as_index:
@@ -1719,9 +1836,11 @@ def groupby_series(obj, col=None):
         if isinstance(obj, Series):
             results = groupby_series(obj)
         else:
+            # TODO: this is duplicative of how GroupBy naturally works
+            # Try to consolidate with normal wrapping functions
             from pandas.core.reshape.concat import concat
 
-            results = [groupby_series(obj[col], col) for col in obj.columns]
+            results = [groupby_series(content, label) for label, content in obj.items()]
             results = concat(results, axis=1)
             results.columns.names = obj.columns.names
 
@@ -1763,7 +1882,7 @@ def _normalize_keyword_aggregation(kwargs):
     """
     Normalize user-provided "named aggregation" kwargs.
 
-    Transforms from the new ``Dict[str, NamedAgg]`` style kwargs
+    Transforms from the new ``Mapping[str, NamedAgg]`` style kwargs
     to the old OrderedDict[str, List[scalar]]].
 
     Parameters
@@ -1784,7 +1903,7 @@ def _normalize_keyword_aggregation(kwargs):
     >>> _normalize_keyword_aggregation({'output': ('input', 'sum')})
     (OrderedDict([('input', ['sum'])]), ('output',), [('input', 'sum')])
     """
-    # Normalize the aggregation functions as Dict[column, List[func]],
+    # Normalize the aggregation functions as Mapping[column, List[func]],
     # process normally, then fixup the names.
     # TODO(Py35): When we drop python 3.5, change this to
     # defaultdict(list)