pandas-dev · jreback · Feb 26, 2020 · Jan 23, 2020 · Jan 23, 2020 · Jan 23, 2020
diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst
@@ -169,6 +169,7 @@ Computations / descriptive stats
    DataFrame.std
    DataFrame.var
    DataFrame.nunique
+   DataFrame.value_counts
 
 Reindexing / selection / label manipulation
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -54,6 +54,7 @@ Other API changes
 
 - :meth:`Series.describe` will now show distribution percentiles for ``datetime`` dtypes, statistics ``first`` and ``last``
   will now be ``min`` and ``max`` to match with numeric dtypes in :meth:`DataFrame.describe` (:issue:`30164`)
+- Added :meth:`DataFrame.value_counts` (:issue:`5377`)
 -
 
 Backwards incompatible API changes

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -39,7 +39,7 @@
 from pandas._config import get_option
 
 from pandas._libs import algos as libalgos, lib, properties
-from pandas._typing import Axes, Axis, Dtype, FilePathOrBuffer, Level, Renamer
+from pandas._typing import Axes, Axis, Dtype, FilePathOrBuffer, Label, Level, Renamer
 from pandas.compat import PY37
 from pandas.compat._optional import import_optional_dependency
 from pandas.compat.numpy import function as nv
@@ -108,7 +108,7 @@
 from pandas.core.indexes import base as ibase
 from pandas.core.indexes.api import Index, ensure_index, ensure_index_from_sequences
 from pandas.core.indexes.datetimes import DatetimeIndex
-from pandas.core.indexes.multi import maybe_droplevels
+from pandas.core.indexes.multi import MultiIndex, maybe_droplevels
 from pandas.core.indexes.period import PeriodIndex
 from pandas.core.indexing import check_bool_indexer, convert_to_index_sliceable
 from pandas.core.internals import BlockManager
@@ -5070,6 +5070,120 @@ def sort_index(
         else:
             return self._constructor(new_data).__finalize__(self)
 
+    def value_counts(
+        self,
+        subset: Optional[Sequence[Label]] = None,
+        normalize: bool = False,
+        sort: bool = True,
+        ascending: bool = False,
+        bins: Optional[int] = None,
+        dropna: bool = True,
+    ):
+        """
+        Return a Series containing counts of unique rows in the DataFrame.
+        .. versionadded:: 1.1.0
+        The returned Series will have a MultiIndex with one level per input
+        column.
+        By default, rows that contain any NA values are omitted from the
+        result.
+        By default, the resulting Series will be in descending order so that the
+        first element is the most frequently-occurring row.
+
+        Parameters
+        ----------
+        subset : list-like, optional
+            Columns to use when counting unique combinations.
+        normalize : bool, default False
+            Return proportions rather than frequencies.
+        sort : bool, default True
+            Sort by frequencies.
+        ascending : bool, default False
+            Sort in ascending order.
+        bins : int, optional
+            This parameter is not yet supported and must be set to None (the
+            default value). It exists to ensure compatibiliy with
+            `Series.value_counts`.
+            Rather than count values, group them into half-open bins,
+            a convenience for ``pd.cut``, only works with single-column numeric
+            data.
+        dropna : bool, default True
+            This parameter is not yet supported and must be set to True (the
+            default value). It exists to ensure compatibiliy with
+            `Series.value_counts`.
+            Don't include counts of rows containing NA values.
+
+        Returns
+        -------
+        Series
+
+        See Also
+        --------
+        Series.value_counts: Equivalent method on Series.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({'num_legs': [2, 4, 4, 6],
+        ...                    'num_wings': [2, 0, 0, 0]},
+        ...                   index=['falcon', 'dog', 'cat', 'ant'])
+        >>> df
+                num_legs  num_wings
+        falcon         2          2
+        dog            4          0
+        cat            4          0
+        ant            6          0
+        >>> df.value_counts()
+        num_legs  num_wings
+        4         0            2
+        6         0            1
+        2         2            1
+        dtype: int64
+        >>> df.value_counts(sort=False)
+        num_legs  num_wings
+        2         2            1
+        4         0            2
+        6         0            1
+        dtype: int64
+        >>> df.value_counts(ascending=True)
+        num_legs  num_wings
+        2         2            1
+        6         0            1
+        4         0            2
+        dtype: int64
+        >>> df.value_counts(normalize=True)
+        num_legs  num_wings
+        4         0            0.50
+        6         0            0.25
+        2         2            0.25
+        dtype: float64
+        """
+        if subset is None:
+            subset = self.columns.tolist()
+
+        # Some features not supported yet
+        if not dropna:
+            raise NotImplementedError(
+                "`dropna=False` not yet supported for DataFrames."
+            )
+
+        if bins is not None:
+            raise NotImplementedError(
+                "`bins` parameter not yet supported for DataFrames."
+            )
+
+        counts = self.groupby(subset).size()
+
+        if sort:
+            counts = counts.sort_values(ascending=ascending)
+        if normalize:
+            counts /= counts.sum()
+        # Force MultiIndex for single column
+        if len(subset) == 1:
+            counts.index = MultiIndex.from_arrays(
+                [counts.index], names=[counts.index.name]
+            )
+
+        return counts
+
     def nlargest(self, n, columns, keep="first") -> "DataFrame":
         """
         Return the first `n` rows ordered by `columns` in descending order.

diff --git a/pandas/tests/frame/test_value_counts.py b/pandas/tests/frame/test_value_counts.py
@@ -0,0 +1,123 @@
+import numpy as np
+import pytest
+
+import pandas as pd
+import pandas._testing as tm
+
+
+def test_data_frame_value_counts_unsorted():
+    df = pd.DataFrame(
+        {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
+        index=["falcon", "dog", "cat", "ant"],
+    )
+
+    result = df.value_counts(sort=False)
+    expected = pd.Series(
+        data=[1, 2, 1],
+        index=pd.MultiIndex.from_arrays(
+            [(2, 4, 6), (2, 0, 0)], names=["num_legs", "num_wings"]
+        ),
+    )
+
+    tm.assert_series_equal(result, expected)
+
+
+def test_data_frame_value_counts_ascending():
+    df = pd.DataFrame(
+        {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
+        index=["falcon", "dog", "cat", "ant"],
+    )
+
+    result = df.value_counts(ascending=True)
+    expected = pd.Series(
+        data=[1, 1, 2],
+        index=pd.MultiIndex.from_arrays(
+            [(2, 6, 4), (2, 0, 0)], names=["num_legs", "num_wings"]
+        ),
+    )
+
+    tm.assert_series_equal(result, expected)
+
+
+def test_data_frame_value_counts_default():
+    df = pd.DataFrame(
+        {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
+        index=["falcon", "dog", "cat", "ant"],
+    )
+
+    result = df.value_counts()
+    expected = pd.Series(
+        data=[2, 1, 1],
+        index=pd.MultiIndex.from_arrays(
+            [(4, 6, 2), (0, 0, 2)], names=["num_legs", "num_wings"]
+        ),
+    )
+
+    tm.assert_series_equal(result, expected)
+
+
+def test_data_frame_value_counts_normalize():
+    df = pd.DataFrame(
+        {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
+        index=["falcon", "dog", "cat", "ant"],
+    )
+
+    result = df.value_counts(normalize=True)
+    expected = pd.Series(
+        data=[0.5, 0.25, 0.25],
+        index=pd.MultiIndex.from_arrays(
+            [(4, 6, 2), (0, 0, 2)], names=["num_legs", "num_wings"]
+        ),
+    )
+
+    tm.assert_series_equal(result, expected)
+
+
+def test_data_frame_value_counts_dropna_not_supported_yet():
+    df = pd.DataFrame(
+        {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
+        index=["falcon", "dog", "cat", "ant"],
+    )
+
+    with pytest.raises(NotImplementedError, match="not yet supported"):
+        df.value_counts(dropna=False)
+
+
+def test_data_frame_value_counts_bins_not_supported():
+    df = pd.DataFrame(
+        {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
+        index=["falcon", "dog", "cat", "ant"],
+    )
+
+    with pytest.raises(NotImplementedError, match="not yet supported"):
+        df.value_counts(bins=2)
+
+
+def test_data_frame_value_counts_single_col_default():
+    df = pd.DataFrame({"num_legs": [2, 4, 4, 6]})
+
+    result = df.value_counts()
+    expected = pd.Series(
+        data=[2, 1, 1],
+        index=pd.MultiIndex.from_arrays([[4, 6, 2]], names=["num_legs"]),
+    )
+
+    tm.assert_series_equal(result, expected)
+
+
+def test_data_frame_value_counts_empty():
+    df_no_cols = pd.DataFrame()
+
+    result = df_no_cols.value_counts()
+    expected = pd.Series([], dtype=np.int64)
+
+    tm.assert_series_equal(result, expected)
+
+
+def test_data_frame_value_counts_empty_normalize():
+    df_no_cols = pd.DataFrame()
+
+    result = df_no_cols.value_counts(normalize=True)
+    expected = pd.Series([], dtype=np.float64)
+
+    tm.assert_series_equal(result, expected)