pandas-dev · jreback · Nov 21, 2018 · Nov 8, 2018 · Nov 8, 2018 · Nov 13, 2018
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -1418,6 +1418,7 @@ Reshaping
 - Bug in :func:`pandas.concat` when concatenating a multicolumn DataFrame with tz-aware data against a DataFrame with a different number of columns (:issue:`22796`)
 - Bug in :func:`merge_asof` where confusing error message raised when attempting to merge with missing values (:issue:`23189`)
 - Bug in :meth:`DataFrame.nsmallest` and :meth:`DataFrame.nlargest` for dataframes that have a :class:`MultiIndex` for columns (:issue:`23033`).
+- Bug in :func:`pandas.melt` when passing column names that are not present in ``DataFrame`` (:issue:`23575`)
 - Bug in :meth:`DataFrame.append` with a :class:`Series` with a dateutil timezone would raise a ``TypeError`` (:issue:`23682`)
 
 .. _whatsnew_0240.bug_fixes.sparse:

diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py
@@ -13,6 +13,7 @@
 from pandas import compat
 from pandas.core.arrays import Categorical
 from pandas.core.frame import _shared_docs
+from pandas.core.indexes.base import Index
 from pandas.core.reshape.concat import concat
 from pandas.core.tools.numeric import to_numeric
 
@@ -24,6 +25,12 @@
 def melt(frame, id_vars=None, value_vars=None, var_name=None,
          value_name='value', col_level=None):
     # TODO: what about the existing index?
+    # If multiindex, gather names of columns on all level for checking presence
+    # of `id_vars` and `value_vars`
+    if isinstance(frame.columns, ABCMultiIndex):
+        cols = [x for c in frame.columns for x in c]
+    else:
+        cols = list(frame.columns)
     if id_vars is not None:
         if not is_list_like(id_vars):
             id_vars = [id_vars]
@@ -32,7 +39,13 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None,
             raise ValueError('id_vars must be a list of tuples when columns'
                              ' are a MultiIndex')
         else:
+            # Check that `id_vars` are in frame
             id_vars = list(id_vars)
+            missing = Index(np.ravel(id_vars)).difference(cols)
+            if not missing.empty:
+                raise KeyError("The following 'id_vars' are not present"
+                               " in the DataFrame: {missing}"
+                               "".format(missing=list(missing)))
     else:
         id_vars = []
 
@@ -45,6 +58,12 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None,
                              ' columns are a MultiIndex')
         else:
             value_vars = list(value_vars)
+            # Check that `value_vars` are in frame
+            missing = Index(np.ravel(value_vars)).difference(cols)
+            if not missing.empty:
+                raise KeyError("The following 'value_vars' are not present in"
+                               " the DataFrame: {missing}"
+                               "".format(missing=list(missing)))
         frame = frame.loc[:, id_vars + value_vars]
     else:
         frame = frame.copy()

diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py
@@ -101,6 +101,14 @@ def test_vars_work_with_multiindex(self):
         result = self.df1.melt(id_vars=[('A', 'a')], value_vars=[('B', 'b')])
         tm.assert_frame_equal(result, expected)
 
+    def test_single_vars_work_with_multiindex(self):
+        expected = DataFrame({
+            'A': {0: 1.067683, 1: -1.321405, 2: -0.807333},
+            'CAP': {0: 'B', 1: 'B', 2: 'B'},
+            'value': {0: -1.110463, 1: 0.368915, 2: 0.08298}})
+        result = self.df1.melt(['A'], ['B'], col_level=0)
+        tm.assert_frame_equal(result, expected)
+
     def test_tuple_vars_fail_with_multiindex(self):
         # melt should fail with an informative error message if
         # the columns have a MultiIndex and a tuple is passed
@@ -233,6 +241,49 @@ def test_pandas_dtypes(self, col):
         expected.columns = ['klass', 'col', 'attribute', 'value']
         tm.assert_frame_equal(result, expected)
 
+    def test_melt_missing_columns_raises(self):
+        # GH-23575
+        # This test is to ensure that pandas raises an error if melting is
+        # attempted with column names absent from the dataframe
+
+        # Generate data
+        df = pd.DataFrame(np.random.randn(5, 4), columns=list('abcd'))
+
+        # Try to melt with missing `value_vars` column name
+        msg = "The following '{Var}' are not present in the DataFrame: {Col}"
+        with pytest.raises(
+                KeyError,
+                match=msg.format(Var='value_vars', Col="\\['C'\\]")):
+            df.melt(['a', 'b'], ['C', 'd'])
+
+        # Try to melt with missing `id_vars` column name
+        with pytest.raises(
+                KeyError,
+                match=msg.format(Var='id_vars', Col="\\['A'\\]")):
+            df.melt(['A', 'b'], ['c', 'd'])
+
+        # Multiple missing
+        with pytest.raises(
+                KeyError,
+                match=msg.format(Var='id_vars',
+                                 Col="\\['not_here', 'or_there'\\]")):
+            df.melt(['a', 'b', 'not_here', 'or_there'], ['c', 'd'])
+
+        # Multiindex melt fails if column is missing from multilevel melt
+        multi = df.copy()
+        multi.columns = [list('ABCD'), list('abcd')]
+        with pytest.raises(
+            KeyError,
+            match=msg.format(Var='id_vars',
+                             Col="\\['E'\\]")):
+            multi.melt([('E', 'a')], [('B', 'b')])
+        # Multiindex fails if column is missing from single level melt
+        with pytest.raises(
+            KeyError,
+            match=msg.format(Var='value_vars',
+                             Col="\\['F'\\]")):
+            multi.melt(['A'], ['F'], col_level=0)
+
 
 class TestLreshape(object):