-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
Melting with not present column does not produce error #23575
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 31 commits
855985d
40fdb05
9670da2
3ffc870
8139f78
0a94650
d0f6d23
6c76161
ad3d926
e097a87
5ff3a32
fcbda15
3175b34
515fb9f
c7d6fcf
5911cc3
47ca7fc
d0ee9c5
c75ab23
32ed22c
e629b2a
89de406
1d13f4a
479b761
01e8d74
6762b21
eae7716
fba641f
06b7cdb
39c746b
af170e1
4c9bc9f
c59d29f
0db8838
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,7 @@ | |
from pandas import compat | ||
from pandas.core.arrays import Categorical | ||
from pandas.core.frame import _shared_docs | ||
from pandas.core.indexes.base import Index | ||
from pandas.core.reshape.concat import concat | ||
from pandas.core.tools.numeric import to_numeric | ||
|
||
|
@@ -24,6 +25,10 @@ | |
def melt(frame, id_vars=None, value_vars=None, var_name=None, | ||
value_name='value', col_level=None): | ||
# TODO: what about the existing index? | ||
if isinstance(frame.columns, ABCMultiIndex): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not especially familiar with melt and multi-index columns, but I don't think this is quite right. It seems like you need to specify However, it doesn't quite seem that a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you need to provide
But you don't need to specify There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All I am doing at L28 is gathering column names from all levels. There are other checks to make sure that melting is performed properly, this will just check to make sure that whatever you pass, it is in your df There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thoughts? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah i think this is ok, can you provdie a comment on what is going on. |
||
cols = [x for c in frame.columns for x in c] | ||
else: | ||
cols = list(frame.columns) | ||
if id_vars is not None: | ||
if not is_list_like(id_vars): | ||
id_vars = [id_vars] | ||
|
@@ -32,7 +37,13 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None, | |
raise ValueError('id_vars must be a list of tuples when columns' | ||
' are a MultiIndex') | ||
else: | ||
# Check that `id_vars` are in frame | ||
id_vars = list(id_vars) | ||
missing = Index(np.ravel(id_vars)).difference(cols) | ||
if not missing.empty: | ||
raise KeyError("The following 'id_vars' are not present" | ||
" in the DataFrame: {missing}" | ||
"".format(missing=list(missing))) | ||
else: | ||
id_vars = [] | ||
|
||
|
@@ -45,6 +56,12 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None, | |
' columns are a MultiIndex') | ||
else: | ||
value_vars = list(value_vars) | ||
# Check that `value_vars` are in frame | ||
missing = Index(np.ravel(value_vars)).difference(cols) | ||
if not missing.empty: | ||
michaelsilverstein marked this conversation as resolved.
Show resolved
Hide resolved
|
||
raise KeyError("The following 'value_vars' are not present in" | ||
" the DataFrame: {missing}" | ||
"".format(missing=list(missing))) | ||
frame = frame.loc[:, id_vars + value_vars] | ||
else: | ||
frame = frame.copy() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -101,6 +101,14 @@ def test_vars_work_with_multiindex(self): | |
result = self.df1.melt(id_vars=[('A', 'a')], value_vars=[('B', 'b')]) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
def test_single_vars_work_with_multiindex(self): | ||
expected = DataFrame({ | ||
'A': {0: 1.067683, 1: -1.321405, 2: -0.807333}, | ||
'CAP': {0: 'B', 1: 'B', 2: 'B'}, | ||
'value': {0: -1.110463, 1: 0.368915, 2: 0.08298}}) | ||
result = self.df1.melt(['A'], ['B'], col_level=0) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
def test_tuple_vars_fail_with_multiindex(self): | ||
# melt should fail with an informative error message if | ||
# the columns have a MultiIndex and a tuple is passed | ||
|
@@ -233,6 +241,34 @@ def test_pandas_dtypes(self, col): | |
expected.columns = ['klass', 'col', 'attribute', 'value'] | ||
tm.assert_frame_equal(result, expected) | ||
|
||
def test_melt_missing_columns_raises(self): | ||
# GH-23575 | ||
# This test is to ensure that pandas raises an error if melting is | ||
# attempted with column names absent from the dataframe | ||
|
||
# Generate data | ||
df = pd.DataFrame(np.random.randn(5, 4), columns=list('abcd')) | ||
|
||
# Try to melt with missing `value_vars` column name | ||
msg = "The following '{Var}' are not present in the DataFrame: {Col}" | ||
with pytest.raises( | ||
KeyError, | ||
match=msg.format(Var='value_vars', Col="\\['C'\\]")): | ||
df.melt(['a', 'b'], ['C', 'd']) | ||
|
||
# Try to melt with missing `id_vars` column name | ||
with pytest.raises( | ||
KeyError, | ||
match=msg.format(Var='id_vars', Col="\\['A'\\]")): | ||
df.melt(['A', 'b'], ['c', 'd']) | ||
|
||
# Multiple missing | ||
with pytest.raises( | ||
KeyError, | ||
match=msg.format(Var='id_vars', | ||
Col="\\['not_here', 'or_there'\\]")): | ||
df.melt(['a', 'b', 'not_here', 'or_there'], ['c', 'd']) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you do an example with an MI and columns that are not in the top level of the MI, ideally try with and w/o col_level as well. |
||
|
||
|
||
class TestLreshape(object): | ||
|
||
|
Uh oh!
There was an error while loading. Please reload this page.