Description
Code Sample, a copy-pastable example if possible
In [2]: df = pd.DataFrame(index=range(3), columns=['A', 'B', 'C', 'D', 'E', 'F'])
In [3]: df.loc[0, ['A', 'D']] = (1,2)
In [4]: df.loc[:, ['B', 'E']] = (1,2)
In [5]: df[['C', 'F']] = (1,2)
In [6]: df
Out[6]:
A B C D E F
0 1 1 1 2 2 2
1 NaN 1 1 NaN 2 2
2 NaN 1 1 NaN 2 2
In [7]: dfdup = pd.DataFrame(index=range(3), columns=['A', 'B', 'C']*2)
In [8]: dfdup.loc[0, 'A'] = (1,2) # Works
In [9]: dfdup.loc[:, 'B'] = (1,2) # Works
In [10]: dfdup['C'] = (1,2) # Fails
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-10-17d5611af828> in <module>()
----> 1 dfdup['C'] = (1,2)
/home/pietro/nobackup/repo/pandas/pandas/core/frame.py in __setitem__(self, key, value)
2421 else:
2422 # set column
-> 2423 self._set_item(key, value)
2424
2425 def _setitem_slice(self, key, value):
/home/pietro/nobackup/repo/pandas/pandas/core/frame.py in _set_item(self, key, value)
2487
2488 self._ensure_valid_index(value)
-> 2489 value = self._sanitize_column(key, value)
2490 NDFrame._set_item(self, key, value)
2491
/home/pietro/nobackup/repo/pandas/pandas/core/frame.py in _sanitize_column(self, key, value, broadcast)
2658
2659 # turn me into an ndarray
-> 2660 value = _sanitize_index(value, self.index, copy=False)
2661 if not isinstance(value, (np.ndarray, Index)):
2662 if isinstance(value, list) and len(value) > 0:
/home/pietro/nobackup/repo/pandas/pandas/core/series.py in _sanitize_index(data, index, copy)
2847
2848 if len(data) != len(index):
-> 2849 raise ValueError('Length of values does not match length of ' 'index')
2850
2851 if isinstance(data, PeriodIndex):
ValueError: Length of values does not match length of index
In [11]: dfdup
Out[11]:
A B C A B C
0 1 1 NaN 2 2 NaN
1 NaN 1 NaN NaN 2 NaN
2 NaN 1 NaN NaN 2 NaN
Problem description
While loc
correctly treats A
as referring to two columns, DataFrame[.]
sees one value only and raises. Note that Series[.]
behaves correctly:
In [12]: s = pd.Series(index=['A', 'B']*2)
In [13]: s.loc['A'] = (1,2)
In [14]: s
Out[14]:
A 1.0
B NaN
A 2.0
B NaN
dtype: float64
In [15]: s['B'] = (1,2)
In [16]: s
Out[16]:
A 1.0
B 1.0
A 2.0
B 2.0
dtype: float64
Expected Output
Out[6]
(except for column names, obviously)
Output of pd.show_versions()
INSTALLED VERSIONS
commit: None
python: 3.5.3.final.0
python-bits: 64
OS: Linux
OS-release: 4.7.0-1-amd64
machine: x86_64
processor:
byteorder: little
LC_ALL: None
LANG: it_IT.utf8
LOCALE: it_IT.UTF-8
pandas: 0.19.0+605.gc081d5782
pytest: 3.0.6
pip: 9.0.1
setuptools: 33.1.1
Cython: 0.25.2
numpy: 1.12.0
scipy: 0.18.1
xarray: None
IPython: 5.1.0.dev
sphinx: 1.4.9
patsy: 0.3.0-dev
dateutil: 2.5.3
pytz: 2016.7
blosc: None
bottleneck: 1.2.0
tables: 3.3.0
numexpr: 2.6.1
feather: 0.3.1
matplotlib: 2.0.0
openpyxl: 2.3.0
xlrd: 1.0.0
xlwt: 1.1.2
xlsxwriter: 0.9.6
lxml: 3.7.1
bs4: 4.5.3
html5lib: 0.999999999
sqlalchemy: 1.0.15
pymysql: None
psycopg2: None
jinja2: 2.8
s3fs: None
pandas_gbq: None
pandas_datareader: 0.2.1