Skip to content

Commit 4efee04

Browse files
Si Wei HowHow Si Wei
Si Wei How
authored and
How Si Wei
committed
Fix assignment to multiple columns when some column do not exist
1 parent e25fd0d commit 4efee04

File tree

4 files changed

+89
-14
lines changed

4 files changed

+89
-14
lines changed

doc/source/whatsnew/v0.25.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,7 @@ Indexing
600600
- Bug in which :meth:`DataFrame.to_csv` caused a segfault for a reindexed data frame, when the indices were single-level :class:`MultiIndex` (:issue:`26303`).
601601
- Fixed bug where assigning a :class:`arrays.PandasArray` to a :class:`pandas.core.frame.DataFrame` would raise error (:issue:`26390`)
602602
- Allow keyword arguments for callable local reference used in the :meth:`DataFrame.query` string (:issue:`26426`)
603+
- Bug in assignment to multiple columns of a `DataFrame` when some of the columns do not exist (:issue:`13658`)
603604

604605

605606
Missing

pandas/core/frame.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3373,7 +3373,8 @@ def _setitem_array(self, key, value):
33733373
for k1, k2 in zip(key, value.columns):
33743374
self[k1] = value[k2]
33753375
else:
3376-
indexer = self.loc._convert_to_indexer(key, axis=1)
3376+
indexer = self.loc._convert_to_indexer(key, axis=1,
3377+
is_setter=True)
33773378
self._check_setitem_copy()
33783379
self.loc._setitem_with_indexer((slice(None), indexer), value)
33793380

pandas/core/indexing.py

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,15 @@ def _setitem_with_indexer(self, indexer, value):
304304
# maybe partial set
305305
take_split_path = self.obj._is_mixed_type
306306

307+
has_missing_columns = (
308+
isinstance(indexer, tuple) and
309+
len(indexer) > info_axis and
310+
is_list_like_indexer(indexer[info_axis]) and
311+
any(isinstance(k, dict) for k in indexer[info_axis]))
312+
313+
if has_missing_columns:
314+
take_split_path = True
315+
307316
# if there is only one block/type, still have to take split path
308317
# unless the block is one-dimensional or it can hold the value
309318
if not take_split_path and self.obj._data.blocks:
@@ -462,14 +471,19 @@ def _setitem_with_indexer(self, indexer, value):
462471
if isinstance(value, ABCSeries):
463472
value = self._align_series(indexer, value)
464473

465-
info_idx = indexer[info_axis]
466-
if is_integer(info_idx):
467-
info_idx = [info_idx]
468-
labels = item_labels[info_idx]
474+
if has_missing_columns:
475+
labels = [idx if isinstance(idx, dict) else item_labels[idx]
476+
for idx in indexer[info_axis]]
477+
else:
478+
info_idx = indexer[info_axis]
479+
if is_integer(info_idx):
480+
info_idx = [info_idx]
481+
labels = item_labels[info_idx]
469482

470483
# if we have a partial multiindex, then need to adjust the plane
471484
# indexer here
472485
if (len(labels) == 1 and
486+
labels[0] in self.obj.columns and
473487
isinstance(self.obj[labels[0]].axes[0], MultiIndex)):
474488
item = labels[0]
475489
obj = self.obj[item]
@@ -513,7 +527,9 @@ def _setitem_with_indexer(self, indexer, value):
513527
lplane_indexer = 0
514528

515529
def setter(item, v):
516-
s = self.obj[item]
530+
item, missing = convert_missing_indexer(item)
531+
if not missing:
532+
s = self.obj[item]
517533
pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer
518534

519535
# perform the equivalent of a setitem on the info axis
@@ -525,8 +541,14 @@ def setter(item, v):
525541
all(com.is_null_slice(idx) or
526542
com.is_full_slice(idx, len(self.obj))
527543
for idx in pi)):
544+
if missing:
545+
self.obj[item] = v
546+
return
528547
s = v
529548
else:
549+
if missing:
550+
self[plane_indexer[0], item] = v
551+
return
530552
# set the item, possibly having a dtype change
531553
s._consolidate_inplace()
532554
s = s.copy()
@@ -1276,6 +1298,13 @@ def _convert_to_indexer(self, obj, axis=None, is_setter=False,
12761298
inds, = obj.nonzero()
12771299
return inds
12781300
else:
1301+
# allow missing columns when setting
1302+
if is_setter and axis == 1:
1303+
return [
1304+
self._convert_to_indexer(k, axis=axis,
1305+
is_setter=is_setter,
1306+
raise_missing=raise_missing)
1307+
for k in obj]
12791308
# When setting, missing keys are not allowed, even with .loc:
12801309
kwargs = {'raise_missing': True if is_setter else
12811310
raise_missing}

pandas/tests/frame/test_indexing.py

Lines changed: 52 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
from pandas.tests.frame.common import TestData
1919
import pandas.util.testing as tm
2020
from pandas.util.testing import (
21-
assert_almost_equal, assert_frame_equal, assert_series_equal)
21+
assert_almost_equal, assert_frame_equal, assert_index_equal,
22+
assert_numpy_array_equal, assert_series_equal)
2223

2324
from pandas.tseries.offsets import BDay
2425

@@ -185,6 +186,51 @@ def test_setitem_list_of_tuples(self):
185186
expected = Series(tuples, index=self.frame.index, name='tuples')
186187
assert_series_equal(result, expected)
187188

189+
def test_setitem_list_all_missing_columns_scalar(self):
190+
df = self.frame.copy()
191+
df[['E', 'F']] = 1
192+
193+
result = df.columns
194+
expected = self.frame.columns.union(['E', 'F'])
195+
assert_index_equal(result, expected)
196+
197+
result = df[self.frame.columns]
198+
expected = self.frame
199+
assert_frame_equal(result, expected)
200+
201+
assert (df[['E', 'F']] == 1).all().all()
202+
203+
def test_setitem_list_some_missing_columns_list(self):
204+
df = self.frame.copy()
205+
df[['A', 'E']] = [1, 2]
206+
207+
result = df.columns
208+
expected = self.frame.columns.union(['E'])
209+
assert_index_equal(result, expected)
210+
211+
result = df[self.frame.columns.drop('A')]
212+
expected = self.frame.drop('A', axis=1)
213+
assert_frame_equal(result, expected)
214+
215+
assert (df['A'] == 1).all()
216+
assert (df['E'] == 2).all()
217+
218+
def test_setitem_list_some_missing_columns_dataframe(self):
219+
df = self.frame.copy()
220+
df[['A', 'E']] = self.frame[['B', 'C']]
221+
222+
result = df.columns
223+
expected = self.frame.columns.union(['E'])
224+
assert_index_equal(result, expected)
225+
226+
result = df[self.frame.columns.drop('A')]
227+
expected = self.frame.drop('A', axis=1)
228+
assert_frame_equal(result, expected)
229+
230+
result = df[['A', 'E']]
231+
expected = self.frame[['B', 'C']]
232+
assert_numpy_array_equal(result.to_numpy(), expected.to_numpy())
233+
188234
def test_setitem_mulit_index(self):
189235
# GH7655, test that assigning to a sub-frame of a frame
190236
# with multi-index columns aligns both rows and columns
@@ -454,9 +500,6 @@ def test_setitem(self):
454500
self.frame['col6'] = series
455501
tm.assert_series_equal(series, self.frame['col6'], check_names=False)
456502

457-
with pytest.raises(KeyError):
458-
self.frame[np.random.randn(len(self.frame) + 1)] = 1
459-
460503
# set ndarray
461504
arr = np.random.randn(len(self.frame))
462505
self.frame['col9'] = arr
@@ -1093,10 +1136,11 @@ def test_fancy_index_int_labels_exceptions(self):
10931136
r" dtype='object'\)\] are in the \[index\]")
10941137
with pytest.raises(KeyError, match=msg):
10951138
self.frame.ix[['foo', 'bar', 'baz']] = 1
1096-
msg = (r"None of \[Index\(\['E'\], dtype='object'\)\] are in the"
1097-
r" \[columns\]")
1098-
with pytest.raises(KeyError, match=msg):
1099-
self.frame.ix[:, ['E']] = 1
1139+
# partial setting now allows this GH13658
1140+
# msg = (r"None of \[Index\(\['E'\], dtype='object'\)\] are in the"
1141+
# r" \[columns\]")
1142+
# with pytest.raises(KeyError, match=msg):
1143+
# self.frame.ix[:, ['E']] = 1
11001144

11011145
# partial setting now allows this GH2578
11021146
# pytest.raises(KeyError, self.frame.ix.__setitem__,

0 commit comments

Comments
 (0)