Skip to content

Commit b18c6f7

Browse files
How Si Weihowsiwei
How Si Wei
authored andcommitted
Fix assignment to multiple columns when some column do not exist
1 parent 0de9955 commit b18c6f7

File tree

5 files changed

+132
-18
lines changed

5 files changed

+132
-18
lines changed

doc/source/whatsnew/v1.0.0.rst

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,37 @@ Backwards incompatible API changes
176176
177177
pd.arrays.IntervalArray.from_tuples([(0, 1), (2, 3)])
178178
179+
.. _whatsnew_1000.api_breaking.multicolumn_assignment:
180+
181+
Assignment to multiple columns of a DataFrame when some columns do not exist
182+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
183+
184+
Assignment to multiple columns of a :class:`DataFrame` when some of the columns do not exist would previously assign the values to the last column. Now, new columns would be constructed withe the right values. (:issue:`13658`)
185+
186+
.. ipython:: python
187+
188+
df = pd.DataFrame({'a': [0, 1, 2], 'b': [3, 4, 5]})
189+
df
190+
191+
*Previous behavior*:
192+
193+
.. code-block:: ipython
194+
195+
In [3]: df[['a', 'c']] = 1
196+
In [4]: df
197+
Out[4]:
198+
a b
199+
0 1 1
200+
1 1 1
201+
2 1 1
202+
203+
*New behavior*:
204+
205+
.. ipython:: python
206+
207+
df[['a', 'c']] = 1
208+
df
209+
179210
.. _whatsnew_1000.api.other:
180211

181212
Other API changes

pandas/core/frame.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3007,6 +3007,12 @@ def _setitem_array(self, key, value):
30073007
for k1, k2 in zip(key, value.columns):
30083008
self[k1] = value[k2]
30093009
else:
3010+
if all(is_hashable(k) for k in key):
3011+
for k in key:
3012+
try:
3013+
self[k]
3014+
except KeyError:
3015+
self[k] = np.nan
30103016
indexer = self.loc._get_listlike_indexer(
30113017
key, axis=1, raise_missing=False
30123018
)[1]

pandas/core/indexing.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from pandas.core.dtypes.common import (
1313
ensure_platform_int,
1414
is_float,
15+
is_hashable,
1516
is_integer,
1617
is_integer_dtype,
1718
is_iterator,
@@ -201,6 +202,19 @@ def _get_setitem_indexer(self, key):
201202
def __setitem__(self, key, value):
202203
if isinstance(key, tuple):
203204
key = tuple(com.apply_if_callable(x, self.obj) for x in key)
205+
if (
206+
self.name == "loc"
207+
and len(key) > 1
208+
and is_list_like_indexer(key[1])
209+
and not isinstance(key[1], tuple)
210+
and not com.is_bool_indexer(key[1])
211+
and all(is_hashable(k) for k in key[1])
212+
):
213+
for k in key[1]:
214+
try:
215+
self.obj[k]
216+
except KeyError:
217+
self.obj[k] = np.nan
204218
else:
205219
key = com.apply_if_callable(key, self.obj)
206220
indexer = self._get_setitem_indexer(key)

pandas/tests/frame/test_indexing.py

Lines changed: 41 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,47 @@ def test_setitem_list_of_tuples(self, float_frame):
204204
expected = Series(tuples, index=float_frame.index, name="tuples")
205205
tm.assert_series_equal(result, expected)
206206

207+
def test_setitem_list_all_missing_columns_scalar(self, float_frame):
208+
# GH 26534
209+
result = float_frame.copy()
210+
result[["E", "F"]] = 1
211+
expected = float_frame.copy()
212+
# force the dtypes to be float as currently multcolumn assignment does not
213+
# change column dtype from float to int even when it's being assigned an int
214+
expected["E"] = 1.0
215+
expected["F"] = 1.0
216+
assert_frame_equal(result, expected)
217+
218+
def test_setitem_list_some_missing_columns_list(self, float_frame):
219+
# GH 26534
220+
result = float_frame.copy()
221+
result[["A", "E"]] = [1, 2]
222+
expected = float_frame.copy()
223+
# force the dtypes to be float as currently multcolumn assignment does not
224+
# change column dtype from float to int even when it's being assigned an int
225+
expected["A"] = 1.0
226+
expected["E"] = 2.0
227+
assert_frame_equal(result, expected)
228+
229+
def test_setitem_list_some_missing_columns_dataframe(self, float_frame):
230+
# GH 26534
231+
result = float_frame.copy()
232+
result[["A", "E"]] = float_frame[["B", "C"]]
233+
expected = float_frame.copy()
234+
expected["A"] = float_frame["B"]
235+
expected["E"] = float_frame["C"]
236+
assert_frame_equal(result, expected)
237+
238+
def test_setitem_list_some_missing_columns_2dlist(self):
239+
# GH 26534
240+
result = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"])
241+
result[["B", "C", "D"]] = [[7, 8, 9], [10, 11, 12], [13, 14, 15]]
242+
expected = pd.DataFrame(
243+
[[1, 7, 8, 9], [3, 10, 11, 12], [5, 13, 14, 15]],
244+
columns=["A", "B", "C", "D"],
245+
)
246+
tm.assert_frame_equal(result, expected)
247+
207248
def test_setitem_mulit_index(self):
208249
# GH7655, test that assigning to a sub-frame of a frame
209250
# with multi-index columns aligns both rows and columns
@@ -497,13 +538,6 @@ def test_setitem(self, float_frame):
497538
float_frame["col6"] = series
498539
tm.assert_series_equal(series, float_frame["col6"], check_names=False)
499540

500-
msg = (
501-
r"\"None of \[Float64Index\(\[.*dtype='float64'\)\] are in the"
502-
r" \[columns\]\""
503-
)
504-
with pytest.raises(KeyError, match=msg):
505-
float_frame[np.random.randn(len(float_frame) + 1)] = 1
506-
507541
# set ndarray
508542
arr = np.random.randn(len(float_frame))
509543
float_frame["col9"] = arr
@@ -1149,17 +1183,6 @@ def test_fancy_index_int_labels_exceptions(self, float_frame):
11491183
)
11501184
with pytest.raises(KeyError, match=msg):
11511185
float_frame.ix[["foo", "bar", "baz"]] = 1
1152-
msg = (
1153-
r"None of \[Index\(\['E'\], dtype='object'\)\] are in the"
1154-
r" \[columns\]"
1155-
)
1156-
with pytest.raises(KeyError, match=msg):
1157-
float_frame.ix[:, ["E"]] = 1
1158-
1159-
# FIXME: don't leave commented-out
1160-
# partial setting now allows this GH2578
1161-
# pytest.raises(KeyError, float_frame.ix.__setitem__,
1162-
# (slice(None, None), 'E'), 1)
11631186

11641187
def test_setitem_fancy_mixed_2d(self, float_string_frame):
11651188

pandas/tests/indexing/test_loc.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -856,6 +856,46 @@ def test_loc_setitem_with_scalar_index(self, indexer, value):
856856

857857
assert is_scalar(result) and result == "Z"
858858

859+
def test_loc_setitem_missing_columns_scalar_index_list_value(self):
860+
# GH 26534
861+
df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"])
862+
df.loc[1, ["C", "D"]] = [7, 8]
863+
expected = pd.DataFrame(
864+
[[1, 2, np.nan, np.nan], [3, 4, 7, 8], [5, 6, np.nan, np.nan]],
865+
columns=["A", "B", "C", "D"],
866+
)
867+
tm.assert_frame_equal(df, expected)
868+
869+
def test_loc_setitem_missing_columns_full_index_dataframe_value(self):
870+
# GH 26534
871+
df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"])
872+
df2 = pd.DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"])
873+
df.loc[:, ["A", "C"]] = df2
874+
expected = pd.DataFrame(
875+
[[7, 2, 8], [9, 4, 10], [11, 6, 12]], columns=["A", "B", "C"]
876+
)
877+
tm.assert_frame_equal(df, expected)
878+
879+
def test_loc_setitem_missing_columns_list_index_scalar_value(self):
880+
# GH 26534
881+
df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"])
882+
df.loc[[0, 2], ["B", "C", "D"]] = 9
883+
expected = pd.DataFrame(
884+
[[1, 9, 9, 9], [3, 4, np.nan, np.nan], [5, 9, 9, 9]],
885+
columns=["A", "B", "C", "D"],
886+
)
887+
tm.assert_frame_equal(df, expected)
888+
889+
def test_loc_setitem_missing_columns_range_index_2dlist_value(self):
890+
# GH 26534
891+
df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"])
892+
df.loc[1:3, ["B", "C", "D"]] = [[7, 8, 9], [10, 11, 12]]
893+
expected = pd.DataFrame(
894+
[[1, 2, np.nan, np.nan], [3, 7, 8, 9], [5, 10, 11, 12]],
895+
columns=["A", "B", "C", "D"],
896+
)
897+
tm.assert_frame_equal(df, expected)
898+
859899
def test_loc_coercion(self):
860900

861901
# 12411

0 commit comments

Comments
 (0)