Skip to content

Commit 0e4c58e

Browse files
REF: Add Manager.column_setitem to set values into a single column (without intermediate series)
1 parent 1be9d38 commit 0e4c58e

File tree

6 files changed

+45
-30
lines changed

6 files changed

+45
-30
lines changed

pandas/core/frame.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3924,16 +3924,16 @@ def _set_value(
39243924
Sets whether or not index/col interpreted as indexers
39253925
"""
39263926
try:
3927-
if takeable:
3928-
series = self._ixs(col, axis=1)
3929-
loc = index
3930-
else:
3931-
series = self._get_item_cache(col)
3932-
loc = self.index.get_loc(index)
3933-
39343927
# setitem_inplace will do validation that may raise TypeError,
39353928
# ValueError, or LossySetitemError
3936-
series._mgr.setitem_inplace(loc, value)
3929+
# breakpoint()
3930+
if takeable:
3931+
self._mgr.column_setitem(col, index, value)
3932+
else:
3933+
icol = self.columns.get_loc(col)
3934+
index = self.index.get_loc(index)
3935+
self._mgr.column_setitem(icol, index, value)
3936+
self._clear_item_cache()
39373937

39383938
except (KeyError, TypeError, ValueError, LossySetitemError):
39393939
# set using a non-recursive method & reset the cache

pandas/core/indexing.py

Lines changed: 8 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@
5252
from pandas.core.indexers import (
5353
check_array_indexer,
5454
is_empty_indexer,
55-
is_exact_shape_match,
5655
is_list_like_indexer,
5756
is_scalar_indexer,
5857
length_of_indexer,
@@ -1936,42 +1935,31 @@ def _setitem_single_column(self, loc: int, value, plane_indexer):
19361935
"""
19371936
pi = plane_indexer
19381937

1939-
ser = self.obj._ixs(loc, axis=1)
1940-
19411938
# perform the equivalent of a setitem on the info axis
19421939
# as we have a null slice or a slice with full bounds
19431940
# which means essentially reassign to the columns of a
19441941
# multi-dim object
19451942
# GH#6149 (null slice), GH#10408 (full bounds)
19461943
if com.is_null_slice(pi) or com.is_full_slice(pi, len(self.obj)):
1947-
ser = value
1944+
self.obj._iset_item(loc, value)
19481945
elif (
19491946
is_array_like(value)
1950-
and is_exact_shape_match(ser, value)
1947+
and len(value.shape) > 0
1948+
and self.obj.shape[0] == value.shape[0]
19511949
and not is_empty_indexer(pi)
19521950
):
19531951
if is_list_like(pi):
1954-
ser = value[np.argsort(pi)]
1952+
value = value[np.argsort(pi)]
19551953
else:
19561954
# in case of slice
1957-
ser = value[pi]
1955+
value = value[pi]
1956+
self.obj._iset_item(loc, value)
19581957
else:
19591958
# set the item, first attempting to operate inplace, then
19601959
# falling back to casting if necessary; see
19611960
# _whatsnew_130.notable_bug_fixes.setitem_column_try_inplace
1962-
1963-
orig_values = ser._values
1964-
ser._mgr = ser._mgr.setitem((pi,), value)
1965-
1966-
if ser._values is orig_values:
1967-
# The setitem happened inplace, so the DataFrame's values
1968-
# were modified inplace.
1969-
return
1970-
self.obj._iset_item(loc, ser)
1971-
return
1972-
1973-
# reset the sliced object if unique
1974-
self.obj._iset_item(loc, ser)
1961+
self.obj._mgr.column_setitem(loc, plane_indexer, value)
1962+
self.obj._clear_item_cache()
19751963

19761964
def _setitem_single_block(self, indexer, value, name: str):
19771965
"""

pandas/core/internals/array_manager.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -869,6 +869,20 @@ def iset(
869869
self.arrays[mgr_idx] = value_arr
870870
return
871871

872+
def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None:
873+
"""
874+
Set values ("setitem") into a single column (not setting the full column).
875+
876+
This is a method on the ArrayManager level, to avoid creating an
877+
intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`)
878+
"""
879+
arr = self.arrays[loc]
880+
# create temporary SingleArrayManager without ref to use setitem implementation
881+
mgr = SingleArrayManager([arr], [self._axes[0]])
882+
new_mgr = mgr.setitem((idx,), value)
883+
# update existing ArrayManager in-place
884+
self.arrays[loc] = new_mgr.arrays[0]
885+
872886
def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None:
873887
"""
874888
Insert item at selected position.

pandas/core/internals/managers.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1185,6 +1185,17 @@ def _iset_single(
11851185
self.blocks = new_blocks
11861186
return
11871187

1188+
def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None:
1189+
"""
1190+
Set values ("setitem") into a single column (not setting the full column).
1191+
1192+
This is a method on the BlockManager level, to avoid creating an
1193+
intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`)
1194+
"""
1195+
col_mgr = self.iget(loc)
1196+
new_mgr = col_mgr.setitem((idx,), value)
1197+
self.iset(loc, new_mgr._block.values, inplace=True)
1198+
11881199
def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None:
11891200
"""
11901201
Insert item at selected position.

pandas/tests/frame/indexing/test_setitem.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1082,7 +1082,7 @@ def test_setitem_partial_column_inplace(self, consolidate, using_array_manager):
10821082
tm.assert_numpy_array_equal(zvals, expected.values)
10831083
assert np.shares_memory(zvals, df["z"]._values)
10841084
if not consolidate:
1085-
assert df["z"]._values is zvals
1085+
assert df["z"]._values.base is zvals.base
10861086

10871087
def test_setitem_duplicate_columns_not_inplace(self):
10881088
# GH#39510

pandas/tests/indexing/test_partial.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ def test_partial_setting(self):
266266
with pytest.raises(IndexError, match=msg):
267267
s.iat[3] = 5.0
268268

269-
def test_partial_setting_frame(self):
269+
def test_partial_setting_frame(self, using_array_manager):
270270
df_orig = DataFrame(
271271
np.arange(6).reshape(3, 2), columns=["A", "B"], dtype="int64"
272272
)
@@ -279,6 +279,8 @@ def test_partial_setting_frame(self):
279279
df.iloc[4, 2] = 5.0
280280

281281
msg = "index 2 is out of bounds for axis 0 with size 2"
282+
if using_array_manager:
283+
msg = "list index out of range"
282284
with pytest.raises(IndexError, match=msg):
283285
df.iat[4, 2] = 5.0
284286

0 commit comments

Comments
 (0)