Skip to content

Commit 351b688

Browse files
authored
PERF: DataFrame.__setitem__ (#44796)
1 parent a48f451 commit 351b688

File tree

2 files changed

+59
-14
lines changed

2 files changed

+59
-14
lines changed

pandas/core/internals/blocks.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1457,7 +1457,8 @@ def iget(self, col):
14571457
def set_inplace(self, locs, values) -> None:
14581458
# NB: This is a misnomer, is supposed to be inplace but is not,
14591459
# see GH#33457
1460-
assert locs.tolist() == [0]
1460+
# When an ndarray, we should have locs.tolist() == [0]
1461+
# When a BlockPlacement we should have list(locs) == [0]
14611462
self.values = values
14621463
try:
14631464
# TODO(GH33457) this can be removed

pandas/core/internals/managers.py

Lines changed: 57 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1066,22 +1066,12 @@ def iset(
10661066

10671067
# Note: we exclude DTA/TDA here
10681068
value_is_extension_type = is_1d_only_ea_dtype(value.dtype)
1069-
1070-
# categorical/sparse/datetimetz
1071-
if value_is_extension_type:
1072-
1073-
def value_getitem(placement):
1074-
return value
1075-
1076-
else:
1069+
if not value_is_extension_type:
10771070
if value.ndim == 2:
10781071
value = value.T
10791072
else:
10801073
value = ensure_block_shape(value, ndim=2)
10811074

1082-
def value_getitem(placement):
1083-
return value[placement.indexer]
1084-
10851075
if value.shape[1:] != self.shape[1:]:
10861076
raise AssertionError(
10871077
"Shape of new values must be compatible with manager shape"
@@ -1092,11 +1082,37 @@ def value_getitem(placement):
10921082
# In this case, get_blkno_placements will yield only one tuple,
10931083
# containing (self._blknos[loc], BlockPlacement(slice(0, 1, 1)))
10941084

1085+
# Check if we can use _iset_single fastpath
1086+
blkno = self.blknos[loc]
1087+
blk = self.blocks[blkno]
1088+
if len(blk._mgr_locs) == 1: # TODO: fastest way to check this?
1089+
return self._iset_single(
1090+
# error: Argument 1 to "_iset_single" of "BlockManager" has
1091+
# incompatible type "Union[int, slice, ndarray[Any, Any]]";
1092+
# expected "int"
1093+
loc, # type:ignore[arg-type]
1094+
value,
1095+
inplace=inplace,
1096+
blkno=blkno,
1097+
blk=blk,
1098+
)
1099+
10951100
# error: Incompatible types in assignment (expression has type
10961101
# "List[Union[int, slice, ndarray]]", variable has type "Union[int,
10971102
# slice, ndarray]")
10981103
loc = [loc] # type: ignore[assignment]
10991104

1105+
# categorical/sparse/datetimetz
1106+
if value_is_extension_type:
1107+
1108+
def value_getitem(placement):
1109+
return value
1110+
1111+
else:
1112+
1113+
def value_getitem(placement):
1114+
return value[placement.indexer]
1115+
11001116
# Accessing public blknos ensures the public versions are initialized
11011117
blknos = self.blknos[loc]
11021118
blklocs = self.blklocs[loc].copy()
@@ -1172,6 +1188,29 @@ def value_getitem(placement):
11721188
# Newly created block's dtype may already be present.
11731189
self._known_consolidated = False
11741190

1191+
def _iset_single(
1192+
self, loc: int, value: ArrayLike, inplace: bool, blkno: int, blk: Block
1193+
) -> None:
1194+
"""
1195+
Fastpath for iset when we are only setting a single position and
1196+
the Block currently in that position is itself single-column.
1197+
1198+
In this case we can swap out the entire Block and blklocs and blknos
1199+
are unaffected.
1200+
"""
1201+
# Caller is responsible for verifying value.shape
1202+
1203+
if inplace and blk.should_store(value):
1204+
iloc = self.blklocs[loc]
1205+
blk.set_inplace(slice(iloc, iloc + 1), value)
1206+
return
1207+
1208+
nb = new_block_2d(value, placement=blk._mgr_locs)
1209+
old_blocks = self.blocks
1210+
new_blocks = old_blocks[:blkno] + (nb,) + old_blocks[blkno + 1 :]
1211+
self.blocks = new_blocks
1212+
return
1213+
11751214
def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None:
11761215
"""
11771216
Insert item at selected position.
@@ -1197,8 +1236,13 @@ def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None:
11971236
bp = BlockPlacement(slice(loc, loc + 1))
11981237
block = new_block_2d(values=value, placement=bp)
11991238

1200-
self._insert_update_mgr_locs(loc)
1201-
self._insert_update_blklocs_and_blknos(loc)
1239+
if not len(self.blocks):
1240+
# Fastpath
1241+
self._blklocs = np.array([0], dtype=np.intp)
1242+
self._blknos = np.array([0], dtype=np.intp)
1243+
else:
1244+
self._insert_update_mgr_locs(loc)
1245+
self._insert_update_blklocs_and_blknos(loc)
12021246

12031247
self.axes[0] = new_axis
12041248
self.blocks += (block,)

0 commit comments

Comments
 (0)