Skip to content

Commit fc4ff26

Browse files
authored
PERF: only do length-1 checks once in concat (#52784)
* PERF: concat only check length=1 once * simplify * mypy fixup
1 parent 6e91a22 commit fc4ff26

File tree

1 file changed

+18
-50
lines changed

1 file changed

+18
-50
lines changed

pandas/core/internals/concat.py

Lines changed: 18 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from typing import (
55
TYPE_CHECKING,
66
Sequence,
7+
cast,
78
)
89
import warnings
910

@@ -220,23 +221,27 @@ def concatenate_managers(
220221
return BlockManager((nb,), axes)
221222

222223
mgrs_indexers = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers)
224+
if len(mgrs_indexers) == 1:
225+
mgr, indexers = mgrs_indexers[0]
226+
# Assertion correct but disabled for perf:
227+
# assert not indexers
228+
if copy:
229+
out = mgr.copy(deep=True)
230+
else:
231+
out = mgr.copy(deep=False)
232+
out.axes = axes
233+
return out
223234

224235
concat_plan = _get_combined_plan([mgr for mgr, _ in mgrs_indexers])
225236

226237
blocks = []
238+
values: ArrayLike
227239

228240
for placement, join_units in concat_plan:
229241
unit = join_units[0]
230242
blk = unit.block
231243

232-
if len(join_units) == 1:
233-
values = blk.values
234-
if copy:
235-
values = values.copy()
236-
else:
237-
values = values.view()
238-
fastpath = True
239-
elif _is_uniform_join_units(join_units):
244+
if _is_uniform_join_units(join_units):
240245
vals = [ju.block.values for ju in join_units]
241246

242247
if not blk.is_extension:
@@ -527,8 +532,7 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:
527532

528533
if upcasted_na is None and self.block.dtype.kind != "V":
529534
# No upcasting is necessary
530-
fill_value = self.block.fill_value
531-
values = self.block.values
535+
return self.block.values
532536
else:
533537
fill_value = upcasted_na
534538

@@ -540,30 +544,13 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:
540544
# we want to avoid filling with np.nan if we are
541545
# using None; we already know that we are all
542546
# nulls
543-
values = self.block.values.ravel(order="K")
544-
if len(values) and values[0] is None:
547+
values = cast(np.ndarray, self.block.values)
548+
if values.size and values[0, 0] is None:
545549
fill_value = None
546550

547551
return make_na_array(empty_dtype, self.block.shape, fill_value)
548552

549-
if not self.block._can_consolidate:
550-
# preserve these for validation in concat_compat
551-
return self.block.values
552-
553-
if self.block.is_bool:
554-
# External code requested filling/upcasting, bool values must
555-
# be upcasted to object to avoid being upcasted to numeric.
556-
values = self.block.astype(np.dtype("object")).values
557-
else:
558-
# No dtype upcasting is done here, it will be performed during
559-
# concatenation itself.
560-
values = self.block.values
561-
562-
# If there's no indexing to be done, we want to signal outside
563-
# code that this array must be copied explicitly. This is done
564-
# by returning a view and checking `retval.base`.
565-
values = values.view()
566-
return values
553+
return self.block.values
567554

568555

569556
def _concatenate_join_units(join_units: list[JoinUnit], copy: bool) -> ArrayLike:
@@ -580,19 +567,7 @@ def _concatenate_join_units(join_units: list[JoinUnit], copy: bool) -> ArrayLike
580567
for ju in join_units
581568
]
582569

583-
if len(to_concat) == 1:
584-
# Only one block, nothing to concatenate.
585-
concat_values = to_concat[0]
586-
if copy:
587-
if isinstance(concat_values, np.ndarray):
588-
# non-reindexed (=not yet copied) arrays are made into a view
589-
# in JoinUnit.get_reindexed_values
590-
if concat_values.base is not None:
591-
concat_values = concat_values.copy()
592-
else:
593-
concat_values = concat_values.copy()
594-
595-
elif any(is_1d_only_ea_dtype(t.dtype) for t in to_concat):
570+
if any(is_1d_only_ea_dtype(t.dtype) for t in to_concat):
596571
# TODO(EA2D): special case not needed if all EAs used HybridBlocks
597572

598573
# error: No overload variant of "__getitem__" of "ExtensionArray" matches
@@ -658,10 +633,6 @@ def _get_empty_dtype(join_units: Sequence[JoinUnit]) -> tuple[DtypeObj, DtypeObj
658633
-------
659634
dtype
660635
"""
661-
if len(join_units) == 1:
662-
blk = join_units[0].block
663-
return blk.dtype, blk.dtype
664-
665636
if lib.dtypes_all_equal([ju.block.dtype for ju in join_units]):
666637
empty_dtype = join_units[0].block.dtype
667638
return empty_dtype, empty_dtype
@@ -722,7 +693,4 @@ def _is_uniform_join_units(join_units: list[JoinUnit]) -> bool:
722693
# no blocks that would get missing values (can lead to type upcasts)
723694
# unless we're an extension dtype.
724695
all(not ju.is_na or ju.block.is_extension for ju in join_units)
725-
and
726-
# only use this path when there is something to concatenate
727-
len(join_units) > 1
728696
)

0 commit comments

Comments
 (0)