Skip to content

Commit 28f4942

Browse files
authored
PERF: any/all with axis=1 (#44857)
1 parent 39ccb35 commit 28f4942

File tree

2 files changed

+48
-4
lines changed

2 files changed

+48
-4
lines changed

pandas/core/frame.py

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5998,14 +5998,15 @@ def dropna(
59985998
raise KeyError(np.array(subset)[check].tolist())
59995999
agg_obj = self.take(indices, axis=agg_axis)
60006000

6001-
count = agg_obj.count(axis=agg_axis)
6002-
60036001
if thresh is not None:
6002+
count = agg_obj.count(axis=agg_axis)
60046003
mask = count >= thresh
60056004
elif how == "any":
6006-
mask = count == len(agg_obj._get_axis(agg_axis))
6005+
# faster equivalent to 'agg_obj.count(agg_axis) == self.shape[agg_axis]'
6006+
mask = notna(agg_obj).all(axis=agg_axis, bool_only=False)
60076007
elif how == "all":
6008-
mask = count > 0
6008+
# faster equivalent to 'agg_obj.count(agg_axis) > 0'
6009+
mask = notna(agg_obj).any(axis=agg_axis, bool_only=False)
60096010
else:
60106011
if how is not None:
60116012
raise ValueError(f"invalid how option: {how}")
@@ -10035,6 +10036,34 @@ def _get_data() -> DataFrame:
1003510036
result = self._constructor_sliced(result, index=labels)
1003610037
return result
1003710038

10039+
def _reduce_axis1(self, name: str, func, skipna: bool) -> Series:
10040+
"""
10041+
Special case for _reduce to try to avoid a potentially-expensive transpose.
10042+
10043+
Apply the reduction block-wise along axis=1 and then reduce the resulting
10044+
1D arrays.
10045+
"""
10046+
if name == "all":
10047+
result = np.ones(len(self), dtype=bool)
10048+
ufunc = np.logical_and
10049+
elif name == "any":
10050+
result = np.zeros(len(self), dtype=bool)
10051+
# error: Incompatible types in assignment
10052+
# (expression has type "_UFunc_Nin2_Nout1[Literal['logical_or'],
10053+
# Literal[20], Literal[False]]", variable has type
10054+
# "_UFunc_Nin2_Nout1[Literal['logical_and'], Literal[20],
10055+
# Literal[True]]")
10056+
ufunc = np.logical_or # type: ignore[assignment]
10057+
else:
10058+
raise NotImplementedError(name)
10059+
10060+
for arr in self._mgr.arrays:
10061+
middle = func(arr, axis=0, skipna=skipna)
10062+
result = ufunc(result, middle)
10063+
10064+
res_ser = self._constructor_sliced(result, index=self.index)
10065+
return res_ser
10066+
1003810067
def nunique(self, axis: Axis = 0, dropna: bool = True) -> Series:
1003910068
"""
1004010069
Count number of distinct elements in specified axis.

pandas/core/generic.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10349,6 +10349,21 @@ def _logical_func(
1034910349
)
1035010350
return res._logical_func(name, func, skipna=skipna, **kwargs)
1035110351

10352+
if (
10353+
self.ndim > 1
10354+
and axis == 1
10355+
and len(self._mgr.arrays) > 1
10356+
# TODO(EA2D): special-case not needed
10357+
and all(x.ndim == 2 for x in self._mgr.arrays)
10358+
and bool_only is not None
10359+
and not kwargs
10360+
):
10361+
# Fastpath avoiding potentially expensive transpose
10362+
obj = self
10363+
if bool_only:
10364+
obj = self._get_bool_data()
10365+
return obj._reduce_axis1(name, func, skipna=skipna)
10366+
1035210367
return self._reduce(
1035310368
func,
1035410369
name=name,

0 commit comments

Comments
 (0)