Skip to content

ENH: Support scalar condition in Series.where and DataFrame.where #53905

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ Other enhancements
- :meth:`DataFrameGroupby.agg` and :meth:`DataFrameGroupby.transform` now support grouping by multiple keys when the index is not a :class:`MultiIndex` for ``engine="numba"`` (:issue:`53486`)
- :meth:`Series.explode` now supports pyarrow-backed list types (:issue:`53602`)
- :meth:`Series.str.join` now supports ``ArrowDtype(pa.string())`` (:issue:`53646`)
- :meth:`Series.where`, :meth:`Series.mask`, :meth:`DataFrame.where`, :meth:`DataFrame.mask` now support scalar ``cond`` (:issue:`53903`)
- :meth:`SeriesGroupby.agg` and :meth:`DataFrameGroupby.agg` now support passing in multiple functions for ``engine="numba"`` (:issue:`53486`)
- :meth:`SeriesGroupby.transform` and :meth:`DataFrameGroupby.transform` now support passing in a string as the function for ``engine="numba"`` (:issue:`53579`)
- Added ``engine_kwargs`` parameter to :meth:`DataFrame.to_excel` (:issue:`53220`)
Expand All @@ -119,6 +120,7 @@ Other enhancements
- Performance improvement in :func:`concat` with homogeneous ``np.float64`` or ``np.float32`` dtypes (:issue:`52685`)
- Performance improvement in :meth:`DataFrame.filter` when ``items`` is given (:issue:`52941`)


.. ---------------------------------------------------------------------------
.. _whatsnew_210.notable_bug_fixes:

Expand Down
14 changes: 11 additions & 3 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -10102,8 +10102,15 @@ def _where(
else:
if not hasattr(cond, "shape"):
cond = np.asanyarray(cond)
if cond.shape != self.shape:
raise ValueError("Array conditional must be same shape as self")
if cond.shape == ():
# Note: DataFrame(True, index=[1,2,3], columns=["a", "b", "c"]) works
# but DataFrame(np.array(True), index=[1,2,3], columns=["a", "b", "c"])
# does not hence we need to unpack scalar
cond = cond.item()
elif cond.shape != self.shape:
raise ValueError(
"Array conditional must be same shape as self or scalar!"
)
cond = self._constructor(cond, **self._construct_axes_dict(), copy=False)

# make sure we are boolean
Expand Down Expand Up @@ -10465,7 +10472,8 @@ def mask(
cond = common.apply_if_callable(cond, self)

# see gh-21891
if not hasattr(cond, "__invert__"):
if not hasattr(cond, "shape") or not hasattr(cond, "__invert__"):
# testing __invert__ not enough, e.g. `~True` is `-2`.
cond = np.array(cond)

return self.where(
Expand Down
13 changes: 9 additions & 4 deletions pandas/tests/frame/indexing/test_where.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,10 +159,15 @@ def test_where_invalid(self):
with pytest.raises(ValueError, match=msg):
df.where(err2, other1)

with pytest.raises(ValueError, match=msg):
df.mask(True)
with pytest.raises(ValueError, match=msg):
df.mask(0)
def test_where_scalar_cond(self):
df = DataFrame(np.random.randn(5, 3), columns=["A", "B", "C"])
result = df.where(True)
expected = df
tm.assert_frame_equal(result, expected)

result = df.where(False)
expected = DataFrame(np.nan, index=df.index, columns=df.columns)
tm.assert_frame_equal(result, expected)

def test_where_set(self, where_frame, float_string_frame):
# where inplace
Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/series/indexing/test_mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,11 @@ def test_mask():
rs2 = s2.mask(cond[:3], -s2)
tm.assert_series_equal(rs, rs2)

# test scalar
assert s.mask(True).isna().all()
tm.assert_series_equal(s.mask(False), s)

msg = "Array conditional must be same shape as self"
with pytest.raises(ValueError, match=msg):
s.mask(1)
with pytest.raises(ValueError, match=msg):
s.mask(cond[:3].values, -s)

Expand Down
16 changes: 14 additions & 2 deletions pandas/tests/series/indexing/test_where.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,6 @@ def test_where_error():
cond = s > 0

msg = "Array conditional must be same shape as self"
with pytest.raises(ValueError, match=msg):
s.where(1)
with pytest.raises(ValueError, match=msg):
s.where(cond[:3].values, -s)

Expand Down Expand Up @@ -464,3 +462,17 @@ def test_where_datetimelike_categorical(tz_naive_fixture):
res = pd.DataFrame(lvals).where(mask[:, None], pd.DataFrame(rvals))

tm.assert_frame_equal(res, pd.DataFrame(dr))


def test_where_scalar_cond():
# True
ser = Series(pd.Categorical(["a", "b"]))
result = ser.where(True)
expected = ser
tm.assert_series_equal(result, expected)

# False
ser = Series(pd.Categorical(["a", "b"]))
result = ser.where(False)
expected = Series(pd.Categorical([None, None], categories=["a", "b"]))
tm.assert_series_equal(result, expected)