Skip to content

Commit c369d93

Browse files
phofllithomas1pre-commit-ci[bot]
authored
BUG: ne comparison returns False for NA and other value (#56123)
* BUG: ne comparison returns False for NA and other value * Fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: Thomas Li <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 4e2cb22 commit c369d93

File tree

4 files changed

+44
-18
lines changed

4 files changed

+44
-18
lines changed

doc/source/whatsnew/v2.1.4.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ Bug fixes
3030
- Fixed bug in :meth:`DataFrame.__setitem__` casting :class:`Index` with object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`)
3131
- Fixed bug in :meth:`DataFrame.to_hdf` raising when columns have ``StringDtype`` (:issue:`55088`)
3232
- Fixed bug in :meth:`Index.insert` casting object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`)
33+
- Fixed bug in :meth:`Series.__ne__` resulting in False for comparison between ``NA`` and string value for ``dtype="string[pyarrow_numpy]"`` (:issue:`56122`)
3334
- Fixed bug in :meth:`Series.mode` not keeping object dtype when ``infer_string`` is set (:issue:`56183`)
3435
- Fixed bug in :meth:`Series.str.split` and :meth:`Series.str.rsplit` when ``pat=None`` for :class:`ArrowDtype` with ``pyarrow.string`` (:issue:`56271`)
3536
- Fixed bug in :meth:`Series.str.translate` losing object dtype when string option is set (:issue:`56152`)

pandas/core/arrays/string_arrow.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
from functools import partial
4+
import operator
45
import re
56
from typing import (
67
TYPE_CHECKING,
@@ -663,7 +664,10 @@ def _convert_int_dtype(self, result):
663664

664665
def _cmp_method(self, other, op):
665666
result = super()._cmp_method(other, op)
666-
return result.to_numpy(np.bool_, na_value=False)
667+
if op == operator.ne:
668+
return result.to_numpy(np.bool_, na_value=True)
669+
else:
670+
return result.to_numpy(np.bool_, na_value=False)
667671

668672
def value_counts(self, dropna: bool = True) -> Series:
669673
from pandas import Series

pandas/tests/arithmetic/test_object.py

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,13 @@
1010

1111
from pandas._config import using_pyarrow_string_dtype
1212

13+
import pandas.util._test_decorators as td
14+
1315
import pandas as pd
1416
from pandas import (
1517
Series,
1618
Timestamp,
19+
option_context,
1720
)
1821
import pandas._testing as tm
1922
from pandas.core import ops
@@ -33,20 +36,24 @@ def test_comparison_object_numeric_nas(self, comparison_op):
3336
expected = func(ser.astype(float), shifted.astype(float))
3437
tm.assert_series_equal(result, expected)
3538

36-
def test_object_comparisons(self):
37-
ser = Series(["a", "b", np.nan, "c", "a"])
39+
@pytest.mark.parametrize(
40+
"infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))]
41+
)
42+
def test_object_comparisons(self, infer_string):
43+
with option_context("future.infer_string", infer_string):
44+
ser = Series(["a", "b", np.nan, "c", "a"])
3845

39-
result = ser == "a"
40-
expected = Series([True, False, False, False, True])
41-
tm.assert_series_equal(result, expected)
46+
result = ser == "a"
47+
expected = Series([True, False, False, False, True])
48+
tm.assert_series_equal(result, expected)
4249

43-
result = ser < "a"
44-
expected = Series([False, False, False, False, False])
45-
tm.assert_series_equal(result, expected)
50+
result = ser < "a"
51+
expected = Series([False, False, False, False, False])
52+
tm.assert_series_equal(result, expected)
4653

47-
result = ser != "a"
48-
expected = -(ser == "a")
49-
tm.assert_series_equal(result, expected)
54+
result = ser != "a"
55+
expected = -(ser == "a")
56+
tm.assert_series_equal(result, expected)
5057

5158
@pytest.mark.parametrize("dtype", [None, object])
5259
def test_more_na_comparisons(self, dtype):

pandas/tests/arrays/string_/test_string.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
This module tests the functionality of StringArray and ArrowStringArray.
33
Tests for the str accessors are in pandas/tests/strings/test_string_array.py
44
"""
5+
import operator
6+
57
import numpy as np
68
import pytest
79

@@ -224,7 +226,10 @@ def test_comparison_methods_scalar(comparison_op, dtype):
224226
result = getattr(a, op_name)(other)
225227
if dtype.storage == "pyarrow_numpy":
226228
expected = np.array([getattr(item, op_name)(other) for item in a])
227-
expected[1] = False
229+
if comparison_op == operator.ne:
230+
expected[1] = True
231+
else:
232+
expected[1] = False
228233
tm.assert_numpy_array_equal(result, expected.astype(np.bool_))
229234
else:
230235
expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
@@ -239,7 +244,10 @@ def test_comparison_methods_scalar_pd_na(comparison_op, dtype):
239244
result = getattr(a, op_name)(pd.NA)
240245

241246
if dtype.storage == "pyarrow_numpy":
242-
expected = np.array([False, False, False])
247+
if operator.ne == comparison_op:
248+
expected = np.array([True, True, True])
249+
else:
250+
expected = np.array([False, False, False])
243251
tm.assert_numpy_array_equal(result, expected)
244252
else:
245253
expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
@@ -265,7 +273,7 @@ def test_comparison_methods_scalar_not_string(comparison_op, dtype):
265273
if dtype.storage == "pyarrow_numpy":
266274
expected_data = {
267275
"__eq__": [False, False, False],
268-
"__ne__": [True, False, True],
276+
"__ne__": [True, True, True],
269277
}[op_name]
270278
expected = np.array(expected_data)
271279
tm.assert_numpy_array_equal(result, expected)
@@ -285,12 +293,18 @@ def test_comparison_methods_array(comparison_op, dtype):
285293
other = [None, None, "c"]
286294
result = getattr(a, op_name)(other)
287295
if dtype.storage == "pyarrow_numpy":
288-
expected = np.array([False, False, False])
289-
expected[-1] = getattr(other[-1], op_name)(a[-1])
296+
if operator.ne == comparison_op:
297+
expected = np.array([True, True, False])
298+
else:
299+
expected = np.array([False, False, False])
300+
expected[-1] = getattr(other[-1], op_name)(a[-1])
290301
tm.assert_numpy_array_equal(result, expected)
291302

292303
result = getattr(a, op_name)(pd.NA)
293-
expected = np.array([False, False, False])
304+
if operator.ne == comparison_op:
305+
expected = np.array([True, True, True])
306+
else:
307+
expected = np.array([False, False, False])
294308
tm.assert_numpy_array_equal(result, expected)
295309

296310
else:

0 commit comments

Comments
 (0)