Skip to content

Commit 12c6de1

Browse files
[ArrowStringArray] fix test_value_counts_na (#41002)
1 parent 427fc57 commit 12c6de1

File tree

2 files changed

+12
-17
lines changed

2 files changed

+12
-17
lines changed

pandas/core/arrays/string_arrow.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -675,13 +675,18 @@ def value_counts(self, dropna: bool = True) -> Series:
675675

676676
vc = self._data.value_counts()
677677

678-
# Index cannot hold ExtensionArrays yet
679-
index = Index(type(self)(vc.field(0)).astype(object))
678+
values = vc.field(0)
679+
counts = vc.field(1)
680+
if dropna and self._data.null_count > 0:
681+
mask = values.is_valid()
682+
values = values.filter(mask)
683+
counts = counts.filter(mask)
684+
680685
# No missing values so we can adhere to the interface and return a numpy array.
681-
counts = np.array(vc.field(1))
686+
counts = np.array(counts)
682687

683-
if dropna and self._data.null_count > 0:
684-
raise NotImplementedError("yo")
688+
# Index cannot hold ExtensionArrays yet
689+
index = Index(type(self)(values)).astype(object)
685690

686691
return Series(counts, index=index).astype("Int64")
687692

pandas/tests/arrays/string_/test_string.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -476,12 +476,7 @@ def test_arrow_roundtrip(dtype, dtype_object):
476476
assert result.loc[2, "a"] is pd.NA
477477

478478

479-
def test_value_counts_na(dtype, request):
480-
if dtype == "arrow_string":
481-
reason = "TypeError: boolean value of NA is ambiguous"
482-
mark = pytest.mark.xfail(reason=reason)
483-
request.node.add_marker(mark)
484-
479+
def test_value_counts_na(dtype):
485480
arr = pd.array(["a", "b", "a", pd.NA], dtype=dtype)
486481
result = arr.value_counts(dropna=False)
487482
expected = pd.Series([2, 1, 1], index=["a", "b", pd.NA], dtype="Int64")
@@ -492,12 +487,7 @@ def test_value_counts_na(dtype, request):
492487
tm.assert_series_equal(result, expected)
493488

494489

495-
def test_value_counts_with_normalize(dtype, request):
496-
if dtype == "arrow_string":
497-
reason = "TypeError: boolean value of NA is ambiguous"
498-
mark = pytest.mark.xfail(reason=reason)
499-
request.node.add_marker(mark)
500-
490+
def test_value_counts_with_normalize(dtype):
501491
s = pd.Series(["a", "b", "a", pd.NA], dtype=dtype)
502492
result = s.value_counts(normalize=True)
503493
expected = pd.Series([2, 1], index=["a", "b"], dtype="Float64") / 3

0 commit comments

Comments
 (0)