Skip to content

Commit a07cb65

Browse files
authored
BUG: indexing empty pyarrow backed object returning corrupt object (#51741)
1 parent 3e0c1da commit a07cb65

File tree

4 files changed

+16
-1
lines changed

4 files changed

+16
-1
lines changed

doc/source/whatsnew/v2.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1248,6 +1248,7 @@ Indexing
12481248
- Bug in :meth:`DataFrame.compare` does not recognize differences when comparing ``NA`` with value in nullable dtypes (:issue:`48939`)
12491249
- Bug in :meth:`Series.rename` with :class:`MultiIndex` losing extension array dtypes (:issue:`21055`)
12501250
- Bug in :meth:`DataFrame.isetitem` coercing extension array dtypes in :class:`DataFrame` to object (:issue:`49922`)
1251+
- Bug in :meth:`Series.__getitem__` returning corrupt object when selecting from an empty pyarrow backed object (:issue:`51734`)
12511252
- Bug in :class:`BusinessHour` would cause creation of :class:`DatetimeIndex` to fail when no opening hour was included in the index (:issue:`49835`)
12521253

12531254
Missing

pandas/_testing/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,7 @@
253253
else:
254254
FLOAT_PYARROW_DTYPES_STR_REPR = []
255255
ALL_INT_PYARROW_DTYPES_STR_REPR = []
256+
ALL_PYARROW_DTYPES = []
256257

257258

258259
EMPTY_STRING_PATTERN = re.compile("^$")

pandas/core/arrays/arrow/array.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1026,7 +1026,12 @@ def _concat_same_type(
10261026
ArrowExtensionArray
10271027
"""
10281028
chunks = [array for ea in to_concat for array in ea._data.iterchunks()]
1029-
arr = pa.chunked_array(chunks)
1029+
if to_concat[0].dtype == "string":
1030+
# StringDtype has no attrivute pyarrow_dtype
1031+
pa_dtype = pa.string()
1032+
else:
1033+
pa_dtype = to_concat[0].dtype.pyarrow_dtype
1034+
arr = pa.chunked_array(chunks, type=pa_dtype)
10301035
return cls(arr)
10311036

10321037
def _accumulate(

pandas/tests/extension/test_arrow.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2329,3 +2329,11 @@ def test_from_sequence_of_strings_boolean():
23292329
strings = ["True", "foo"]
23302330
with pytest.raises(pa.ArrowInvalid, match="Failed to parse"):
23312331
ArrowExtensionArray._from_sequence_of_strings(strings, dtype=pa.bool_())
2332+
2333+
2334+
def test_concat_empty_arrow_backed_series(dtype):
2335+
# GH#51734
2336+
ser = pd.Series([], dtype=dtype)
2337+
expected = ser.copy()
2338+
result = pd.concat([ser[np.array([], dtype=np.bool_)]])
2339+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)