Skip to content

Commit 3832e85

Browse files
String dtype: more informative repr (keeping brief __str__) (#61148)
* String dtype: more informative repr (keeping brief __str__) * fix display in series * update doctest * update docstring * fixup
1 parent 9c5b9ee commit 3832e85

File tree

5 files changed

+25
-19
lines changed

5 files changed

+25
-19
lines changed

pandas/core/arrays/string_.py

+4-7
Original file line numberDiff line numberDiff line change
@@ -123,10 +123,10 @@ class StringDtype(StorageExtensionDtype):
123123
Examples
124124
--------
125125
>>> pd.StringDtype()
126-
string[python]
126+
<StringDtype(storage='python', na_value=<NA>)>
127127
128128
>>> pd.StringDtype(storage="pyarrow")
129-
string[pyarrow]
129+
<StringDtype(na_value=<NA>)>
130130
"""
131131

132132
@property
@@ -198,11 +198,8 @@ def __init__(
198198
self._na_value = na_value
199199

200200
def __repr__(self) -> str:
201-
if self._na_value is libmissing.NA:
202-
return f"{self.name}[{self.storage}]"
203-
else:
204-
# TODO add more informative repr
205-
return self.name
201+
storage = "" if self.storage == "pyarrow" else "storage='python', "
202+
return f"<StringDtype({storage}na_value={self._na_value})>"
206203

207204
def __eq__(self, other: object) -> bool:
208205
# we need to override the base class __eq__ because na_value (NA or NaN)

pandas/core/generic.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -6819,12 +6819,12 @@ def convert_dtypes(
68196819
2 3 z <NA> <NA> 20 200.0
68206820
68216821
>>> dfn.dtypes
6822-
a Int32
6823-
b string[python]
6824-
c boolean
6825-
d string[python]
6826-
e Int64
6827-
f Float64
6822+
a Int32
6823+
b string
6824+
c boolean
6825+
d string
6826+
e Int64
6827+
f Float64
68286828
dtype: object
68296829
68306830
Start with a Series of strings and missing data represented by ``np.nan``.

pandas/io/formats/format.py

-3
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@
6767
ExtensionArray,
6868
TimedeltaArray,
6969
)
70-
from pandas.core.arrays.string_ import StringDtype
7170
from pandas.core.base import PandasObject
7271
import pandas.core.common as com
7372
from pandas.core.indexes.api import (
@@ -1218,8 +1217,6 @@ def _format(x):
12181217
return self.na_rep
12191218
elif isinstance(x, PandasObject):
12201219
return str(x)
1221-
elif isinstance(x, StringDtype):
1222-
return repr(x)
12231220
else:
12241221
# object dtype
12251222
return str(formatter(x))

pandas/tests/arrays/string_/test_string.py

+12
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,18 @@ def test_repr(dtype):
103103
assert repr(df.A.array) == expected
104104

105105

106+
def test_dtype_repr(dtype):
107+
if dtype.storage == "pyarrow":
108+
if dtype.na_value is pd.NA:
109+
assert repr(dtype) == "<StringDtype(na_value=<NA>)>"
110+
else:
111+
assert repr(dtype) == "<StringDtype(na_value=nan)>"
112+
elif dtype.na_value is pd.NA:
113+
assert repr(dtype) == "<StringDtype(storage='python', na_value=<NA>)>"
114+
else:
115+
assert repr(dtype) == "<StringDtype(storage='python', na_value=nan)>"
116+
117+
106118
def test_none_to_nan(cls, dtype):
107119
a = cls._from_sequence(["a", None, "b"], dtype=dtype)
108120
assert a[1] is not None

pandas/tests/io/formats/test_to_string.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -777,9 +777,9 @@ def test_to_string_string_dtype(self):
777777
result = df.dtypes.to_string()
778778
expected = dedent(
779779
"""\
780-
x string[pyarrow]
781-
y string[python]
782-
z int64[pyarrow]"""
780+
x string
781+
y string
782+
z int64[pyarrow]"""
783783
)
784784
assert result == expected
785785

0 commit comments

Comments
 (0)