Skip to content

Commit 490914f

Browse files
added pyarrow/numpy dtype literals and allowed str | DtypeObj as input for Series.astype (#756)
* added pyarrow/numpy dtype literals & allowed str as astype input * removed accidental double float * added ObjectDtypeArg and lots of unit tests for literals * removed str overload * re-enabled s.astype(s.dtype) test * refactored astype-tests to use pytest.mark.parametrize * added VoidDtype, fixed some test issues * attempted fix for float96/complex192 * added coded for testing that all types are tested * small edit * removed float96, complex192 and fixed integer tests * reverted accidental Series renames * removed windows check for test_astype_int * reordered literals
1 parent 5a9abdd commit 490914f

File tree

3 files changed

+1113
-325
lines changed

3 files changed

+1113
-325
lines changed

pandas-stubs/_typing.pyi

Lines changed: 221 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,17 @@ NpDtype: TypeAlias = str | np.dtype[np.generic] | type[str | complex | bool | ob
7878
Dtype: TypeAlias = ExtensionDtype | NpDtype
7979
DtypeArg: TypeAlias = Dtype | Mapping[Any, Dtype]
8080
DtypeBackend: TypeAlias = Literal["pyarrow", "numpy_nullable"]
81+
82+
# NOTE: we want to catch all the possible dtypes from np.sctypeDict
83+
# timedelta64
84+
# M
85+
# m8
86+
# M8
87+
# object_
88+
# object0
89+
# m
90+
# datetime64
91+
8192
BooleanDtypeArg: TypeAlias = (
8293
# Builtin bool type and its string alias
8394
type[bool] # noqa: Y030
@@ -86,7 +97,11 @@ BooleanDtypeArg: TypeAlias = (
8697
| pd.BooleanDtype
8798
| Literal["boolean"]
8899
# Numpy bool type
100+
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.bool_
89101
| type[np.bool_]
102+
| Literal["?", "b1", "bool8", "bool_"]
103+
# PyArrow boolean type and its string alias
104+
| Literal["bool[pyarrow]", "boolean[pyarrow]"]
90105
)
91106
IntDtypeArg: TypeAlias = (
92107
# Builtin integer type and its string alias
@@ -99,31 +114,56 @@ IntDtypeArg: TypeAlias = (
99114
| pd.Int64Dtype
100115
| Literal["Int8", "Int16", "Int32", "Int64"]
101116
# Numpy signed integer types and their string aliases
117+
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.byte
102118
| type[np.byte]
103-
| type[np.int8]
104-
| type[np.int16]
105-
| type[np.int32]
106-
| type[np.int64]
107-
| type[np.intp]
108-
| Literal["byte", "int8", "int16", "int32", "int64", "intp"]
119+
| Literal["b", "i1", "int8", "byte"]
120+
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.short
121+
| type[np.short]
122+
| Literal["h", "i2", "int16", "short"]
123+
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.intc
124+
| type[np.intc]
125+
| Literal["i", "i4", "int32", "intc"]
126+
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.int_
127+
| type[np.int_]
128+
| Literal["l", "i8", "int64", "int_", "long"]
129+
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.longlong
130+
| type[np.longlong]
131+
| Literal["q", "longlong"] # NOTE: int128 not assigned
132+
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.intp
133+
| type[np.intp] # signed pointer (=`intptr_t`, platform dependent)
134+
| Literal["p", "intp", "int0"]
135+
# PyArrow integer types and their string aliases
136+
| Literal["int8[pyarrow]", "int16[pyarrow]", "int32[pyarrow]", "int64[pyarrow]"]
137+
)
138+
UIntDtypeArg: TypeAlias = (
139+
# Pandas nullable unsigned integer types and their string aliases
140+
pd.UInt8Dtype # noqa: Y030
141+
| pd.UInt16Dtype
142+
| pd.UInt32Dtype
143+
| pd.UInt64Dtype
144+
| Literal["UInt8", "UInt16", "UInt32", "UInt64"]
109145
# Numpy unsigned integer types and their string aliases
146+
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.ubyte
110147
| type[np.ubyte]
111-
| type[np.uint8]
112-
| type[np.uint16]
113-
| type[np.uint32]
114-
| type[np.uint64]
115-
| type[np.uintp]
116-
| Literal["ubyte", "uint8", "uint16", "uint32", "uint64", "uintp"]
148+
| Literal["B", "u1", "uint8", "ubyte"]
149+
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.ushort
150+
| type[np.ushort]
151+
| Literal["H", "u2", "uint16", "ushort"]
152+
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.uintc
153+
| type[np.uintc]
154+
| Literal["I", "u4", "uint32", "uintc"]
155+
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.uint
156+
| type[np.uint]
157+
| Literal["L", "u8", "uint", "ulong", "uint64"]
158+
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.ulonglong
159+
| type[np.ulonglong]
160+
| Literal["Q", "ulonglong"] # NOTE: uint128 not assigned
161+
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.uintp
162+
| type[np.uintp] # unsigned pointer (=`uintptr_t`, platform dependent)
163+
| Literal["P", "uintp", "uint0"]
164+
# PyArrow unsigned integer types and their string aliases
165+
| Literal["uint8[pyarrow]", "uint16[pyarrow]", "uint32[pyarrow]", "uint64[pyarrow]"]
117166
)
118-
StrDtypeArg: TypeAlias = (
119-
# Builtin str type and its string alias
120-
type[str] # noqa: Y030
121-
| Literal["str"]
122-
# Pandas nullable string type and its string alias
123-
| pd.StringDtype
124-
| Literal["string"]
125-
)
126-
BytesDtypeArg: TypeAlias = type[bytes]
127167
FloatDtypeArg: TypeAlias = (
128168
# Builtin float type and its string alias
129169
type[float] # noqa: Y030
@@ -133,19 +173,50 @@ FloatDtypeArg: TypeAlias = (
133173
| pd.Float64Dtype
134174
| Literal["Float32", "Float64"]
135175
# Numpy float types and their string aliases
136-
| type[np.float16]
137-
| type[np.float32]
138-
| type[np.float64]
139-
| Literal["float16", "float32", "float64"]
176+
# NOTE: Alias np.float16 only on Linux x86_64, use np.half instead
177+
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.half
178+
| type[np.half]
179+
| Literal["e", "f2", "<f2", "float16", "half"]
180+
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.single
181+
| type[np.single]
182+
| Literal["f", "f4", "float32", "single"]
183+
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.double
184+
| type[np.double]
185+
| Literal["d", "f8", "float64", "double", "float_"]
186+
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.longdouble
187+
| type[np.longdouble]
188+
| Literal["g", "f16", "float128", "longdouble", "longfloat"]
189+
# PyArrow floating point types and their string aliases
190+
| Literal[
191+
"float[pyarrow]",
192+
"double[pyarrow]",
193+
"float16[pyarrow]",
194+
"float32[pyarrow]",
195+
"float64[pyarrow]",
196+
]
140197
)
141198
ComplexDtypeArg: TypeAlias = (
142199
# Builtin complex type and its string alias
143200
type[complex] # noqa: Y030
144201
| Literal["complex"]
145202
# Numpy complex types and their aliases
146-
| type[np.complex64]
147-
| type[np.complex128]
148-
| Literal["complex64", "complex128"]
203+
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.csingle
204+
| type[np.csingle]
205+
| Literal["F", "c8", "complex64", "csingle", "singlecomplex"]
206+
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.cdouble
207+
| type[np.cdouble]
208+
| Literal["D", "c16", "complex128", "cdouble", "cfloat", "complex_"]
209+
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.clongdouble
210+
# NOTE: Alias np.complex256 only on Linux x86_64, use np.clongdouble instead
211+
| type[np.clongdouble]
212+
| Literal[
213+
"G",
214+
"c32",
215+
"complex256",
216+
"clongdouble",
217+
"clongfloat",
218+
"longcomplex",
219+
]
149220
)
150221
# Refer to https://numpy.org/doc/stable/reference/arrays.datetime.html#datetime-units
151222
TimedeltaDtypeArg: TypeAlias = Literal[
@@ -163,6 +234,41 @@ TimedeltaDtypeArg: TypeAlias = Literal[
163234
"timedelta64[ps]",
164235
"timedelta64[fs]",
165236
"timedelta64[as]",
237+
# numpy type codes
238+
"m8[Y]",
239+
"m8[M]",
240+
"m8[W]",
241+
"m8[D]",
242+
"m8[h]",
243+
"m8[m]",
244+
"m8[s]",
245+
"m8[ms]",
246+
"m8[us]",
247+
"m8[μs]",
248+
"m8[ns]",
249+
"m8[ps]",
250+
"m8[fs]",
251+
"m8[as]",
252+
# little endian
253+
"<m8[Y]",
254+
"<m8[M]",
255+
"<m8[W]",
256+
"<m8[D]",
257+
"<m8[h]",
258+
"<m8[m]",
259+
"<m8[s]",
260+
"<m8[ms]",
261+
"<m8[us]",
262+
"<m8[μs]",
263+
"<m8[ns]",
264+
"<m8[ps]",
265+
"<m8[fs]",
266+
"<m8[as]",
267+
# PyArrow duration type and its string alias
268+
"duration[s][pyarrow]",
269+
"duration[ms][pyarrow]",
270+
"duration[us][pyarrow]",
271+
"duration[ns][pyarrow]",
166272
]
167273
TimestampDtypeArg: TypeAlias = Literal[
168274
"datetime64[Y]",
@@ -179,24 +285,107 @@ TimestampDtypeArg: TypeAlias = Literal[
179285
"datetime64[ps]",
180286
"datetime64[fs]",
181287
"datetime64[as]",
288+
# numpy type codes
289+
"M8[Y]",
290+
"M8[M]",
291+
"M8[W]",
292+
"M8[D]",
293+
"M8[h]",
294+
"M8[m]",
295+
"M8[s]",
296+
"M8[ms]",
297+
"M8[us]",
298+
"M8[μs]",
299+
"M8[ns]",
300+
"M8[ps]",
301+
"M8[fs]",
302+
"M8[as]",
303+
# little endian
304+
"<M8[Y]",
305+
"<M8[M]",
306+
"<M8[W]",
307+
"<M8[D]",
308+
"<M8[h]",
309+
"<M8[m]",
310+
"<M8[s]",
311+
"<M8[ms]",
312+
"<M8[us]",
313+
"<M8[μs]",
314+
"<M8[ns]",
315+
"<M8[ps]",
316+
"<M8[fs]",
317+
"<M8[as]",
318+
# PyArrow timestamp type and its string alias
319+
"date32[pyarrow]",
320+
"date64[pyarrow]",
321+
"timestamp[s][pyarrow]",
322+
"timestamp[ms][pyarrow]",
323+
"timestamp[us][pyarrow]",
324+
"timestamp[ns][pyarrow]",
182325
]
326+
327+
StrDtypeArg: TypeAlias = (
328+
# Builtin str type and its string alias
329+
type[str] # noqa: Y030
330+
| Literal["str"]
331+
# Pandas nullable string type and its string alias
332+
| pd.StringDtype
333+
| Literal["string"]
334+
# Numpy string type and its string alias
335+
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.str_
336+
| type[np.str_]
337+
| Literal["U", "str_", "str0", "unicode", "unicode_"]
338+
# PyArrow string type and its string alias
339+
| Literal["string[pyarrow]"]
340+
)
341+
BytesDtypeArg: TypeAlias = (
342+
# Builtin bytes type and its string alias
343+
type[bytes] # noqa: Y030
344+
| Literal["bytes"]
345+
# Numpy bytes type and its string alias
346+
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.bytes_
347+
| type[np.bytes_]
348+
| Literal["S", "a", "bytes_", "bytes0", "string_"]
349+
# PyArrow binary type and its string alias
350+
| Literal["binary[pyarrow]"]
351+
)
183352
CategoryDtypeArg: TypeAlias = CategoricalDtype | Literal["category"]
184353

354+
ObjectDtypeArg: TypeAlias = (
355+
# Builtin object type and its string alias
356+
type[object] # noqa: Y030
357+
| Literal["object"]
358+
# Numpy object type and its string alias
359+
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.object_
360+
| type[np.object_]
361+
| Literal["O"] # NOTE: "object_" not assigned
362+
)
363+
364+
VoidDtypeArg: TypeAlias = (
365+
# Numpy void type and its string alias
366+
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.void
367+
type[np.void]
368+
| Literal["V", "void", "void0"]
369+
)
370+
371+
# DtypeArg specifies all allowable dtypes in a functions its dtype argument
372+
DtypeObj: TypeAlias = np.dtype[np.generic] | ExtensionDtype
373+
185374
AstypeArg: TypeAlias = (
186375
BooleanDtypeArg
187376
| IntDtypeArg
377+
| UIntDtypeArg
188378
| StrDtypeArg
189379
| BytesDtypeArg
190380
| FloatDtypeArg
191381
| ComplexDtypeArg
192382
| TimedeltaDtypeArg
193383
| TimestampDtypeArg
194384
| CategoryDtypeArg
195-
| ExtensionDtype
196-
| type[object]
385+
| ObjectDtypeArg
386+
| VoidDtypeArg
387+
| DtypeObj
197388
)
198-
# DtypeArg specifies all allowable dtypes in a functions its dtype argument
199-
DtypeObj: TypeAlias = np.dtype[np.generic] | ExtensionDtype
200389

201390
# filenames and file-like-objects
202391
AnyStr_cov = TypeVar("AnyStr_cov", str, bytes, covariant=True)

pandas-stubs/core/series.pyi

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ from pandas._typing import (
127127
ListLikeU,
128128
MaskType,
129129
NaPosition,
130+
ObjectDtypeArg,
130131
QuantileInterpolation,
131132
RandomState,
132133
Renamer,
@@ -138,6 +139,8 @@ from pandas._typing import (
138139
TimedeltaDtypeArg,
139140
TimestampConvention,
140141
TimestampDtypeArg,
142+
UIntDtypeArg,
143+
VoidDtypeArg,
141144
WriteBuffer,
142145
np_ndarray_anyint,
143146
np_ndarray_bool,
@@ -329,7 +332,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
329332
is_copy: _bool | None = ...,
330333
**kwargs,
331334
) -> Series[S1]: ...
332-
def __getattr__(self, name: str) -> S1: ...
335+
def __getattr__(self, name: _str) -> S1: ...
333336
@overload
334337
def __getitem__(
335338
self,
@@ -1152,7 +1155,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
11521155
@overload
11531156
def astype(
11541157
self,
1155-
dtype: IntDtypeArg,
1158+
dtype: IntDtypeArg | UIntDtypeArg,
11561159
copy: _bool = ...,
11571160
errors: IgnoreRaise = ...,
11581161
) -> Series[int]: ...
@@ -1208,7 +1211,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
12081211
@overload
12091212
def astype(
12101213
self,
1211-
dtype: type[object] | ExtensionDtype,
1214+
dtype: ObjectDtypeArg | VoidDtypeArg | ExtensionDtype | DtypeObj,
12121215
copy: _bool = ...,
12131216
errors: IgnoreRaise = ...,
12141217
) -> Series: ...

0 commit comments

Comments
 (0)