1
+ from typing import Tuple , Optional , Dict , Any , Iterable , Sequence
2
+ import enum
3
+
4
+ class DlpackDeviceType (enum .IntEnum ):
5
+ CPU = 1
6
+ CUDA = 2
7
+ CPU_PINNED = 3
8
+ OPENCL = 4
9
+ VULKAN = 7
10
+ METAL = 8
11
+ VPI = 9
12
+ ROCM = 10
13
+
14
+ class DtypeKind (enum .IntEnum ):
15
+ INT = 0
16
+ UINT = 1
17
+ FLOAT = 2
18
+ BOOL = 20
19
+ STRING = 21 # UTF-8
20
+ DATETIME = 22
21
+ CATEGORICAL = 23
22
+
23
+ class ColumnNullType :
24
+ NON_NULLABLE = 0
25
+ USE_NAN = 1
26
+ USE_SENTINEL = 2
27
+ USE_BITMASK = 3
28
+ USE_BYTEMASK = 4
29
+
1
30
class Buffer :
2
31
"""
3
32
Data in the buffer is guaranteed to be contiguous in memory.
@@ -41,20 +70,11 @@ def __dlpack__(self):
41
70
"""
42
71
raise NotImplementedError ("__dlpack__" )
43
72
44
- def __dlpack_device__ (self ) -> Tuple [enum . IntEnum , int ]:
73
+ def __dlpack_device__ (self ) -> Tuple [DlpackDeviceType , int ]:
45
74
"""
46
75
Device type and device ID for where the data in the buffer resides.
47
76
48
- Uses device type codes matching DLPack. Enum members are::
49
-
50
- - CPU = 1
51
- - CUDA = 2
52
- - CPU_PINNED = 3
53
- - OPENCL = 4
54
- - VULKAN = 7
55
- - METAL = 8
56
- - VPI = 9
57
- - ROCM = 10
77
+ Uses device type codes matching DLPack.
58
78
59
79
Note: must be implemented even if ``__dlpack__`` is not.
60
80
"""
@@ -128,20 +148,10 @@ def offset(self) -> int:
128
148
pass
129
149
130
150
@property
131
- def dtype (self ) -> Tuple [enum . IntEnum , int , str , str ]:
151
+ def dtype (self ) -> Tuple [DtypeKind , int , str , str ]:
132
152
"""
133
153
Dtype description as a tuple ``(kind, bit-width, format string, endianness)``.
134
154
135
- Kind :
136
-
137
- - INT = 0
138
- - UINT = 1
139
- - FLOAT = 2
140
- - BOOL = 20
141
- - STRING = 21 # UTF-8
142
- - DATETIME = 22
143
- - CATEGORICAL = 23
144
-
145
155
Bit-width : the number of bits as an integer
146
156
Format string : data type description format string in Apache Arrow C
147
157
Data Interface format.
@@ -170,7 +180,7 @@ def dtype(self) -> Tuple[enum.IntEnum, int, str, str]:
170
180
pass
171
181
172
182
@property
173
- def describe_categorical (self ) -> dict [bool , bool , Optional [dict ]]:
183
+ def describe_categorical (self ) -> Dict [bool , bool , Optional [dict ]]:
174
184
"""
175
185
If the dtype is categorical, there are two options:
176
186
@@ -193,19 +203,11 @@ def describe_categorical(self) -> dict[bool, bool, Optional[dict]]:
193
203
pass
194
204
195
205
@property
196
- def describe_null (self ) -> Tuple [int , Any ]:
206
+ def describe_null (self ) -> Tuple [ColumnNullType , Any ]:
197
207
"""
198
208
Return the missing value (or "null") representation the column dtype
199
209
uses, as a tuple ``(kind, value)``.
200
210
201
- Kind:
202
-
203
- - 0 : non-nullable
204
- - 1 : NaN/NaT
205
- - 2 : sentinel value
206
- - 3 : bit mask
207
- - 4 : byte mask
208
-
209
211
Value : if kind is "sentinel value", the actual value. If kind is a bit
210
212
mask or a byte mask, the value (0 or 1) indicating a missing value. None
211
213
otherwise.
@@ -234,15 +236,15 @@ def num_chunks(self) -> int:
234
236
"""
235
237
pass
236
238
237
- def get_chunks (self , n_chunks : Optional [int ] = None ) -> Iterable [Column ]:
239
+ def get_chunks (self , n_chunks : Optional [int ] = None ) -> Iterable [" Column" ]:
238
240
"""
239
241
Return an iterator yielding the chunks.
240
242
241
243
See `DataFrame.get_chunks` for details on ``n_chunks``.
242
244
"""
243
245
pass
244
246
245
- def get_buffers (self ) -> dict [Tuple [Buffer , Any ], Optional [Tuple [Buffer , Any ]], Optional [Tuple [Buffer , Any ]]]:
247
+ def get_buffers (self ) -> Dict [Tuple [Buffer , Any ], Optional [Tuple [Buffer , Any ]], Optional [Tuple [Buffer , Any ]]]:
246
248
"""
247
249
Return a dictionary containing the underlying buffers.
248
250
@@ -367,19 +369,19 @@ def get_columns(self) -> Iterable[Column]:
367
369
"""
368
370
pass
369
371
370
- def select_columns (self , indices : Sequence [int ]) -> DataFrame :
372
+ def select_columns (self , indices : Sequence [int ]) -> " DataFrame" :
371
373
"""
372
374
Create a new DataFrame by selecting a subset of columns by index.
373
375
"""
374
376
pass
375
377
376
- def select_columns_by_name (self , names : Sequence [str ]) -> DataFrame :
378
+ def select_columns_by_name (self , names : Sequence [str ]) -> " DataFrame" :
377
379
"""
378
380
Create a new DataFrame by selecting a subset of columns by name.
379
381
"""
380
382
pass
381
383
382
- def get_chunks (self , n_chunks : Optional [int ] = None ) -> Iterable [DataFrame ]:
384
+ def get_chunks (self , n_chunks : Optional [int ] = None ) -> Iterable [" DataFrame" ]:
383
385
"""
384
386
Return an iterator yielding the chunks.
385
387
0 commit comments