|
| 1 | +from typing import Tuple, Optional, Dict, Any, Iterable, Sequence |
| 2 | +import enum |
| 3 | + |
| 4 | +class DlpackDeviceType(enum.IntEnum): |
| 5 | + CPU = 1 |
| 6 | + CUDA = 2 |
| 7 | + CPU_PINNED = 3 |
| 8 | + OPENCL = 4 |
| 9 | + VULKAN = 7 |
| 10 | + METAL = 8 |
| 11 | + VPI = 9 |
| 12 | + ROCM = 10 |
| 13 | + |
| 14 | +class DtypeKind(enum.IntEnum): |
| 15 | + INT = 0 |
| 16 | + UINT = 1 |
| 17 | + FLOAT = 2 |
| 18 | + BOOL = 20 |
| 19 | + STRING = 21 # UTF-8 |
| 20 | + DATETIME = 22 |
| 21 | + CATEGORICAL = 23 |
| 22 | + |
| 23 | +class ColumnNullType: |
| 24 | + NON_NULLABLE = 0 |
| 25 | + USE_NAN = 1 |
| 26 | + USE_SENTINEL = 2 |
| 27 | + USE_BITMASK = 3 |
| 28 | + USE_BYTEMASK = 4 |
| 29 | + |
1 | 30 | class Buffer:
|
2 | 31 | """
|
3 | 32 | Data in the buffer is guaranteed to be contiguous in memory.
|
@@ -41,20 +70,11 @@ def __dlpack__(self):
|
41 | 70 | """
|
42 | 71 | raise NotImplementedError("__dlpack__")
|
43 | 72 |
|
44 |
| - def __dlpack_device__(self) -> Tuple[enum.IntEnum, int]: |
| 73 | + def __dlpack_device__(self) -> Tuple[DlpackDeviceType, int]: |
45 | 74 | """
|
46 | 75 | Device type and device ID for where the data in the buffer resides.
|
47 | 76 |
|
48 |
| - Uses device type codes matching DLPack. Enum members are:: |
49 |
| -
|
50 |
| - - CPU = 1 |
51 |
| - - CUDA = 2 |
52 |
| - - CPU_PINNED = 3 |
53 |
| - - OPENCL = 4 |
54 |
| - - VULKAN = 7 |
55 |
| - - METAL = 8 |
56 |
| - - VPI = 9 |
57 |
| - - ROCM = 10 |
| 77 | + Uses device type codes matching DLPack. |
58 | 78 |
|
59 | 79 | Note: must be implemented even if ``__dlpack__`` is not.
|
60 | 80 | """
|
@@ -128,20 +148,10 @@ def offset(self) -> int:
|
128 | 148 | pass
|
129 | 149 |
|
130 | 150 | @property
|
131 |
| - def dtype(self) -> Tuple[enum.IntEnum, int, str, str]: |
| 151 | + def dtype(self) -> Tuple[DtypeKind, int, str, str]: |
132 | 152 | """
|
133 | 153 | Dtype description as a tuple ``(kind, bit-width, format string, endianness)``.
|
134 | 154 |
|
135 |
| - Kind : |
136 |
| -
|
137 |
| - - INT = 0 |
138 |
| - - UINT = 1 |
139 |
| - - FLOAT = 2 |
140 |
| - - BOOL = 20 |
141 |
| - - STRING = 21 # UTF-8 |
142 |
| - - DATETIME = 22 |
143 |
| - - CATEGORICAL = 23 |
144 |
| -
|
145 | 155 | Bit-width : the number of bits as an integer
|
146 | 156 | Format string : data type description format string in Apache Arrow C
|
147 | 157 | Data Interface format.
|
@@ -194,19 +204,11 @@ def describe_categorical(self) -> dict[bool, bool, Optional[Column]]:
|
194 | 204 | pass
|
195 | 205 |
|
196 | 206 | @property
|
197 |
| - def describe_null(self) -> Tuple[int, Any]: |
| 207 | + def describe_null(self) -> Tuple[ColumnNullType, Any]: |
198 | 208 | """
|
199 | 209 | Return the missing value (or "null") representation the column dtype
|
200 | 210 | uses, as a tuple ``(kind, value)``.
|
201 | 211 |
|
202 |
| - Kind: |
203 |
| -
|
204 |
| - - 0 : non-nullable |
205 |
| - - 1 : NaN/NaT |
206 |
| - - 2 : sentinel value |
207 |
| - - 3 : bit mask |
208 |
| - - 4 : byte mask |
209 |
| -
|
210 | 212 | Value : if kind is "sentinel value", the actual value. If kind is a bit
|
211 | 213 | mask or a byte mask, the value (0 or 1) indicating a missing value. None
|
212 | 214 | otherwise.
|
@@ -235,15 +237,15 @@ def num_chunks(self) -> int:
|
235 | 237 | """
|
236 | 238 | pass
|
237 | 239 |
|
238 |
| - def get_chunks(self, n_chunks : Optional[int] = None) -> Iterable[Column]: |
| 240 | + def get_chunks(self, n_chunks : Optional[int] = None) -> Iterable["Column"]: |
239 | 241 | """
|
240 | 242 | Return an iterator yielding the chunks.
|
241 | 243 |
|
242 | 244 | See `DataFrame.get_chunks` for details on ``n_chunks``.
|
243 | 245 | """
|
244 | 246 | pass
|
245 | 247 |
|
246 |
| - def get_buffers(self) -> dict[Tuple[Buffer, Any], Optional[Tuple[Buffer, Any]], Optional[Tuple[Buffer, Any]]]: |
| 248 | + def get_buffers(self) -> Dict[Tuple[Buffer, Any], Optional[Tuple[Buffer, Any]], Optional[Tuple[Buffer, Any]]]: |
247 | 249 | """
|
248 | 250 | Return a dictionary containing the underlying buffers.
|
249 | 251 |
|
@@ -368,19 +370,19 @@ def get_columns(self) -> Iterable[Column]:
|
368 | 370 | """
|
369 | 371 | pass
|
370 | 372 |
|
371 |
| - def select_columns(self, indices: Sequence[int]) -> DataFrame: |
| 373 | + def select_columns(self, indices: Sequence[int]) -> "DataFrame": |
372 | 374 | """
|
373 | 375 | Create a new DataFrame by selecting a subset of columns by index.
|
374 | 376 | """
|
375 | 377 | pass
|
376 | 378 |
|
377 |
| - def select_columns_by_name(self, names: Sequence[str]) -> DataFrame: |
| 379 | + def select_columns_by_name(self, names: Sequence[str]) -> "DataFrame": |
378 | 380 | """
|
379 | 381 | Create a new DataFrame by selecting a subset of columns by name.
|
380 | 382 | """
|
381 | 383 | pass
|
382 | 384 |
|
383 |
| - def get_chunks(self, n_chunks : Optional[int] = None) -> Iterable[DataFrame]: |
| 385 | + def get_chunks(self, n_chunks : Optional[int] = None) -> Iterable["DataFrame"]: |
384 | 386 | """
|
385 | 387 | Return an iterator yielding the chunks.
|
386 | 388 |
|
|
0 commit comments