Skip to content

Commit 825b6d7

Browse files
committed
Change API a bit, align formatting with pandas
Signed-off-by: Vasily Litvinov <[email protected]>
1 parent 63ba1e7 commit 825b6d7

File tree

1 file changed

+44
-5
lines changed

1 file changed

+44
-5
lines changed

protocol/dataframe_protocol.py

Lines changed: 44 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,17 @@
1-
from typing import Tuple, Optional, Dict, Any, Iterable, Sequence, TypedDict
1+
from abc import (
2+
ABC,
3+
abstractmethod,
4+
)
25
import enum
3-
from abc import ABC, abstractmethod
6+
from typing import (
7+
Any,
8+
Dict,
9+
Iterable,
10+
Optional,
11+
Sequence,
12+
Tuple,
13+
TypedDict,
14+
)
415

516

617
class DlpackDeviceType(enum.IntEnum):
@@ -89,6 +100,16 @@ class ColumnBuffers(TypedDict):
89100
offsets: Optional[Tuple["Buffer", Any]]
90101

91102

103+
class CategoricalDescription(TypedDict):
104+
# whether the ordering of dictionary indices is semantically meaningful
105+
is_ordered: bool
106+
# whether a dictionary-style mapping of categorical values to other objects exists
107+
is_dictionary: bool
108+
# Python-level only (e.g. ``{int: str}``).
109+
# None if not a dictionary-style categorical.
110+
mapping: Optional[dict]
111+
112+
92113
class Buffer(ABC):
93114
"""
94115
Data in the buffer is guaranteed to be contiguous in memory.
@@ -191,7 +212,7 @@ class Column(ABC):
191212

192213
@property
193214
@abstractmethod
194-
def size(self) -> Optional[int]:
215+
def size(self) -> int:
195216
"""
196217
Size of the column, in elements.
197218
@@ -246,15 +267,15 @@ def dtype(self) -> Tuple[DtypeKind, int, str, str]:
246267

247268
@property
248269
@abstractmethod
249-
def describe_categorical(self) -> Tuple[bool, bool, Optional[dict]]:
270+
def describe_categorical(self) -> CategoricalDescription:
250271
"""
251272
If the dtype is categorical, there are two options:
252273
- There are only values in the data buffer.
253274
- There is a separate dictionary-style encoding for categorical values.
254275
255276
Raises TypeError if the dtype is not categorical
256277
257-
Returns the description on how to interpret the data buffer:
278+
Returns the dictionary with description on how to interpret the data buffer:
258279
- "is_ordered" : bool, whether the ordering of dictionary indices is
259280
semantically meaningful.
260281
- "is_dictionary" : bool, whether a dictionary-style mapping of
@@ -363,6 +384,24 @@ class DataFrame(ABC):
363384

364385
version = 0 # version of the protocol
365386

387+
@abstractmethod
388+
def __dataframe__(
389+
self, nan_as_null: bool = False, allow_copy: bool = True
390+
) -> "DataFrame":
391+
"""
392+
Construct a new exchange object, potentially changing the parameters.
393+
394+
``nan_as_null`` is a keyword intended for the consumer to tell the
395+
producer to overwrite null values in the data with ``NaN`` (or ``NaT``).
396+
It is intended for cases where the consumer does not support the bit
397+
mask or byte mask that is the producer's native representation.
398+
``allow_copy`` is a keyword that defines whether or not the library is
399+
allowed to make a copy of the data. For example, copying data would be
400+
necessary if a library supports strided buffers, given that this protocol
401+
specifies contiguous buffers.
402+
"""
403+
pass
404+
366405
@property
367406
@abstractmethod
368407
def metadata(self) -> Dict[str, Any]:

0 commit comments

Comments
 (0)