Skip to content

Commit 98bfab4

Browse files
committed
Try to fix typecheck issues
Signed-off-by: Vasily Litvinov <[email protected]>
1 parent efad57a commit 98bfab4

File tree

5 files changed

+21
-12
lines changed

5 files changed

+21
-12
lines changed

pandas/core/exchange/buffer.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
from typing import Tuple
1+
from typing import (
2+
Optional,
3+
Tuple,
4+
)
25

36
import numpy as np
47

@@ -52,7 +55,7 @@ def __dlpack__(self):
5255
"""
5356
raise NotImplementedError("__dlpack__")
5457

55-
def __dlpack_device__(self) -> Tuple[DlpackDeviceType, int]:
58+
def __dlpack_device__(self) -> Tuple[DlpackDeviceType, Optional[int]]:
5659
"""
5760
Device type and device ID for where the data in the buffer resides.
5861
"""

pandas/core/exchange/column.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from functools import cached_property
22
from typing import (
33
Any,
4+
List,
5+
Optional,
46
Tuple,
57
)
68

@@ -14,6 +16,7 @@
1416
from pandas.core.exchange.buffer import PandasBuffer
1517
from pandas.core.exchange.dataframe_protocol import (
1618
Column,
19+
ColumnBuffers,
1720
ColumnNullType,
1821
DtypeKind,
1922
)
@@ -223,7 +226,7 @@ def get_buffers(self):
223226
if the data buffer does not have an associated offsets
224227
buffer.
225228
"""
226-
buffers = {}
229+
buffers: ColumnBuffers = {}
227230
buffers["data"] = self._get_data_buffer()
228231
try:
229232
buffers["validity"] = self._get_validity_buffer()
@@ -328,7 +331,7 @@ def _get_offsets_buffer(self) -> Tuple[PandasBuffer, Any]:
328331
# For each string, we need to manually determine the next offset
329332
values = self._col.to_numpy()
330333
ptr = 0
331-
offsets = [ptr] + [None] * len(values)
334+
offsets: List[Optional[int]] = [ptr] + [None] * len(values)
332335
for i, v in enumerate(values):
333336
# For missing values (in this case, `np.nan` values)
334337
# we don't increment the pointer

pandas/core/exchange/dataframe_protocol.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ class Column(ABC):
216216

217217
@property
218218
@abstractmethod
219-
def size(self) -> Optional[int]:
219+
def size(self) -> int:
220220
"""
221221
Size of the column, in elements.
222222

pandas/core/exchange/from_dataframe.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
import re
33
from typing import (
44
Any,
5+
Dict,
6+
List,
57
Optional,
68
Tuple,
79
Union,
@@ -22,7 +24,7 @@
2224
Endianness,
2325
)
2426

25-
_NP_DTYPES = {
27+
_NP_DTYPES: Dict[DtypeKind, Dict[int, Any]] = {
2628
DtypeKind.INT: {8: np.int8, 16: np.int16, 32: np.int32, 64: np.int64},
2729
DtypeKind.UINT: {8: np.uint8, 16: np.uint16, 32: np.uint32, 64: np.uint64},
2830
DtypeKind.FLOAT: {32: np.float32, 64: np.float64},
@@ -90,7 +92,7 @@ def protocol_df_chunk_to_pandas(df: DataFrameXchg) -> pd.DataFrame:
9092
"""
9193
# We need a dict of columns here, with each column being a NumPy array (at
9294
# least for now, deal with non-NumPy dtypes later).
93-
columns = {}
95+
columns: Dict[str, Any] = {}
9496
buffers = [] # hold on to buffers, keeps memory alive
9597
for name in df.column_names():
9698
if not isinstance(name, str):
@@ -210,6 +212,7 @@ def string_column_to_ndarray(col: Column) -> Tuple[np.ndarray, Any]:
210212

211213
buffers = col.get_buffers()
212214

215+
assert buffers["offsets"], "String buffers must contain offsets"
213216
# Retrieve the data buffer containing the UTF-8 code units
214217
data_buff, protocol_data_dtype = buffers["data"]
215218
# We're going to reinterpret the buffer as uint8, so make sure we can do it safely
@@ -238,13 +241,14 @@ def string_column_to_ndarray(col: Column) -> Tuple[np.ndarray, Any]:
238241

239242
null_pos = None
240243
if null_kind in (ColumnNullType.USE_BITMASK, ColumnNullType.USE_BYTEMASK):
244+
assert buffers["validity"], "Validity buffers cannot be empty for masks"
241245
valid_buff, valid_dtype = buffers["validity"]
242246
null_pos = buffer_to_ndarray(valid_buff, valid_dtype, col.offset, col.size)
243247
if sentinel_val == 0:
244248
null_pos = ~null_pos
245249

246250
# Assemble the strings from the code units
247-
str_list = [None] * col.size
251+
str_list: List[Union[None, float, str]] = [None] * col.size
248252
for i in range(col.size):
249253
# Check for missing values
250254
if null_pos is not None and null_pos[i]:
@@ -448,7 +452,7 @@ def bitmask_to_bool_ndarray(
448452
def set_nulls(
449453
data: Union[np.ndarray, pd.Series],
450454
col: Column,
451-
validity: Tuple[Buffer, Tuple[DtypeKind, int, str, str]],
455+
validity: Optional[Tuple[Buffer, Tuple[DtypeKind, int, str, str]]],
452456
allow_modify_inplace: bool = True,
453457
):
454458
"""

pandas/tests/exchange/test_impl.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@ def test_dataframe(data):
7272

7373
df2 = df.__dataframe__()
7474

75-
assert df2._allow_copy is True
7675
assert df2.num_columns() == NCOLS
7776
assert df2.num_rows() == NROWS
7877

@@ -153,7 +152,7 @@ def test_select_columns_error():
153152

154153
df2 = df.__dataframe__()
155154

156-
with pytest.raises(ValueError, match="is not a sequence"):
155+
with pytest.raises(ValueError, match="is not a sequence"): # type: ignore[arg-type]
157156
df2.select_columns(np.array([0, 2]))
158157

159158

@@ -162,7 +161,7 @@ def test_select_columns_by_name_error():
162161

163162
df2 = df.__dataframe__()
164163

165-
with pytest.raises(ValueError, match="is not a sequence"):
164+
with pytest.raises(ValueError, match="is not a sequence"): # type: ignore[arg-type]
166165
df2.select_columns_by_name(np.array(["col33", "col35"]))
167166

168167

0 commit comments

Comments
 (0)