Skip to content

Commit a681598

Browse files
committed
Try to fix typecheck issues
Signed-off-by: Vasily Litvinov <[email protected]>
1 parent efad57a commit a681598

File tree

5 files changed

+33
-35
lines changed

5 files changed

+33
-35
lines changed

pandas/core/exchange/buffer.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
from typing import Tuple
1+
from typing import (
2+
Optional,
3+
Tuple,
4+
)
25

36
import numpy as np
47

@@ -52,7 +55,7 @@ def __dlpack__(self):
5255
"""
5356
raise NotImplementedError("__dlpack__")
5457

55-
def __dlpack_device__(self) -> Tuple[DlpackDeviceType, int]:
58+
def __dlpack_device__(self) -> Tuple[DlpackDeviceType, Optional[int]]:
5659
"""
5760
Device type and device ID for where the data in the buffer resides.
5861
"""

pandas/core/exchange/column.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from pandas.core.exchange.buffer import PandasBuffer
1515
from pandas.core.exchange.dataframe_protocol import (
1616
Column,
17+
ColumnBuffers,
1718
ColumnNullType,
1819
DtypeKind,
1920
)
@@ -223,17 +224,21 @@ def get_buffers(self):
223224
if the data buffer does not have an associated offsets
224225
buffer.
225226
"""
226-
buffers = {}
227-
buffers["data"] = self._get_data_buffer()
227+
buffers: ColumnBuffers = {
228+
"data": self._get_data_buffer(),
229+
"validity": None,
230+
"offsets": None,
231+
}
232+
228233
try:
229234
buffers["validity"] = self._get_validity_buffer()
230235
except NoBufferPresent:
231-
buffers["validity"] = None
236+
pass
232237

233238
try:
234239
buffers["offsets"] = self._get_offsets_buffer()
235240
except NoBufferPresent:
236-
buffers["offsets"] = None
241+
pass
237242

238243
return buffers
239244

@@ -328,7 +333,7 @@ def _get_offsets_buffer(self) -> Tuple[PandasBuffer, Any]:
328333
# For each string, we need to manually determine the next offset
329334
values = self._col.to_numpy()
330335
ptr = 0
331-
offsets = [ptr] + [None] * len(values)
336+
offsets = [ptr] + [0] * len(values)
332337
for i, v in enumerate(values):
333338
# For missing values (in this case, `np.nan` values)
334339
# we don't increment the pointer

pandas/core/exchange/dataframe_protocol.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ class Column(ABC):
216216

217217
@property
218218
@abstractmethod
219-
def size(self) -> Optional[int]:
219+
def size(self) -> int:
220220
"""
221221
Size of the column, in elements.
222222

pandas/core/exchange/from_dataframe.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
import re
33
from typing import (
44
Any,
5+
Dict,
6+
List,
57
Optional,
68
Tuple,
79
Union,
@@ -22,7 +24,7 @@
2224
Endianness,
2325
)
2426

25-
_NP_DTYPES = {
27+
_NP_DTYPES: Dict[DtypeKind, Dict[int, Any]] = {
2628
DtypeKind.INT: {8: np.int8, 16: np.int16, 32: np.int32, 64: np.int64},
2729
DtypeKind.UINT: {8: np.uint8, 16: np.uint16, 32: np.uint32, 64: np.uint64},
2830
DtypeKind.FLOAT: {32: np.float32, 64: np.float64},
@@ -90,7 +92,7 @@ def protocol_df_chunk_to_pandas(df: DataFrameXchg) -> pd.DataFrame:
9092
"""
9193
# We need a dict of columns here, with each column being a NumPy array (at
9294
# least for now, deal with non-NumPy dtypes later).
93-
columns = {}
95+
columns: Dict[str, Any] = {}
9496
buffers = [] # hold on to buffers, keeps memory alive
9597
for name in df.column_names():
9698
if not isinstance(name, str):
@@ -161,12 +163,14 @@ def categorical_column_to_series(col: Column) -> Tuple[pd.Series, Any]:
161163
Tuple of pd.Series holding the data and the memory owner object
162164
that keeps the memory alive.
163165
"""
164-
ordered, is_dict, mapping = col.describe_categorical.values()
166+
categorical = col.describe_categorical
165167

166-
if not is_dict:
168+
if not categorical["is_dictionary"]:
167169
raise NotImplementedError("Non-dictionary categoricals not supported yet")
168170

169-
categories = np.array(tuple(mapping.values()))
171+
mapping = categorical["mapping"]
172+
assert isinstance(mapping, dict), "Categorical mapping must be a dict"
173+
categories = np.array(tuple(mapping[k] for k in sorted(mapping)))
170174
buffers = col.get_buffers()
171175

172176
codes_buff, codes_dtype = buffers["data"]
@@ -176,7 +180,9 @@ def categorical_column_to_series(col: Column) -> Tuple[pd.Series, Any]:
176180
# out-of-bounds sentinel values in `codes`
177181
values = categories[codes % len(categories)]
178182

179-
cat = pd.Categorical(values, categories=categories, ordered=ordered)
183+
cat = pd.Categorical(
184+
values, categories=categories, ordered=categorical["is_ordered"]
185+
)
180186
data = pd.Series(cat)
181187

182188
data = set_nulls(data, col, buffers["validity"])
@@ -210,6 +216,7 @@ def string_column_to_ndarray(col: Column) -> Tuple[np.ndarray, Any]:
210216

211217
buffers = col.get_buffers()
212218

219+
assert buffers["offsets"], "String buffers must contain offsets"
213220
# Retrieve the data buffer containing the UTF-8 code units
214221
data_buff, protocol_data_dtype = buffers["data"]
215222
# We're going to reinterpret the buffer as uint8, so make sure we can do it safely
@@ -238,13 +245,14 @@ def string_column_to_ndarray(col: Column) -> Tuple[np.ndarray, Any]:
238245

239246
null_pos = None
240247
if null_kind in (ColumnNullType.USE_BITMASK, ColumnNullType.USE_BYTEMASK):
248+
assert buffers["validity"], "Validity buffers cannot be empty for masks"
241249
valid_buff, valid_dtype = buffers["validity"]
242250
null_pos = buffer_to_ndarray(valid_buff, valid_dtype, col.offset, col.size)
243251
if sentinel_val == 0:
244252
null_pos = ~null_pos
245253

246254
# Assemble the strings from the code units
247-
str_list = [None] * col.size
255+
str_list: List[Union[None, float, str]] = [None] * col.size
248256
for i in range(col.size):
249257
# Check for missing values
250258
if null_pos is not None and null_pos[i]:
@@ -448,7 +456,7 @@ def bitmask_to_bool_ndarray(
448456
def set_nulls(
449457
data: Union[np.ndarray, pd.Series],
450458
col: Column,
451-
validity: Tuple[Buffer, Tuple[DtypeKind, int, str, str]],
459+
validity: Optional[Tuple[Buffer, Tuple[DtypeKind, int, str, str]]],
452460
allow_modify_inplace: bool = True,
453461
):
454462
"""
@@ -478,6 +486,7 @@ def set_nulls(
478486
if null_kind == ColumnNullType.USE_SENTINEL:
479487
null_pos = data == sentinel_val
480488
elif null_kind in (ColumnNullType.USE_BITMASK, ColumnNullType.USE_BYTEMASK):
489+
assert validity, "Expected to have a validity buffer for the mask"
481490
valid_buff, valid_dtype = validity
482491
null_pos = buffer_to_ndarray(valid_buff, valid_dtype, col.offset, col.size)
483492
if sentinel_val == 0:

pandas/tests/exchange/test_impl.py

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@ def test_dataframe(data):
7272

7373
df2 = df.__dataframe__()
7474

75-
assert df2._allow_copy is True
7675
assert df2.num_columns() == NCOLS
7776
assert df2.num_rows() == NROWS
7877

@@ -148,24 +147,6 @@ def test_mixed_missing():
148147
assert df2.get_column_by_name(col_name).null_count == 2
149148

150149

151-
def test_select_columns_error():
152-
df = pd.DataFrame(int_data)
153-
154-
df2 = df.__dataframe__()
155-
156-
with pytest.raises(ValueError, match="is not a sequence"):
157-
df2.select_columns(np.array([0, 2]))
158-
159-
160-
def test_select_columns_by_name_error():
161-
df = pd.DataFrame(int_data)
162-
163-
df2 = df.__dataframe__()
164-
165-
with pytest.raises(ValueError, match="is not a sequence"):
166-
df2.select_columns_by_name(np.array(["col33", "col35"]))
167-
168-
169150
def test_string():
170151
test_str_data = string_data["separator data"] + [""]
171152
df = pd.DataFrame({"A": test_str_data})

0 commit comments

Comments
 (0)