11
11
from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass
12
12
from pandas.core.dtypes.common import (
13
13
is_integer, is_scalar, is_float,
14
+ is_bool_dtype,
14
15
is_float_dtype,
15
16
is_integer_dtype,
16
17
is_object_dtype,
@@ -76,7 +77,7 @@ def construct_from_string(cls, string):
76
77
"'{}'".format(cls, string))
77
78
78
79
79
- def to_integer_array (values, dtype=None):
80
+ def integer_array (values, dtype=None, copy=False ):
80
81
"""
81
82
Infer and return an integer array of the values.
82
83
@@ -85,6 +86,7 @@ def to_integer_array(values, dtype=None):
85
86
values : 1D list-like
86
87
dtype : dtype, optional
87
88
dtype to coerce
89
+ copy : boolean, default False
88
90
89
91
Returns
90
92
-------
@@ -94,7 +96,8 @@ def to_integer_array(values, dtype=None):
94
96
------
95
97
TypeError if incompatible types
96
98
"""
97
- return IntegerArray(values, dtype=dtype, copy=False)
99
+ values, mask = coerce_to_array(values, dtype=dtype, copy=copy)
100
+ return IntegerArray(values, mask)
98
101
99
102
100
103
def safe_cast(values, dtype, copy):
@@ -133,6 +136,11 @@ def coerce_to_array(values, dtype, mask=None, copy=False):
133
136
-------
134
137
tuple of (values, mask)
135
138
"""
139
+ # if values is integer numpy array, preserve it's dtype
140
+ if dtype is None and hasattr(values, 'dtype'):
141
+ if is_integer_dtype(values.dtype):
142
+ dtype = values.dtype
143
+
136
144
if dtype is not None:
137
145
if not issubclass(type(dtype), _IntegerDtype):
138
146
try:
@@ -174,10 +182,7 @@ def coerce_to_array(values, dtype, mask=None, copy=False):
174
182
175
183
# infer dtype if needed
176
184
if dtype is None:
177
- if is_integer_dtype(values):
178
- dtype = values.dtype
179
- else:
180
- dtype = np.dtype('int64')
185
+ dtype = np.dtype('int64')
181
186
else:
182
187
dtype = dtype.type
183
188
@@ -197,47 +202,62 @@ def coerce_to_array(values, dtype, mask=None, copy=False):
197
202
198
203
class IntegerArray(ExtensionArray, ExtensionOpsMixin):
199
204
"""
200
- We represent an IntegerArray with 2 numpy arrays
205
+ Array of integer (optional missing) values.
206
+
207
+ We represent an IntegerArray with 2 numpy arrays:
208
+
201
209
- data: contains a numpy integer array of the appropriate dtype
202
- - mask: a boolean array holding a mask on the data, False is missing
210
+ - mask: a boolean array holding a mask on the data, True is missing
211
+
212
+ To construct an IntegerArray from generic array-like input, use
213
+ ``integer_array`` function instead.
214
+
215
+ Parameters
216
+ ----------
217
+ values : integer 1D numpy array
218
+ mask : boolean 1D numpy array
219
+ copy : bool, default False
220
+
221
+ Returns
222
+ -------
223
+ IntegerArray
224
+
203
225
"""
204
226
205
227
@cache_readonly
206
228
def dtype(self):
207
229
return _dtypes[str(self._data.dtype)]
208
230
209
- def __init__(self, values, mask=None, dtype=None , copy=False):
210
- """
211
- Parameters
212
- ----------
213
- values : 1D list-like / IntegerArray
214
- mask : 1D list-like, optional
215
- dtype : subclass of _IntegerDtype, optional
216
- copy : bool, default False
231
+ def __init__(self, values, mask, copy=False):
232
+ if not (isinstance(values, np.ndarray)
233
+ and is_integer_dtype(values.dtype)):
234
+ raise TypeError("values should be integer numpy array. Use "
235
+ "the 'integer_array' function instead")
236
+ if not (isinstance(mask, np.ndarray) and is_bool_dtype(mask.dtype)):
237
+ raise TypeError("mask should be boolean numpy array. Use "
238
+ "the 'integer_array' function instead")
217
239
218
- Returns
219
- -------
220
- IntegerArray
221
- """
222
- self._data, self._mask = coerce_to_array(
223
- values, dtype=dtype, mask=mask, copy=copy)
240
+ if copy:
241
+ values = values.copy()
242
+ mask = mask.copy()
243
+
244
+ self._data = values
245
+ self._mask = mask
224
246
225
247
@classmethod
226
248
def _from_sequence(cls, scalars, dtype=None, copy=False):
227
- return cls (scalars, dtype=dtype, copy=copy)
249
+ return integer_array (scalars, dtype=dtype, copy=copy)
228
250
229
251
@classmethod
230
252
def _from_factorized(cls, values, original):
231
- return cls (values, dtype=original.dtype)
253
+ return integer_array (values, dtype=original.dtype)
232
254
233
255
def __getitem__(self, item):
234
256
if is_integer(item):
235
257
if self._mask[item]:
236
258
return self.dtype.na_value
237
259
return self._data[item]
238
- return type(self)(self._data[item],
239
- mask=self._mask[item],
240
- dtype=self.dtype)
260
+ return type(self)(self._data[item], self._mask[item])
241
261
242
262
def _coerce_to_ndarray(self):
243
263
"""
@@ -294,7 +314,7 @@ def take(self, indexer, allow_fill=False, fill_value=None):
294
314
result[fill_mask] = fill_value
295
315
mask = mask ^ fill_mask
296
316
297
- return type(self)(result, mask=mask, dtype=self.dtype , copy=False)
317
+ return type(self)(result, mask, copy=False)
298
318
299
319
def copy(self, deep=False):
300
320
data, mask = self._data, self._mask
@@ -304,7 +324,7 @@ def copy(self, deep=False):
304
324
else:
305
325
data = data.copy()
306
326
mask = mask.copy()
307
- return type(self)(data, mask, dtype=self.dtype, copy=False)
327
+ return type(self)(data, mask, copy=False)
308
328
309
329
def __setitem__(self, key, value):
310
330
_is_scalar = is_scalar(value)
@@ -356,7 +376,7 @@ def _na_value(self):
356
376
def _concat_same_type(cls, to_concat):
357
377
data = np.concatenate([x._data for x in to_concat])
358
378
mask = np.concatenate([x._mask for x in to_concat])
359
- return cls(data, mask=mask, dtype=to_concat[0].dtype )
379
+ return cls(data, mask)
360
380
361
381
def astype(self, dtype, copy=True):
362
382
"""Cast to a NumPy array or IntegerArray with 'dtype'.
@@ -386,8 +406,7 @@ def astype(self, dtype, copy=True):
386
406
if isinstance(dtype, _IntegerDtype):
387
407
result = self._data.astype(dtype.numpy_dtype,
388
408
casting='same_kind', copy=False)
389
- return type(self)(result, mask=self._mask,
390
- dtype=dtype, copy=False)
409
+ return type(self)(result, mask=self._mask, copy=False)
391
410
392
411
# coerce
393
412
data = self._coerce_to_ndarray()
@@ -523,7 +542,7 @@ def _maybe_mask_result(self, result, mask, other, op_name):
523
542
result[mask] = np.nan
524
543
return result
525
544
526
- return type(self)(result, mask=mask, dtype=self.dtype , copy=False)
545
+ return type(self)(result, mask, copy=False)
527
546
528
547
@classmethod
529
548
def _create_arithmetic_method(cls, op):
0 commit comments