Skip to content

Commit 046dc29

Browse files
committed
CLN/BUG: fix ndarray assignment may cause unexpected cast
1 parent 233d51d commit 046dc29

File tree

10 files changed

+311
-144
lines changed

10 files changed

+311
-144
lines changed

doc/source/whatsnew/v0.19.1.txt

+3
Original file line numberDiff line numberDiff line change
@@ -57,5 +57,8 @@ Bug Fixes
5757
- Bug in ``DataFrame.to_json`` where ``lines=True`` and a value contained a ``}`` character (:issue:`14391`)
5858
- Bug in ``df.groupby`` causing an ``AttributeError`` when grouping a single index frame by a column and the index level (:issue`14327`)
5959

60+
- Bug in assignment against datetime-like data with ``int`` may incorrectly converted to datetime-like (:issue:`14145`)
61+
- Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`)
62+
6063
- Bug in ``pd.pivot_table`` may raise ``TypeError`` or ``ValueError`` when ``index`` or ``columns``
6164
is not scalar and ``values`` is not specified (:issue:`14380`)

pandas/core/frame.py

+8-14
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
import numpy.ma as ma
2525

2626
from pandas.types.cast import (_maybe_upcast,
27-
_infer_dtype_from_scalar,
27+
_cast_scalar_to_array,
2828
_possibly_cast_to_datetime,
2929
_possibly_infer_to_datetimelike,
3030
_possibly_convert_platform,
@@ -332,15 +332,10 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
332332
raise_with_traceback(exc)
333333

334334
if arr.ndim == 0 and index is not None and columns is not None:
335-
if isinstance(data, compat.string_types) and dtype is None:
336-
dtype = np.object_
337-
if dtype is None:
338-
dtype, data = _infer_dtype_from_scalar(data)
339-
340-
values = np.empty((len(index), len(columns)), dtype=dtype)
341-
values.fill(data)
342-
mgr = self._init_ndarray(values, index, columns, dtype=dtype,
343-
copy=False)
335+
values = _cast_scalar_to_array((len(index), len(columns)),
336+
data, dtype=dtype)
337+
mgr = self._init_ndarray(values, index, columns,
338+
dtype=values.dtype, copy=False)
344339
else:
345340
raise PandasError('DataFrame constructor not properly called!')
346341

@@ -454,7 +449,7 @@ def _get_axes(N, K, index=index, columns=columns):
454449
values = _prep_ndarray(values, copy=copy)
455450

456451
if dtype is not None:
457-
if values.dtype != dtype:
452+
if not is_dtype_equal(values.dtype, dtype):
458453
try:
459454
values = values.astype(dtype)
460455
except Exception as orig:
@@ -2656,9 +2651,8 @@ def reindexer(value):
26562651

26572652
else:
26582653
# upcast the scalar
2659-
dtype, value = _infer_dtype_from_scalar(value)
2660-
value = np.repeat(value, len(self.index)).astype(dtype)
2661-
value = _possibly_cast_to_datetime(value, dtype)
2654+
value = _cast_scalar_to_array(len(self.index), value)
2655+
value = _possibly_cast_to_datetime(value, value.dtype)
26622656

26632657
# return internal types directly
26642658
if is_extension_type(value):

pandas/core/internals.py

+94-82
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
is_null_datelike_scalar)
4343
import pandas.types.concat as _concat
4444

45-
from pandas.types.generic import ABCSeries
45+
from pandas.types.generic import ABCSeries, ABCDatetimeIndex
4646
from pandas.core.common import is_null_slice
4747
import pandas.core.algorithms as algos
4848

@@ -378,7 +378,8 @@ def fillna(self, value, limit=None, inplace=False, downcast=None,
378378

379379
# fillna, but if we cannot coerce, then try again as an ObjectBlock
380380
try:
381-
values, _, value, _ = self._try_coerce_args(self.values, value)
381+
values, _, _, _ = self._try_coerce_args(self.values, value)
382+
# value may be converted to internal, thus drop
382383
blocks = self.putmask(mask, value, inplace=inplace)
383384
blocks = [b.make_block(values=self._try_coerce_result(b.values))
384385
for b in blocks]
@@ -666,8 +667,43 @@ def setitem(self, indexer, value, mgr=None):
666667
if self.is_numeric:
667668
value = np.nan
668669

669-
# coerce args
670-
values, _, value, _ = self._try_coerce_args(self.values, value)
670+
# coerce if block dtype can store value
671+
values = self.values
672+
try:
673+
values, _, value, _ = self._try_coerce_args(values, value)
674+
# can keep its own dtype
675+
if hasattr(value, 'dtype') and is_dtype_equal(values.dtype,
676+
value.dtype):
677+
dtype = self.dtype
678+
else:
679+
dtype = 'infer'
680+
681+
except (TypeError, ValueError):
682+
# current dtype cannot store value, coerce to common dtype
683+
find_dtype = False
684+
685+
if hasattr(value, 'dtype'):
686+
dtype = value.dtype
687+
find_dtype = True
688+
689+
elif is_scalar(value):
690+
if isnull(value):
691+
# NaN promotion is handled in latter path
692+
dtype = False
693+
else:
694+
dtype, _ = _infer_dtype_from_scalar(value,
695+
pandas_dtype=True)
696+
find_dtype = True
697+
else:
698+
dtype = 'infer'
699+
700+
if find_dtype:
701+
dtype = _find_common_type([values.dtype, dtype])
702+
if not is_dtype_equal(self.dtype, dtype):
703+
b = self.astype(dtype)
704+
return b.setitem(indexer, value, mgr=mgr)
705+
706+
# value must be storeable at this moment
671707
arr_value = np.array(value)
672708

673709
# cast the values to a type that can hold nan (if necessary)
@@ -697,87 +733,52 @@ def setitem(self, indexer, value, mgr=None):
697733
raise ValueError("cannot set using a slice indexer with a "
698734
"different length than the value")
699735

700-
try:
701-
702-
def _is_scalar_indexer(indexer):
703-
# return True if we are all scalar indexers
704-
705-
if arr_value.ndim == 1:
706-
if not isinstance(indexer, tuple):
707-
indexer = tuple([indexer])
708-
return all([is_scalar(idx) for idx in indexer])
709-
return False
710-
711-
def _is_empty_indexer(indexer):
712-
# return a boolean if we have an empty indexer
736+
def _is_scalar_indexer(indexer):
737+
# return True if we are all scalar indexers
713738

714-
if arr_value.ndim == 1:
715-
if not isinstance(indexer, tuple):
716-
indexer = tuple([indexer])
717-
return any(isinstance(idx, np.ndarray) and len(idx) == 0
718-
for idx in indexer)
719-
return False
720-
721-
# empty indexers
722-
# 8669 (empty)
723-
if _is_empty_indexer(indexer):
724-
pass
725-
726-
# setting a single element for each dim and with a rhs that could
727-
# be say a list
728-
# GH 6043
729-
elif _is_scalar_indexer(indexer):
730-
values[indexer] = value
731-
732-
# if we are an exact match (ex-broadcasting),
733-
# then use the resultant dtype
734-
elif (len(arr_value.shape) and
735-
arr_value.shape[0] == values.shape[0] and
736-
np.prod(arr_value.shape) == np.prod(values.shape)):
737-
values[indexer] = value
738-
values = values.astype(arr_value.dtype)
739-
740-
# set
741-
else:
742-
values[indexer] = value
743-
744-
# coerce and try to infer the dtypes of the result
745-
if hasattr(value, 'dtype') and is_dtype_equal(values.dtype,
746-
value.dtype):
747-
dtype = value.dtype
748-
elif is_scalar(value):
749-
dtype, _ = _infer_dtype_from_scalar(value)
750-
else:
751-
dtype = 'infer'
752-
values = self._try_coerce_and_cast_result(values, dtype)
753-
block = self.make_block(transf(values), fastpath=True)
754-
755-
# may have to soft convert_objects here
756-
if block.is_object and not self.is_object:
757-
block = block.convert(numeric=False)
758-
759-
return block
760-
except ValueError:
761-
raise
762-
except TypeError:
739+
if arr_value.ndim == 1:
740+
if not isinstance(indexer, tuple):
741+
indexer = tuple([indexer])
742+
return all([is_scalar(idx) for idx in indexer])
743+
return False
763744

764-
# cast to the passed dtype if possible
765-
# otherwise raise the original error
766-
try:
767-
# e.g. we are uint32 and our value is uint64
768-
# this is for compat with older numpies
769-
block = self.make_block(transf(values.astype(value.dtype)))
770-
return block.setitem(indexer=indexer, value=value, mgr=mgr)
745+
def _is_empty_indexer(indexer):
746+
# return a boolean if we have an empty indexer
771747

772-
except:
773-
pass
774-
775-
raise
748+
if arr_value.ndim == 1:
749+
if not isinstance(indexer, tuple):
750+
indexer = tuple([indexer])
751+
return any(isinstance(idx, np.ndarray) and len(idx) == 0
752+
for idx in indexer)
753+
return False
776754

777-
except Exception:
755+
# empty indexers
756+
# 8669 (empty)
757+
if _is_empty_indexer(indexer):
778758
pass
779759

780-
return [self]
760+
# setting a single element for each dim and with a rhs that could
761+
# be say a list
762+
# GH 6043
763+
elif _is_scalar_indexer(indexer):
764+
values[indexer] = value
765+
766+
# if we are an exact match (ex-broadcasting),
767+
# then use the resultant dtype
768+
elif (len(arr_value.shape) and
769+
arr_value.shape[0] == values.shape[0] and
770+
np.prod(arr_value.shape) == np.prod(values.shape)):
771+
values[indexer] = value
772+
values = values.astype(arr_value.dtype)
773+
774+
# set
775+
else:
776+
values[indexer] = value
777+
778+
# coerce and try to infer the dtypes of the result
779+
values = self._try_coerce_and_cast_result(values, dtype)
780+
block = self.make_block(transf(values), fastpath=True)
781+
return block
781782

782783
def putmask(self, mask, new, align=True, inplace=False, axis=0,
783784
transpose=False, mgr=None):
@@ -1241,6 +1242,7 @@ def func(cond, values, other):
12411242

12421243
values, values_mask, other, other_mask = self._try_coerce_args(
12431244
values, other)
1245+
12441246
try:
12451247
return self._try_coerce_result(expressions.where(
12461248
cond, values, other, raise_on_error=True))
@@ -1497,6 +1499,7 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0,
14971499
new = new[mask]
14981500

14991501
mask = _safe_reshape(mask, new_values.shape)
1502+
15001503
new_values[mask] = new
15011504
new_values = self._try_coerce_result(new_values)
15021505
return [self.make_block(values=new_values)]
@@ -1666,7 +1669,7 @@ def fillna(self, value, **kwargs):
16661669

16671670
# allow filling with integers to be
16681671
# interpreted as seconds
1669-
if not isinstance(value, np.timedelta64) and is_integer(value):
1672+
if not isinstance(value, np.timedelta64):
16701673
value = Timedelta(value, unit='s')
16711674
return super(TimeDeltaBlock, self).fillna(value, **kwargs)
16721675

@@ -1898,6 +1901,15 @@ def _maybe_downcast(self, blocks, downcast=None):
18981901
def _can_hold_element(self, element):
18991902
return True
19001903

1904+
def _try_coerce_args(self, values, other):
1905+
""" provide coercion to our input arguments """
1906+
1907+
if isinstance(other, ABCDatetimeIndex):
1908+
# to store DatetimeTZBlock as object
1909+
other = other.asobject.values
1910+
1911+
return values, False, other, False
1912+
19011913
def _try_cast(self, element):
19021914
return element
19031915

@@ -2234,8 +2246,6 @@ def _try_coerce_args(self, values, other):
22342246
"naive Block")
22352247
other_mask = isnull(other)
22362248
other = other.asm8.view('i8')
2237-
elif hasattr(other, 'dtype') and is_integer_dtype(other):
2238-
other = other.view('i8')
22392249
else:
22402250
try:
22412251
other = np.asarray(other)
@@ -2411,6 +2421,8 @@ def _try_coerce_args(self, values, other):
24112421
raise ValueError("incompatible or non tz-aware value")
24122422
other_mask = isnull(other)
24132423
other = other.value
2424+
else:
2425+
raise TypeError
24142426

24152427
return values, values_mask, other, other_mask
24162428

pandas/core/panel.py

+5-8
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import numpy as np
1010

1111
from pandas.types.cast import (_infer_dtype_from_scalar,
12+
_cast_scalar_to_array,
1213
_possibly_cast_item)
1314
from pandas.types.common import (is_integer, is_list_like,
1415
is_string_like, is_scalar)
@@ -166,11 +167,9 @@ def _init_data(self, data, copy, dtype, **kwargs):
166167
copy = False
167168
dtype = None
168169
elif is_scalar(data) and all(x is not None for x in passed_axes):
169-
if dtype is None:
170-
dtype, data = _infer_dtype_from_scalar(data)
171-
values = np.empty([len(x) for x in passed_axes], dtype=dtype)
172-
values.fill(data)
173-
mgr = self._init_matrix(values, passed_axes, dtype=dtype,
170+
values = _cast_scalar_to_array([len(x) for x in passed_axes],
171+
data, dtype=dtype)
172+
mgr = self._init_matrix(values, passed_axes, dtype=values.dtype,
174173
copy=False)
175174
copy = False
176175
else: # pragma: no cover
@@ -570,9 +569,7 @@ def __setitem__(self, key, value):
570569
shape[1:], tuple(map(int, value.shape))))
571570
mat = np.asarray(value)
572571
elif is_scalar(value):
573-
dtype, value = _infer_dtype_from_scalar(value)
574-
mat = np.empty(shape[1:], dtype=dtype)
575-
mat.fill(value)
572+
mat = _cast_scalar_to_array(shape[1:], value)
576573
else:
577574
raise TypeError('Cannot set item of type: %s' % str(type(value)))
578575

0 commit comments

Comments
 (0)