-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
BUG: Issues with DatetimeTZ values in where and combine_first (#21469 + #21546) #21660
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
ac2f836
770a22e
04a0538
6e6c619
8c022d9
96a59f6
deda82c
71a4fd4
7c14643
3d072c5
f812e78
f177ec5
09fe18f
c97b951
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1476,14 +1476,16 @@ def where(self, other, cond, align=True, errors='raise', | |
if transpose: | ||
values = values.T | ||
|
||
other = getattr(other, 'values', other) | ||
other = getattr(other, '_values', getattr(other, 'values', other)) | ||
cond = getattr(cond, 'values', cond) | ||
|
||
# If the default broadcasting would go in the wrong direction, then | ||
# explicitly reshape other instead | ||
if getattr(other, 'ndim', 0) >= 1: | ||
if values.ndim - 1 == other.ndim and axis == 1: | ||
other = other.reshape(tuple(other.shape + (1, ))) | ||
elif transpose and values.ndim == self.ndim - 1: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we really need this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Without lines 1487-1488 The issue is DatetimeTZBlock._try_coerce_args returns However, we don't run _try_coerce_args until the interior of func at 1499. Thus lines 1476-1477 ( An alternative would be changing `DatetimeTZBlock._try_coerce_args to return 1-D arrays for values and values_mask (changing 2875-2877). But if I do that it breaks setitem. |
||
cond = cond.T | ||
|
||
if not hasattr(cond, 'shape'): | ||
raise ValueError("where must have a condition that is ndarray " | ||
|
@@ -2888,8 +2890,8 @@ def _try_coerce_args(self, values, other): | |
elif isinstance(other, self._holder): | ||
if other.tz != self.values.tz: | ||
raise ValueError("incompatible or non tz-aware value") | ||
other = other.asi8 | ||
other_mask = isna(other) | ||
other_mask = _block_shape(isna(other), ndim=self.ndim) | ||
other = _block_shape(other.asi8, ndim=self.ndim) | ||
elif isinstance(other, (np.datetime64, datetime, date)): | ||
other = tslib.Timestamp(other) | ||
tz = getattr(other, 'tz', None) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,7 @@ | |
is_float_dtype, | ||
is_extension_type, | ||
is_extension_array_dtype, | ||
is_datetimelike, | ||
is_datetime64tz_dtype, | ||
is_timedelta64_dtype, | ||
is_object_dtype, | ||
|
@@ -78,6 +79,7 @@ | |
from pandas._libs import index as libindex, tslib as libts, lib, iNaT | ||
from pandas.core.config import get_option | ||
from pandas.core.strings import StringMethods | ||
from pandas.core.tools.datetimes import to_datetime | ||
|
||
import pandas.plotting._core as gfx | ||
|
||
|
@@ -2303,10 +2305,12 @@ def combine_first(self, other): | |
new_index = self.index.union(other.index) | ||
this = self.reindex(new_index, copy=False) | ||
other = other.reindex(new_index, copy=False) | ||
if is_datetimelike(this) and not is_datetimelike(other): | ||
other = to_datetime(other) | ||
|
||
# TODO: do we need name? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this comment still relevant? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Comment no longer relevant, I'll remove. What do you think of datetime coercion of other? As I noted my (too long) comment above, using to_datetime coerces ints as well as strings. Old code used the Series constructor which only coerced strings but not ints. Alternatively, we could remove the datetimelike coercion entirely. |
||
name = ops.get_op_result_name(self, other) # noqa | ||
rs_vals = com._where_compat(isna(this), other._values, this._values) | ||
return self._constructor(rs_vals, index=new_index).__finalize__(self) | ||
return this.where(notna(this), other) | ||
|
||
def update(self, other): | ||
""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2936,6 +2936,20 @@ def test_where_callable(self): | |
tm.assert_frame_equal(result, | ||
(df + 2).where((df + 2) > 8, (df + 2) + 10)) | ||
|
||
def test_where_tz_values(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you use the tz_naive_fixture here |
||
dts1 = date_range('20150101', '20150105', tz='America/New_York') | ||
df1 = DataFrame({'date': dts1}) | ||
dts2 = date_range('20150103', '20150107', tz='America/New_York') | ||
df2 = DataFrame({'date': dts2}) | ||
mask = DataFrame(True, index=df1.index, columns=df2.columns) | ||
mask.iloc[3:] = False | ||
result = df1.where(mask, df2) | ||
dts3 = DatetimeIndex(['20150101', '20150102', '20150103', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ideally just contruct what you can w/o using temporaries. ideally call this expected |
||
'20150106', '20150107'], | ||
tz='America/New_York') | ||
exp = DataFrame({'date': dts3}) | ||
assert_frame_equal(exp, result) | ||
|
||
def test_mask(self): | ||
df = DataFrame(np.random.randn(5, 3)) | ||
cond = df > 0 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -580,12 +580,11 @@ def test_where_series_datetime64(self, fill_val, exp_dtype): | |
values = pd.Series(pd.date_range(fill_val, periods=4)) | ||
if fill_val.tz: | ||
exp = pd.Series([pd.Timestamp('2011-01-01'), | ||
pd.Timestamp('2012-01-02 05:00'), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what changed here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Previous behavior was was treating |
||
pd.Timestamp('2012-01-02 00:00', tz='US/Eastern'), | ||
pd.Timestamp('2011-01-03'), | ||
pd.Timestamp('2012-01-04 05:00')]) | ||
self._assert_where_conversion(obj, cond, values, exp, | ||
'datetime64[ns]') | ||
pytest.xfail("ToDo: do not coerce to UTC, must be object") | ||
pd.Timestamp('2012-01-04 00:00', | ||
tz='US/Eastern')]) | ||
self._assert_where_conversion(obj, cond, values, exp, exp_dtype) | ||
|
||
exp = pd.Series([pd.Timestamp('2011-01-01'), values[1], | ||
pd.Timestamp('2011-01-03'), values[3]]) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -551,6 +551,19 @@ def test_where_datetime_conversion(): | |
assert_series_equal(rs, expected) | ||
|
||
|
||
def test_where_dt_tz_values(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you use the tz_naive_fixture here |
||
dts1 = pd.date_range('20150101', '20150105', tz='America/New_York') | ||
df1 = pd.DataFrame({'date': dts1}) | ||
dts2 = pd.date_range('20150103', '20150107', tz='America/New_York') | ||
df2 = pd.DataFrame({'date': dts2}) | ||
result = df1.date.where(df1.date < df1.date[3], df2.date) | ||
exp_vals = pd.DatetimeIndex(['20150101', '20150102', '20150103', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same comment as above |
||
'20150106', '20150107'], | ||
tz='America/New_York') | ||
exp = Series(exp_vals, name='date') | ||
assert_series_equal(exp, result) | ||
|
||
|
||
def test_mask(): | ||
# compare with tested results in test_where | ||
s = Series(np.random.randn(5)) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -170,6 +170,19 @@ def get_result_type(dtype, dtype2): | |
]).dtype | ||
assert result.kind == expected | ||
|
||
def test_combine_first_dt_tz_values(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you use the tz_naive_fixture here |
||
dts1 = pd.date_range('20150101', '20150105', tz='America/New_York') | ||
df1 = pd.DataFrame({'date': dts1}) | ||
dts2 = pd.date_range('20160514', '20160518', tz='America/New_York') | ||
df2 = pd.DataFrame({'date': dts2}, index=range(3, 8)) | ||
result = df1.date.combine_first(df2.date) | ||
exp_vals = pd.DatetimeIndex(['20150101', '20150102', '20150103', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same |
||
'20150104', '20150105', '20160516', | ||
'20160517', '20160518'], | ||
tz='America/New_York') | ||
exp = pd.Series(exp_vals, name='date') | ||
assert_series_equal(exp, result) | ||
|
||
def test_concat_empty_series_dtypes(self): | ||
|
||
# booleans | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nice!