Skip to content

Commit a3cddfa

Browse files
sinhrksjreback
authored andcommitted
BUG: TypeError in merge with timedelta64 column
closes #13389 Author: sinhrks <[email protected]> Closes #13802 from sinhrks/isnull_dateunit and squashes the following commits: 8dbfde2 [sinhrks] BUG: TypeError in merge with timedelta64 column
1 parent d06355d commit a3cddfa

File tree

6 files changed

+227
-96
lines changed

6 files changed

+227
-96
lines changed

doc/source/whatsnew/v0.19.0.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -768,6 +768,10 @@ Bug Fixes
768768
- Bug in ``Index`` raises ``OutOfBoundsDatetime`` if ``datetime`` exceeds ``datetime64[ns]`` bounds, rather than coercing to ``object`` dtype (:issue:`13663`)
769769
- Bug in ``.value_counts`` raises ``OutOfBoundsDatetime`` if data exceeds ``datetime64[ns]`` bounds (:issue:`13663`)
770770
- Bug in ``DatetimeIndex`` may raise ``OutOfBoundsDatetime`` if input ``np.datetime64`` has other unit than ``ns`` (:issue:`9114`)
771+
- Bug in ``isnull`` ``notnull`` raise ``TypeError`` if input datetime-like has other unit than ``ns`` (:issue:`13389`)
772+
- Bug in ``.merge`` may raise ``TypeError`` if input datetime-like has other unit than ``ns`` (:issue:`13389`)
773+
774+
771775

772776
- Bug in ``Categorical.remove_unused_categories()`` changes ``.codes`` dtype to platform int (:issue:`13261`)
773777
- Bug in ``groupby`` with ``as_index=False`` returns all NaN's when grouping on multiple columns including a categorical one (:issue:`13204`)

pandas/tests/types/test_missing.py

Lines changed: 161 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from datetime import datetime
66
from pandas.util import testing as tm
77

8+
import pandas as pd
89
from pandas.core import config as cf
910
from pandas.compat import u
1011
from pandas.tslib import iNaT
@@ -45,100 +46,6 @@ def test_notnull():
4546
assert (isinstance(isnull(s), Series))
4647

4748

48-
def test_isnull():
49-
assert not isnull(1.)
50-
assert isnull(None)
51-
assert isnull(np.NaN)
52-
assert not isnull(np.inf)
53-
assert not isnull(-np.inf)
54-
55-
# series
56-
for s in [tm.makeFloatSeries(), tm.makeStringSeries(),
57-
tm.makeObjectSeries(), tm.makeTimeSeries(),
58-
tm.makePeriodSeries()]:
59-
assert (isinstance(isnull(s), Series))
60-
61-
# frame
62-
for df in [tm.makeTimeDataFrame(), tm.makePeriodFrame(),
63-
tm.makeMixedDataFrame()]:
64-
result = isnull(df)
65-
expected = df.apply(isnull)
66-
tm.assert_frame_equal(result, expected)
67-
68-
# panel
69-
for p in [tm.makePanel(), tm.makePeriodPanel(), tm.add_nans(tm.makePanel())
70-
]:
71-
result = isnull(p)
72-
expected = p.apply(isnull)
73-
tm.assert_panel_equal(result, expected)
74-
75-
# panel 4d
76-
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
77-
for p in [tm.makePanel4D(), tm.add_nans_panel4d(tm.makePanel4D())]:
78-
result = isnull(p)
79-
expected = p.apply(isnull)
80-
tm.assert_panel4d_equal(result, expected)
81-
82-
83-
def test_isnull_lists():
84-
result = isnull([[False]])
85-
exp = np.array([[False]])
86-
assert (np.array_equal(result, exp))
87-
88-
result = isnull([[1], [2]])
89-
exp = np.array([[False], [False]])
90-
assert (np.array_equal(result, exp))
91-
92-
# list of strings / unicode
93-
result = isnull(['foo', 'bar'])
94-
assert (not result.any())
95-
96-
result = isnull([u('foo'), u('bar')])
97-
assert (not result.any())
98-
99-
100-
def test_isnull_nat():
101-
result = isnull([NaT])
102-
exp = np.array([True])
103-
assert (np.array_equal(result, exp))
104-
105-
result = isnull(np.array([NaT], dtype=object))
106-
exp = np.array([True])
107-
assert (np.array_equal(result, exp))
108-
109-
110-
def test_isnull_numpy_nat():
111-
arr = np.array([NaT, np.datetime64('NaT'), np.timedelta64('NaT'),
112-
np.datetime64('NaT', 's')])
113-
result = isnull(arr)
114-
expected = np.array([True] * 4)
115-
tm.assert_numpy_array_equal(result, expected)
116-
117-
118-
def test_isnull_datetime():
119-
assert (not isnull(datetime.now()))
120-
assert notnull(datetime.now())
121-
122-
idx = date_range('1/1/1990', periods=20)
123-
assert (notnull(idx).all())
124-
125-
idx = np.asarray(idx)
126-
idx[0] = iNaT
127-
idx = DatetimeIndex(idx)
128-
mask = isnull(idx)
129-
assert (mask[0])
130-
assert (not mask[1:].any())
131-
132-
# GH 9129
133-
pidx = idx.to_period(freq='M')
134-
mask = isnull(pidx)
135-
assert (mask[0])
136-
assert (not mask[1:].any())
137-
138-
mask = isnull(pidx[1:])
139-
assert (not mask.any())
140-
141-
14249
class TestIsNull(tm.TestCase):
14350

14451
def test_0d_array(self):
@@ -150,6 +57,166 @@ def test_0d_array(self):
15057
self.assertFalse(isnull(np.array(0.0, dtype=object)))
15158
self.assertFalse(isnull(np.array(0, dtype=object)))
15259

60+
def test_isnull(self):
61+
self.assertFalse(isnull(1.))
62+
self.assertTrue(isnull(None))
63+
self.assertTrue(isnull(np.NaN))
64+
self.assertTrue(float('nan'))
65+
self.assertFalse(isnull(np.inf))
66+
self.assertFalse(isnull(-np.inf))
67+
68+
# series
69+
for s in [tm.makeFloatSeries(), tm.makeStringSeries(),
70+
tm.makeObjectSeries(), tm.makeTimeSeries(),
71+
tm.makePeriodSeries()]:
72+
self.assertIsInstance(isnull(s), Series)
73+
74+
# frame
75+
for df in [tm.makeTimeDataFrame(), tm.makePeriodFrame(),
76+
tm.makeMixedDataFrame()]:
77+
result = isnull(df)
78+
expected = df.apply(isnull)
79+
tm.assert_frame_equal(result, expected)
80+
81+
# panel
82+
for p in [tm.makePanel(), tm.makePeriodPanel(),
83+
tm.add_nans(tm.makePanel())]:
84+
result = isnull(p)
85+
expected = p.apply(isnull)
86+
tm.assert_panel_equal(result, expected)
87+
88+
# panel 4d
89+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
90+
for p in [tm.makePanel4D(), tm.add_nans_panel4d(tm.makePanel4D())]:
91+
result = isnull(p)
92+
expected = p.apply(isnull)
93+
tm.assert_panel4d_equal(result, expected)
94+
95+
def test_isnull_lists(self):
96+
result = isnull([[False]])
97+
exp = np.array([[False]])
98+
tm.assert_numpy_array_equal(result, exp)
99+
100+
result = isnull([[1], [2]])
101+
exp = np.array([[False], [False]])
102+
tm.assert_numpy_array_equal(result, exp)
103+
104+
# list of strings / unicode
105+
result = isnull(['foo', 'bar'])
106+
exp = np.array([False, False])
107+
tm.assert_numpy_array_equal(result, exp)
108+
109+
result = isnull([u('foo'), u('bar')])
110+
exp = np.array([False, False])
111+
tm.assert_numpy_array_equal(result, exp)
112+
113+
def test_isnull_nat(self):
114+
result = isnull([NaT])
115+
exp = np.array([True])
116+
tm.assert_numpy_array_equal(result, exp)
117+
118+
result = isnull(np.array([NaT], dtype=object))
119+
exp = np.array([True])
120+
tm.assert_numpy_array_equal(result, exp)
121+
122+
def test_isnull_numpy_nat(self):
123+
arr = np.array([NaT, np.datetime64('NaT'), np.timedelta64('NaT'),
124+
np.datetime64('NaT', 's')])
125+
result = isnull(arr)
126+
expected = np.array([True] * 4)
127+
tm.assert_numpy_array_equal(result, expected)
128+
129+
def test_isnull_datetime(self):
130+
self.assertFalse(isnull(datetime.now()))
131+
self.assertTrue(notnull(datetime.now()))
132+
133+
idx = date_range('1/1/1990', periods=20)
134+
exp = np.ones(len(idx), dtype=bool)
135+
tm.assert_numpy_array_equal(notnull(idx), exp)
136+
137+
idx = np.asarray(idx)
138+
idx[0] = iNaT
139+
idx = DatetimeIndex(idx)
140+
mask = isnull(idx)
141+
self.assertTrue(mask[0])
142+
exp = np.array([True] + [False] * (len(idx) - 1), dtype=bool)
143+
self.assert_numpy_array_equal(mask, exp)
144+
145+
# GH 9129
146+
pidx = idx.to_period(freq='M')
147+
mask = isnull(pidx)
148+
self.assertTrue(mask[0])
149+
exp = np.array([True] + [False] * (len(idx) - 1), dtype=bool)
150+
self.assert_numpy_array_equal(mask, exp)
151+
152+
mask = isnull(pidx[1:])
153+
exp = np.zeros(len(mask), dtype=bool)
154+
self.assert_numpy_array_equal(mask, exp)
155+
156+
def test_datetime_other_units(self):
157+
idx = pd.DatetimeIndex(['2011-01-01', 'NaT', '2011-01-02'])
158+
exp = np.array([False, True, False])
159+
tm.assert_numpy_array_equal(isnull(idx), exp)
160+
tm.assert_numpy_array_equal(notnull(idx), ~exp)
161+
tm.assert_numpy_array_equal(isnull(idx.values), exp)
162+
tm.assert_numpy_array_equal(notnull(idx.values), ~exp)
163+
164+
for dtype in ['datetime64[D]', 'datetime64[h]', 'datetime64[m]',
165+
'datetime64[s]', 'datetime64[ms]', 'datetime64[us]',
166+
'datetime64[ns]']:
167+
values = idx.values.astype(dtype)
168+
169+
exp = np.array([False, True, False])
170+
tm.assert_numpy_array_equal(isnull(values), exp)
171+
tm.assert_numpy_array_equal(notnull(values), ~exp)
172+
173+
exp = pd.Series([False, True, False])
174+
s = pd.Series(values)
175+
tm.assert_series_equal(isnull(s), exp)
176+
tm.assert_series_equal(notnull(s), ~exp)
177+
s = pd.Series(values, dtype=object)
178+
tm.assert_series_equal(isnull(s), exp)
179+
tm.assert_series_equal(notnull(s), ~exp)
180+
181+
def test_timedelta_other_units(self):
182+
idx = pd.TimedeltaIndex(['1 days', 'NaT', '2 days'])
183+
exp = np.array([False, True, False])
184+
tm.assert_numpy_array_equal(isnull(idx), exp)
185+
tm.assert_numpy_array_equal(notnull(idx), ~exp)
186+
tm.assert_numpy_array_equal(isnull(idx.values), exp)
187+
tm.assert_numpy_array_equal(notnull(idx.values), ~exp)
188+
189+
for dtype in ['timedelta64[D]', 'timedelta64[h]', 'timedelta64[m]',
190+
'timedelta64[s]', 'timedelta64[ms]', 'timedelta64[us]',
191+
'timedelta64[ns]']:
192+
values = idx.values.astype(dtype)
193+
194+
exp = np.array([False, True, False])
195+
tm.assert_numpy_array_equal(isnull(values), exp)
196+
tm.assert_numpy_array_equal(notnull(values), ~exp)
197+
198+
exp = pd.Series([False, True, False])
199+
s = pd.Series(values)
200+
tm.assert_series_equal(isnull(s), exp)
201+
tm.assert_series_equal(notnull(s), ~exp)
202+
s = pd.Series(values, dtype=object)
203+
tm.assert_series_equal(isnull(s), exp)
204+
tm.assert_series_equal(notnull(s), ~exp)
205+
206+
def test_period(self):
207+
idx = pd.PeriodIndex(['2011-01', 'NaT', '2012-01'], freq='M')
208+
exp = np.array([False, True, False])
209+
tm.assert_numpy_array_equal(isnull(idx), exp)
210+
tm.assert_numpy_array_equal(notnull(idx), ~exp)
211+
212+
exp = pd.Series([False, True, False])
213+
s = pd.Series(idx)
214+
tm.assert_series_equal(isnull(s), exp)
215+
tm.assert_series_equal(notnull(s), ~exp)
216+
s = pd.Series(idx, dtype=object)
217+
tm.assert_series_equal(isnull(s), exp)
218+
tm.assert_series_equal(notnull(s), ~exp)
219+
153220

154221
def test_array_equivalent():
155222
assert array_equivalent(np.array([np.nan, np.nan]),

pandas/tools/tests/test_merge.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,47 @@ def test_join_append_timedeltas(self):
473473
'0r': Series([td, NaT], index=list('AB'))})
474474
assert_frame_equal(result, expected)
475475

476+
def test_other_datetime_unit(self):
477+
# GH 13389
478+
df1 = pd.DataFrame({'entity_id': [101, 102]})
479+
s = pd.Series([None, None], index=[101, 102], name='days')
480+
481+
for dtype in ['datetime64[D]', 'datetime64[h]', 'datetime64[m]',
482+
'datetime64[s]', 'datetime64[ms]', 'datetime64[us]',
483+
'datetime64[ns]']:
484+
485+
df2 = s.astype(dtype).to_frame('days')
486+
# coerces to datetime64[ns], thus sholuld not be affected
487+
self.assertEqual(df2['days'].dtype, 'datetime64[ns]')
488+
489+
result = df1.merge(df2, left_on='entity_id', right_index=True)
490+
491+
exp = pd.DataFrame({'entity_id': [101, 102],
492+
'days': np.array(['nat', 'nat'],
493+
dtype='datetime64[ns]')},
494+
columns=['entity_id', 'days'])
495+
tm.assert_frame_equal(result, exp)
496+
497+
def test_other_timedelta_unit(self):
498+
# GH 13389
499+
df1 = pd.DataFrame({'entity_id': [101, 102]})
500+
s = pd.Series([None, None], index=[101, 102], name='days')
501+
502+
for dtype in ['timedelta64[D]', 'timedelta64[h]', 'timedelta64[m]',
503+
'timedelta64[s]', 'timedelta64[ms]', 'timedelta64[us]',
504+
'timedelta64[ns]']:
505+
506+
df2 = s.astype(dtype).to_frame('days')
507+
self.assertEqual(df2['days'].dtype, dtype)
508+
509+
result = df1.merge(df2, left_on='entity_id', right_index=True)
510+
511+
exp = pd.DataFrame({'entity_id': [101, 102],
512+
'days': np.array(['nat', 'nat'],
513+
dtype=dtype)},
514+
columns=['entity_id', 'days'])
515+
tm.assert_frame_equal(result, exp)
516+
476517
def test_overlapping_columns_error_message(self):
477518
df = DataFrame({'key': [1, 2, 3],
478519
'v1': [4, 5, 6],

pandas/tseries/tests/test_period.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1663,6 +1663,24 @@ def test_constructor_datetime64arr(self):
16631663

16641664
self.assertRaises(ValueError, PeriodIndex, vals, freq='D')
16651665

1666+
def test_view(self):
1667+
idx = pd.PeriodIndex([], freq='M')
1668+
1669+
exp = np.array([], dtype=np.int64)
1670+
tm.assert_numpy_array_equal(idx.view('i8'), exp)
1671+
tm.assert_numpy_array_equal(idx.asi8, exp)
1672+
1673+
idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M')
1674+
1675+
exp = np.array([492, -9223372036854775808], dtype=np.int64)
1676+
tm.assert_numpy_array_equal(idx.view('i8'), exp)
1677+
tm.assert_numpy_array_equal(idx.asi8, exp)
1678+
1679+
exp = np.array([14975, -9223372036854775808], dtype=np.int64)
1680+
idx = pd.PeriodIndex(['2011-01-01', pd.NaT], freq='D')
1681+
tm.assert_numpy_array_equal(idx.view('i8'), exp)
1682+
tm.assert_numpy_array_equal(idx.asi8, exp)
1683+
16661684
def test_constructor_empty(self):
16671685
idx = pd.PeriodIndex([], freq='M')
16681686
tm.assertIsInstance(idx, PeriodIndex)

pandas/types/common.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,8 @@ def is_object(x):
230230

231231
def needs_i8_conversion(arr_or_dtype):
232232
return (is_datetime_or_timedelta_dtype(arr_or_dtype) or
233-
is_datetime64tz_dtype(arr_or_dtype))
233+
is_datetime64tz_dtype(arr_or_dtype) or
234+
isinstance(arr_or_dtype, ABCPeriodIndex))
234235

235236

236237
def is_numeric_dtype(arr_or_dtype):

pandas/types/missing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ def _isnull_ndarraylike(obj):
140140
vec = lib.isnullobj(values.ravel())
141141
result[...] = vec.reshape(shape)
142142

143-
elif is_datetimelike(obj):
143+
elif needs_i8_conversion(obj):
144144
# this is the NaT pattern
145145
result = values.view('i8') == iNaT
146146
else:

0 commit comments

Comments
 (0)