Skip to content

ENH: PeriodIndex now accepts pd.NaT #13430

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.18.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ Other API changes
- ``Float64Index.astype(int)`` will now raise ``ValueError`` if ``Float64Index`` contains ``NaN`` values (:issue:`13149`)
- ``TimedeltaIndex.astype(int)`` and ``DatetimeIndex.astype(int)`` will now return ``Int64Index`` instead of ``np.array`` (:issue:`13209`)
- ``.filter()`` enforces mutual exclusion of the keyword arguments. (:issue:`12399`)
- ``PeridIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`)

.. _whatsnew_0182.deprecations:

Expand Down
37 changes: 32 additions & 5 deletions pandas/src/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ cimport cython
from datetime cimport *
cimport util
cimport lib
from lib cimport is_null_datetimelike
import lib
from pandas import tslib
from tslib import Timedelta, Timestamp, iNaT, NaT
Expand Down Expand Up @@ -458,13 +459,39 @@ def extract_ordinals(ndarray[object] values, freq):

for i in range(n):
p = values[i]
ordinals[i] = p.ordinal
if p.freqstr != freqstr:
msg = _DIFFERENT_FREQ_INDEX.format(freqstr, p.freqstr)
raise IncompatibleFrequency(msg)

if is_null_datetimelike(p):
ordinals[i] = tslib.iNaT
else:
try:
ordinals[i] = p.ordinal

if p.freqstr != freqstr:
msg = _DIFFERENT_FREQ_INDEX.format(freqstr, p.freqstr)
raise IncompatibleFrequency(msg)

except AttributeError:
p = Period(p, freq=freq)
ordinals[i] = p.ordinal

return ordinals


def extract_freq(ndarray[object] values):
cdef:
Py_ssize_t i, n = len(values)
object p

for i in range(n):
p = values[i]
try:
return p.freq
except AttributeError:
pass

raise ValueError('freq not specified and cannot be inferred')


cpdef resolution(ndarray[int64_t] stamps, tz=None):
cdef:
Py_ssize_t i, n = len(stamps)
Expand Down Expand Up @@ -719,7 +746,7 @@ cdef class Period(object):
converted = other.asfreq(freq)
ordinal = converted.ordinal

elif lib.is_null_datetimelike(value) or value in tslib._nat_strings:
elif is_null_datetimelike(value) or value in tslib._nat_strings:
ordinal = tslib.iNaT
if freq is None:
raise ValueError("If value is NaT, freq cannot be None "
Expand Down
30 changes: 10 additions & 20 deletions pandas/tseries/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,6 @@ def f(self):
return property(f)


def _get_ordinals(data, freq):
f = lambda x: Period(x, freq=freq).ordinal
if isinstance(data[0], Period):
return period.extract_ordinals(data, freq)
else:
return lib.map_infer(data, f)


def dt64arr_to_periodarr(data, freq, tz):
if data.dtype != np.dtype('M8[ns]'):
raise ValueError('Wrong dtype: %s' % data.dtype)
Expand Down Expand Up @@ -235,14 +227,9 @@ def _from_arraylike(cls, data, freq, tz):
except (TypeError, ValueError):
data = com._ensure_object(data)

if freq is None and len(data) > 0:
freq = getattr(data[0], 'freq', None)

if freq is None:
raise ValueError('freq not specified and cannot be '
'inferred from first element')

data = _get_ordinals(data, freq)
freq = period.extract_freq(data)
data = period.extract_ordinals(data, freq)
else:
if isinstance(data, PeriodIndex):
if freq is None or freq == data.freq:
Expand All @@ -254,12 +241,15 @@ def _from_arraylike(cls, data, freq, tz):
data = period.period_asfreq_arr(data.values,
base1, base2, 1)
else:
if freq is None and len(data) > 0:
freq = getattr(data[0], 'freq', None)

if freq is None and com.is_object_dtype(data):
# must contain Period instance and thus extract ordinals
freq = period.extract_freq(data)
data = period.extract_ordinals(data, freq)

if freq is None:
raise ValueError('freq not specified and cannot be '
'inferred from first element')
msg = 'freq not specified and cannot be inferred'
raise ValueError(msg)

if data.dtype != np.int64:
if np.issubdtype(data.dtype, np.datetime64):
Expand All @@ -269,7 +259,7 @@ def _from_arraylike(cls, data, freq, tz):
data = com._ensure_int64(data)
except (TypeError, ValueError):
data = com._ensure_object(data)
data = _get_ordinals(data, freq)
data = period.extract_ordinals(data, freq)

return data, freq

Expand Down
78 changes: 78 additions & 0 deletions pandas/tseries/tests/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -1742,6 +1742,84 @@ def test_constructor_datetime64arr(self):

self.assertRaises(ValueError, PeriodIndex, vals, freq='D')

def test_constructor_empty(self):
idx = pd.PeriodIndex([], freq='M')
tm.assertIsInstance(idx, PeriodIndex)
self.assertEqual(len(idx), 0)
self.assertEqual(idx.freq, 'M')

with tm.assertRaisesRegexp(ValueError, 'freq not specified'):
pd.PeriodIndex([])

def test_constructor_pi_nat(self):
idx = PeriodIndex([Period('2011-01', freq='M'), pd.NaT,
Period('2011-01', freq='M')])
exp = PeriodIndex(['2011-01', 'NaT', '2011-01'], freq='M')
tm.assert_index_equal(idx, exp)

idx = PeriodIndex(np.array([Period('2011-01', freq='M'), pd.NaT,
Period('2011-01', freq='M')]))
tm.assert_index_equal(idx, exp)

idx = PeriodIndex([pd.NaT, pd.NaT, Period('2011-01', freq='M'),
Period('2011-01', freq='M')])
exp = PeriodIndex(['NaT', 'NaT', '2011-01', '2011-01'], freq='M')
tm.assert_index_equal(idx, exp)

idx = PeriodIndex(np.array([pd.NaT, pd.NaT,
Period('2011-01', freq='M'),
Period('2011-01', freq='M')]))
tm.assert_index_equal(idx, exp)

idx = PeriodIndex([pd.NaT, pd.NaT, '2011-01', '2011-01'], freq='M')
tm.assert_index_equal(idx, exp)

with tm.assertRaisesRegexp(ValueError, 'freq not specified'):
PeriodIndex([pd.NaT, pd.NaT])

with tm.assertRaisesRegexp(ValueError, 'freq not specified'):
PeriodIndex(np.array([pd.NaT, pd.NaT]))

with tm.assertRaisesRegexp(ValueError, 'freq not specified'):
PeriodIndex(['NaT', 'NaT'])

with tm.assertRaisesRegexp(ValueError, 'freq not specified'):
PeriodIndex(np.array(['NaT', 'NaT']))

def test_constructor_incompat_freq(self):
msg = "Input has different freq=D from PeriodIndex\\(freq=M\\)"

with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg):
PeriodIndex([Period('2011-01', freq='M'), pd.NaT,
Period('2011-01', freq='D')])

with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg):
PeriodIndex(np.array([Period('2011-01', freq='M'), pd.NaT,
Period('2011-01', freq='D')]))

# first element is pd.NaT
with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg):
PeriodIndex([pd.NaT, Period('2011-01', freq='M'),
Period('2011-01', freq='D')])

with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg):
PeriodIndex(np.array([pd.NaT, Period('2011-01', freq='M'),
Period('2011-01', freq='D')]))

def test_constructor_mixed(self):
idx = PeriodIndex(['2011-01', pd.NaT, Period('2011-01', freq='M')])
exp = PeriodIndex(['2011-01', 'NaT', '2011-01'], freq='M')
tm.assert_index_equal(idx, exp)

idx = PeriodIndex(['NaT', pd.NaT, Period('2011-01', freq='M')])
exp = PeriodIndex(['NaT', 'NaT', '2011-01'], freq='M')
tm.assert_index_equal(idx, exp)

idx = PeriodIndex([Period('2011-01-01', freq='D'), pd.NaT,
'2012-01-01'])
exp = PeriodIndex(['2011-01-01', 'NaT', '2012-01-01'], freq='D')
tm.assert_index_equal(idx, exp)

def test_constructor_simple_new(self):
idx = period_range('2007-01', name='p', periods=2, freq='M')
result = idx._simple_new(idx, 'p', freq=idx.freq)
Expand Down