Skip to content

Fix IntervalDtype Bugs and Inconsistencies #18997

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 10, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ Other API Changes
- Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mis-matched timezones will raise ``TypeError`` instead of ``ValueError`` (issue:`18817`)
- :class:`IntervalIndex` and ``IntervalDtype`` no longer support categorical, object, and string subtypes (:issue:`19016`)
- The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`)
- ``IntervalDtype`` now returns ``True`` when compared against ``'interval'`` regardless of subtype, and ``IntervalDtype.name`` now returns ``'interval'`` regardless of subtype (:issue:`18980`)

.. _whatsnew_0230.deprecations:

Expand Down
34 changes: 14 additions & 20 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,7 @@ class IntervalDtype(ExtensionDtype):

THIS IS NOT A REAL NUMPY DTYPE
"""
name = 'interval'
type = IntervalDtypeType
kind = None
str = '|O08'
Expand Down Expand Up @@ -653,8 +654,8 @@ def __new__(cls, subtype=None):
u.subtype = None
return u
elif (isinstance(subtype, compat.string_types) and
subtype == 'interval'):
subtype = ''
subtype.lower() == 'interval'):
subtype = None
else:
if isinstance(subtype, compat.string_types):
m = cls._match.search(subtype)
Expand All @@ -666,11 +667,6 @@ def __new__(cls, subtype=None):
except TypeError:
raise ValueError("could not construct IntervalDtype")

if subtype is None:
u = object.__new__(cls)
u.subtype = None
return u

if is_categorical_dtype(subtype) or is_string_dtype(subtype):
# GH 19016
msg = ('category, object, and string subtypes are not supported '
Expand All @@ -692,31 +688,29 @@ def construct_from_string(cls, string):
if its not possible
"""
if isinstance(string, compat.string_types):
try:
return cls(string)
except ValueError:
pass
raise TypeError("could not construct IntervalDtype")
return cls(string)
msg = "a string needs to be passed, got type {typ}"
raise TypeError(msg.format(typ=type(string)))

def __unicode__(self):
if self.subtype is None:
return "interval"
return "interval[{subtype}]".format(subtype=self.subtype)

@property
def name(self):
return str(self)

def __hash__(self):
# make myself hashable
return hash(str(self))

def __eq__(self, other):
if isinstance(other, compat.string_types):
return other == self.name or other == self.name.title()

return (isinstance(other, IntervalDtype) and
self.subtype == other.subtype)
return other.lower() in (self.name.lower(), str(self).lower())
elif not isinstance(other, IntervalDtype):
return False
elif self.subtype is None or other.subtype is None:
# None should match any subtype
return True
else:
return self.subtype == other.subtype

@classmethod
def is_dtype(cls, dtype):
Expand Down
94 changes: 59 additions & 35 deletions pandas/tests/dtypes/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ def test_hash_vs_equality(self):
assert dtype2 == dtype
assert dtype3 == dtype
assert dtype is dtype2
assert dtype2 is dtype
assert dtype2 is dtype3
assert dtype3 is dtype
assert hash(dtype) == hash(dtype2)
assert hash(dtype) == hash(dtype3)
Expand All @@ -451,14 +451,19 @@ def test_hash_vs_equality(self):
assert hash(dtype2) == hash(dtype2)
assert hash(dtype2) == hash(dtype3)

def test_construction(self):
with pytest.raises(ValueError):
IntervalDtype('xx')
@pytest.mark.parametrize('subtype', [
'interval[int64]', 'Interval[int64]', 'int64', np.dtype('int64')])
def test_construction(self, subtype):
i = IntervalDtype(subtype)
assert i.subtype == np.dtype('int64')
assert is_interval_dtype(i)

for s in ['interval[int64]', 'Interval[int64]', 'int64']:
i = IntervalDtype(s)
assert i.subtype == np.dtype('int64')
assert is_interval_dtype(i)
@pytest.mark.parametrize('subtype', [None, 'interval', 'Interval'])
def test_construction_generic(self, subtype):
# generic
i = IntervalDtype(subtype)
assert i.subtype is None
assert is_interval_dtype(i)

@pytest.mark.parametrize('subtype', [
CategoricalDtype(list('abc'), False),
Expand All @@ -471,17 +476,27 @@ def test_construction_not_supported(self, subtype):
with tm.assert_raises_regex(TypeError, msg):
IntervalDtype(subtype)

def test_construction_generic(self):
# generic
i = IntervalDtype('interval')
assert i.subtype == ''
assert is_interval_dtype(i)
assert str(i) == 'interval[]'
def test_construction_errors(self):
msg = 'could not construct IntervalDtype'
with tm.assert_raises_regex(ValueError, msg):
IntervalDtype('xx')

i = IntervalDtype()
assert i.subtype is None
assert is_interval_dtype(i)
assert str(i) == 'interval'
def test_construction_from_string(self):
result = IntervalDtype('interval[int64]')
assert is_dtype_equal(self.dtype, result)
result = IntervalDtype.construct_from_string('interval[int64]')
assert is_dtype_equal(self.dtype, result)

@pytest.mark.parametrize('string', [
'foo', 'interval[foo]', 'foo[int64]', 0, 3.14, ('a', 'b'), None])
def test_construction_from_string_errors(self, string):
if isinstance(string, string_types):
error, msg = ValueError, 'could not construct IntervalDtype'
else:
error, msg = TypeError, 'a string needs to be passed, got type'

with tm.assert_raises_regex(error, msg):
IntervalDtype.construct_from_string(string)

def test_subclass(self):
a = IntervalDtype('interval[int64]')
Expand All @@ -506,36 +521,45 @@ def test_is_dtype(self):
assert not IntervalDtype.is_dtype(np.int64)
assert not IntervalDtype.is_dtype(np.float64)

def test_identity(self):
assert (IntervalDtype('interval[int64]') ==
IntervalDtype('interval[int64]'))

def test_coerce_to_dtype(self):
assert (_coerce_to_dtype('interval[int64]') ==
IntervalDtype('interval[int64]'))

def test_construction_from_string(self):
result = IntervalDtype('interval[int64]')
assert is_dtype_equal(self.dtype, result)
result = IntervalDtype.construct_from_string('interval[int64]')
assert is_dtype_equal(self.dtype, result)
with pytest.raises(TypeError):
IntervalDtype.construct_from_string('foo')
with pytest.raises(TypeError):
IntervalDtype.construct_from_string('interval[foo]')
with pytest.raises(TypeError):
IntervalDtype.construct_from_string('foo[int64]')

def test_equality(self):
assert is_dtype_equal(self.dtype, 'interval[int64]')
assert is_dtype_equal(self.dtype, IntervalDtype('int64'))
assert is_dtype_equal(self.dtype, IntervalDtype('int64'))
assert is_dtype_equal(IntervalDtype('int64'), IntervalDtype('int64'))

assert not is_dtype_equal(self.dtype, 'int64')
assert not is_dtype_equal(IntervalDtype('int64'),
IntervalDtype('float64'))

@pytest.mark.parametrize('subtype', [
None, 'interval', 'Interval', 'int64', 'uint64', 'float64',
'complex128', 'datetime64', 'timedelta64', PeriodDtype('Q')])
def test_equality_generic(self, subtype):
# GH 18980
dtype = IntervalDtype(subtype)
assert is_dtype_equal(dtype, 'interval')
assert is_dtype_equal(dtype, IntervalDtype())

@pytest.mark.parametrize('subtype', [
'int64', 'uint64', 'float64', 'complex128', 'datetime64',
'timedelta64', PeriodDtype('Q')])
def test_name_repr(self, subtype):
# GH 18980
dtype = IntervalDtype(subtype)
expected = 'interval[{subtype}]'.format(subtype=subtype)
assert str(dtype) == expected
assert dtype.name == 'interval'

@pytest.mark.parametrize('subtype', [None, 'interval', 'Interval'])
def test_name_repr_generic(self, subtype):
# GH 18980
dtype = IntervalDtype(subtype)
assert str(dtype) == 'interval'
assert dtype.name == 'interval'

def test_basic(self):
assert is_interval_dtype(self.dtype)

Expand Down