Skip to content

Commit de7553b

Browse files
committed
Fix IntervalDtype Bugs and Inconsistencies
1 parent 36a71eb commit de7553b

File tree

3 files changed

+90
-53
lines changed

3 files changed

+90
-53
lines changed

doc/source/whatsnew/v0.23.0.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ Other API Changes
270270
- Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mis-matched timezones will raise ``TypeError`` instead of ``ValueError`` (issue:`18817`)
271271
- :class:`IntervalIndex` and ``IntervalDtype`` no longer support categorical, object, and string subtypes (:issue:`19016`)
272272
- The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`)
273+
- ``IntervalDtype`` now returns ``True`` when compared against ``'interval'`` regardless of subtype, and ``IntervalDtype.name`` now returns ``'interval'`` regardless of subtype (:issue:`18980`)
273274

274275
.. _whatsnew_0230.deprecations:
275276

@@ -375,7 +376,7 @@ Conversion
375376
- Bug in :class:`TimedeltaIndex` where division by a ``Series`` would return a ``TimedeltaIndex`` instead of a ``Series`` (issue:`19042`)
376377
- Bug in :class:`Series` with ``dtype='timedelta64[ns]`` where addition or subtraction of ``TimedeltaIndex`` could return a ``Series`` with an incorrect name (issue:`19043`)
377378
- Fixed bug where comparing :class:`DatetimeIndex` failed to raise ``TypeError`` when attempting to compare timezone-aware and timezone-naive datetimelike objects (:issue:`18162`)
378-
-
379+
- Bug in ``IntervalDtype`` when constructing two instances with subtype ``CategoricalDtype`` where the second instance used cached attributes from the first (:issue:`18980`)
379380

380381
Indexing
381382
^^^^^^^^

pandas/core/dtypes/dtypes.py

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -626,6 +626,7 @@ class IntervalDtype(ExtensionDtype):
626626
627627
THIS IS NOT A REAL NUMPY DTYPE
628628
"""
629+
name = 'interval'
629630
type = IntervalDtypeType
630631
kind = None
631632
str = '|O08'
@@ -653,8 +654,8 @@ def __new__(cls, subtype=None):
653654
u.subtype = None
654655
return u
655656
elif (isinstance(subtype, compat.string_types) and
656-
subtype == 'interval'):
657-
subtype = ''
657+
subtype in ('interval', 'interval[]')):
658+
subtype = None
658659
else:
659660
if isinstance(subtype, compat.string_types):
660661
m = cls._match.search(subtype)
@@ -678,11 +679,15 @@ def __new__(cls, subtype=None):
678679
raise TypeError(msg)
679680

680681
try:
681-
return cls._cache[str(subtype)]
682+
# GH 18980: need to combine since str and hash individually may not
683+
# be unique, e.g. str(CategoricalDtype) always returns 'category',
684+
# and hash(np.dtype('<m8')) == hash(np.dtype('<m8[ns]'))
685+
key = ''.join([str(subtype), str(hash(subtype))])
686+
return cls._cache[key]
682687
except KeyError:
683688
u = object.__new__(cls)
684689
u.subtype = subtype
685-
cls._cache[str(subtype)] = u
690+
cls._cache[key] = u
686691
return u
687692

688693
@classmethod
@@ -692,31 +697,29 @@ def construct_from_string(cls, string):
692697
if its not possible
693698
"""
694699
if isinstance(string, compat.string_types):
695-
try:
696-
return cls(string)
697-
except ValueError:
698-
pass
699-
raise TypeError("could not construct IntervalDtype")
700+
return cls(string)
701+
msg = "a string needs to be passed, got type {typ}"
702+
raise TypeError(msg.format(typ=type(string)))
700703

701704
def __unicode__(self):
702705
if self.subtype is None:
703706
return "interval"
704707
return "interval[{subtype}]".format(subtype=self.subtype)
705708

706-
@property
707-
def name(self):
708-
return str(self)
709-
710709
def __hash__(self):
711710
# make myself hashable
712711
return hash(str(self))
713712

714713
def __eq__(self, other):
715714
if isinstance(other, compat.string_types):
716-
return other == self.name or other == self.name.title()
717-
718-
return (isinstance(other, IntervalDtype) and
719-
self.subtype == other.subtype)
715+
return other.title() in (self.name.title(), str(self).title())
716+
elif not isinstance(other, IntervalDtype):
717+
return False
718+
elif self.subtype is None or other.subtype is None:
719+
# None should match any subtype
720+
return True
721+
else:
722+
return self.subtype == other.subtype
720723

721724
@classmethod
722725
def is_dtype(cls, dtype):

pandas/tests/dtypes/test_dtypes.py

Lines changed: 68 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -433,7 +433,7 @@ def test_hash_vs_equality(self):
433433
assert dtype2 == dtype
434434
assert dtype3 == dtype
435435
assert dtype is dtype2
436-
assert dtype2 is dtype
436+
assert dtype2 is dtype3
437437
assert dtype3 is dtype
438438
assert hash(dtype) == hash(dtype2)
439439
assert hash(dtype) == hash(dtype3)
@@ -451,14 +451,19 @@ def test_hash_vs_equality(self):
451451
assert hash(dtype2) == hash(dtype2)
452452
assert hash(dtype2) == hash(dtype3)
453453

454-
def test_construction(self):
455-
with pytest.raises(ValueError):
456-
IntervalDtype('xx')
454+
@pytest.mark.parametrize('subtype', [
455+
'interval[int64]', 'Interval[int64]', 'int64', np.dtype('int64')])
456+
def test_construction(self, subtype):
457+
i = IntervalDtype(subtype)
458+
assert i.subtype == np.dtype('int64')
459+
assert is_interval_dtype(i)
457460

458-
for s in ['interval[int64]', 'Interval[int64]', 'int64']:
459-
i = IntervalDtype(s)
460-
assert i.subtype == np.dtype('int64')
461-
assert is_interval_dtype(i)
461+
@pytest.mark.parametrize('subtype', [None, 'interval', 'interval[]'])
462+
def test_construction_generic(self, subtype):
463+
# generic
464+
i = IntervalDtype(subtype)
465+
assert i.subtype is None
466+
assert is_interval_dtype(i)
462467

463468
@pytest.mark.parametrize('subtype', [
464469
CategoricalDtype(list('abc'), False),
@@ -471,17 +476,27 @@ def test_construction_not_supported(self, subtype):
471476
with tm.assert_raises_regex(TypeError, msg):
472477
IntervalDtype(subtype)
473478

474-
def test_construction_generic(self):
475-
# generic
476-
i = IntervalDtype('interval')
477-
assert i.subtype == ''
478-
assert is_interval_dtype(i)
479-
assert str(i) == 'interval[]'
479+
def test_construction_errors(self):
480+
msg = 'could not construct IntervalDtype'
481+
with tm.assert_raises_regex(ValueError, msg):
482+
IntervalDtype('xx')
480483

481-
i = IntervalDtype()
482-
assert i.subtype is None
483-
assert is_interval_dtype(i)
484-
assert str(i) == 'interval'
484+
def test_construction_from_string(self):
485+
result = IntervalDtype('interval[int64]')
486+
assert is_dtype_equal(self.dtype, result)
487+
result = IntervalDtype.construct_from_string('interval[int64]')
488+
assert is_dtype_equal(self.dtype, result)
489+
490+
@pytest.mark.parametrize('string', [
491+
'foo', 'interval[foo]', 'foo[int64]', 0, 3.14, ('a', 'b'), None])
492+
def test_construction_from_string_errors(self, string):
493+
if isinstance(string, string_types):
494+
error, msg = ValueError, 'could not construct IntervalDtype'
495+
else:
496+
error, msg = TypeError, 'a string needs to be passed, got type'
497+
498+
with tm.assert_raises_regex(error, msg):
499+
IntervalDtype.construct_from_string(string)
485500

486501
def test_subclass(self):
487502
a = IntervalDtype('interval[int64]')
@@ -506,36 +521,45 @@ def test_is_dtype(self):
506521
assert not IntervalDtype.is_dtype(np.int64)
507522
assert not IntervalDtype.is_dtype(np.float64)
508523

509-
def test_identity(self):
510-
assert (IntervalDtype('interval[int64]') ==
511-
IntervalDtype('interval[int64]'))
512-
513524
def test_coerce_to_dtype(self):
514525
assert (_coerce_to_dtype('interval[int64]') ==
515526
IntervalDtype('interval[int64]'))
516527

517-
def test_construction_from_string(self):
518-
result = IntervalDtype('interval[int64]')
519-
assert is_dtype_equal(self.dtype, result)
520-
result = IntervalDtype.construct_from_string('interval[int64]')
521-
assert is_dtype_equal(self.dtype, result)
522-
with pytest.raises(TypeError):
523-
IntervalDtype.construct_from_string('foo')
524-
with pytest.raises(TypeError):
525-
IntervalDtype.construct_from_string('interval[foo]')
526-
with pytest.raises(TypeError):
527-
IntervalDtype.construct_from_string('foo[int64]')
528-
529528
def test_equality(self):
530529
assert is_dtype_equal(self.dtype, 'interval[int64]')
531530
assert is_dtype_equal(self.dtype, IntervalDtype('int64'))
532-
assert is_dtype_equal(self.dtype, IntervalDtype('int64'))
533531
assert is_dtype_equal(IntervalDtype('int64'), IntervalDtype('int64'))
534532

535533
assert not is_dtype_equal(self.dtype, 'int64')
536534
assert not is_dtype_equal(IntervalDtype('int64'),
537535
IntervalDtype('float64'))
538536

537+
@pytest.mark.parametrize('subtype', [
538+
None, 'interval', 'interval[]', 'int64', 'uint64', 'float64', object,
539+
CategoricalDtype(), 'datetime64', 'timedelta64', PeriodDtype('Q')])
540+
def test_equality_generic(self, subtype):
541+
# GH 18980
542+
dtype = IntervalDtype(subtype)
543+
assert is_dtype_equal(dtype, 'interval')
544+
assert is_dtype_equal(dtype, IntervalDtype())
545+
546+
@pytest.mark.parametrize('subtype', [
547+
'int64', 'uint64', 'float64', 'complex128', np.dtype('O'),
548+
CategoricalDtype(), 'datetime64', 'timedelta64', PeriodDtype('Q')])
549+
def test_name_repr(self, subtype):
550+
# GH 18980
551+
dtype = IntervalDtype(subtype)
552+
expected = 'interval[{subtype}]'.format(subtype=subtype)
553+
assert str(dtype) == expected
554+
assert dtype.name == 'interval'
555+
556+
@pytest.mark.parametrize('subtype', [None, 'interval', 'interval[]'])
557+
def test_name_repr_generic(self, subtype):
558+
# GH 18980
559+
dtype = IntervalDtype(subtype)
560+
assert str(dtype) == 'interval'
561+
assert dtype.name == 'interval'
562+
539563
def test_basic(self):
540564
assert is_interval_dtype(self.dtype)
541565

@@ -576,6 +600,15 @@ def test_caching(self):
576600
tm.round_trip_pickle(dtype)
577601
assert len(IntervalDtype._cache) == 0
578602

603+
def test_caching_categoricaldtype(self):
604+
# GH 18980
605+
cdt1 = CategoricalDtype(list('abc'), True)
606+
cdt2 = CategoricalDtype(list('wxyz'), False)
607+
idt1 = IntervalDtype(cdt1)
608+
idt2 = IntervalDtype(cdt2)
609+
assert idt1.subtype is cdt1
610+
assert idt2.subtype is cdt2
611+
579612

580613
class TestCategoricalDtypeParametrized(object):
581614

0 commit comments

Comments
 (0)