Open
Description
You can create an SparseDtype with a ExtensionDtype as subtype:
In [14]: spdt = pd.SparseDtype(pd.DatetimeTZDtype(tz='Europe/Brussels'))
In [15]: spdt
Out[15]: Sparse[datetime64[ns, Europe/Brussels], NaT]
but I don't think that you can actually create a SparseArray with that?
At least, there seems to be several places in the sparse code that assumes the subtype is a numpy dtype, and also creating one fails:
In [16]: pd.SparseArray([pd.Timestamp('2012-01-01', tz="Europe/Brussels"), pd.NaT], dtype=spdt)
Out[16]: ---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~/scipy/repos/ipython/IPython/core/formatters.py in __call__(self, obj)
700 type_pprinters=self.type_printers,
701 deferred_pprinters=self.deferred_printers)
--> 702 printer.pretty(obj)
703 printer.flush()
704 return stream.getvalue()
~/scipy/repos/ipython/IPython/lib/pretty.py in pretty(self, obj)
400 if cls is not object \
401 and callable(cls.__dict__.get('__repr__')):
--> 402 return _repr_pprint(obj, self, cycle)
403
404 return _default_pprint(obj, self, cycle)
~/scipy/repos/ipython/IPython/lib/pretty.py in _repr_pprint(obj, p, cycle)
695 """A pprint that just redirects to the normal repr function."""
696 # Find newlines and replace them with p.break_()
--> 697 output = repr(obj)
698 for idx,output_line in enumerate(output.splitlines()):
699 if idx:
~/scipy/pandas/pandas/core/base.py in __repr__(self)
65 Return a string representation for a particular object.
66 """
---> 67 return str(self)
68
69
~/scipy/pandas/pandas/core/base.py in __str__(self)
50 Return a string representation for a particular Object
51 """
---> 52 return self.__unicode__()
53
54 def __bytes__(self):
~/scipy/pandas/pandas/core/arrays/sparse.py in __unicode__(self)
1826 def __unicode__(self):
1827 return '{self}\nFill: {fill}\n{index}'.format(
-> 1828 self=printing.pprint_thing(self),
1829 fill=printing.pprint_thing(self.fill_value),
1830 index=printing.pprint_thing(self.sp_index))
~/scipy/pandas/pandas/io/formats/printing.py in pprint_thing(thing, _nest_lvl, escape_chars, default_escapes, quote_strings, max_seq_items)
215 result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars,
216 quote_strings=quote_strings,
--> 217 max_seq_items=max_seq_items)
218 elif isinstance(thing, str) and quote_strings:
219 result = "'{thing}'".format(thing=as_escaped_unicode(thing))
~/scipy/pandas/pandas/io/formats/printing.py in _pprint_seq(seq, _nest_lvl, max_seq_items, **kwds)
111 r = [pprint_thing(next(s),
112 _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)
--> 113 for i in range(min(nitems, len(seq)))]
114 body = ", ".join(r)
115
~/scipy/pandas/pandas/io/formats/printing.py in <listcomp>(.0)
111 r = [pprint_thing(next(s),
112 _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)
--> 113 for i in range(min(nitems, len(seq)))]
114 body = ", ".join(r)
115
~/scipy/pandas/pandas/core/arrays/base.py in __iter__(self)
282 # calls to ``__getitem__``, which may be slower than necessary.
283 for i in range(len(self)):
--> 284 yield self[i]
285
286 # ------------------------------------------------------------------------
~/scipy/pandas/pandas/core/arrays/sparse.py in __getitem__(self, key)
1075
1076 if is_integer(key):
-> 1077 return self._get_val_at(key)
1078 elif isinstance(key, tuple):
1079 data_slice = self.values[key]
~/scipy/pandas/pandas/core/arrays/sparse.py in _get_val_at(self, loc)
1118 return self.fill_value
1119 else:
-> 1120 return libindex.get_value_at(self.sp_values, sp_loc)
1121
1122 def take(self, indices, allow_fill=False, fill_value=None):
TypeError: Argument 'arr' has incorrect type (expected numpy.ndarray, got DatetimeArray)
(although it is actually the repr that fails)
Is this something we would like to support? (cc @TomAugspurger )