SparseArrays backed by other Extension array/dtype ?

You can create an SparseDtype with a ExtensionDtype as subtype:

```
In [14]: spdt = pd.SparseDtype(pd.DatetimeTZDtype(tz='Europe/Brussels'))

In [15]: spdt
Out[15]: Sparse[datetime64[ns, Europe/Brussels], NaT]
```

but I don't think that you can actually create a SparseArray with that?

At least, there seems to be several places in the sparse code that assumes the subtype is a numpy dtype, and also creating one fails:

```
In [16]: pd.SparseArray([pd.Timestamp('2012-01-01', tz="Europe/Brussels"), pd.NaT], dtype=spdt)
Out[16]: ---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
~/scipy/repos/ipython/IPython/core/formatters.py in __call__(self, obj)
    700                 type_pprinters=self.type_printers,
    701                 deferred_pprinters=self.deferred_printers)
--> 702             printer.pretty(obj)
    703             printer.flush()
    704             return stream.getvalue()

~/scipy/repos/ipython/IPython/lib/pretty.py in pretty(self, obj)
    400                         if cls is not object \
    401                                 and callable(cls.__dict__.get('__repr__')):
--> 402                             return _repr_pprint(obj, self, cycle)
    403 
    404             return _default_pprint(obj, self, cycle)

~/scipy/repos/ipython/IPython/lib/pretty.py in _repr_pprint(obj, p, cycle)
    695     """A pprint that just redirects to the normal repr function."""
    696     # Find newlines and replace them with p.break_()
--> 697     output = repr(obj)
    698     for idx,output_line in enumerate(output.splitlines()):
    699         if idx:

~/scipy/pandas/pandas/core/base.py in __repr__(self)
     65         Return a string representation for a particular object.
     66         """
---> 67         return str(self)
     68 
     69 

~/scipy/pandas/pandas/core/base.py in __str__(self)
     50         Return a string representation for a particular Object
     51         """
---> 52         return self.__unicode__()
     53 
     54     def __bytes__(self):

~/scipy/pandas/pandas/core/arrays/sparse.py in __unicode__(self)
   1826     def __unicode__(self):
   1827         return '{self}\nFill: {fill}\n{index}'.format(
-> 1828             self=printing.pprint_thing(self),
   1829             fill=printing.pprint_thing(self.fill_value),
   1830             index=printing.pprint_thing(self.sp_index))

~/scipy/pandas/pandas/io/formats/printing.py in pprint_thing(thing, _nest_lvl, escape_chars, default_escapes, quote_strings, max_seq_items)
    215         result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars,
    216                              quote_strings=quote_strings,
--> 217                              max_seq_items=max_seq_items)
    218     elif isinstance(thing, str) and quote_strings:
    219         result = "'{thing}'".format(thing=as_escaped_unicode(thing))

~/scipy/pandas/pandas/io/formats/printing.py in _pprint_seq(seq, _nest_lvl, max_seq_items, **kwds)
    111     r = [pprint_thing(next(s),
    112                       _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)
--> 113          for i in range(min(nitems, len(seq)))]
    114     body = ", ".join(r)
    115 

~/scipy/pandas/pandas/io/formats/printing.py in <listcomp>(.0)
    111     r = [pprint_thing(next(s),
    112                       _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)
--> 113          for i in range(min(nitems, len(seq)))]
    114     body = ", ".join(r)
    115 

~/scipy/pandas/pandas/core/arrays/base.py in __iter__(self)
    282         # calls to ``__getitem__``, which may be slower than necessary.
    283         for i in range(len(self)):
--> 284             yield self[i]
    285 
    286     # ------------------------------------------------------------------------

~/scipy/pandas/pandas/core/arrays/sparse.py in __getitem__(self, key)
   1075 
   1076         if is_integer(key):
-> 1077             return self._get_val_at(key)
   1078         elif isinstance(key, tuple):
   1079             data_slice = self.values[key]

~/scipy/pandas/pandas/core/arrays/sparse.py in _get_val_at(self, loc)
   1118             return self.fill_value
   1119         else:
-> 1120             return libindex.get_value_at(self.sp_values, sp_loc)
   1121 
   1122     def take(self, indices, allow_fill=False, fill_value=None):

TypeError: Argument 'arr' has incorrect type (expected numpy.ndarray, got DatetimeArray)
```

(although it is actually the repr that fails)

Is this something we would like to support? (cc @TomAugspurger )

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

SparseArrays backed by other Extension array/dtype ? #26407

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

SparseArrays backed by other Extension array/dtype ? #26407

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions