Skip to content

Commit 7861a4a

Browse files
jrebackcldy
authored andcommitted
ENH: add to_xarray conversion method
supersedes pandas-dev#11950 xref pandas-dev#10000 Author: Jeff Reback <[email protected]> Closes pandas-dev#11972 from jreback/xarray and squashes the following commits: 85de0b7 [Jeff Reback] ENH: add to_xarray conversion method
1 parent 74c8344 commit 7861a4a

File tree

9 files changed

+222
-2
lines changed

9 files changed

+222
-2
lines changed

ci/requirements-2.7.run

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,4 @@ html5lib=1.0b2
2020
beautiful-soup=4.2.1
2121
statsmodels
2222
jinja2=2.8
23+
xarray

ci/requirements-3.5.run

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ bottleneck
1717
sqlalchemy
1818
pymysql
1919
psycopg2
20+
xarray
2021

2122
# incompat with conda ATM
2223
# beautiful-soup

doc/source/api.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -684,6 +684,7 @@ Serialization / IO / Conversion
684684
Series.to_csv
685685
Series.to_dict
686686
Series.to_frame
687+
Series.to_xarray
687688
Series.to_hdf
688689
Series.to_sql
689690
Series.to_msgpack
@@ -918,6 +919,7 @@ Reshaping, sorting, transposing
918919
DataFrame.unstack
919920
DataFrame.T
920921
DataFrame.to_panel
922+
DataFrame.to_xarray
921923
DataFrame.transpose
922924

923925
Combining / joining / merging
@@ -1216,6 +1218,7 @@ Serialization / IO / Conversion
12161218
Panel.to_json
12171219
Panel.to_sparse
12181220
Panel.to_frame
1221+
Panel.to_xarray
12191222
Panel.to_clipboard
12201223

12211224
.. _api.panel4d:
@@ -1230,6 +1233,13 @@ Constructor
12301233

12311234
Panel4D
12321235

1236+
Serialization / IO / Conversion
1237+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1238+
.. autosummary::
1239+
:toctree: generated/
1240+
1241+
Panel4D.to_xarray
1242+
12331243
Attributes and underlying data
12341244
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
12351245
**Axes**

doc/source/install.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,7 @@ Optional Dependencies
244244
* `Cython <http://www.cython.org>`__: Only necessary to build development
245245
version. Version 0.19.1 or higher.
246246
* `SciPy <http://www.scipy.org>`__: miscellaneous statistical functions
247+
* `xarray <http://xarray.readthedocs.org>`__: pandas like handling for > 2 dims, needed for converting Panels to xarray objects. Version 0.7.0 or higher is recommended.
247248
* `PyTables <http://www.pytables.org>`__: necessary for HDF5-based storage. Version 3.0.0 or higher required, Version 3.2.1 or higher highly recommended.
248249
* `SQLAlchemy <http://www.sqlalchemy.org>`__: for SQL database support. Version 0.8.1 or higher recommended. Besides SQLAlchemy, you also need a database specific driver. You can find an overview of supported drivers for each SQL dialect in the `SQLAlchemy docs <http://docs.sqlalchemy.org/en/latest/dialects/index.html>`__. Some common drivers are:
249250

doc/source/whatsnew/v0.18.0.txt

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,6 @@ In addition, ``.round()``, ``.floor()`` and ``.ceil()`` will be available thru t
274274
s
275275
s.dt.round('D')
276276

277-
278277
Formatting of integer in FloatIndex
279278
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
280279

@@ -315,6 +314,35 @@ New Behavior:
315314
s.index
316315
print(s.to_csv(path=None))
317316

317+
.. _whatsnew_0180.enhancements.xarray:
318+
319+
to_xarray
320+
^^^^^^^^^
321+
322+
In a future version of pandas, we will be deprecating ``Panel`` and other > 2 ndim objects. In order to provide for continuity,
323+
all ``NDFrame`` objects have gained the ``.to_xarray()`` method in order to convert to ``xarray`` objects, which has
324+
a pandas-like interface for > 2 ndim.
325+
326+
See the `xarray full-documentation here <http://xarray.pydata.org/en/stable/>`__.
327+
328+
.. code-block:: python
329+
330+
In [1]: p = Panel(np.arange(2*3*4).reshape(2,3,4))
331+
332+
In [2]: p.to_xarray()
333+
Out[2]:
334+
<xarray.DataArray (items: 2, major_axis: 3, minor_axis: 4)>
335+
array([[[ 0, 1, 2, 3],
336+
[ 4, 5, 6, 7],
337+
[ 8, 9, 10, 11]],
338+
339+
[[12, 13, 14, 15],
340+
[16, 17, 18, 19],
341+
[20, 21, 22, 23]]])
342+
Coordinates:
343+
* items (items) int64 0 1
344+
* major_axis (major_axis) int64 0 1 2
345+
* minor_axis (minor_axis) int64 0 1 2 3
318346

319347
.. _whatsnew_0180.enhancements.other:
320348

pandas/core/generic.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1040,6 +1040,32 @@ def to_clipboard(self, excel=None, sep=None, **kwargs):
10401040
from pandas.io import clipboard
10411041
clipboard.to_clipboard(self, excel=excel, sep=sep, **kwargs)
10421042

1043+
def to_xarray(self):
1044+
"""
1045+
Return an xarray object from the pandas object.
1046+
1047+
Returns
1048+
-------
1049+
a DataArray for a Series
1050+
a Dataset for a DataFrame
1051+
a DataArray for higher dims
1052+
1053+
See Also
1054+
--------
1055+
`xarray docs <http://xarray.pydata.org/en/stable/>`__
1056+
"""
1057+
import xarray
1058+
if self.ndim == 1:
1059+
return xarray.DataArray.from_series(self)
1060+
elif self.ndim == 2:
1061+
return xarray.Dataset.from_dataframe(self)
1062+
1063+
# > 2 dims
1064+
coords = [(a, self._get_axis(a)) for a in self._AXIS_ORDERS]
1065+
return xarray.DataArray(self,
1066+
coords=coords,
1067+
)
1068+
10431069
# ----------------------------------------------------------------------
10441070
# Fancy Indexing
10451071

pandas/tests/test_generic.py

Lines changed: 141 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from distutils.version import LooseVersion
1010
from pandas import (Index, Series, DataFrame, Panel, isnull,
11-
date_range, period_range)
11+
date_range, period_range, Panel4D)
1212
from pandas.core.index import MultiIndex
1313

1414
import pandas.core.common as com
@@ -18,6 +18,8 @@
1818
from pandas.util.testing import (assert_series_equal,
1919
assert_frame_equal,
2020
assert_panel_equal,
21+
assert_panel4d_equal,
22+
assert_almost_equal,
2123
assert_equal)
2224
import pandas.util.testing as tm
2325

@@ -1063,6 +1065,52 @@ def test_describe_none(self):
10631065
expected = Series([0, 0], index=['count', 'unique'], name='None')
10641066
assert_series_equal(noneSeries.describe(), expected)
10651067

1068+
def test_to_xarray(self):
1069+
1070+
tm._skip_if_no_xarray()
1071+
from xarray import DataArray
1072+
1073+
s = Series([])
1074+
s.index.name = 'foo'
1075+
result = s.to_xarray()
1076+
self.assertEqual(len(result), 0)
1077+
self.assertEqual(len(result.coords), 1)
1078+
assert_almost_equal(list(result.coords.keys()), ['foo'])
1079+
self.assertIsInstance(result, DataArray)
1080+
1081+
def testit(index, check_index_type=True):
1082+
s = Series(range(6), index=index(6))
1083+
s.index.name = 'foo'
1084+
result = s.to_xarray()
1085+
repr(result)
1086+
self.assertEqual(len(result), 6)
1087+
self.assertEqual(len(result.coords), 1)
1088+
assert_almost_equal(list(result.coords.keys()), ['foo'])
1089+
self.assertIsInstance(result, DataArray)
1090+
1091+
# idempotency
1092+
assert_series_equal(result.to_series(), s,
1093+
check_index_type=check_index_type)
1094+
1095+
for index in [tm.makeFloatIndex, tm.makeIntIndex,
1096+
tm.makeStringIndex, tm.makeUnicodeIndex,
1097+
tm.makeDateIndex, tm.makePeriodIndex,
1098+
tm.makeTimedeltaIndex]:
1099+
testit(index)
1100+
1101+
# not idempotent
1102+
testit(tm.makeCategoricalIndex, check_index_type=False)
1103+
1104+
s = Series(range(6))
1105+
s.index.name = 'foo'
1106+
s.index = pd.MultiIndex.from_product([['a', 'b'], range(3)],
1107+
names=['one', 'two'])
1108+
result = s.to_xarray()
1109+
self.assertEqual(len(result), 2)
1110+
assert_almost_equal(list(result.coords.keys()), ['one', 'two'])
1111+
self.assertIsInstance(result, DataArray)
1112+
assert_series_equal(result.to_series(), s)
1113+
10661114

10671115
class TestDataFrame(tm.TestCase, Generic):
10681116
_typ = DataFrame
@@ -1783,11 +1831,103 @@ def test_pct_change(self):
17831831

17841832
self.assert_frame_equal(result, expected)
17851833

1834+
def test_to_xarray(self):
1835+
1836+
tm._skip_if_no_xarray()
1837+
from xarray import Dataset
1838+
1839+
df = DataFrame({'a': list('abc'),
1840+
'b': list(range(1, 4)),
1841+
'c': np.arange(3, 6).astype('u1'),
1842+
'd': np.arange(4.0, 7.0, dtype='float64'),
1843+
'e': [True, False, True],
1844+
'f': pd.Categorical(list('abc')),
1845+
'g': pd.date_range('20130101', periods=3),
1846+
'h': pd.date_range('20130101',
1847+
periods=3,
1848+
tz='US/Eastern')}
1849+
)
1850+
1851+
df.index.name = 'foo'
1852+
result = df[0:0].to_xarray()
1853+
self.assertEqual(result.dims['foo'], 0)
1854+
self.assertIsInstance(result, Dataset)
1855+
1856+
for index in [tm.makeFloatIndex, tm.makeIntIndex,
1857+
tm.makeStringIndex, tm.makeUnicodeIndex,
1858+
tm.makeDateIndex, tm.makePeriodIndex,
1859+
tm.makeCategoricalIndex, tm.makeTimedeltaIndex]:
1860+
df.index = index(3)
1861+
df.index.name = 'foo'
1862+
df.columns.name = 'bar'
1863+
result = df.to_xarray()
1864+
self.assertEqual(result.dims['foo'], 3)
1865+
self.assertEqual(len(result.coords), 1)
1866+
self.assertEqual(len(result.data_vars), 8)
1867+
assert_almost_equal(list(result.coords.keys()), ['foo'])
1868+
self.assertIsInstance(result, Dataset)
1869+
1870+
# idempotency
1871+
# categoricals are not preserved
1872+
# datetimes w/tz are not preserved
1873+
# column names are lost
1874+
expected = df.copy()
1875+
expected['f'] = expected['f'].astype(object)
1876+
expected['h'] = expected['h'].astype('datetime64[ns]')
1877+
expected.columns.name = None
1878+
assert_frame_equal(result.to_dataframe(),
1879+
expected,
1880+
check_index_type=False)
1881+
1882+
# not implemented
1883+
df.index = pd.MultiIndex.from_product([['a'], range(3)],
1884+
names=['one', 'two'])
1885+
self.assertRaises(ValueError, lambda: df.to_xarray())
1886+
17861887

17871888
class TestPanel(tm.TestCase, Generic):
17881889
_typ = Panel
17891890
_comparator = lambda self, x, y: assert_panel_equal(x, y)
17901891

1892+
def test_to_xarray(self):
1893+
1894+
tm._skip_if_no_xarray()
1895+
from xarray import DataArray
1896+
1897+
p = tm.makePanel()
1898+
1899+
result = p.to_xarray()
1900+
self.assertIsInstance(result, DataArray)
1901+
self.assertEqual(len(result.coords), 3)
1902+
assert_almost_equal(list(result.coords.keys()),
1903+
['items', 'major_axis', 'minor_axis'])
1904+
self.assertEqual(len(result.dims), 3)
1905+
1906+
# idempotency
1907+
assert_panel_equal(result.to_pandas(), p)
1908+
1909+
1910+
class TestPanel4D(tm.TestCase, Generic):
1911+
_typ = Panel4D
1912+
_comparator = lambda self, x, y: assert_panel4d_equal(x, y)
1913+
1914+
def test_to_xarray(self):
1915+
1916+
tm._skip_if_no_xarray()
1917+
from xarray import DataArray
1918+
1919+
p = tm.makePanel4D()
1920+
1921+
result = p.to_xarray()
1922+
self.assertIsInstance(result, DataArray)
1923+
self.assertEqual(len(result.coords), 4)
1924+
assert_almost_equal(list(result.coords.keys()),
1925+
['labels', 'items', 'major_axis', 'minor_axis'])
1926+
self.assertEqual(len(result.dims), 4)
1927+
1928+
# non-convertible
1929+
self.assertRaises(ValueError, lambda: result.to_pandas())
1930+
17911931

17921932
class TestNDFrame(tm.TestCase):
17931933
# tests that don't fit elsewhere

pandas/util/print_versions.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ def show_versions(as_json=False):
6868
("numpy", lambda mod: mod.version.version),
6969
("scipy", lambda mod: mod.version.version),
7070
("statsmodels", lambda mod: mod.__version__),
71+
("xarray", lambda mod: mod.__version__),
7172
("IPython", lambda mod: mod.__version__),
7273
("sphinx", lambda mod: mod.__version__),
7374
("patsy", lambda mod: mod.__version__),

pandas/util/testing.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,18 @@ def _skip_if_scipy_0_17():
224224
import nose
225225
raise nose.SkipTest("scipy 0.17")
226226

227+
def _skip_if_no_xarray():
228+
try:
229+
import xarray
230+
except ImportError:
231+
import nose
232+
raise nose.SkipTest("xarray not installed")
233+
234+
v = xarray.__version__
235+
if v < LooseVersion('0.7.0'):
236+
import nose
237+
raise nose.SkipTest("xarray not version is too low: {0}".format(v))
238+
227239
def _skip_if_no_pytz():
228240
try:
229241
import pytz

0 commit comments

Comments
 (0)