Skip to content

Commit 14a7e02

Browse files
committed
ENH: add to_xray conversion method
1 parent 1dc78c7 commit 14a7e02

File tree

8 files changed

+195
-9
lines changed

8 files changed

+195
-9
lines changed

ci/requirements-2.7.run

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,4 @@ html5lib=1.0b2
2020
beautiful-soup=4.2.1
2121
statsmodels
2222
jinja2=2.8
23+
xray

ci/requirements-3.5.run

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ html5lib
1313
lxml
1414
matplotlib
1515
jinja2
16+
xray
1617

1718
# currently causing some warnings
1819
#sqlalchemy

doc/source/install.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ Optional Dependencies
245245
* `Cython <http://www.cython.org>`__: Only necessary to build development
246246
version. Version 0.19.1 or higher.
247247
* `SciPy <http://www.scipy.org>`__: miscellaneous statistical functions
248+
* `xray <http://xray.readthedocs.org>`__: pandas like handling for > 2 dims.
248249
* `PyTables <http://www.pytables.org>`__: necessary for HDF5-based storage. Version 3.0.0 or higher required, Version 3.2.1 or higher highly recommended.
249250
* `SQLAlchemy <http://www.sqlalchemy.org>`__: for SQL database support. Version 0.8.1 or higher recommended.
250251
* Besides SQLAlchemy, you also need a database specific driver.

pandas/core/generic.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1041,7 +1041,29 @@ def to_clipboard(self, excel=None, sep=None, **kwargs):
10411041
from pandas.io import clipboard
10421042
clipboard.to_clipboard(self, excel=excel, sep=sep, **kwargs)
10431043

1044-
#----------------------------------------------------------------------
1044+
def to_xray(self):
1045+
"""
1046+
Return an xray object from the pandas object.
1047+
1048+
Returns
1049+
-------
1050+
a DataArray for a Series
1051+
a Dataset for a DataFrame
1052+
a Dataset for higher dims
1053+
"""
1054+
import xray
1055+
if self.ndim == 1:
1056+
return xray.DataArray.from_series(self)
1057+
elif self.ndim == 2:
1058+
return xray.Dataset.from_dataframe(self)
1059+
1060+
# > 2 dims
1061+
coords = [(a, self._get_axis(a)) for a in self._AXIS_ORDERS]
1062+
return xray.DataArray(self,
1063+
coords=coords,
1064+
).to_dataset()
1065+
1066+
# ----------------------------------------------------------------------
10451067
# Fancy Indexing
10461068

10471069
@classmethod

pandas/tests/test_frame.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5457,10 +5457,10 @@ def test_repr_column_name_unicode_truncation_bug(self):
54575457
def test_head_tail(self):
54585458
assert_frame_equal(self.frame.head(), self.frame[:5])
54595459
assert_frame_equal(self.frame.tail(), self.frame[-5:])
5460-
5460+
54615461
assert_frame_equal(self.frame.head(0), self.frame[0:0])
54625462
assert_frame_equal(self.frame.tail(0), self.frame[0:0])
5463-
5463+
54645464
assert_frame_equal(self.frame.head(-1), self.frame[:-1])
54655465
assert_frame_equal(self.frame.tail(-1), self.frame[1:])
54665466
assert_frame_equal(self.frame.head(1), self.frame[:1])
@@ -13564,10 +13564,11 @@ def test_round_issue(self):
1356413564

1356513565
decimals = pd.Series([1, 0, 2], index=['A', 'B', 'A'])
1356613566
self.assertRaises(ValueError, df.round, decimals)
13567-
13567+
1356813568
def test_built_in_round(self):
1356913569
if not compat.PY3:
13570-
raise nose.SkipTest('build in round cannot be overriden prior to Python 3')
13570+
raise nose.SkipTest('build in round cannot be '
13571+
'overriden prior to Python 3')
1357113572

1357213573
# GH11763
1357313574
# Here's the test frame we'll be working with
@@ -13578,7 +13579,7 @@ def test_built_in_round(self):
1357813579
expected_rounded = DataFrame(
1357913580
{'col1': [1., 2., 3.], 'col2': [1., 2., 3.]})
1358013581
tm.assert_frame_equal(round(df), expected_rounded)
13581-
13582+
1358213583
def test_quantile(self):
1358313584
from numpy import percentile
1358413585

pandas/tests/test_generic.py

Lines changed: 154 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
# -*- coding: utf-8 -*-
22
# pylint: disable-msg=E1101,W0612
33

4+
from distutils.version import LooseVersion
45
from datetime import datetime, timedelta
56
import nose
67
import numpy as np
78
from numpy import nan
89
import pandas as pd
910

10-
from pandas import (Index, Series, DataFrame, Panel,
11-
isnull, notnull, date_range, period_range)
12-
from pandas.core.index import Index, MultiIndex
11+
from pandas import (Index, Series, DataFrame, Panel, Panel4D,
12+
isnull, notnull, date_range, period_range,
13+
MultiIndex)
1314

1415
import pandas.core.common as com
1516

@@ -18,6 +19,7 @@
1819
from pandas.util.testing import (assert_series_equal,
1920
assert_frame_equal,
2021
assert_panel_equal,
22+
assert_panel4d_equal,
2123
assert_almost_equal,
2224
assert_equal,
2325
ensure_clean)
@@ -1032,6 +1034,55 @@ def test_describe_none(self):
10321034
expected = Series([0, 0], index=['count', 'unique'], name='None')
10331035
assert_series_equal(noneSeries.describe(), expected)
10341036

1037+
def test_to_xray(self):
1038+
1039+
tm._skip_if_no_xray()
1040+
import xray
1041+
from xray import DataArray
1042+
1043+
if LooseVersion(xray.__version__) > '0.6.1':
1044+
# https://github.com/xray/xray/issues/697
1045+
s = Series([])
1046+
s.index.name = 'foo'
1047+
result = s.to_xray()
1048+
self.assertEqual(len(result), 0)
1049+
self.assertEqual(len(result.coords), 1)
1050+
self.assertEqual(result.coords.keys(), ['foo'])
1051+
self.assertIsInstance(result, DataArray)
1052+
1053+
for index in [tm.makeFloatIndex, tm.makeIntIndex,
1054+
tm.makeStringIndex, tm.makeUnicodeIndex,
1055+
tm.makeDateIndex, tm.makePeriodIndex,
1056+
tm.makeTimedeltaIndex]:
1057+
s = Series(range(6), index=index(6))
1058+
s.index.name = 'foo'
1059+
result = s.to_xray()
1060+
repr(result)
1061+
self.assertEqual(len(result), 6)
1062+
self.assertEqual(len(result.coords), 1)
1063+
assert_almost_equal(result.coords.keys(), ['foo'])
1064+
self.assertIsInstance(result, DataArray)
1065+
1066+
# idempotency
1067+
assert_series_equal(result.to_series(), s)
1068+
1069+
# fails ATM
1070+
# https://github.com/xray/xray/issues/700
1071+
for index in [tm.makeCategoricalIndex]:
1072+
s = Series(range(6), index=index(6))
1073+
s.index.name = 'foo'
1074+
1075+
result = s.to_xray()
1076+
self.assertRaises(ValueError, lambda: repr(result))
1077+
1078+
s.index = pd.MultiIndex.from_product([['a', 'b'], range(3)],
1079+
names=['one', 'two'])
1080+
result = s.to_xray()
1081+
self.assertEqual(len(result), 2)
1082+
assert_almost_equal(result.coords.keys(), ['one', 'two'])
1083+
self.assertIsInstance(result, DataArray)
1084+
assert_series_equal(result.to_series(), s)
1085+
10351086

10361087
class TestDataFrame(tm.TestCase, Generic):
10371088
_typ = DataFrame
@@ -1715,10 +1766,110 @@ def test_pct_change(self):
17151766

17161767
self.assert_frame_equal(result, expected)
17171768

1769+
def test_to_xray(self):
1770+
1771+
tm._skip_if_no_xray()
1772+
import xray
1773+
from xray import Dataset
1774+
1775+
df = DataFrame({'a': list('abc'),
1776+
'b': list(range(1, 4)),
1777+
'c': np.arange(3, 6).astype('u1'),
1778+
'd': np.arange(4.0, 7.0, dtype='float64'),
1779+
'e': [True, False, True],
1780+
'f': pd.Categorical(list('abc')),
1781+
'g': pd.date_range('20130101', periods=3),
1782+
'h': pd.date_range('20130101',
1783+
periods=3,
1784+
tz='US/Eastern')}
1785+
)
1786+
1787+
if LooseVersion(xray.__version__) > '0.6.1':
1788+
# https://github.com/xray/xray/issues/697
1789+
df.index.name = 'foo'
1790+
result = df[0:0].to_xray()
1791+
self.assertEqual(result.dims['foo'], 0)
1792+
self.assertIsInstance(result, Dataset)
1793+
1794+
for index in [tm.makeFloatIndex, tm.makeIntIndex,
1795+
tm.makeStringIndex, tm.makeUnicodeIndex,
1796+
tm.makeDateIndex, tm.makePeriodIndex,
1797+
tm.makeCategoricalIndex, tm.makeTimedeltaIndex]:
1798+
df.index = index(3)
1799+
df.index.name = 'foo'
1800+
df.columns.name = 'bar'
1801+
result = df.to_xray()
1802+
self.assertEqual(result.dims['foo'], 3)
1803+
self.assertEqual(len(result.coords), 1)
1804+
self.assertEqual(len(result.data_vars), 8)
1805+
assert_almost_equal(result.coords.keys(), ['foo'])
1806+
self.assertIsInstance(result, Dataset)
1807+
1808+
# idempotency
1809+
# categoricals are not preserved
1810+
# datetimes w/tz are not preserved
1811+
# column names are lost
1812+
expected = df.copy()
1813+
expected['f'] = expected['f'].astype(object)
1814+
expected['h'] = expected['h'].astype('datetime64[ns]')
1815+
expected.columns.name = None
1816+
assert_frame_equal(result.to_dataframe(),
1817+
expected,
1818+
check_index_type=False)
1819+
1820+
# not implemented
1821+
df.index = pd.MultiIndex.from_product([['a'], range(3)],
1822+
names=['one', 'two'])
1823+
self.assertRaises(ValueError, lambda: df.to_xray())
1824+
1825+
17181826
class TestPanel(tm.TestCase, Generic):
17191827
_typ = Panel
17201828
_comparator = lambda self, x, y: assert_panel_equal(x, y)
17211829

1830+
def test_to_xray(self):
1831+
1832+
tm._skip_if_no_xray()
1833+
import xray
1834+
from xray import Dataset
1835+
1836+
p = tm.makePanel()
1837+
1838+
if LooseVersion(xray.__version__) > '0.6.1':
1839+
# https://github.com/xray/xray/issues/697
1840+
pass
1841+
1842+
result = p.to_xray()
1843+
self.assertIsInstance(result, Dataset)
1844+
self.assertEqual(len(result.coords), 3)
1845+
assert_almost_equal(result.coords.keys(),
1846+
['items', 'major_axis', 'minor_axis'])
1847+
self.assertEqual(len(result.dims), 3)
1848+
1849+
1850+
class TestPanel4D(tm.TestCase, Generic):
1851+
_typ = Panel4D
1852+
_comparator = lambda self, x, y: assert_panel4d_equal(x, y)
1853+
1854+
def test_to_xray(self):
1855+
1856+
tm._skip_if_no_xray()
1857+
import xray
1858+
from xray import Dataset
1859+
1860+
p = tm.makePanel4D()
1861+
1862+
if LooseVersion(xray.__version__) > '0.6.1':
1863+
# https://github.com/xray/xray/issues/697
1864+
pass
1865+
1866+
result = p.to_xray()
1867+
self.assertIsInstance(result, Dataset)
1868+
self.assertEqual(len(result.coords), 4)
1869+
assert_almost_equal(result.coords.keys(),
1870+
['labels', 'items', 'major_axis', 'minor_axis'])
1871+
self.assertEqual(len(result.dims), 4)
1872+
17221873

17231874
class TestNDFrame(tm.TestCase):
17241875
# tests that don't fit elsewhere

pandas/util/print_versions.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ def show_versions(as_json=False):
6767
("numpy", lambda mod: mod.version.version),
6868
("scipy", lambda mod: mod.version.version),
6969
("statsmodels", lambda mod: mod.__version__),
70+
("xray", lambda mod: mod.__version__),
7071
("IPython", lambda mod: mod.__version__),
7172
("sphinx", lambda mod: mod.__version__),
7273
("patsy", lambda mod: mod.__version__),

pandas/util/testing.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,14 @@ def _skip_if_no_scipy():
215215
raise nose.SkipTest('scipy.interpolate missing')
216216

217217

218+
def _skip_if_no_xray():
219+
try:
220+
import xray
221+
except ImportError:
222+
import nose
223+
raise nose.SkipTest("xray not installed")
224+
225+
218226
def _skip_if_no_pytz():
219227
try:
220228
import pytz

0 commit comments

Comments
 (0)