pandas-dev · jreback · Sep 19, 2017 · Sep 19, 2017
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -4492,7 +4492,7 @@ Several caveats.
 - The format will NOT write an ``Index``, or ``MultiIndex`` for the ``DataFrame`` and will raise an
   error if a non-default one is provided. You can simply ``.reset_index(drop=True)`` in order to store the index.
 - Duplicate column names and non-string columns names are not supported
-- Categorical dtypes are currently not-supported (for ``pyarrow``).
+- Categorical dtypes can be serialized to parquet, but will de-serialize as ``object`` dtype.
 - Non supported types include ``Period`` and actual python object types. These will raise a helpful error message
   on an attempt at serialization.
 

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
@@ -2,11 +2,12 @@
 
 import pytest
 import datetime
+from distutils.version import LooseVersion
 from warnings import catch_warnings
 
 import numpy as np
 import pandas as pd
-from pandas.compat import PY3, is_platform_windows
+from pandas.compat import PY3
 from pandas.io.parquet import (to_parquet, read_parquet, get_engine,
                                PyArrowImpl, FastParquetImpl)
 from pandas.util import testing as tm
@@ -42,8 +43,24 @@ def engine(request):
 def pa():
     if not _HAVE_PYARROW:
         pytest.skip("pyarrow is not installed")
-    if is_platform_windows():
-        pytest.skip("pyarrow-parquet not building on windows")
+    return 'pyarrow'
+
+
+@pytest.fixture
+def pa_lt_070():
+    if not _HAVE_PYARROW:
+        pytest.skip("pyarrow is not installed")
+    if LooseVersion(pyarrow.__version__) >= '0.7.0':
+        pytest.skip("pyarrow is >= 0.7.0")
+    return 'pyarrow'
+
+
+@pytest.fixture
+def pa_ge_070():
+    if not _HAVE_PYARROW:
+        pytest.skip("pyarrow is not installed")
+    if LooseVersion(pyarrow.__version__) < '0.7.0':
+        pytest.skip("pyarrow is < 0.7.0")
     return 'pyarrow'
 
 
@@ -302,10 +319,6 @@ def test_unsupported(self, pa):
         df = pd.DataFrame({'a': pd.period_range('2013', freq='M', periods=3)})
         self.check_error_on_write(df, pa, ValueError)
 
-        # categorical
-        df = pd.DataFrame({'a': pd.Categorical(list('abc'))})
-        self.check_error_on_write(df, pa, NotImplementedError)
-
         # timedelta
         df = pd.DataFrame({'a': pd.timedelta_range('1 day',
                                                    periods=3)})
@@ -315,6 +328,23 @@ def test_unsupported(self, pa):
         df = pd.DataFrame({'a': ['a', 1, 2.0]})
         self.check_error_on_write(df, pa, ValueError)
 
+    def test_categorical(self, pa_ge_070):
+        pa = pa_ge_070
+
+        # supported in >= 0.7.0
+        df = pd.DataFrame({'a': pd.Categorical(list('abc'))})
+
+        # de-serialized as object
+        expected = df.assign(a=df.a.astype(object))
+        self.check_round_trip(df, pa, expected)
+
+    def test_categorical_unsupported(self, pa_lt_070):
+        pa = pa_lt_070
+
+        # supported in >= 0.7.0
+        df = pd.DataFrame({'a': pd.Categorical(list('abc'))})
+        self.check_error_on_write(df, pa, NotImplementedError)
+
 
 class TestParquetFastParquet(Base):