pandas-dev · jreback · Nov 29, 2020 · Jan 23, 2020 · Sep 4, 2020 · Sep 4, 2020
diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
@@ -1,5 +1,6 @@
 """ parquet compat """
 
+from distutils.version import LooseVersion
 from typing import Any, Dict, Optional
 from warnings import catch_warnings
 
@@ -116,13 +117,32 @@ def write(
                 **kwargs,
             )
 
-    def read(self, path, columns=None, **kwargs):
+    def read(self, path, columns=None, use_nullable_dtypes=False, **kwargs):
         path, _, _, should_close = get_filepath_or_buffer(path)
 
         kwargs["use_pandas_metadata"] = True
-        result = self.api.parquet.read_table(
-            path, columns=columns, **kwargs
-        ).to_pandas()
+        to_pandas_kwargs = {}
+        if use_nullable_dtypes:
+            if LooseVersion(self.api.__version__) > "0.15.1.dev":
+                import pandas as pd
+
+                mapping = {
+                    self.api.int8(): pd.Int8Dtype(),
+                    self.api.int16(): pd.Int16Dtype(),
+                    self.api.int32(): pd.Int32Dtype(),
+                    self.api.int64(): pd.Int64Dtype(),
+                    self.api.uint8(): pd.UInt8Dtype(),
+                    self.api.uint16(): pd.UInt16Dtype(),
+                    self.api.uint32(): pd.UInt32Dtype(),
+                    self.api.uint64(): pd.UInt64Dtype(),
+                    self.api.bool_(): pd.BooleanDtype(),
+                    self.api.string(): pd.StringDtype(),
+                }
+                to_pandas_kwargs["types_mapper"] = mapping.get
+
+        result = self.api.parquet.read_table(path, columns=columns, **kwargs).to_pandas(
+            **to_pandas_kwargs
+        )
         if should_close:
             path.close()
 
@@ -184,6 +204,12 @@ def write(
             )
 
     def read(self, path, columns=None, **kwargs):
+        use_nullable_dtypes = kwargs.pop("use_nullable_dtypes", False)
+        if use_nullable_dtypes:
+            raise ValueError(
+                "The 'use_nullable_dtypes' argument is not supported for the "
+                "fastparquet engine"
+            )
         if is_s3_url(path):
             from pandas.io.s3 import get_file_and_filesystem
 
@@ -263,7 +289,13 @@ def to_parquet(
     )
 
 
-def read_parquet(path, engine: str = "auto", columns=None, **kwargs):
+def read_parquet(
+    path,
+    engine: str = "auto",
+    columns=None,
+    use_nullable_dtypes: bool = False,
+    **kwargs,
+):
     """
     Load a parquet object from the file path, returning a DataFrame.
 
@@ -296,6 +328,11 @@ def read_parquet(path, engine: str = "auto", columns=None, **kwargs):
         If not None, only these columns will be read from the file.
 
         .. versionadded:: 0.21.1
+    use_nullable_dtypes : bool, default False
+        If True, use dtypes that use ``pd.NA`` as missing value indicator
+        for the resulting DataFrame (only applicable for ``engine="pyarrow"``).
+        As new dtypes are added that support ``pd.NA`` in the future, the
+        output with this option will change to use those dtypes.
     **kwargs
         Any additional kwargs are passed to the engine.
 
@@ -305,4 +342,6 @@ def read_parquet(path, engine: str = "auto", columns=None, **kwargs):
     """
 
     impl = get_engine(engine)
-    return impl.read(path, columns=columns, **kwargs)
+    return impl.read(
+        path, columns=columns, use_nullable_dtypes=use_nullable_dtypes, **kwargs
+    )
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
@@ -564,6 +564,31 @@ def test_additional_extension_types(self, pa):
         )
         check_round_trip(df, pa)
 
+    @td.skip_if_no("pyarrow", min_version="0.15.1.dev")
+    def test_use_nullable_dtypes(self, pa):
+        import pyarrow.parquet as pq
+
+        table = pyarrow.table(
+            {
+                "a": pyarrow.array([1, 2, 3, None], "int64"),
+                "b": pyarrow.array(["a", "b", "c", None]),
+            }
+        )
+        with tm.ensure_clean() as path:
+            # write manually with pyarrow to write integers
+            pq.write_table(table, path)
+            result1 = read_parquet(path)
+            result2 = read_parquet(path, use_nullable_dtypes=True)
+
+        assert result1["a"].dtype == np.dtype("float64")
+        expected = pd.DataFrame(
+            {
+                "a": pd.array([1, 2, 3, None], dtype="Int64"),
+                "b": pd.array(["a", "b", "c", None], dtype="string"),
+            }
+        )
+        tm.assert_frame_equal(result2, expected)
+
 
 class TestParquetFastParquet(Base):
     @td.skip_if_no("fastparquet", min_version="0.3.2")