pandas-dev · ml-evs · Oct 13, 2020 · Oct 13, 2020 · Oct 13, 2020 · Oct 14, 2020
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -253,6 +253,8 @@ Other enhancements
 - Improve error reporting for :meth:`DataFrame.merge()` when invalid merge column definitions were given (:issue:`16228`)
 - Improve numerical stability for :meth:`Rolling.skew()`, :meth:`Rolling.kurt()`, :meth:`Expanding.skew()` and :meth:`Expanding.kurt()` through implementation of Kahan summation (:issue:`6929`)
 - Improved error reporting for subsetting columns of a :class:`DataFrameGroupBy` with ``axis=1`` (:issue:`37725`)
+- :func:`read_pickle` (and other ``read_*`` functions that handle compressed inputs) can now load from ``.zip`` files created by OS X/macOS that contain ``__MACOSX/`` or ``.DS_STORE`` hidden folders/files (:issue:`37098`).
+
 
 .. ---------------------------------------------------------------------------
 

diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -616,7 +616,14 @@ def get_handle(
             handle = _BytesZipFile(handle, ioargs.mode, **compression_args)
             if handle.mode == "r":
                 handles.append(handle)
-                zip_names = handle.namelist()
+
+                # Ignore hidden folders added by OS X/macOS on .zip creation
+                zip_names = [
+                    _
+                    for _ in handle.namelist()
+                    if not (_.startswith("__MACOSX/") or _.startswith(".DS_STORE"))
+                ]
+
                 if len(zip_names) == 1:
                     handle = handle.open(zip_names.pop())
                 elif len(zip_names) == 0:

diff --git a/pandas/tests/io/parser/test_compression.py b/pandas/tests/io/parser/test_compression.py
@@ -61,6 +61,20 @@ def test_zip_error_multiple_files(parser_and_data, compression):
             parser.read_csv(path, compression=compression)
 
 
+@pytest.mark.parametrize("compression", ["zip", "infer"])
+def test_zip_no_error_hidden_files(parser_and_data, compression, python_parser_only):
+    _, data, expected = parser_and_data
+
+    with tm.ensure_clean("combined_zip.zip") as path:
+        inner_file_names = ["test_file", "__MACOSX/dummy", ".DS_STORE"]
+
+        with zipfile.ZipFile(path, mode="w") as tmp:
+            for file_name in inner_file_names:
+                tmp.writestr(file_name, data)
+
+        python_parser_only.read_csv(path, compression=compression)
+
+
 def test_zip_error_no_files(parser_and_data):
     parser, _, _ = parser_and_data
 

diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py
@@ -393,6 +393,33 @@ def test_read_infer(self, ext, get_random_path):
 
             tm.assert_frame_equal(df, df2)
 
+    @pytest.mark.parametrize("cruft", ["__MACOSX/", ".DS_STORE"])
+    def test_load_zip_with_hidden_folders(self, cruft, get_random_path):
+        # Test loading .zip files with platform-specific hidden folders (issue #37098)
+        base = get_random_path
+        path1 = f"{base}.raw"
+        path2 = f"{base}.zip"
+        dummy = f"{base}.dummy"
+        compression = "zip"
+
+        with tm.ensure_clean(path1) as p1, tm.ensure_clean(
+            path2
+        ) as p2, tm.ensure_clean(dummy) as dummy_path:
+
+            df = tm.makeDataFrame()
+            df.to_pickle(p1, compression=None)
+            self.compress_file(p1, p2, compression=compression)
+
+            # add dummy file `{cruft}{dummy}` to the archive
+            with zipfile.ZipFile(p2, "a", compression=zipfile.ZIP_DEFLATED) as f:
+                f.write(dummy_path, f"{cruft}{dummy}")
+            with zipfile.ZipFile(p2, "r") as f:
+                assert f"{cruft}{dummy}" in f.namelist()
+
+            # dummy file should be ignored on reading, otherwise read_pickle will fail
+            df2 = pd.read_pickle(p2)
+            tm.assert_frame_equal(df, df2)
+
 
 # ---------------------
 # test pickle compression