pandas-dev · TomAugspurger · Aug 19, 2019 · Aug 19, 2019
diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst
@@ -103,7 +103,6 @@ MultiIndex
 
 I/O
 ^^^
-
 - Avoid calling ``S3File.s3`` when reading parquet, as this was removed in s3fs version 0.3.0 (:issue:`27756`)
 - Better error message when a negative header is passed in :func:`pandas.read_csv` (:issue:`27779`)
 -
@@ -160,6 +159,14 @@ Other
 -
 -
 
+I/O and LZMA
+~~~~~~~~~~~~
+
+Some users may unknowingly have an incomplete Python installation, which lacks the `lzma` module from the standard library. In this case, `import pandas` failed due to an `ImportError` (:issue: `27575`).
+Pandas will now warn, rather than raising an `ImportError` if the `lzma` module is not present. Any subsequent attempt to use `lzma` methods will raise a `RuntimeError`.
+A possible fix for the lack of the `lzma` module is to ensure you have the necessary libraries and then re-install Python.
+For example, on MacOS installing Python with `pyenv` may lead to an incomplete Python installation due to unmet system dependencies at compilation time (like `xz`). Compilation will succeed, but Python might fail at run time. The issue can be solved by installing the necessary dependencies and then re-installing Python.
+
 .. _whatsnew_0.251.contributors:
 
 Contributors

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -2,7 +2,6 @@
 # See LICENSE for the license
 import bz2
 import gzip
-import lzma
 import os
 import sys
 import time
@@ -59,9 +58,12 @@ from pandas.core.arrays import Categorical
 from pandas.core.dtypes.concat import union_categoricals
 import pandas.io.common as icom
 
+from pandas.compat import _import_lzma, _get_lzma_file
 from pandas.errors import (ParserError, DtypeWarning,
                            EmptyDataError, ParserWarning)
 
+lzma = _import_lzma()
+
 # Import CParserError as alias of ParserError for backwards compatibility.
 # Ultimately, we want to remove this import. See gh-12665 and gh-14479.
 CParserError = ParserError
@@ -645,9 +647,9 @@ cdef class TextReader:
                                      'zip file %s', str(zip_names))
             elif self.compression == 'xz':
                 if isinstance(source, str):
-                    source = lzma.LZMAFile(source, 'rb')
+                    source = _get_lzma_file(lzma)(source, 'rb')
                 else:
-                    source = lzma.LZMAFile(filename=source)
+                    source = _get_lzma_file(lzma)(filename=source)
             else:
                 raise ValueError('Unrecognized compression type: %s' %
                                  self.compression)

diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
@@ -10,6 +10,7 @@
 import platform
 import struct
 import sys
+import warnings
 
 PY35 = sys.version_info[:2] == (3, 5)
 PY36 = sys.version_info >= (3, 6)
@@ -65,3 +66,32 @@ def is_platform_mac():
 
 def is_platform_32bit():
     return struct.calcsize("P") * 8 < 64
+
+
+def _import_lzma():
+    """Attempts to import lzma, warning the user when lzma is not available.
+    """
+    try:
+        import lzma
+
+        return lzma
+    except ImportError:
+        msg = (
+            "Could not import the lzma module. "
+            "Your installed Python is incomplete. "
+            "Attempting to use lzma compression will result in a RuntimeError."
+        )
+        warnings.warn(msg)
+
+
+def _get_lzma_file(lzma):
+    """Returns the lzma method LZMAFile when the module was correctly imported.
+    Otherwise, raises a RuntimeError.
+    """
+    if lzma is None:
+        raise RuntimeError(
+            "lzma module not available. "
+            "A Python re-install with the proper "
+            "dependencies might be required to solve this issue."
+        )
+    return lzma.LZMAFile
diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -6,7 +6,6 @@
 import gzip
 from http.client import HTTPException  # noqa
 from io import BytesIO
-import lzma
 import mmap
 import os
 import pathlib
@@ -22,6 +21,7 @@
 from urllib.request import pathname2url, urlopen
 import zipfile
 
+from pandas.compat import _get_lzma_file, _import_lzma
 from pandas.errors import (  # noqa
     AbstractMethodError,
     DtypeWarning,
@@ -32,6 +32,8 @@
 
 from pandas.core.dtypes.common import is_file_like
 
+lzma = _import_lzma()
+
 # gh-12665: Alias for now and remove later.
 CParserError = ParserError
 
@@ -382,7 +384,7 @@ def _get_handle(
 
         # XZ Compression
         elif compression == "xz":
-            f = lzma.LZMAFile(path_or_buf, mode)
+            f = _get_lzma_file(lzma)(path_or_buf, mode)
 
         # Unrecognized Compression
         else:

diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py
@@ -1,5 +1,7 @@
 import contextlib
 import os
+import subprocess
+import textwrap
 import warnings
 
 import pytest
@@ -125,3 +127,33 @@ def test_compression_warning(compression_only):
         with tm.assert_produces_warning(RuntimeWarning, check_stacklevel=False):
             with f:
                 df.to_csv(f, compression=compression_only)
+
+
+def test_with_missing_lzma():
+    """Tests if import pandas works when lzma is not present."""
+    # https://github.com/pandas-dev/pandas/issues/27575
+    code = textwrap.dedent(
+        """\
+        import sys
+        sys.modules['lzma'] = None
+        import pandas
+        """
+    )
+    subprocess.check_output(["python", "-c", code])
+
+
+def test_with_missing_lzma_runtime():
+    """Tests if RuntimeError is hit when calling lzma without
+    having the module available."""
+    code = textwrap.dedent(
+        """
+        import sys
+        import pytest
+        sys.modules['lzma'] = None
+        import pandas
+        df = pandas.DataFrame()
+        with pytest.raises(RuntimeError, match='lzma module'):
+            df.to_csv('foo.csv', compression='xz')
+        """
+    )
+    subprocess.check_output(["python", "-c", code])
diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py
@@ -13,7 +13,6 @@
 import bz2
 import glob
 import gzip
-import lzma
 import os
 import pickle
 import shutil
@@ -22,14 +21,16 @@
 
 import pytest
 
-from pandas.compat import is_platform_little_endian
+from pandas.compat import _get_lzma_file, _import_lzma, is_platform_little_endian
 
 import pandas as pd
 from pandas import Index
 import pandas.util.testing as tm
 
 from pandas.tseries.offsets import Day, MonthEnd
 
+lzma = _import_lzma()
+
 
 @pytest.fixture(scope="module")
 def current_pickle_data():
@@ -270,7 +271,7 @@ def compress_file(self, src_path, dest_path, compression):
             with zipfile.ZipFile(dest_path, "w", compression=zipfile.ZIP_DEFLATED) as f:
                 f.write(src_path, os.path.basename(src_path))
         elif compression == "xz":
-            f = lzma.LZMAFile(dest_path, "w")
+            f = _get_lzma_file(lzma)(dest_path, "w")
         else:
             msg = "Unrecognized compression type: {}".format(compression)
             raise ValueError(msg)

diff --git a/pandas/util/testing.py b/pandas/util/testing.py
@@ -5,7 +5,6 @@
 from functools import wraps
 import gzip
 import http.client
-import lzma
 import os
 import re
 from shutil import rmtree
@@ -26,7 +25,7 @@
 )
 
 import pandas._libs.testing as _testing
-from pandas.compat import raise_with_traceback
+from pandas.compat import _get_lzma_file, _import_lzma, raise_with_traceback
 
 from pandas.core.dtypes.common import (
     is_bool,
@@ -70,6 +69,8 @@
 from pandas.io.common import urlopen
 from pandas.io.formats.printing import pprint_thing
 
+lzma = _import_lzma()
+
 N = 30
 K = 4
 _RAISE_NETWORK_ERROR_DEFAULT = False
@@ -211,7 +212,7 @@ def decompress_file(path, compression):
     elif compression == "bz2":
         f = bz2.BZ2File(path, "rb")
     elif compression == "xz":
-        f = lzma.LZMAFile(path, "rb")
+        f = _get_lzma_file(lzma)(path, "rb")
     elif compression == "zip":
         zip_file = zipfile.ZipFile(path)
         zip_names = zip_file.namelist()
@@ -264,9 +265,7 @@ def write_to_compressed(compression, path, data, dest="test"):
 
         compress_method = bz2.BZ2File
     elif compression == "xz":
-        import lzma
-
-        compress_method = lzma.LZMAFile
+        compress_method = _get_lzma_file(lzma)
     else:
         msg = "Unrecognized compression type: {}".format(compression)
         raise ValueError(msg)