Layout-Parser · lolipopshock · Sep 9, 2021 · Sep 8, 2021 · Sep 8, 2021 · Sep 9, 2021
diff --git a/src/layoutparser/__init__.py b/src/layoutparser/__init__.py
@@ -1,27 +1,63 @@
 __version__ = "0.2.0"
 
-from .elements import (
-    Interval, Rectangle, Quadrilateral, 
-    TextBlock, Layout
-)
+import sys
 
-from .visualization import (
-    draw_box, draw_text
+from .file_utils import (
+    _LazyModule,
+    is_detectron2_available,
+    is_paddle_available,
+    is_pytesseract_available,
+    is_gcv_available,
 )
 
-from .ocr import (
-    GCVFeatureType, GCVAgent, 
-    TesseractFeatureType, TesseractAgent
-)
+_import_structure = {
+    "elements": [
+        "Interval", 
+        "Rectangle", 
+        "Quadrilateral", 
+        "TextBlock", 
+        "Layout"
+    ],
+    "visualization": [
+        "draw_box", 
+        "draw_text"
+    ],
+    "io": [
+        "load_json", 
+        "load_dict", 
+        "load_csv", 
+        "load_dataframe"
+    ],
+    "file_utils":[
+        "is_torch_available",
+        "is_torch_cuda_available",
+        "is_detectron2_available",
+        "is_paddle_available",
+        "is_pytesseract_available",
+        "is_gcv_available",
+        "requires_backends"
+    ]
+}
 
-from .models import (
-    Detectron2LayoutModel,
-    PaddleDetectionLayoutModel
-)
+if is_detectron2_available():
+    _import_structure["models.detectron2"] = ["Detectron2LayoutModel"]
+
+if is_paddle_available():
+    _import_structure["models.paddledetection"] = ["PaddleDetectionLayoutModel"]
 
-from .io import (
-    load_json,
-    load_dict,
-    load_csv,
-    load_dataframe
-)
+if is_pytesseract_available():
+    _import_structure["ocr.tesseract_agent"] = [
+        "TesseractAgent",
+        "TesseractFeatureType",
+    ]
+
+if is_gcv_available():
+    _import_structure["ocr.gcv_agent"] = ["GCVAgent", "GCVFeatureType"]
+
+sys.modules[__name__] = _LazyModule(
+    __name__,
+    globals()["__file__"],
+    _import_structure,
+    module_spec=__spec__,
+    extra_objects={"__version__": __version__},
+)
diff --git a/src/layoutparser/file_utils.py b/src/layoutparser/file_utils.py
@@ -0,0 +1,201 @@
+# Some code are adapted from
+# https://github.com/huggingface/transformers/blob/master/src/transformers/file_utils.py
+
+from typing import Any, BinaryIO, Dict, List, Optional, Tuple, Union
+import sys
+import os
+import logging
+import importlib.util
+from types import ModuleType
+
+logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
+
+# The package importlib_metadata is in a different place, depending on the python version.
+if sys.version_info < (3, 8):
+    import importlib_metadata
+else:
+    import importlib.metadata as importlib_metadata
+
+###########################################
+############ Layout Model Deps ############
+###########################################
+
+_torch_available = importlib.util.find_spec("torch") is not None
+try:
+    _torch_version = importlib_metadata.version("torch")
+    logger.debug(f"PyTorch version {_torch_version} available.")
+except importlib_metadata.PackageNotFoundError:
+    _torch_available = False
+
+_detectron2_available = importlib.util.find_spec("detectron2") is not None
+try:
+    _detectron2_version = importlib_metadata.version("detectron2")
+    logger.debug(f"Detectron2 version {_detectron2_version} available")
+except importlib_metadata.PackageNotFoundError:
+    _detectron2_available = False
+
+_paddle_available = importlib.util.find_spec("paddle") is not None
+try:
+    # The name of the paddlepaddle library:
+    # Install name: pip install paddlepaddle
+    # Import name: import paddle
+    _paddle_version = importlib_metadata.version("paddlepaddle") 
+    logger.debug(f"Paddle version {_paddle_version} available.")
+except importlib_metadata.PackageNotFoundError:
+    _paddle_available = False
+
+###########################################
+############## OCR Tool Deps ##############
+###########################################
+
+_pytesseract_available = importlib.util.find_spec("pytesseract") is not None
+try:
+    _pytesseract_version = importlib_metadata.version("pytesseract")
+    logger.debug(f"Pytesseract version {_pytesseract_version} available.")
+except importlib_metadata.PackageNotFoundError:
+    _pytesseract_available = False
+
+_gcv_available = importlib.util.find_spec("google.cloud.vision") is not None
+try:
+    _gcv_version = importlib_metadata.version(
+        "google-cloud-vision"
+    )  # This is slightly different
+    logger.debug(f"Google Cloud Vision Utils version {_gcv_version} available.")
+except importlib_metadata.PackageNotFoundError:
+    _gcv_available = False
+
+
+def is_torch_available():
+    return _torch_available
+
+
+def is_torch_cuda_available():
+    if is_torch_available():
+        import torch
+
+        return torch.cuda.is_available()
+    else:
+        return False
+
+
+def is_paddle_available():
+    return _paddle_available
+
+
+def is_detectron2_available():
+    return _detectron2_available
+
+
+def is_pytesseract_available():
+    return _pytesseract_available
+
+
+def is_gcv_available():
+    return _gcv_available
+
+
+PYTORCH_IMPORT_ERROR = """
+{0} requires the PyTorch library but it was not found in your environment. Checkout the instructions on the
+installation page: https://pytorch.org/get-started/locally/ and follow the ones that match your environment.
+"""
+
+DETECTRON2_IMPORT_ERROR = """
+{0} requires the detectron2 library but it was not found in your environment. Checkout the instructions on the
+installation page: https://github.com/facebookresearch/detectron2/blob/master/INSTALL.md and follow the ones
+that match your environment. Typically the following would work for MacOS or Linux CPU machines:
+    pip install 'git+https://github.com/facebookresearch/[email protected]#egg=detectron2' 
+"""
+
+PADDLE_IMPORT_ERROR = """
+{0} requires the PaddlePaddle library but it was not found in your environment. Checkout the instructions on the
+installation page: https://github.com/PaddlePaddle/Paddle and follow the ones that match your environment.
+"""
+
+PYTESSERACT_IMPORT_ERROR = """
+{0} requires the PyTesseract library but it was not found in your environment. You can install it with pip:
+`pip install pytesseract`
+"""
+
+GCV_IMPORT_ERROR = """
+{0} requires the Google Cloud Vision Python utils but it was not found in your environment. You can install it with pip:
+`pip install google-cloud-vision==1`
+"""
+
+BACKENDS_MAPPING = dict(
+    [
+        ("torch", (is_torch_available, PYTORCH_IMPORT_ERROR)),
+        ("detectron2", (is_detectron2_available, DETECTRON2_IMPORT_ERROR)),
+        ("paddle", (is_paddle_available, PADDLE_IMPORT_ERROR)),
+        ("pytesseract", (is_pytesseract_available, PYTESSERACT_IMPORT_ERROR)),
+        ("google-cloud-vision", (is_gcv_available, GCV_IMPORT_ERROR)),
+    ]
+)
+
+
+def requires_backends(obj, backends):
+    if not isinstance(backends, (list, tuple)):
+        backends = [backends]
+
+    name = obj.__name__ if hasattr(obj, "__name__") else obj.__class__.__name__
+    if not all(BACKENDS_MAPPING[backend][0]() for backend in backends):
+        raise ImportError(
+            "".join([BACKENDS_MAPPING[backend][1].format(name) for backend in backends])
+        )
+
+
+class _LazyModule(ModuleType):
+    """
+    Module class that surfaces all objects but only performs associated imports when the objects are requested.
+    """
+
+    # Adapted from HuggingFace
+    # https://github.com/huggingface/transformers/blob/c37573806ab3526dd805c49cbe2489ad4d68a9d7/src/transformers/file_utils.py#L1990
+
+    def __init__(
+        self, name, module_file, import_structure, module_spec=None, extra_objects=None
+    ):
+        super().__init__(name)
+        self._modules = set(import_structure.keys())
+        self._class_to_module = {}
+        for key, values in import_structure.items():
+            for value in values:
+                self._class_to_module[value] = key
+        # Needed for autocompletion in an IDE
+        self.__all__ = list(import_structure.keys()) + sum(
+            import_structure.values(), []
+        )
+        self.__file__ = module_file
+        self.__spec__ = module_spec
+        self.__path__ = [os.path.dirname(module_file)]
+        self._objects = {} if extra_objects is None else extra_objects
+        self._name = name
+        self._import_structure = import_structure
+
+        # Following [PEP 366](https://www.python.org/dev/peps/pep-0366/)
+        # The __package__ variable should be set 
+        # https://docs.python.org/3/reference/import.html#__package__
+        self.__package__ = self.__name__
+
+    # Needed for autocompletion in an IDE
+    def __dir__(self):
+        return super().__dir__() + self.__all__
+
+    def __getattr__(self, name: str) -> Any:
+        if name in self._objects:
+            return self._objects[name]
+        if name in self._modules:
+            value = self._get_module(name)
+        elif name in self._class_to_module.keys():
+            module = self._get_module(self._class_to_module[name])
+            value = getattr(module, name)
+        else:
+            raise AttributeError(f"module {self.__name__} has no attribute {name}")
+
+        setattr(self, name, value)
+        return value
+
+    def _get_module(self, module_name: str):
+        return importlib.import_module("." + module_name, self.__name__)
+
+    def __reduce__(self):
+        return (self.__class__, (self._name, self.__file__, self._import_structure))
diff --git a/src/layoutparser/models/base_layoutmodel.py b/src/layoutparser/models/base_layoutmodel.py
@@ -1,6 +1,6 @@
 from abc import ABC, abstractmethod
-import os
-import importlib
+
+from ..file_utils import requires_backends
 
 
 class BaseLayoutModel(ABC):
@@ -23,28 +23,7 @@ def DEPENDENCIES(self):
         """DEPENDENCIES lists all necessary dependencies for the class."""
         pass
 
-    @property
-    @abstractmethod
-    def MODULES(self):
-        """MODULES instructs how to import these necessary libraries."""
-        pass
-
-    @classmethod
-    def _import_module(cls):
-        for m in cls.MODULES:
-            if importlib.util.find_spec(m["module_path"]):
-                setattr(
-                    cls, m["import_name"], importlib.import_module(m["module_path"])
-                )
-            else:
-                raise ModuleNotFoundError(
-                    f"\n "
-                    f"\nPlease install the following libraries to support the class {cls.__name__}:"
-                    f"\n    pip install {' '.join(cls.DEPENDENCIES)}"
-                    f"\n "
-                )
-
     def __new__(cls, *args, **kwargs):
 
-        cls._import_module()
+        requires_backends(cls, cls.DEPENDENCIES)
         return super().__new__(cls)
diff --git a/src/layoutparser/models/detectron2/layoutmodel.py b/src/layoutparser/models/detectron2/layoutmodel.py
@@ -1,10 +1,15 @@
 from PIL import Image
 import numpy as np
-import torch
 
 from .catalog import PathManager, LABEL_MAP_CATALOG
 from ..base_layoutmodel import BaseLayoutModel
 from ...elements import Rectangle, TextBlock, Layout
+from ...file_utils import is_torch_cuda_available, is_detectron2_available
+
+if is_detectron2_available():
+    import detectron2.engine
+    import detectron2.config
+
 
 __all__ = ["Detectron2LayoutModel"]
 
@@ -42,13 +47,6 @@ class Detectron2LayoutModel(BaseLayoutModel):
     """
 
     DEPENDENCIES = ["detectron2"]
-    MODULES = [
-        {
-            "import_name": "_engine",
-            "module_path": "detectron2.engine",
-        },
-        {"import_name": "_config", "module_path": "detectron2.config"},
-    ]
     DETECTOR_NAME = "detectron2"
 
     def __init__(
@@ -70,7 +68,7 @@ def __init__(
         if enforce_cpu:
             extra_config.extend(["MODEL.DEVICE", "cpu"])
 
-        cfg = self._config.get_cfg()
+        cfg = detectron2.config.get_cfg()
         config_path = self._reconstruct_path_with_detector_name(config_path)
         config_path = PathManager.get_local_path(config_path)
         cfg.merge_from_file(config_path)
@@ -79,7 +77,10 @@ def __init__(
         if model_path is not None:
             model_path = self._reconstruct_path_with_detector_name(model_path)
             cfg.MODEL.WEIGHTS = model_path
-        cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+
+        if not enforce_cpu:
+            cfg.MODEL.DEVICE = "cuda" if is_torch_cuda_available() else "cpu"
+
         self.cfg = cfg
 
         self.label_map = label_map
@@ -135,7 +136,7 @@ def gather_output(self, outputs):
         return layout
 
     def _create_model(self):
-        self.model = self._engine.DefaultPredictor(self.cfg)
+        self.model = detectron2.engine.DefaultPredictor(self.cfg)
 
     def detect(self, image):
         """Detect the layout of a given image.