Skip to content

[feat] Dynamic import based on the available dependencies #65

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Sep 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 56 additions & 20 deletions src/layoutparser/__init__.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,63 @@
__version__ = "0.2.0"

from .elements import (
Interval, Rectangle, Quadrilateral,
TextBlock, Layout
)
import sys

from .visualization import (
draw_box, draw_text
from .file_utils import (
_LazyModule,
is_detectron2_available,
is_paddle_available,
is_pytesseract_available,
is_gcv_available,
)

from .ocr import (
GCVFeatureType, GCVAgent,
TesseractFeatureType, TesseractAgent
)
_import_structure = {
"elements": [
"Interval",
"Rectangle",
"Quadrilateral",
"TextBlock",
"Layout"
],
"visualization": [
"draw_box",
"draw_text"
],
"io": [
"load_json",
"load_dict",
"load_csv",
"load_dataframe"
],
"file_utils":[
"is_torch_available",
"is_torch_cuda_available",
"is_detectron2_available",
"is_paddle_available",
"is_pytesseract_available",
"is_gcv_available",
"requires_backends"
]
}

from .models import (
Detectron2LayoutModel,
PaddleDetectionLayoutModel
)
if is_detectron2_available():
_import_structure["models.detectron2"] = ["Detectron2LayoutModel"]

if is_paddle_available():
_import_structure["models.paddledetection"] = ["PaddleDetectionLayoutModel"]

from .io import (
load_json,
load_dict,
load_csv,
load_dataframe
)
if is_pytesseract_available():
_import_structure["ocr.tesseract_agent"] = [
"TesseractAgent",
"TesseractFeatureType",
]

if is_gcv_available():
_import_structure["ocr.gcv_agent"] = ["GCVAgent", "GCVFeatureType"]

sys.modules[__name__] = _LazyModule(
__name__,
globals()["__file__"],
_import_structure,
module_spec=__spec__,
extra_objects={"__version__": __version__},
)
201 changes: 201 additions & 0 deletions src/layoutparser/file_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
# Some code are adapted from
# https://github.com/huggingface/transformers/blob/master/src/transformers/file_utils.py

from typing import Any, BinaryIO, Dict, List, Optional, Tuple, Union
import sys
import os
import logging
import importlib.util
from types import ModuleType

logger = logging.getLogger(__name__) # pylint: disable=invalid-name

# The package importlib_metadata is in a different place, depending on the python version.
if sys.version_info < (3, 8):
import importlib_metadata
else:
import importlib.metadata as importlib_metadata

###########################################
############ Layout Model Deps ############
###########################################

_torch_available = importlib.util.find_spec("torch") is not None
try:
_torch_version = importlib_metadata.version("torch")
logger.debug(f"PyTorch version {_torch_version} available.")
except importlib_metadata.PackageNotFoundError:
_torch_available = False

_detectron2_available = importlib.util.find_spec("detectron2") is not None
try:
_detectron2_version = importlib_metadata.version("detectron2")
logger.debug(f"Detectron2 version {_detectron2_version} available")
except importlib_metadata.PackageNotFoundError:
_detectron2_available = False

_paddle_available = importlib.util.find_spec("paddle") is not None
try:
# The name of the paddlepaddle library:
# Install name: pip install paddlepaddle
# Import name: import paddle
_paddle_version = importlib_metadata.version("paddlepaddle")
logger.debug(f"Paddle version {_paddle_version} available.")
except importlib_metadata.PackageNotFoundError:
_paddle_available = False

###########################################
############## OCR Tool Deps ##############
###########################################

_pytesseract_available = importlib.util.find_spec("pytesseract") is not None
try:
_pytesseract_version = importlib_metadata.version("pytesseract")
logger.debug(f"Pytesseract version {_pytesseract_version} available.")
except importlib_metadata.PackageNotFoundError:
_pytesseract_available = False

_gcv_available = importlib.util.find_spec("google.cloud.vision") is not None
try:
_gcv_version = importlib_metadata.version(
"google-cloud-vision"
) # This is slightly different
logger.debug(f"Google Cloud Vision Utils version {_gcv_version} available.")
except importlib_metadata.PackageNotFoundError:
_gcv_available = False


def is_torch_available():
return _torch_available


def is_torch_cuda_available():
if is_torch_available():
import torch

return torch.cuda.is_available()
else:
return False


def is_paddle_available():
return _paddle_available


def is_detectron2_available():
return _detectron2_available


def is_pytesseract_available():
return _pytesseract_available


def is_gcv_available():
return _gcv_available


PYTORCH_IMPORT_ERROR = """
{0} requires the PyTorch library but it was not found in your environment. Checkout the instructions on the
installation page: https://pytorch.org/get-started/locally/ and follow the ones that match your environment.
"""

DETECTRON2_IMPORT_ERROR = """
{0} requires the detectron2 library but it was not found in your environment. Checkout the instructions on the
installation page: https://github.com/facebookresearch/detectron2/blob/master/INSTALL.md and follow the ones
that match your environment. Typically the following would work for MacOS or Linux CPU machines:
pip install 'git+https://github.com/facebookresearch/[email protected]#egg=detectron2'
"""

PADDLE_IMPORT_ERROR = """
{0} requires the PaddlePaddle library but it was not found in your environment. Checkout the instructions on the
installation page: https://github.com/PaddlePaddle/Paddle and follow the ones that match your environment.
"""

PYTESSERACT_IMPORT_ERROR = """
{0} requires the PyTesseract library but it was not found in your environment. You can install it with pip:
`pip install pytesseract`
"""

GCV_IMPORT_ERROR = """
{0} requires the Google Cloud Vision Python utils but it was not found in your environment. You can install it with pip:
`pip install google-cloud-vision==1`
"""

BACKENDS_MAPPING = dict(
[
("torch", (is_torch_available, PYTORCH_IMPORT_ERROR)),
("detectron2", (is_detectron2_available, DETECTRON2_IMPORT_ERROR)),
("paddle", (is_paddle_available, PADDLE_IMPORT_ERROR)),
("pytesseract", (is_pytesseract_available, PYTESSERACT_IMPORT_ERROR)),
("google-cloud-vision", (is_gcv_available, GCV_IMPORT_ERROR)),
]
)


def requires_backends(obj, backends):
if not isinstance(backends, (list, tuple)):
backends = [backends]

name = obj.__name__ if hasattr(obj, "__name__") else obj.__class__.__name__
if not all(BACKENDS_MAPPING[backend][0]() for backend in backends):
raise ImportError(
"".join([BACKENDS_MAPPING[backend][1].format(name) for backend in backends])
)


class _LazyModule(ModuleType):
"""
Module class that surfaces all objects but only performs associated imports when the objects are requested.
"""

# Adapted from HuggingFace
# https://github.com/huggingface/transformers/blob/c37573806ab3526dd805c49cbe2489ad4d68a9d7/src/transformers/file_utils.py#L1990

def __init__(
self, name, module_file, import_structure, module_spec=None, extra_objects=None
):
super().__init__(name)
self._modules = set(import_structure.keys())
self._class_to_module = {}
for key, values in import_structure.items():
for value in values:
self._class_to_module[value] = key
# Needed for autocompletion in an IDE
self.__all__ = list(import_structure.keys()) + sum(
import_structure.values(), []
)
self.__file__ = module_file
self.__spec__ = module_spec
self.__path__ = [os.path.dirname(module_file)]
self._objects = {} if extra_objects is None else extra_objects
self._name = name
self._import_structure = import_structure

# Following [PEP 366](https://www.python.org/dev/peps/pep-0366/)
# The __package__ variable should be set
# https://docs.python.org/3/reference/import.html#__package__
self.__package__ = self.__name__

# Needed for autocompletion in an IDE
def __dir__(self):
return super().__dir__() + self.__all__

def __getattr__(self, name: str) -> Any:
if name in self._objects:
return self._objects[name]
if name in self._modules:
value = self._get_module(name)
elif name in self._class_to_module.keys():
module = self._get_module(self._class_to_module[name])
value = getattr(module, name)
else:
raise AttributeError(f"module {self.__name__} has no attribute {name}")

setattr(self, name, value)
return value

def _get_module(self, module_name: str):
return importlib.import_module("." + module_name, self.__name__)

def __reduce__(self):
return (self.__class__, (self._name, self.__file__, self._import_structure))
27 changes: 3 additions & 24 deletions src/layoutparser/models/base_layoutmodel.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from abc import ABC, abstractmethod
import os
import importlib

from ..file_utils import requires_backends


class BaseLayoutModel(ABC):
Expand All @@ -23,28 +23,7 @@ def DEPENDENCIES(self):
"""DEPENDENCIES lists all necessary dependencies for the class."""
pass

@property
@abstractmethod
def MODULES(self):
"""MODULES instructs how to import these necessary libraries."""
pass

@classmethod
def _import_module(cls):
for m in cls.MODULES:
if importlib.util.find_spec(m["module_path"]):
setattr(
cls, m["import_name"], importlib.import_module(m["module_path"])
)
else:
raise ModuleNotFoundError(
f"\n "
f"\nPlease install the following libraries to support the class {cls.__name__}:"
f"\n pip install {' '.join(cls.DEPENDENCIES)}"
f"\n "
)

def __new__(cls, *args, **kwargs):

cls._import_module()
requires_backends(cls, cls.DEPENDENCIES)
return super().__new__(cls)
23 changes: 12 additions & 11 deletions src/layoutparser/models/detectron2/layoutmodel.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
from PIL import Image
import numpy as np
import torch

from .catalog import PathManager, LABEL_MAP_CATALOG
from ..base_layoutmodel import BaseLayoutModel
from ...elements import Rectangle, TextBlock, Layout
from ...file_utils import is_torch_cuda_available, is_detectron2_available

if is_detectron2_available():
import detectron2.engine
import detectron2.config


__all__ = ["Detectron2LayoutModel"]

Expand Down Expand Up @@ -42,13 +47,6 @@ class Detectron2LayoutModel(BaseLayoutModel):
"""

DEPENDENCIES = ["detectron2"]
MODULES = [
{
"import_name": "_engine",
"module_path": "detectron2.engine",
},
{"import_name": "_config", "module_path": "detectron2.config"},
]
DETECTOR_NAME = "detectron2"

def __init__(
Expand All @@ -70,7 +68,7 @@ def __init__(
if enforce_cpu:
extra_config.extend(["MODEL.DEVICE", "cpu"])

cfg = self._config.get_cfg()
cfg = detectron2.config.get_cfg()
config_path = self._reconstruct_path_with_detector_name(config_path)
config_path = PathManager.get_local_path(config_path)
cfg.merge_from_file(config_path)
Expand All @@ -79,7 +77,10 @@ def __init__(
if model_path is not None:
model_path = self._reconstruct_path_with_detector_name(model_path)
cfg.MODEL.WEIGHTS = model_path
cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

if not enforce_cpu:
cfg.MODEL.DEVICE = "cuda" if is_torch_cuda_available() else "cpu"

self.cfg = cfg

self.label_map = label_map
Expand Down Expand Up @@ -135,7 +136,7 @@ def gather_output(self, outputs):
return layout

def _create_model(self):
self.model = self._engine.DefaultPredictor(self.cfg)
self.model = detectron2.engine.DefaultPredictor(self.cfg)

def detect(self, image):
"""Detect the layout of a given image.
Expand Down
Loading