JaidedAI · avbelova · Dec 29, 2023 · Mar 26, 2024 · Mar 26, 2024 · Mar 26, 2024
diff --git a/README.md b/README.md
@@ -114,12 +114,42 @@ reader = easyocr.Reader(['ch_sim','en'], gpu=False)
 
 For more information, read the [tutorial](https://www.jaided.ai/easyocr/tutorial) and [API Documentation](https://www.jaided.ai/easyocr/documentation).
 
+#### Run with [OpenVINO(TM) Toolkit](https://github.com/openvinotoolkit/openvino) backend 
+To run EasyOCR with OpenVINO(TM) Toolkit on on Intel(R) CPU, Intel(R) Processor Graphics or Intel(R) Discrete Graphics, pass the inference device name as a Reader input argument. Choose between ‘ov_cpu’, ‘ov_gpu.<Intel GPU#>’ or 'ov_auto' (ov_+ device/plugin name as it's called in [OpenVINO](https://docs.openvino.ai/2024/about-openvino/compatibility-and-support/supported-devices.html))
+
+The following example sets up model inference to OpenVINO+Intel(R) CPU:
+
+```python
+reader = easyocr.Reader(['ch_sim','en'], 'ov_cpu')
+```
+Example of running inference on OpenVINO+Intel(R) Processor Graphics:
+
+```python
+reader = easyocr.Reader(['ch_sim','en'], 'ov_gpu')
+```
+Running inference on OpenVINO+Intel(R) Discrete Graphics:
+
+```python
+reader = easyocr.Reader(['ch_sim','en'], 'ov_gpu.1')
+```
+To accelerate the OCR you can run CRAFT model in INT8 format. To do that just add "_int8" to the device name. CRAFT model is quantized using [NNCF](https://github.com/openvinotoolkit/nncf) framework, quantization script is [here](https://github.com/avbelova/EasyOCR/blob/feature/openvino-backend/easyocr/quantize_craft_default.py).
+```python
+reader = easyocr.Reader(['ch_sim','en'], 'ov_cpu_int8')
+```
 #### Run on command line
 
 ```shell
 $ easyocr -l ch_sim en -f chinese.jpg --detail=1 --gpu=True
 ```
 
+#### Run on command line with [OpenVINO(TM) Toolkit](https://github.com/openvinotoolkit/openvino) backend 
+
+Choose target device (--gpu) from ov_cpu, ov_gpu, ov_gpu.0, ov_gpu.1, ov_gpu.2,ov_gpu.3, ov_auto.
+
+```shell
+$ easyocr -l ch_sim en -f chinese.jpg --detail=1 --gpu=ov_cpu
+```
+
 ## Train/use your own model
 
 For recognition model, [Read here](https://github.com/JaidedAI/EasyOCR/blob/master/custom_model.md).

diff --git a/easyocr/cli.py b/easyocr/cli.py
@@ -14,10 +14,9 @@ def parse_args():
     )
     parser.add_argument(
         "--gpu",
-        type=bool,
-        choices=[True, False],
+        choices=[True, False, 'ov_cpu', 'ov_gpu', 'ov_gpu.0', 'ov_gpu.1', 'ov_gpu.2', 'ov_gpu.3', 'ov_auto'],
         default=True,
-        help="Using GPU (default: True)",
+        help="Using GPU (default: True) or OpenVINO(TM) Toolkit on Intel(R) CPU, Intel(R) Processor Graphics or Intel(R) Discrete Graphics. Choose from True, False, ov_cpu, ov_gpu, ov_gpu.0, ov_gpu.1, ov_gpu.2, ov_gpu.3, ov_auto" ,
     )
     parser.add_argument(
         "--model_storage_directory",

diff --git a/easyocr/config.py b/easyocr/config.py
@@ -23,6 +23,11 @@
         'filename': 'pretrained_ic15_res50.pt',
         'url': 'https://github.com/JaidedAI/EasyOCR/releases/download/v1.6.0/pretrained_ic15_res50.zip',
         'md5sum': 'a8e90144c131c2467d1eb7886c2e93a6'
+    },
+    'craft_ov_int8': {
+        'filename': 'CRAFT-detector_int8.xml',
+        'url': 'https://github.com/avbelova/EasyOCR/releases/download/v1.0.0/CRAFT-detector_int8.zip',
+        'md5sum': '3789b097bab80ac15c8440d32c35b9a2'
     }
 }
 

diff --git a/easyocr/detection.py b/easyocr/detection.py
@@ -10,6 +10,10 @@
 from .imgproc import resize_aspect_ratio, normalizeMeanVariance
 from .craft import CRAFT
 
+import openvino as ov
+import re
+import os
+
 def copyStateDict(state_dict):
     if list(state_dict.keys())[0].startswith("module"):
         start_idx = 1
@@ -39,11 +43,18 @@ def test_net(canvas_size, mag_ratio, net, image, text_threshold, link_threshold,
     x = [np.transpose(normalizeMeanVariance(n_img), (2, 0, 1))
          for n_img in img_resized_list]
     x = torch.from_numpy(np.array(x))
-    x = x.to(device)
+    if 'ov_' in device:
+        x=x.to('cpu')
+    else:
+        x = x.to(device)
 
     # forward pass
-    with torch.no_grad():
-        y, feature = net(x)
+    if 'ov_' in device:
+        res=net.infer_new_request({0: x})
+        y=torch.tensor(res[0])
+    else:
+        with torch.no_grad():
+            y, feature = net(x)
 
     boxes_list, polys_list = [], []
     for out in y:
@@ -81,12 +92,28 @@ def get_detector(trained_model, device='cpu', quantize=True, cudnn_benchmark=Fal
                 torch.quantization.quantize_dynamic(net, dtype=torch.qint8, inplace=True)
             except:
                 pass
+        net.eval()
+    elif 'ov_' in device:
+        if 'int8' in device:
+            ov_device=re.sub('ov_','',device).upper()[:-5]
+            net_ov=trained_model
+        else:
+            ov_device=re.sub('ov_','',device).upper()
+            net.load_state_dict(copyStateDict(torch.load(trained_model, map_location='cpu')))
+            dummy_inp = torch.rand(1, 3, 608, 800)
+            net_ov = ov.convert_model(net, example_input=dummy_inp)
+        core = ov.Core()
+        if 'GPU' in ov_device:
+            cache_dir=os.path.expanduser('~/.EasyOCR/cache')
+            core.set_property({'CACHE_DIR': cache_dir})
+        net=core.compile_model(net_ov, ov_device)
+        print("Text detection model is running with OpenVINO on Intel", ov_device)
     else:
         net.load_state_dict(copyStateDict(torch.load(trained_model, map_location=device)))
         net = torch.nn.DataParallel(net).to(device)
         cudnn.benchmark = cudnn_benchmark
-
-    net.eval()
+        net.eval()
+
     return net
 
 def get_textbox(detector, image, canvas_size, mag_ratio, text_threshold, link_threshold, low_text, poly, device, optimal_num_chars=None, **kwargs):

diff --git a/easyocr/easyocr.py b/easyocr/easyocr.py
@@ -85,9 +85,11 @@ def __init__(self, lang_list, gpu=True, model_storage_directory=None,
         self.recognition_models = recognition_models
 
         # check and download detection model
-        self.support_detection_network = ['craft', 'dbnet18']
+        self.support_detection_network = ['craft', 'dbnet18', 'craft_ov_int8']
         self.quantize=quantize, 
         self.cudnn_benchmark=cudnn_benchmark
+        if 'ov' and 'int8' in self.device:
+            detect_network='craft_ov_int8'
         if detector:
             detector_path = self.getDetectorPath(detect_network)
 
@@ -235,7 +237,7 @@ def __init__(self, lang_list, gpu=True, model_storage_directory=None,
     def getDetectorPath(self, detect_network):
         if detect_network in self.support_detection_network:
             self.detect_network = detect_network
-            if self.detect_network == 'craft':
+            if self.detect_network == 'craft' or self.detect_network == 'craft_ov_int8':
                 from .detection import get_detector, get_textbox
             elif self.detect_network in ['dbnet18']:
                 from .detection_db import get_detector, get_textbox

diff --git a/easyocr/quantize_craft_default.py b/easyocr/quantize_craft_default.py
@@ -0,0 +1,56 @@
+import os
+import torch
+from torch.utils.data import Dataset
+import numpy as np
+import openvino as ov
+import nncf
+import cv2
+from easyocr.imgproc import resize_aspect_ratio, normalizeMeanVariance
+import argparse
+
+class ICDAR2015Dataset(Dataset):
+    def __init__(self, image_dir):
+        self.image_dir = image_dir
+        self.image_files = sorted([f for f in os.listdir(image_dir) if f.lower().endswith(('jpg', 'png'))])
+
+    def __len__(self):
+        return len(self.image_files)
+
+    def __getitem__(self, idx):
+        # Load and preprocess an image
+        image_path = os.path.join(self.image_dir, self.image_files[idx])
+        image = cv2.imread(image_path)
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        image, _, _ = resize_aspect_ratio(image, 2560,  interpolation=cv2.INTER_LINEAR)
+        image = np.transpose(normalizeMeanVariance(image), (2, 0, 1))
+        return image
+
+def transform_fn(data_item):
+    image = data_item
+    return image
+
+def quantize_craft(model_path, image_dir):
+    core = ov.Core()
+    model = core.read_model(model_path) 
+
+    #Prepare a calibration dataset
+    dataset = ICDAR2015Dataset(image_dir)
+    val_data_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False)
+    calibration_dataset = nncf.Dataset(val_data_loader, transform_fn)
+
+    # Run quantization and save INT8 model
+    ov_quantized_model = nncf.quantize(model, calibration_dataset)
+    int8_model_path="./INT8/"+model_path.replace(".xml", "_int8.xml")
+    ov.save_model(ov_quantized_model, int8_model_path, compress_to_fp16=False)
+    print("INT8 model is saved to", int8_model_path)
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("model_path", help = "Path to the model")
+    parser.add_argument("image_dir", help = "Path to the directory with images")
+    args = parser.parse_args()
+    quantize_craft(args.model_path, args.image_dir)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/easyocr/recognition.py b/easyocr/recognition.py
@@ -9,6 +9,9 @@
 import importlib
 from .utils import CTCLabelConverter
 import math
+import openvino as ov
+import os
+import re
 
 def custom_mean(x):
     return x.prod()**(2.0/np.sqrt(len(x)))
@@ -96,9 +99,21 @@ def __call__(self, batch):
         image_tensors = torch.cat([t.unsqueeze(0) for t in resized_images], 0)
         return image_tensors
 
+def copyStateDict(state_dict):
+    new_state_dict = OrderedDict()
+    for key, value in state_dict.items():
+        new_key = key[7:]
+        new_state_dict[new_key] = value
+    return new_state_dict
+
 def recognizer_predict(model, converter, test_loader, batch_max_length,\
                        ignore_idx, char_group_idx, decoder = 'greedy', beamWidth= 5, device = 'cpu'):
-    model.eval()
+    ov_device=''
+    if 'ov_' not in device:
+        model.eval()
+    else:
+        ov_device=device
+        device='cpu'
     result = []
     with torch.no_grad():
         for image_tensors in test_loader:
@@ -108,7 +123,12 @@ def recognizer_predict(model, converter, test_loader, batch_max_length,\
             length_for_pred = torch.IntTensor([batch_max_length] * batch_size).to(device)
             text_for_pred = torch.LongTensor(batch_size, batch_max_length + 1).fill_(0).to(device)
 
-            preds = model(image, text_for_pred)
+            if ov_device!='':
+                res = model.infer_new_request({0: image})
+                preds = next(iter(res.values()))
+                preds=torch.tensor(preds)
+            else:
+                preds = model(image, text_for_pred)
 
             # Select max probabilty (greedy decoding) then decode index to character
             preds_size = torch.IntTensor([preds.size(1)] * batch_size)
@@ -167,16 +187,29 @@ def get_recognizer(recog_network, network_params, character,\
 
     if device == 'cpu':
         state_dict = torch.load(model_path, map_location=device)
-        new_state_dict = OrderedDict()
-        for key, value in state_dict.items():
-            new_key = key[7:]
-            new_state_dict[new_key] = value
+        new_state_dict = copyStateDict(state_dict)
         model.load_state_dict(new_state_dict)
         if quantize:
             try:
                 torch.quantization.quantize_dynamic(model, dtype=torch.qint8, inplace=True)
             except:
                 pass
+    elif 'ov_' in device:
+        state_dict = torch.load(model_path, map_location="cpu")
+        new_state_dict = copyStateDict(state_dict)
+        model.load_state_dict(new_state_dict)
+        if 'int8' in device:
+            ov_device=re.sub('ov_','',device).upper()[:-5]
+        else:
+            ov_device = re.sub('ov_','',device).upper()
+        core = ov.Core()
+        if 'GPU' in ov_device:
+            cache_dir = os.path.expanduser('~/.EasyOCR/cache')
+            core.set_property({'CACHE_DIR': cache_dir})
+        dummy_inp = torch.zeros(1, 1, 64, 320),torch.zeros(1,33)
+        model_ov = ov.convert_model(model,example_input=dummy_inp)
+        model = core.compile_model(model_ov, ov_device)
+        print('Text recognition model is running with OpenVINO on Intel ', ov_device)
     else:
         model = torch.nn.DataParallel(model).to(device)
         model.load_state_dict(torch.load(model_path, map_location=device))

diff --git a/easyocr/utils.py b/easyocr/utils.py
@@ -627,7 +627,7 @@ def download_and_unzip(url, filename, model_storage_directory, verbose=True):
     reporthook = printProgressBar(prefix='Progress:', suffix='Complete', length=50) if verbose else None
     urlretrieve(url, zip_path, reporthook=reporthook)
     with ZipFile(zip_path, 'r') as zipObj:
-        zipObj.extract(filename, model_storage_directory)
+        zipObj.extractall(model_storage_directory)
     os.remove(zip_path)
 
 def calculate_md5(fname):

diff --git a/openvino_models/CRAFT-detector_int8.bin b/openvino_models/CRAFT-detector_int8.bin