-
Notifications
You must be signed in to change notification settings - Fork 3.3k
/
Copy pathdetection.py
137 lines (121 loc) · 5.04 KB
/
detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import torch
import torch.backends.cudnn as cudnn
from torch.autograd import Variable
from PIL import Image
from collections import OrderedDict
import cv2
import numpy as np
from .craft_utils import getDetBoxes, adjustResultCoordinates
from .imgproc import resize_aspect_ratio, normalizeMeanVariance
from .craft import CRAFT
import openvino as ov
import re
import os
def copyStateDict(state_dict):
if list(state_dict.keys())[0].startswith("module"):
start_idx = 1
else:
start_idx = 0
new_state_dict = OrderedDict()
for k, v in state_dict.items():
name = ".".join(k.split(".")[start_idx:])
new_state_dict[name] = v
return new_state_dict
def test_net(canvas_size, mag_ratio, net, image, text_threshold, link_threshold, low_text, poly, device, estimate_num_chars=False):
if isinstance(image, np.ndarray) and len(image.shape) == 4: # image is batch of np arrays
image_arrs = image
else: # image is single numpy array
image_arrs = [image]
img_resized_list = []
# resize
for img in image_arrs:
img_resized, target_ratio, size_heatmap = resize_aspect_ratio(img, canvas_size,
interpolation=cv2.INTER_LINEAR,
mag_ratio=mag_ratio)
img_resized_list.append(img_resized)
ratio_h = ratio_w = 1 / target_ratio
# preprocessing
x = [np.transpose(normalizeMeanVariance(n_img), (2, 0, 1))
for n_img in img_resized_list]
x = torch.from_numpy(np.array(x))
if 'ov_' in device:
x=x.to('cpu')
else:
x = x.to(device)
# forward pass
if 'ov_' in device:
res=net.infer_new_request({0: x})
y=torch.tensor(res[0])
else:
with torch.no_grad():
y, feature = net(x)
boxes_list, polys_list = [], []
for out in y:
# make score and link map
score_text = out[:, :, 0].cpu().data.numpy()
score_link = out[:, :, 1].cpu().data.numpy()
# Post-processing
boxes, polys, mapper = getDetBoxes(
score_text, score_link, text_threshold, link_threshold, low_text, poly, estimate_num_chars)
# coordinate adjustment
boxes = adjustResultCoordinates(boxes, ratio_w, ratio_h)
polys = adjustResultCoordinates(polys, ratio_w, ratio_h)
if estimate_num_chars:
boxes = list(boxes)
polys = list(polys)
for k in range(len(polys)):
if estimate_num_chars:
boxes[k] = (boxes[k], mapper[k])
if polys[k] is None:
polys[k] = boxes[k]
boxes_list.append(boxes)
polys_list.append(polys)
return boxes_list, polys_list
def get_detector(trained_model, device='cpu', quantize=True, cudnn_benchmark=False):
net = CRAFT()
if device == 'cpu':
net.load_state_dict(copyStateDict(torch.load(trained_model, map_location=device)))
if quantize:
try:
torch.quantization.quantize_dynamic(net, dtype=torch.qint8, inplace=True)
except:
pass
net.eval()
elif 'ov_' in device:
if 'int8' in device:
ov_device=re.sub('ov_','',device).upper()[:-5]
net_ov=trained_model
else:
ov_device=re.sub('ov_','',device).upper()
net.load_state_dict(copyStateDict(torch.load(trained_model, map_location='cpu')))
dummy_inp = torch.rand(1, 3, 608, 800)
net_ov = ov.convert_model(net, example_input=dummy_inp)
core = ov.Core()
if 'GPU' in ov_device:
cache_dir=os.path.expanduser('~/.EasyOCR/cache')
core.set_property({'CACHE_DIR': cache_dir})
net=core.compile_model(net_ov, ov_device)
print("Text detection model is running with OpenVINO on Intel", ov_device)
else:
net.load_state_dict(copyStateDict(torch.load(trained_model, map_location=device)))
net = torch.nn.DataParallel(net).to(device)
cudnn.benchmark = cudnn_benchmark
net.eval()
return net
def get_textbox(detector, image, canvas_size, mag_ratio, text_threshold, link_threshold, low_text, poly, device, optimal_num_chars=None, **kwargs):
result = []
estimate_num_chars = optimal_num_chars is not None
bboxes_list, polys_list = test_net(canvas_size, mag_ratio, detector,
image, text_threshold,
link_threshold, low_text, poly,
device, estimate_num_chars)
if estimate_num_chars:
polys_list = [[p for p, _ in sorted(polys, key=lambda x: abs(optimal_num_chars - x[1]))]
for polys in polys_list]
for polys in polys_list:
single_img_result = []
for i, box in enumerate(polys):
poly = np.array(box).astype(np.int32).reshape((-1))
single_img_result.append(poly)
result.append(single_img_result)
return result