Spaces:
Sleeping
Sleeping
import os | |
import cv2 | |
import numpy as np | |
from openvino import Core | |
class CodecCTC: | |
def __init__(self, characters): | |
self.chars = ["[blank]"] + list(characters) | |
def decode(self, preds, top_k=10): | |
index, texts, nbest = 0, list(), list() | |
preds_index: np.ndarray = np.argmax(preds, 2) | |
preds_index = preds_index.transpose(1, 0) | |
preds_index_reshape = preds_index.reshape(-1) | |
preds_sizes = np.array([preds_index.shape[1]] * preds_index.shape[0]) | |
for step in preds_sizes: | |
t = preds_index_reshape[index : index + step] | |
if t.shape[0] == 0: | |
continue | |
char_list = [] | |
for i in range(step): | |
if t[i] == 0: | |
continue | |
# removing repeated characters and blank. | |
if i > 0 and t[i - 1] == t[i]: | |
continue | |
char_list.append(self.chars[t[i]]) | |
# process n-best | |
probs = self.softmax(preds[i][0]) | |
k_idx = np.argsort(-probs)[:top_k] | |
k_probs = probs[k_idx] | |
k_res = [ | |
dict(prob=p, char=self.chars[j]) for j, p in zip(k_idx, k_probs) | |
] | |
nbest.append(k_res) | |
text = "".join(char_list) | |
texts.append(text) | |
index += step | |
return texts, nbest | |
def softmax(self, x): | |
e_x = np.exp(x - np.max(x)) | |
return e_x / np.sum(e_x, axis=0) | |
class Recognizer: | |
def __init__(self, model_path, char_list_path): | |
core = Core() | |
self.model = core.read_model(model_path) | |
self.compiled_model = core.compile_model(self.model, "CPU") | |
self.infer_request = self.compiled_model.create_infer_request() | |
# (batch_size, channel, width, height) | |
_, _, self.inn_h, self.inn_w = self.model.inputs[0].shape | |
self.input_tensor_name = self.model.inputs[0].get_any_name() | |
self.output_tensor_name = self.model.outputs[0].get_any_name() | |
with open(char_list_path, "r", encoding="utf-8") as f: | |
char_list = "".join(line.strip("\n") for line in f) | |
self.codec = CodecCTC(char_list) | |
def __call__(self, inn_img): | |
inn_img = self.preprocess(inn_img, height=self.inn_h, width=self.inn_w) | |
inn_img = inn_img[None, :, :, :] | |
for _ in range(2): | |
self.infer_request.infer(inputs={self.input_tensor_name: inn_img}) | |
preds = self.infer_request.get_tensor(self.output_tensor_name).data[:] | |
result, nbest = self.codec.decode(preds) | |
return result, nbest | |
def preprocess(self, image, height, width, invert=False): | |
src: np.ndarray = cv2.cvtColor(image, cv2.COLOR_RGBA2GRAY) | |
src = (255 - src) if invert else src | |
ratio = float(src.shape[1]) / float(src.shape[0]) | |
tw = int(height * ratio) | |
rsz = cv2.resize(src, (tw, height), interpolation=cv2.INTER_AREA).astype(np.float32) | |
# [h,w] -> [c,h,w] | |
img = rsz[None, :, :] | |
_, h, w = img.shape | |
# right edge padding | |
pad_img = np.pad(img, ((0, 0), (0, height - h), (0, width - w)), mode="edge") | |
return pad_img | |
def main(): | |
recog = Recognizer("model/model.xml", "model/char_list.txt") | |
target_dir = "." | |
file_list = [os.path.join(dn, fn) for dn, _, ff in os.walk(target_dir) for fn in ff] | |
file_list = sorted(file_list) | |
for fp in file_list: | |
if fp.endswith(".png"): | |
print(recog(fp)) | |
if __name__ == "__main__": | |
main() | |