Spaces:

DaOppaiLoli
/

KanaWrite

Sleeping

App Files Files Community

KanaWrite / ocr.py

penut85420

remove unused images

d0f39fb 9 months ago

raw

history blame

3.59 kB

	import os

	import cv2
	import numpy as np
	from openvino import Core


	class CodecCTC:
	def __init__(self, characters):
	self.chars = ["[blank]"] + list(characters)

	def decode(self, preds, top_k=10):
	index, texts, nbest = 0, list(), list()

	preds_index: np.ndarray = np.argmax(preds, 2)
	preds_index = preds_index.transpose(1, 0)
	preds_index_reshape = preds_index.reshape(-1)
	preds_sizes = np.array([preds_index.shape[1]] * preds_index.shape[0])

	for step in preds_sizes:
	t = preds_index_reshape[index : index + step]

	if t.shape[0] == 0:
	continue

	char_list = []
	for i in range(step):
	if t[i] == 0:
	continue

	# removing repeated characters and blank.
	if i > 0 and t[i - 1] == t[i]:
	continue

	char_list.append(self.chars[t[i]])

	# process n-best
	probs = self.softmax(preds[i][0])
	k_idx = np.argsort(-probs)[:top_k]
	k_probs = probs[k_idx]
	k_res = [
	dict(prob=p, char=self.chars[j]) for j, p in zip(k_idx, k_probs)
	]
	nbest.append(k_res)

	text = "".join(char_list)
	texts.append(text)

	index += step

	return texts, nbest

	def softmax(self, x):
	e_x = np.exp(x - np.max(x))
	return e_x / np.sum(e_x, axis=0)


	class Recognizer:
	def __init__(self, model_path, char_list_path):
	core = Core()
	self.model = core.read_model(model_path)
	self.compiled_model = core.compile_model(self.model, "CPU")
	self.infer_request = self.compiled_model.create_infer_request()

	# (batch_size, channel, width, height)
	_, _, self.inn_h, self.inn_w = self.model.inputs[0].shape
	self.input_tensor_name = self.model.inputs[0].get_any_name()
	self.output_tensor_name = self.model.outputs[0].get_any_name()

	with open(char_list_path, "r", encoding="utf-8") as f:
	char_list = "".join(line.strip("\n") for line in f)
	self.codec = CodecCTC(char_list)

	def __call__(self, inn_img):
	inn_img = self.preprocess(inn_img, height=self.inn_h, width=self.inn_w)
	inn_img = inn_img[None, :, :, :]

	for _ in range(2):
	self.infer_request.infer(inputs={self.input_tensor_name: inn_img})
	preds = self.infer_request.get_tensor(self.output_tensor_name).data[:]
	result, nbest = self.codec.decode(preds)

	return result, nbest

	def preprocess(self, image, height, width, invert=False):
	src: np.ndarray = cv2.cvtColor(image, cv2.COLOR_RGBA2GRAY)
	src = (255 - src) if invert else src

	ratio = float(src.shape[1]) / float(src.shape[0])
	tw = int(height * ratio)
	rsz = cv2.resize(src, (tw, height), interpolation=cv2.INTER_AREA).astype(np.float32)

	# [h,w] -> [c,h,w]
	img = rsz[None, :, :]
	_, h, w = img.shape

	# right edge padding
	pad_img = np.pad(img, ((0, 0), (0, height - h), (0, width - w)), mode="edge")

	return pad_img


	def main():
	recog = Recognizer("model/model.xml", "model/char_list.txt")

	target_dir = "."
	file_list = [os.path.join(dn, fn) for dn, _, ff in os.walk(target_dir) for fn in ff]
	file_list = sorted(file_list)

	for fp in file_list:
	if fp.endswith(".png"):
	print(recog(fp))


	if __name__ == "__main__":
	main()