Spaces:

toandev
/

OCR-for-Captcha

Running

App Files Files

OCR-for-Captcha / app.py

toandev

Add image examples and refactor app for improved OCR functionality

bcb8d00 about 2 months ago

raw

history blame

2.26 kB

	import torch
	import onnx
	import onnxruntime as rt
	from torchvision import transforms as T
	from pathlib import Path
	from PIL import Image
	from huggingface_hub import hf_hub_download

	import os
	import gradio as gr

	from utils.tokenizer_base import Tokenizer


	# Download the model from Hugging Face Hub
	cwd = Path(__file__).parent.resolve()
	model_file = os.path.join(cwd, hf_hub_download("toandev/OCR-for-Captcha", "model.onnx"))

	# Define the image size and vocabulary
	img_size = (32, 128)
	vocab = r"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{\|}~"

	# Initialize the tokenizer
	tokenizer = Tokenizer(vocab)


	def to_numpy(tensor):
	"""Convert tensor to numpy."""
	return (
	tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
	)


	def get_transform(img_size):
	"""Preprocess the input image."""
	transforms = []
	transforms.extend(
	[
	T.Resize(img_size, T.InterpolationMode.BICUBIC),
	T.ToTensor(),
	T.Normalize(0.5, 0.5),
	]
	)
	return T.Compose(transforms)


	def load_model(model_file):
	"""Load the model and return the transform function."""
	transform = get_transform(img_size)

	onnx_model = onnx.load(model_file)
	onnx.checker.check_model(onnx_model)

	s = rt.InferenceSession(model_file)
	return transform, s


	# Load the model
	transform, s = load_model(model_file=model_file)


	def process(img: Image.Image):
	"""Predict the text from the input image."""
	x = transform(img.convert("RGB")).unsqueeze(0)

	ort_inputs = {s.get_inputs()[0].name: to_numpy(x)}
	logits = s.run(None, ort_inputs)[0]
	probs = torch.tensor(logits).softmax(-1)
	preds, probs = tokenizer.decode(probs)

	return preds[0]


	iface = gr.Interface(
	process,
	gr.Image(type="pil", label="Input Image"),
	gr.Textbox(label="Predicted Text"),
	title="OCR for CAPTCHA",
	description="Solve captchas from images including letters and numbers, success rate is about 80-90%.",
	examples=[
	"examples/1.png",
	"examples/2.jpg",
	"examples/3.jpg",
	"examples/4.png",
	"examples/5.png",
	],
	)

	if __name__ == "__main__":
	iface.launch()