Spaces:

abdur75648
/

UrduOCR-UTRNet

Running

UrduOCR-UTRNet / app.py

Abdur Rahman

Migrating from Kraken to YoloV8 (Finetuned on UrduDoc)

af39285 9 months ago

2.52 kB

	import torch
	import gradio as gr
	from read import text_recognizer
	from model import Model
	from utils import CTCLabelConverter
	from ultralytics import YOLO
	from PIL import ImageDraw

	""" vocab / character number configuration """
	file = open("UrduGlyphs.txt","r",encoding="utf-8")
	content = file.readlines()
	content = ''.join([str(elem).strip('\n') for elem in content])
	content = content+" "
	""" model configuration """
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	converter = CTCLabelConverter(content)
	recognition_model = Model(num_class=len(converter.character), device=device)
	recognition_model = recognition_model.to(device)
	recognition_model.load_state_dict(torch.load("best_norm_ED.pth", map_location=device))
	recognition_model.eval()

	detection_model = YOLO("yolov8m_UrduDoc.pt")

	examples = ["1.jpg","2.jpg","3.jpg"]

	input = gr.Image(type="pil",image_mode="RGB", label="Input Image")

	def predict(input):
	"Line Detection"
	detection_results = detection_model.predict(source=input, conf=0.2, imgsz=1280, save=False, nms=True, device=device)
	bounding_boxes = detection_results[0].boxes.xyxy.cpu().numpy().tolist()
	bounding_boxes.sort(key=lambda x: x[1])

	"Draw the bounding boxes"
	draw = ImageDraw.Draw(input)
	for box in bounding_boxes:
	# draw rectangle outline with random color and width=5
	from numpy import random
	draw.rectangle(box, fill=None, outline=tuple(random.randint(0,255,3)), width=5)

	"Crop the detected lines"
	cropped_images = []
	for box in bounding_boxes:
	cropped_images.append(input.crop(box))
	len(cropped_images)

	"Recognize the text"
	texts = []
	for img in cropped_images:
	texts.append(text_recognizer(img, recognition_model, converter, device))

	"Join the text"
	text = "\n".join(texts)

	"Return the image with bounding boxes and the text"
	return input,text

	output_image = gr.Image(type="pil",image_mode="RGB",label="Detected Lines")
	output_text = gr.Textbox(label="Recognized Text",interactive=True,show_copy_button=True)

	iface = gr.Interface(predict,
	inputs=input,
	outputs=[output_image,output_text],
	title="End-to-End Urdu OCR",
	description="Demo Web App For UTRNet\n(https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition)",
	examples=examples,
	allow_flagging="never")
	iface.launch()