Sakibrumu's picture
Update app.py
07eed04 verified
import gradio as gr
import cv2
import easyocr
import numpy as np
import os
from PIL import Image
from ultralytics import YOLO
from datetime import datetime
# Load YOLO model
model = YOLO("/home/user/app/best.pt")
# Label map
label_map = {0: "Analog", 1: "Digital", 2: "Non-LP"}
# EasyOCR Bengali
reader = easyocr.Reader(['bn'])
def annotate_frame(frame):
input_img = cv2.resize(frame, (640, 640))
results = model(input_img)[0]
detections = results.boxes.data.cpu().numpy()
for det in detections:
if len(det) < 6:
continue
x1, y1, x2, y2, conf, cls = det
x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])
label = label_map.get(int(cls), "Unknown")
percent = f"{conf * 100:.2f}%"
# Draw box and label
cv2.rectangle(input_img, (x1, y1), (x2, y2), (255, 0, 0), 2)
cv2.putText(input_img, f"{label}: {percent}", (x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
# OCR
cropped = frame[y1:y2, x1:x2]
if cropped.size > 0:
ocr_result = reader.readtext(cropped)
for i, item in enumerate(ocr_result):
text = item[1].strip()
conf = item[2]
cv2.putText(input_img, text, (x1, y2 + 20 + i*25),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)
return cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB)
def process_input(input_file):
file_path = input_file.name
ext = os.path.splitext(file_path)[-1].lower()
if ext in ['.mp4', '.avi', '.mov']:
cap = cv2.VideoCapture(file_path)
if not cap.isOpened():
return None, None, "Could not open video file", ""
fps = cap.get(cv2.CAP_PROP_FPS)
frame_skip = 5
frame_id = 0
output_path = "annotated_output.mp4"
out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (640, 640))
while True:
ret, frame = cap.read()
if not ret:
break
if frame_id % frame_skip != 0:
frame_id += 1
continue
frame_id += 1
frame = cv2.resize(frame, (640, 640))
annotated = annotate_frame(frame)
out.write(cv2.cvtColor(annotated, cv2.COLOR_RGB2BGR))
cap.release()
out.release()
return output_path, None, "Bangla text in video (see frames)", "OCR confidence displayed"
else:
frame = cv2.imread(file_path)
if frame is None:
return None, None, "Invalid image", ""
frame = cv2.resize(frame, (640, 640))
annotated = annotate_frame(frame)
pil_img = Image.fromarray(annotated)
return None, pil_img, "Bangla text in image", "OCR confidence in image"
interface = gr.Interface(
fn=process_input,
inputs=gr.File(type="filepath", label="Upload Image or Video"),
outputs=[
gr.Video(label="Output Video"),
gr.Image(type="pil", label="Output Image"),
gr.Textbox(label="Detected Text (Bangla)"),
gr.Textbox(label="Confidence (%)")
],
title="YOLOv5 License Plate Detector (Bangla OCR)",
description="Upload an image or video. Detects license plates and extracts Bangla text using EasyOCR."
)
interface.launch()