Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
-
import cv2
|
4 |
import pytesseract
|
5 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
6 |
|
@@ -13,12 +12,10 @@ classification_model_name = "distilbert-base-uncased"
|
|
13 |
classification_tokenizer = AutoTokenizer.from_pretrained(classification_model_name)
|
14 |
classification_model = AutoModelForSequenceClassification.from_pretrained(classification_model_name)
|
15 |
|
16 |
-
# Define function for OCR
|
17 |
def perform_ocr(image):
|
18 |
-
#
|
19 |
-
|
20 |
-
# Perform OCR
|
21 |
-
text = pytesseract.image_to_string(gray_image)
|
22 |
return text
|
23 |
|
24 |
# Define function to process video and predict
|
@@ -36,15 +33,12 @@ def predict_runner_status(video_file):
|
|
36 |
results_detection = model(frame)
|
37 |
|
38 |
# Logic for determining runner status using detected objects
|
39 |
-
# Example: if person detected, extract text and classify
|
40 |
objects = results_detection.pred[0][:, -1].numpy()
|
41 |
if 0 in objects: # 0 corresponds to person class
|
42 |
-
#
|
43 |
person_bbox = results_detection.pred[0][np.where(objects == 0)][0][:4]
|
44 |
person_bbox = person_bbox.astype(int)
|
45 |
person_img = frame[person_bbox[1]:person_bbox[3], person_bbox[0]:person_bbox[2]]
|
46 |
-
|
47 |
-
# Perform OCR on the cropped image
|
48 |
text = perform_ocr(person_img)
|
49 |
|
50 |
# Classification using text classification model
|
|
|
1 |
import gradio as gr
|
2 |
import torch
|
|
|
3 |
import pytesseract
|
4 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
5 |
|
|
|
12 |
classification_tokenizer = AutoTokenizer.from_pretrained(classification_model_name)
|
13 |
classification_model = AutoModelForSequenceClassification.from_pretrained(classification_model_name)
|
14 |
|
15 |
+
# Define function for text recognition (OCR)
|
16 |
def perform_ocr(image):
|
17 |
+
# Perform OCR using pytesseract
|
18 |
+
text = pytesseract.image_to_string(image)
|
|
|
|
|
19 |
return text
|
20 |
|
21 |
# Define function to process video and predict
|
|
|
33 |
results_detection = model(frame)
|
34 |
|
35 |
# Logic for determining runner status using detected objects
|
|
|
36 |
objects = results_detection.pred[0][:, -1].numpy()
|
37 |
if 0 in objects: # 0 corresponds to person class
|
38 |
+
# Perform OCR on the detected person
|
39 |
person_bbox = results_detection.pred[0][np.where(objects == 0)][0][:4]
|
40 |
person_bbox = person_bbox.astype(int)
|
41 |
person_img = frame[person_bbox[1]:person_bbox[3], person_bbox[0]:person_bbox[2]]
|
|
|
|
|
42 |
text = perform_ocr(person_img)
|
43 |
|
44 |
# Classification using text classification model
|