Update app.py
Browse files
app.py
CHANGED
@@ -1,55 +1,72 @@
|
|
1 |
import gradio as gr
|
2 |
-
import
|
3 |
-
import
|
4 |
-
|
5 |
-
import
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
model =
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
def
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
frame = video.get_frame(i)
|
34 |
-
image = Image.fromarray(frame)
|
35 |
-
label = classify_frame(image)
|
36 |
-
|
37 |
-
if "baseball" in label.lower():
|
38 |
-
result = "The runner is out"
|
39 |
break
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
import torch
|
3 |
+
import cv2
|
4 |
+
import pytesseract
|
5 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
6 |
+
|
7 |
+
# Load the Hugging Face model for object detection
|
8 |
+
model_name = "flax-community/yolov5s-v1-coco"
|
9 |
+
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
|
10 |
+
|
11 |
+
# Load the Hugging Face model for text classification
|
12 |
+
classification_model_name = "distilbert-base-uncased"
|
13 |
+
classification_tokenizer = AutoTokenizer.from_pretrained(classification_model_name)
|
14 |
+
classification_model = AutoModelForSequenceClassification.from_pretrained(classification_model_name)
|
15 |
+
|
16 |
+
# Define function for OCR
|
17 |
+
def perform_ocr(image):
|
18 |
+
# Convert image to grayscale
|
19 |
+
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
20 |
+
# Perform OCR
|
21 |
+
text = pytesseract.image_to_string(gray_image)
|
22 |
+
return text
|
23 |
+
|
24 |
+
# Define function to process video and predict
|
25 |
+
def predict_runner_status(video_file):
|
26 |
+
cap = cv2.VideoCapture(video_file.name)
|
27 |
+
|
28 |
+
results = []
|
29 |
+
|
30 |
+
while cap.isOpened():
|
31 |
+
ret, frame = cap.read()
|
32 |
+
if not ret:
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
break
|
34 |
|
35 |
+
# Object detection
|
36 |
+
results_detection = model(frame)
|
37 |
+
|
38 |
+
# Logic for determining runner status using detected objects
|
39 |
+
# Example: if person detected, extract text and classify
|
40 |
+
objects = results_detection.pred[0][:, -1].numpy()
|
41 |
+
if 0 in objects: # 0 corresponds to person class
|
42 |
+
# Get the cropped region containing the person for OCR
|
43 |
+
person_bbox = results_detection.pred[0][np.where(objects == 0)][0][:4]
|
44 |
+
person_bbox = person_bbox.astype(int)
|
45 |
+
person_img = frame[person_bbox[1]:person_bbox[3], person_bbox[0]:person_bbox[2]]
|
46 |
+
|
47 |
+
# Perform OCR on the cropped image
|
48 |
+
text = perform_ocr(person_img)
|
49 |
+
|
50 |
+
# Classification using text classification model
|
51 |
+
inputs_classification = classification_tokenizer(text, return_tensors="pt", padding=True, truncation=True)
|
52 |
+
outputs_classification = classification_model(**inputs_classification)
|
53 |
+
predicted_class = torch.argmax(outputs_classification.logits).item()
|
54 |
+
if predicted_class == 1:
|
55 |
+
runner_status = "Out"
|
56 |
+
else:
|
57 |
+
runner_status = "Safe"
|
58 |
+
|
59 |
+
result = {
|
60 |
+
"frame_number": cap.get(cv2.CAP_PROP_POS_FRAMES),
|
61 |
+
"runner_status": runner_status
|
62 |
+
}
|
63 |
+
results.append(result)
|
64 |
+
|
65 |
+
cap.release()
|
66 |
+
|
67 |
+
return results
|
68 |
+
|
69 |
+
inputs = gr.inputs.Video(type="file", label="Upload a baseball video")
|
70 |
+
outputs = gr.outputs.Label(type="auto", label="Runner Status")
|
71 |
+
interface = gr.Interface(fn=predict_runner_status, inputs=inputs, outputs=outputs, title="Baseball Runner Status Predictor")
|
72 |
+
interface.launch(share=True)
|