MNGames commited on
Commit
23ebda3
1 Parent(s): 948a376

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -52
app.py CHANGED
@@ -1,55 +1,72 @@
1
  import gradio as gr
2
- import tensorflow as tf
3
- import numpy as np
4
- from PIL import Image
5
- import io
6
- import moviepy.editor as mp
7
-
8
- # Load a pre-trained TensorFlow model (replace with your model path)
9
- model = tf.keras.applications.MobileNetV2(weights="imagenet")
10
-
11
- def preprocess_image(image):
12
- img = np.array(image)
13
- img = tf.image.resize(img, (224, 224))
14
- img = tf.keras.applications.mobilenet_v2.preprocess_input(img)
15
- return np.expand_dims(img, axis=0)
16
-
17
- def classify_frame(frame):
18
- processed_frame = preprocess_image(frame)
19
- predictions = model.predict(processed_frame)
20
- decoded_predictions = tf.keras.applications.mobilenet_v2.decode_predictions(predictions, top=1)[0]
21
- return decoded_predictions[0][1]
22
-
23
- def process_video(video_file):
24
- result = ""
25
- if isinstance(video_file, str): # If the input is a file path
26
- video = mp.VideoFileClip(video_file)
27
- else: # If the input is a file-like object
28
- video = mp.VideoFileClip(io.BytesIO(video_file.read()))
29
- duration = int(video.duration)
30
- frame_interval = duration // 10 # Analyze 10 frames evenly spaced throughout the video
31
-
32
- for i in range(0, duration, frame_interval):
33
- frame = video.get_frame(i)
34
- image = Image.fromarray(frame)
35
- label = classify_frame(image)
36
-
37
- if "baseball" in label.lower():
38
- result = "The runner is out"
39
  break
40
 
41
- if result == "":
42
- result = "The runner is safe"
43
-
44
- return result
45
-
46
- iface = gr.Interface(
47
- fn=process_video,
48
- inputs="video",
49
- outputs="text",
50
- title="Baseball Runner Status",
51
- description="Upload a baseball video to determine if the runner is out or safe."
52
- )
53
-
54
- if __name__ == "__main__":
55
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import torch
3
+ import cv2
4
+ import pytesseract
5
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
6
+
7
+ # Load the Hugging Face model for object detection
8
+ model_name = "flax-community/yolov5s-v1-coco"
9
+ model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
10
+
11
+ # Load the Hugging Face model for text classification
12
+ classification_model_name = "distilbert-base-uncased"
13
+ classification_tokenizer = AutoTokenizer.from_pretrained(classification_model_name)
14
+ classification_model = AutoModelForSequenceClassification.from_pretrained(classification_model_name)
15
+
16
+ # Define function for OCR
17
+ def perform_ocr(image):
18
+ # Convert image to grayscale
19
+ gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
20
+ # Perform OCR
21
+ text = pytesseract.image_to_string(gray_image)
22
+ return text
23
+
24
+ # Define function to process video and predict
25
+ def predict_runner_status(video_file):
26
+ cap = cv2.VideoCapture(video_file.name)
27
+
28
+ results = []
29
+
30
+ while cap.isOpened():
31
+ ret, frame = cap.read()
32
+ if not ret:
 
 
 
 
 
 
33
  break
34
 
35
+ # Object detection
36
+ results_detection = model(frame)
37
+
38
+ # Logic for determining runner status using detected objects
39
+ # Example: if person detected, extract text and classify
40
+ objects = results_detection.pred[0][:, -1].numpy()
41
+ if 0 in objects: # 0 corresponds to person class
42
+ # Get the cropped region containing the person for OCR
43
+ person_bbox = results_detection.pred[0][np.where(objects == 0)][0][:4]
44
+ person_bbox = person_bbox.astype(int)
45
+ person_img = frame[person_bbox[1]:person_bbox[3], person_bbox[0]:person_bbox[2]]
46
+
47
+ # Perform OCR on the cropped image
48
+ text = perform_ocr(person_img)
49
+
50
+ # Classification using text classification model
51
+ inputs_classification = classification_tokenizer(text, return_tensors="pt", padding=True, truncation=True)
52
+ outputs_classification = classification_model(**inputs_classification)
53
+ predicted_class = torch.argmax(outputs_classification.logits).item()
54
+ if predicted_class == 1:
55
+ runner_status = "Out"
56
+ else:
57
+ runner_status = "Safe"
58
+
59
+ result = {
60
+ "frame_number": cap.get(cv2.CAP_PROP_POS_FRAMES),
61
+ "runner_status": runner_status
62
+ }
63
+ results.append(result)
64
+
65
+ cap.release()
66
+
67
+ return results
68
+
69
+ inputs = gr.inputs.Video(type="file", label="Upload a baseball video")
70
+ outputs = gr.outputs.Label(type="auto", label="Runner Status")
71
+ interface = gr.Interface(fn=predict_runner_status, inputs=inputs, outputs=outputs, title="Baseball Runner Status Predictor")
72
+ interface.launch(share=True)