DailySnap / app.py
yasserrmd's picture
Update app.py
9c69830 verified
raw
history blame
4.01 kB
import gradio as gr
from ultralytics import YOLOv10
import cv2
import torch
import os
import spaces
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = YOLOv10.from_pretrained('jameslahm/yolov10x').to(device)
# Define activity categories based on detected objects
activity_categories = {
"Working": ["laptop", "computer", "keyboard", "office chair"],
"Meal Time": ["fork", "spoon", "plate", "food"],
"Exercise": ["dumbbell", "bicycle", "yoga mat", "treadmill"],
"Outdoors": ["car", "tree", "bicycle", "road"],
# Add more categories and objects as needed
}
# Function to map detected objects to categorized activities
def categorize_activity(detected_objects):
categorized_activities = {}
for activity, objects in activity_categories.items():
if any(obj in detected_objects for obj in objects):
if activity not in categorized_activities:
categorized_activities[activity] = []
categorized_activities[activity].append(detected_objects)
return categorized_activities
# Function to process the video, detect objects, and generate a categorized journal with images
@spaces.GPU
def generate_journal_with_images(video_path):
cap = cv2.VideoCapture(video_path)
journal_entries = {}
saved_images = []
frame_count = 0
output_folder = "detected_frames"
os.makedirs(output_folder, exist_ok=True) # Create folder to store images
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Make predictions using YOLOv10 on the current frame
results = model.predict(source=frame_rgb, device=device)
# Draw bounding boxes on the frame
results.render() # Render the results on the image (this modifies the frame in-place)
# Save the image with bounding boxes
frame_filename = os.path.join(output_folder, f"frame_{frame_count}.jpg")
cv2.imwrite(frame_filename, frame_rgb[:, :, ::-1]) # Convert back to BGR for saving
saved_images.append(frame_filename)
# Extract labels (class indices) and map them to class names
detected_objects = [model.names[int(box.cls)] for box in results.boxes]
# Get current timestamp in the video
timestamp = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000 # Convert ms to seconds
# Categorize the detected objects into activities
activity_summary = categorize_activity(detected_objects)
# Store the activities with their timestamp
for activity, objects in activity_summary.items():
if activity not in journal_entries:
journal_entries[activity] = []
journal_entries[activity].append((f"At {timestamp:.2f} seconds: {', '.join(objects[0])}", frame_filename))
frame_count += 1
cap.release()
# Create a formatted journal output
formatted_journal = []
for activity, entries in journal_entries.items():
formatted_journal.append(f"**{activity}:**")
for entry, image_path in entries:
formatted_journal.append((entry, image_path))
return formatted_journal
# Gradio interface for uploading video and generating journal with images
def display_journal_with_images(video):
journal_with_images = generate_journal_with_images(video)
# Create the final display with text and images
display_items = []
for entry, image_path in journal_with_images:
display_items.append((entry, image_path))
return display_items
with gr.Blocks() as iface:
video_input = gr.Video(label="Upload Video")
output_gallery = gr.Gallery(label="Generated Daily Journal with Images")
run_button = gr.Button("Generate Journal")
run_button.click(fn=display_journal_with_images, inputs=video_input, outputs=output_gallery)
iface.launch()