|
import gradio as gr |
|
from ultralytics import YOLOv10 |
|
from skimage.metrics import structural_similarity as ssim |
|
import cv2 |
|
import torch |
|
import os |
|
import spaces |
|
|
|
|
|
device = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
model = YOLOv10.from_pretrained('jameslahm/yolov10x').to(device) |
|
|
|
|
|
activity_categories = { |
|
"Working": ["laptop", "computer", "keyboard", "office chair"], |
|
"Meal Time": ["fork", "spoon", "plate", "food"], |
|
"Exercise": ["dumbbell", "bicycle", "yoga mat", "treadmill"], |
|
"Outdoors": ["car", "tree", "bicycle", "road"], |
|
|
|
} |
|
|
|
|
|
def categorize_activity(detected_objects): |
|
categorized_activities = {} |
|
|
|
for activity, objects in activity_categories.items(): |
|
if any(obj in detected_objects for obj in objects): |
|
if activity not in categorized_activities: |
|
categorized_activities[activity] = [] |
|
categorized_activities[activity].append(detected_objects) |
|
|
|
return categorized_activities |
|
|
|
|
|
|
|
def is_frame_different(frame1, frame2, threshold=0.9): |
|
gray_frame1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY) |
|
gray_frame2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY) |
|
score, _ = ssim(gray_frame1, gray_frame2, full=True) |
|
return score < threshold |
|
|
|
|
|
|
|
@spaces.GPU |
|
def generate_journal_with_images(video_path, frame_interval=30): |
|
cap = cv2.VideoCapture(video_path) |
|
journal_entries = [] |
|
saved_images = [] |
|
frame_count = 0 |
|
last_processed_frame = None |
|
output_folder = "detected_frames" |
|
os.makedirs(output_folder, exist_ok=True) |
|
|
|
while cap.isOpened(): |
|
ret, frame = cap.read() |
|
if not ret: |
|
break |
|
|
|
|
|
if frame_count % frame_interval == 0 or (last_processed_frame is not None and is_frame_different(last_processed_frame, frame)): |
|
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) |
|
|
|
|
|
results = model.predict(source=frame_rgb, device=device) |
|
|
|
|
|
annotated_frame = results[0].plot() |
|
|
|
|
|
frame_filename = os.path.join(output_folder, f"frame_{frame_count}.jpg") |
|
cv2.imwrite(frame_filename, annotated_frame[:, :, ::-1]) |
|
saved_images.append(frame_filename) |
|
|
|
|
|
detected_objects = [model.names[int(box.cls)] for box in results[0].boxes] |
|
|
|
|
|
timestamp = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000 |
|
|
|
|
|
activity_summary = categorize_activity(detected_objects) |
|
|
|
|
|
for activity, objects in activity_summary.items(): |
|
journal_entries.append((f"At {timestamp:.2f} seconds: {', '.join(objects[0])}", frame_filename)) |
|
|
|
last_processed_frame = frame |
|
|
|
frame_count += 1 |
|
|
|
cap.release() |
|
|
|
return journal_entries |
|
|
|
|
|
def display_journal_with_images(video): |
|
journal_entries, image_paths = generate_journal_with_images(video, frame_interval=30) |
|
|
|
|
|
journal_text = "\n".join(journal_entries) |
|
return journal_text, image_paths |
|
|
|
|
|
with gr.Blocks() as iface: |
|
video_input = gr.Video(label="Upload Video", height=300) |
|
journal_output = gr.Textbox(label="Generated Daily Journal", lines=10) |
|
image_gallery = gr.Gallery(label="Annotated Frames") |
|
run_button = gr.Button("Generate Journal") |
|
|
|
run_button.click(fn=display_journal_with_images, inputs=video_input, outputs=[journal_output, image_gallery]) |
|
|
|
iface.launch() |
|
|
|
iface.launch() |
|
|