Spaces:

yasserrmd
/

DailySnap

Sleeping

App Files Files Community

DailySnap / app.py

yasserrmd

Update app.py

da6e971 verified 5 months ago

raw

history blame

4.54 kB

	import gradio as gr
	from ultralytics import YOLOv10
	from skimage.metrics import structural_similarity as ssim
	import cv2
	import torch
	import os
	import spaces


	device = 'cuda' if torch.cuda.is_available() else 'cpu'
	model = YOLOv10.from_pretrained('jameslahm/yolov10x').to(device)

	# Define activity categories based on detected objects
	activity_categories = {
	"Working": ["laptop", "computer", "keyboard", "office chair"],
	"Meal Time": ["fork", "spoon", "plate", "food"],
	"Exercise": ["dumbbell", "bicycle", "yoga mat", "treadmill"],
	"Outdoors": ["car", "tree", "bicycle", "road"],
	# Add more categories and objects as needed
	}

	# Function to map detected objects to categorized activities
	def categorize_activity(detected_objects):
	categorized_activities = {}

	for activity, objects in activity_categories.items():
	if any(obj in detected_objects for obj in objects):
	if activity not in categorized_activities:
	categorized_activities[activity] = []
	categorized_activities[activity].append(detected_objects)

	return categorized_activities


	# Function to compare frames using SSIM to avoid repeated frames
	def is_frame_different(frame1, frame2, threshold=0.9):
	gray_frame1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
	gray_frame2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
	score, _ = ssim(gray_frame1, gray_frame2, full=True)
	return score < threshold


	# Function to process the video, detect objects, and generate a categorized journal with images
	@spaces.GPU
	def generate_journal_with_images(video_path, frame_interval=30):
	cap = cv2.VideoCapture(video_path)
	journal_entries = []
	saved_images = []
	frame_count = 0
	last_processed_frame = None
	output_folder = "detected_frames"
	os.makedirs(output_folder, exist_ok=True) # Create folder to store images

	while cap.isOpened():
	ret, frame = cap.read()
	if not ret:
	break

	# Process every Nth frame or if the current frame is different from the last processed frame
	if frame_count % frame_interval == 0 or (last_processed_frame is not None and is_frame_different(last_processed_frame, frame)):
	frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

	# Make predictions using YOLOv10 on the current frame
	results = model.predict(source=frame_rgb, device=device)

	# Plot bounding boxes and labels on the image
	annotated_frame = results[0].plot() # Plot detection results on the frame

	# Save the annotated image
	frame_filename = os.path.join(output_folder, f"frame_{frame_count}.jpg")
	cv2.imwrite(frame_filename, annotated_frame[:, :, ::-1]) # Convert back to BGR for saving
	saved_images.append(frame_filename)

	# Extract labels (class indices) and map them to class names
	detected_objects = [model.names[int(box.cls)] for box in results[0].boxes] # Access the first result

	# Get current timestamp in the video
	timestamp = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000 # Convert ms to seconds

	# Categorize the detected objects into activities
	activity_summary = categorize_activity(detected_objects)

	# Store the activities with their timestamp
	for activity, objects in activity_summary.items():
	journal_entries.append((f"At {timestamp:.2f} seconds: {', '.join(objects[0])}", frame_filename))

	last_processed_frame = frame # Update the last processed frame

	frame_count += 1

	cap.release()

	return journal_entries


	def display_journal_with_images(video):
	journal_entries, image_paths = generate_journal_with_images(video, frame_interval=30)

	# Return journal text and list of images separately
	journal_text = "\n".join(journal_entries)
	return journal_text, image_paths

	# Define Gradio Blocks for custom display
	with gr.Blocks() as iface:
	video_input = gr.Video(label="Upload Video", height=300)
	journal_output = gr.Textbox(label="Generated Daily Journal", lines=10)
	image_gallery = gr.Gallery(label="Annotated Frames")
	run_button = gr.Button("Generate Journal")

	run_button.click(fn=display_journal_with_images, inputs=video_input, outputs=[journal_output, image_gallery])

	iface.launch()

	iface.launch()