Spaces:
No application file
No application file
import zipfile | |
import os | |
import chromadb | |
from chromadb.utils.embedding_functions import OpenCLIPEmbeddingFunction | |
from chromadb.utils.data_loaders import ImageLoader | |
import cv2 | |
path = "mm_vdb2" | |
client = chromadb.PersistentClient(path=path) | |
image_loader = ImageLoader() | |
CLIP = OpenCLIPEmbeddingFunction() | |
video_collection = client.get_or_create_collection( | |
name='video_collection', | |
embedding_function=CLIP, | |
data_loader=image_loader | |
) | |
def extract_frames(video_folder, output_folder): | |
if not os.path.exists(output_folder): | |
os.makedirs(output_folder) | |
for video_filename in os.listdir(video_folder): | |
if video_filename.endswith('.mp4'): | |
video_path = os.path.join(video_folder, video_filename) | |
video_capture = cv2.VideoCapture(video_path) | |
fps = video_capture.get(cv2.CAP_PROP_FPS) | |
frame_count = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT)) | |
duration = frame_count / fps | |
output_subfolder = os.path.join(output_folder, os.path.splitext(video_filename)[0]) | |
if not os.path.exists(output_subfolder): | |
os.makedirs(output_subfolder) | |
success, image = video_capture.read() | |
frame_number = 0 | |
while success: | |
if frame_number == 0 or frame_number % int(fps * 5) == 0 or frame_number == frame_count - 1: | |
frame_time = frame_number / fps | |
output_frame_filename = os.path.join(output_subfolder, f'frame_{int(frame_time)}.jpg') | |
cv2.imwrite(output_frame_filename, image) | |
success, image = video_capture.read() | |
frame_number += 1 | |
video_capture.release() | |
def add_frames_to_chromadb(video_dir, frames_dir): | |
video_frames = {} | |
for video_file in os.listdir(video_dir): | |
if video_file.endswith('.mp4'): | |
video_title = video_file[:-4] | |
frame_folder = os.path.join(frames_dir, video_title) | |
if os.path.exists(frame_folder): | |
video_frames[video_title] = [f for f in os.listdir(frame_folder) if f.endswith('.jpg')] | |
ids = [] | |
uris = [] | |
metadatas = [] | |
for video_title, frames in video_frames.items(): | |
video_path = os.path.join(video_dir, f"{video_title}.mp4") | |
for frame in frames: | |
frame_id = f"{frame[:-4]}_{video_title}" | |
frame_path = os.path.join(frames_dir, video_title, frame) | |
ids.append(frame_id) | |
uris.append(frame_path) | |
metadatas.append({'video_uri': video_path}) | |
video_collection.add(ids=ids, uris=uris, metadatas=metadatas) | |
def process_video_files(video_paths): | |
frames_output_folder = r"extracted_frames" | |
os.makedirs(frames_output_folder, exist_ok=True) | |
for video_path in video_paths: | |
video_folder = os.path.dirname(video_path) | |
extract_frames(video_folder, frames_output_folder) | |
add_frames_to_chromadb(video_folder, frames_output_folder) | |
return video_collection | |
# # Example usage | |
# video_paths = [ | |
# "/path/to/video1.mp4", | |
# "/path/to/video2.mp4", | |
# "/path/to/video3.mp4" | |
# ] | |
# process_video_files(video_paths) | |