Spaces:
Runtime error
Runtime error
import cv2 | |
import time | |
import numpy as np | |
import onnx | |
import onnxruntime | |
import os | |
os.system('pip install --upgrade --force-reinstall onnxruntime') | |
# Ref: https://github.com/liruoteng/OpticalFlowToolkit/blob/5cf87b947a0032f58c922bbc22c0afb30b90c418/lib/flowlib.py#L249 | |
import numpy as np | |
UNKNOWN_FLOW_THRESH = 1e7 | |
def make_color_wheel(): | |
""" | |
Generate color wheel according Middlebury color code | |
:return: Color wheel | |
""" | |
RY = 15 | |
YG = 6 | |
GC = 4 | |
CB = 11 | |
BM = 13 | |
MR = 6 | |
ncols = RY + YG + GC + CB + BM + MR | |
colorwheel = np.zeros([ncols, 3]) | |
col = 0 | |
# RY | |
colorwheel[0:RY, 0] = 255 | |
colorwheel[0:RY, 1] = np.transpose(np.floor(255*np.arange(0, RY) / RY)) | |
col += RY | |
# YG | |
colorwheel[col:col+YG, 0] = 255 - np.transpose(np.floor(255*np.arange(0, YG) / YG)) | |
colorwheel[col:col+YG, 1] = 255 | |
col += YG | |
# GC | |
colorwheel[col:col+GC, 1] = 255 | |
colorwheel[col:col+GC, 2] = np.transpose(np.floor(255*np.arange(0, GC) / GC)) | |
col += GC | |
# CB | |
colorwheel[col:col+CB, 1] = 255 - np.transpose(np.floor(255*np.arange(0, CB) / CB)) | |
colorwheel[col:col+CB, 2] = 255 | |
col += CB | |
# BM | |
colorwheel[col:col+BM, 2] = 255 | |
colorwheel[col:col+BM, 0] = np.transpose(np.floor(255*np.arange(0, BM) / BM)) | |
col += + BM | |
# MR | |
colorwheel[col:col+MR, 2] = 255 - np.transpose(np.floor(255 * np.arange(0, MR) / MR)) | |
colorwheel[col:col+MR, 0] = 255 | |
return colorwheel | |
colorwheel = make_color_wheel() | |
def compute_color(u, v): | |
""" | |
compute optical flow color map | |
:param u: optical flow horizontal map | |
:param v: optical flow vertical map | |
:return: optical flow in color code | |
""" | |
[h, w] = u.shape | |
img = np.zeros([h, w, 3]) | |
nanIdx = np.isnan(u) | np.isnan(v) | |
u[nanIdx] = 0 | |
v[nanIdx] = 0 | |
ncols = np.size(colorwheel, 0) | |
rad = np.sqrt(u**2+v**2) | |
a = np.arctan2(-v, -u) / np.pi | |
fk = (a+1) / 2 * (ncols - 1) + 1 | |
k0 = np.floor(fk).astype(int) | |
k1 = k0 + 1 | |
k1[k1 == ncols+1] = 1 | |
f = fk - k0 | |
for i in range(0, np.size(colorwheel,1)): | |
tmp = colorwheel[:, i] | |
col0 = tmp[k0-1] / 255 | |
col1 = tmp[k1-1] / 255 | |
col = (1-f) * col0 + f * col1 | |
idx = rad <= 1 | |
col[idx] = 1-rad[idx]*(1-col[idx]) | |
notidx = np.logical_not(idx) | |
col[notidx] *= 0.75 | |
img[:, :, i] = np.uint8(np.floor(255 * col*(1-nanIdx))) | |
return img | |
def flow_to_image(flow): | |
""" | |
Convert flow into middlebury color code image | |
:param flow: optical flow map | |
:return: optical flow image in middlebury color | |
""" | |
u = flow[:, :, 0] | |
v = flow[:, :, 1] | |
maxu = -999. | |
maxv = -999. | |
minu = 999. | |
minv = 999. | |
idxUnknow = (abs(u) > UNKNOWN_FLOW_THRESH) | (abs(v) > UNKNOWN_FLOW_THRESH) | |
u[idxUnknow] = 0 | |
v[idxUnknow] = 0 | |
maxu = max(maxu, np.max(u)) | |
minu = min(minu, np.min(u)) | |
maxv = max(maxv, np.max(v)) | |
minv = min(minv, np.min(v)) | |
rad = np.sqrt(u ** 2 + v ** 2) | |
maxrad = max(-1, np.max(rad)) | |
u = u/(maxrad + np.finfo(float).eps) | |
v = v/(maxrad + np.finfo(float).eps) | |
img = compute_color(u, v) | |
idx = np.repeat(idxUnknow[:, :, np.newaxis], 3, axis=2) | |
img[idx] = 0 | |
return np.uint8(img) | |
class Raft(): | |
def __init__(self, model_path): | |
# Initialize model | |
self.initialize_model(model_path) | |
def __call__(self, img1, img2): | |
return self.estimate_flow(img1, img2) | |
def initialize_model(self, model_path): | |
self.session = onnxruntime.InferenceSession(model_path, providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) | |
# Get model info | |
self.get_input_details() | |
self.get_output_details() | |
def estimate_flow(self, img1, img2): | |
input_tensor1 = self.prepare_input(img1) | |
input_tensor2 = self.prepare_input(img2) | |
outputs = self.inference(input_tensor1, input_tensor2) | |
self.flow_map = self.process_output(outputs) | |
return self.flow_map | |
def prepare_input(self, img): | |
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) | |
self.img_height, self.img_width = img.shape[:2] | |
img_input = cv2.resize(img, (self.input_width,self.input_height)) | |
# img_input = img_input/255 | |
img_input = img_input.transpose(2, 0, 1) | |
img_input = img_input[np.newaxis,:,:,:] | |
return img_input.astype(np.float32) | |
def inference(self, input_tensor1, input_tensor2): | |
# start = time.time() | |
outputs = self.session.run(self.output_names, {self.input_names[0]: input_tensor1, | |
self.input_names[1]: input_tensor2}) | |
# print(time.time() - start) | |
return outputs | |
def process_output(self, output): | |
flow_map = output[1][0].transpose(1, 2, 0) | |
return flow_map | |
def draw_flow(self): | |
# Convert flow to image | |
flow_img = flow_to_image(self.flow_map) | |
# Convert to BGR | |
flow_img = cv2.cvtColor(flow_img, cv2.COLOR_RGB2BGR) | |
# Resize the depth map to match the input image shape | |
return cv2.resize(flow_img, (self.img_width,self.img_height)) | |
def get_input_details(self): | |
model_inputs = self.session.get_inputs() | |
self.input_names = [model_inputs[i].name for i in range(len(model_inputs))] | |
self.input_shape = model_inputs[0].shape | |
self.input_height = self.input_shape[2] | |
self.input_width = self.input_shape[3] | |
def get_output_details(self): | |
model_outputs = self.session.get_outputs() | |
self.output_names = [model_outputs[i].name for i in range(len(model_outputs))] | |
self.output_shape = model_outputs[0].shape | |
self.output_height = self.output_shape[2] | |
self.output_width = self.output_shape[3] | |
if __name__ == '__main__': | |
from imread_from_url import imread_from_url | |
# Initialize model | |
model_path='raft_small_iter10_240x320.onnx' | |
flow_estimator = Raft(model_path) | |
# Read inference image | |
img1 = imread_from_url("https://github.com/princeton-vl/RAFT/blob/master/demo-frames/frame_0016.png?raw=true") | |
img2 = imread_from_url("https://github.com/princeton-vl/RAFT/blob/master/demo-frames/frame_0025.png?raw=true") | |
# Estimate flow and colorize it | |
flow_map = flow_estimator(img1, img2) | |
flow_img = flow_estimator.draw_flow() | |
combined_img = np.hstack((img1, img2, flow_img)) | |
#cv2.namedWindow("Estimated flow", cv2.WINDOW_NORMAL) | |
#cv2.imshow("Estimated flow", combined_img) | |
#cv2.waitKey(0) | |
import os | |
import cv2 | |
import gradio as gr | |
import yt_dlp | |
def download_youtube_video(youtube_url, output_filename): | |
ydl_opts = { | |
'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best', | |
'outtmpl': output_filename, | |
} | |
with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
ydl.download([youtube_url]) | |
def process_video(youtube_url, start_time, flow_frame_offset): | |
model_path = 'models/raft_small_iter10_240x320.onnx' | |
flow_estimator = Raft(model_path) | |
output_filename = 'downloaded_video.mp4' | |
processed_output = 'processed_video.mp4' | |
# Download video | |
if os.path.exists(output_filename): | |
os.remove(output_filename) | |
download_youtube_video(youtube_url, output_filename) | |
cap = cv2.VideoCapture(output_filename) | |
if not cap.isOpened(): | |
return "Error: Could not open video." | |
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) | |
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
fps = cap.get(cv2.CAP_PROP_FPS) | |
fourcc = cv2.VideoWriter_fourcc(*'XVID') | |
out = cv2.VideoWriter(processed_output, fourcc, fps, (frame_width, frame_height)) | |
cap.set(cv2.CAP_PROP_POS_FRAMES, start_time * fps) | |
frame_list = [] | |
frame_num = 0 | |
while cap.isOpened(): | |
ret, prev_frame = cap.read() | |
if not ret: | |
break | |
frame_list.append(prev_frame) | |
frame_num += 1 | |
if frame_num <= flow_frame_offset: | |
continue | |
flow_map = flow_estimator(frame_list[0], frame_list[-1]) | |
flow_img = flow_estimator.draw_flow() | |
alpha = 0.5 | |
combined_img = cv2.addWeighted(frame_list[0], alpha, flow_img, (1 - alpha), 0) | |
if combined_img is None: | |
break | |
out.write(combined_img) | |
frame_list.pop(0) | |
cap.release() | |
out.release() | |
return processed_output | |
examples = [ | |
["https://www.youtube.com/watch?v=is38pqgbj6A", 5, 50, "output_1.mp4"], | |
["https://www.youtube.com/watch?v=AdbrfoxiAtk", 0, 60, "output_2.mp4"], | |
["https://www.youtube.com/watch?v=vWGg0iPmI8k", 13, 70, "output_3.mp4"], | |
] | |
with gr.Blocks() as app: | |
gr.Markdown("### Optical Flow Video Processing\n" | |
"Enter a YouTube URL, set the start time and flow frame offset, " | |
"then click 'Process Video' to see the optical flow processing.") | |
with gr.Row(): | |
with gr.Column(): | |
youtube_url = gr.Textbox(label="YouTube URL", placeholder="Enter YouTube Video URL Here") | |
start_time = gr.Slider(minimum=0, maximum=60, label="Start Time (seconds)", step=1) | |
flow_frame_offset = gr.Slider(minimum=1, maximum=100, label="Flow Frame Offset", step=1) | |
submit_button = gr.Button("Process Video") | |
with gr.Column(): | |
output_video = gr.Video(label="Processed Video") | |
submit_button.click( | |
fn=process_video, | |
inputs=[youtube_url, start_time, flow_frame_offset], | |
outputs=output_video | |
) | |
gr.Examples(examples=examples, | |
inputs=[youtube_url, start_time, flow_frame_offset], | |
fn=process_video, | |
outputs=output_video, | |
cache_examples=False) | |
app.launch() | |