import os
import sys
sys.path.append("../")
import gradio as gr
import torch
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams["font.family"] = "serif"
import decord
import PIL, PIL.Image
import librosa
from IPython.display import Markdown, display
import pandas as pd
from util import *
css = """
"""
header = css + """
"""
footer = css + """
"""
def download_from_youtube(
video_id,
save_dir="/tmp/",
convert_to_mp4=False,
):
"""
Downloads a YouTube video from start to end times.
Args:
video_id (str): YouTube video ID.
save_dir (str): Directory to save the video.
convert_to_mp4 (bool): Whether to convert the video to mp4 format.
The saved video is in the format: {save_dir}/{video_id}.mp4
"""
import datetime
from subprocess import call
print("Downloading video from YouTube...")
print("Video ID:", video_id)
command = [
"yt-dlp",
"-o", "'{}%(id)s.%(ext)s'".format(save_dir),
"--cookies ./chrome_cookies.txt",
"--verbose",
"--force-overwrites",
f"https://www.youtube.com/watch?v={video_id}",
]
call(" ".join(command), shell=True)
# If not mp4, convert to mp4
from glob import glob
saved_filepath = glob(os.path.join(save_dir, f"{video_id}.*"))[0]
print("Saved file:", saved_filepath)
if convert_to_mp4:
ext = saved_filepath.split(".")[-1]
to_save = saved_filepath.replace(ext, "mp4")
if ext != "mp4":
# convert to mp4 using ffmpeg
command = "ffmpeg -y -i {} {}".format(saved_filepath, to_save)
call(command, shell=True)
return to_save
else:
return saved_filepath
def configure_input():
gr.Markdown(
"#### Either upload a video file or provide a YouTube link to a video. Note that the entire video shall be used.",
)
video_input = gr.Video(label="Upload Video", height=520)
youtube_link = gr.Textbox(label="YouTube Link", value=None)
return [video_input, youtube_link]
# video_backend = "decord"
video_backend = "torchvision"
def get_predictions(video_path):
model = load_model()
frame = load_frame(video_path, video_backend=video_backend)
S = load_spectrogram(video_path)
audio = load_audio_tensor(video_path)
z_audio, y_audio = get_model_output(audio, model)
image, df_show, tsne_image = show_output(frame, S, y_audio, z_audio)
return image, df_show, tsne_image
def get_video_id_from_url(url):
import re
if "v=" in url:
video_id = re.findall(r"v=([a-zA-Z0-9_-]+)", url)
elif "youtu.be" in url:
video_id = re.findall(r"youtu.be/([a-zA-Z0-9_-]+)", url)
elif "shorts" in url:
video_id = re.findall(r"shorts/([a-zA-Z0-9_-]+)", url)
else:
raise ValueError("Invalid YouTube URL")
print("Video URL:", url)
print("Video ID:", video_id)
if len(video_id) > 0:
return video_id[0]
else:
raise ValueError("Invalid YouTube URL")
note = """
**Note**: Radius (as well as height) estimation depends on accurate wavelength estimation towards the end.
Thus, it may not be accurate if the wavelength is not estimated correctly at the end.
$$
H = l(0) = \\frac{\lambda(0) - \lambda(T)}{4} \ \ \\text{and} \ \ R = \\frac{\lambda(T)}{4\\beta}
$$
"""
# Example usage in a Gradio interface
def process_input(video, youtube_link):
provided_video = video is not None
if youtube_link is None:
provided_link = False
elif isinstance(youtube_link, str):
provided_link = len(youtube_link) > 0
else:
raise ValueError(f"Invalid type of link {youtube_link}.")
if provided_video and provided_link:
raise ValueError("Please provide either a video file or a YouTube link, not both.")
if provided_video:
print(video)
# # Load model globally
# model = load_model()
# The input is a video file path
video_path = video
# Get predictions
image, df_show, tsne_image = get_predictions(video_path)
return image, df_show, gr.Markdown(note), tsne_image
else:
print(provided_link)
assert provided_link, \
"YouTube Link cannot be empty if no video is provided."
video_id = get_video_id_from_url(youtube_link)
video_path = download_from_youtube(
video_id, save_dir="/tmp/", convert_to_mp4=False,
)
# Get predictions
image, df_show, tsne_image = get_predictions(video_path)
# Add youtube link to the note
local_note = f"{note}\n\nYou can watch the original video here: "\
f"[YouTube Link](https://www.youtube.com/watch?v={video_id})"
return image, df_show, gr.Markdown(local_note), tsne_image
def configure_outputs():
image_wide = gr.Image(label="Estimated pitch")
dataframe = gr.DataFrame(label="Estimated physical properties")
image_tsne = gr.Image(label="TSNE of features", width=300)
markdown = gr.Markdown(label="Note")
return [image_wide, dataframe, markdown, image_tsne]
# Configure pre-defined examples
examples = [
["./media_assets/example_video.mp4", None],
["./media_assets/ayNzH0uygFw_9.0_21.0.mp4", None],
["./media_assets/biDn0Gi6V8U_7.0_15.0.mp4", None],
["./media_assets/goWgiQQMugA_2.5_9.0.mp4", None],
["./media_assets/K87g4RvO-9k_254.0_259.0.mp4", None],
# Shows that it works with background noise
["./media_assets/l74zJHCZ9uA.webm", None],
# Shows that it works with a slightly differently shaped container
["./media_assets/LpRPV0hIymU.webm", None],
["./media_assets/k-HnMsS36J8.webm", None],
# [None, "https://www.youtube.com/shorts/6eUQTdkTooo"],
# [None, "https://www.youtube.com/shorts/VxZT15cG6tw"],
# [None, "https://www.youtube.com/shorts/GSXQnNhliDY"],
]
# Define Gradio interface
with gr.Blocks(
css=custom_css,
theme=gr.themes.Default(),
) as demo:
# Add the header
gr.HTML(header)
gr.Interface(
fn=process_input,
inputs=configure_input(),
outputs=configure_outputs(),
examples=examples,
)
# Add the footer
gr.HTML(footer)
# Launch the interface
demo.launch(allowed_paths=["."], share=True)