import os import sys sys.path.append("../") import gradio as gr import torch import numpy as np import matplotlib.pyplot as plt plt.rcParams["font.family"] = "serif" import decord import PIL, PIL.Image import librosa from IPython.display import Markdown, display import pandas as pd from util import * css = """ """ header = css + """

The Sound of Water

Inferring Physical Properties from Pouring Liquids

Piyush Bagad¹, Makarand Tapaswi², Cees G. M. Snoek³, Andrew Zisserman¹,

¹University of Oxford, ²IIIT Hyderabad, ³University of Amsterdam

arXiv 🌐 Project

Code 🤗 Data 🤗 Models 🎯 Demo

""" footer = css + """

Please give us a 🌟 on Github if you like our work! Tips to get better results:

The first example may take up to 30-60s for processing since the model is also loaded.
If you are providing a link, it may take a few seconds to download video from YouTube. Note that the entire video shall be used. If the sound of pouring is not clear, the results will be random.
Although the model is somewhat robust to noise, make sure there is not too much noise such that the pouring is audible.
Note that the video is not used during the inference. The displayed frame is only for reference.

""" def download_from_youtube( video_id, save_dir="/tmp/", convert_to_mp4=False, ): """ Downloads a YouTube video from start to end times. Args: video_id (str): YouTube video ID. save_dir (str): Directory to save the video. convert_to_mp4 (bool): Whether to convert the video to mp4 format. The saved video is in the format: {save_dir}/{video_id}.mp4 """ import datetime from subprocess import call print("Downloading video from YouTube...") print("Video ID:", video_id) command = [ "yt-dlp", "-o", "'{}%(id)s.%(ext)s'".format(save_dir), "--verbose", "--force-overwrites", f"https://www.youtube.com/watch?v={video_id}", ] call(" ".join(command), shell=True) # If not mp4, convert to mp4 from glob import glob saved_filepath = glob(os.path.join(save_dir, f"{video_id}.*"))[0] print("Saved file:", saved_filepath) if convert_to_mp4: ext = saved_filepath.split(".")[-1] to_save = saved_filepath.replace(ext, "mp4") if ext != "mp4": # convert to mp4 using ffmpeg command = "ffmpeg -y -i {} {}".format(saved_filepath, to_save) call(command, shell=True) return to_save else: return saved_filepath def configure_input(): gr.Markdown( "#### Either upload a video file or provide a YouTube link to a video. Note that the entire video shall be used.", ) video_input = gr.Video(label="Upload Video", height=520) youtube_link = gr.Textbox(label="YouTube Link", value=None) return [video_input, youtube_link] # video_backend = "decord" video_backend = "torchvision" def get_predictions(video_path): model = load_model() frame = load_frame(video_path, video_backend=video_backend) S = load_spectrogram(video_path) audio = load_audio_tensor(video_path) z_audio, y_audio = get_model_output(audio, model) image, df_show, tsne_image = show_output(frame, S, y_audio, z_audio) return image, df_show, tsne_image def get_video_id_from_url(url): import re if "v=" in url: video_id = re.findall(r"v=([a-zA-Z0-9_-]+)", url) elif "youtu.be" in url: video_id = re.findall(r"youtu.be/([a-zA-Z0-9_-]+)", url) elif "shorts" in url: video_id = re.findall(r"shorts/([a-zA-Z0-9_-]+)", url) else: raise ValueError("Invalid YouTube URL") print("Video URL:", url) print("Video ID:", video_id) if len(video_id) > 0: return video_id[0] else: raise ValueError("Invalid YouTube URL") note = """ **Note**: Radius (as well as height) estimation depends on accurate wavelength estimation towards the end. Thus, it may not be accurate if the wavelength is not estimated correctly at the end. $$ H = l(0) = \\frac{\lambda(0) - \lambda(T)}{4} \ \ \\text{and} \ \ R = \\frac{\lambda(T)}{4\\beta} $$ """ # Example usage in a Gradio interface def process_input(video, youtube_link): if video is not None and len(youtube_link) > 0: raise ValueError("Please provide either a video file or a YouTube link, not both.") if video is not None: print(video) # # Load model globally # model = load_model() # The input is a video file path video_path = video # Get predictions image, df_show, tsne_image = get_predictions(video_path) return image, df_show, gr.Markdown(note), tsne_image else: assert len(youtube_link) > 0, \ "YouTube Link cannot be empty if no video is provided." video_id = get_video_id_from_url(youtube_link) video_path = download_from_youtube( video_id, save_dir="/tmp/", convert_to_mp4=False, ) # Get predictions image, df_show, tsne_image = get_predictions(video_path) # Add youtube link to the note local_note = f"{note}\n\nYou can watch the original video here: "\ f"[YouTube Link](https://www.youtube.com/watch?v={video_id})" return image, df_show, gr.Markdown(local_note), tsne_image def configure_outputs(): image_wide = gr.Image(label="Estimated pitch") dataframe = gr.DataFrame(label="Estimated physical properties") image_tsne = gr.Image(label="TSNE of features", width=300) markdown = gr.Markdown(label="Note") return [image_wide, dataframe, markdown, image_tsne] # Configure pre-defined examples examples = [ ["./media_assets/example_video.mp4", None], ["./media_assets/ayNzH0uygFw_9.0_21.0.mp4", None], ["./media_assets/biDn0Gi6V8U_7.0_15.0.mp4", None], ["./media_assets/goWgiQQMugA_2.5_9.0.mp4", None], ["./media_assets/K87g4RvO-9k_254.0_259.0.mp4", None], # Shows that it works with background noise ["./media_assets/l74zJHCZ9uA.webm", None], # Shows that it works with a slightly differently shaped container ["./media_assets/LpRPV0hIymU.webm", None], ["./media_assets/k-HnMsS36J8.webm", None], # [None, "https://www.youtube.com/shorts/6eUQTdkTooo"], # [None, "https://www.youtube.com/shorts/VxZT15cG6tw"], # [None, "https://www.youtube.com/shorts/GSXQnNhliDY"], ] # Define Gradio interface with gr.Blocks( css=custom_css, theme=gr.themes.Default(), ) as demo: # Add the header gr.HTML(header) gr.Interface( fn=process_input, inputs=configure_input(), outputs=configure_outputs(), examples=examples, ) # Add the footer gr.HTML(footer) # Launch the interface demo.launch(allowed_paths=["."], share=True)