Spaces:

bpiyush
/

SoundOfWater

Running

File size: 10,970 Bytes

import os
import sys
sys.path.append("../")

import gradio as gr
import torch
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams["font.family"] = "serif"
import decord
import PIL, PIL.Image
import librosa
from IPython.display import Markdown, display
import pandas as pd

from util import *


css = """
<style>
    body {
        font-family: 'Arial', serif;
        margin: 0;
        padding: 0;
        color: black;
    }
    .header {
        display: flex;
        align-items: center;
        justify-content: center;
        margin-top: 5px;
        color: black;
    }
    .footer {
        display: flex;
        align-items: center;
        justify-content: center;
        margin-top: 5px;
    }
    .image {
        margin-right: 20px;
    }
    .content {
        text-align: center;
        color: black;
    }
    .title {
        font-size: 2.5em;
        font-weight: bold;
        margin-bottom: 10px;
    }
    .authors {
        color: #4a90e2;
        font-size: 1.05em;
        margin: 10px 0;
    }
    .affiliations {
        font-size: 1.em;
        margin-bottom: 20px;
    }
    .buttons {
        display: flex;
        justify-content: center;
        gap: 10px;
    }
    .button {
        background-color: #545758;
        text-decoration: none;
        padding: 8px 16px;
        border-radius: 5px;
        font-size: 1.05em;
    }
    .button:hover {
        background-color: #333;
    }
</style>
"""


header = css + """
<div class="header">
    <!-- <div class="image">
        <img src="./media_assets/pouring-water-logo5.png" alt="logo" width="100">
    </div> -->
    <div class="content">
        <img src="https://bpiyush.github.io/pouring-water-website/assets/pouring-water-logo5.png" alt="logo" width="80" style="margin-bottom: -50px; margin-right: 30px;">
        <div class="title" style="font-size: 44px; margin-left: -30px;">The Sound of Water</div>
        <div style="font-size: 30px; margin-left: -30px;"><b>Inferring Physical Properties from Pouring Liquids</b></div>
        <div class="authors">
            <a style="color: #92eaff; href="https://bpiyush.github.io/">Piyush Bagad</a><sup>1</sup>,
            <a style="color: #92eaff; href="https://makarandtapaswi.github.io/">Makarand Tapaswi</a><sup>2</sup>,
            <a style="color: #92eaff; href="https://www.ceessnoek.info/">Cees G. M. Snoek</a><sup>3</sup>,
            <a style="color: #92eaff; href="https://www.robots.ox.ac.uk/~az/">Andrew Zisserman</a><sup>1</sup>,
        </div>
        <div class="affiliations">
            <sup>1</sup>University of Oxford, <sup>2</sup>IIIT Hyderabad, <sup>3</sup>University of Amsterdam
        </div>
        
        <div class="buttons">
            <a href="#" style="color: #92eaff;" class="button">arXiv</a>
            <a href="https://bpiyush.github.io/pouring-water-website/" style="color: #92eaff;" class="button">🌐 Project</a>
            <a href="https://github.com/bpiyush/SoundOfWater" style="color: #92eaff;" class="button"> <img src="https://bpiyush.github.io/pouring-water-website/assets/github-logo.png" alt="logo" style="height:16px; float: left;"> &nbsp;Code</a>
            <a href="https://huggingface.co/datasets/bpiyush/sound-of-water" style="color: #92eaff;" class="button">🤗 Data</a>
            <a href="https://huggingface.co/bpiyush/sound-of-water-models" style="color: #92eaff;" class="button">🤗 Models</a>
            <a href="#" style="color: #92eaff;" class="button">🎯 Demo</a>
        </div>
    </div>
</div>
"""

footer = css + """
<div class="header" style="justify-content: left;">
<div class="content" style="font-size: 16px;">
Please give us a 🌟 on <a href='https://github.com/bpiyush/SoundOfWater'>Github</a> if you like our work!
Tips to get better results:
<br><br>
<ol style="text-align: left; font-size: 14px; margin-left: 30px">
    <li>The first example may take up to 30-60s for processing since the model is also loaded.</li>
    <li>
    If you are providing a link, it may take a few seconds to download video from YouTube.
    Note that the entire video shall be used.
    If the sound of pouring is not clear, the results will be random.
    </li>
    <li>Although the model is somewhat robust to noise, make sure there is not too much noise such that the pouring is audible.</li>
    <li>Note that the video is not used during the inference. The displayed frame is only for reference.</li>
</ol>
</div>
</div>
"""
from download_youtube import download_youtube_video_ytdlp

def download_from_youtube(
        video_id,
        save_dir="/tmp/",
        convert_to_mp4=False,
    ):
    """
    Downloads a YouTube video from start to end times.

    Args:
        video_id (str): YouTube video ID.
        save_dir (str): Directory to save the video.
        convert_to_mp4 (bool): Whether to convert the video to mp4 format.

    The saved video is in the format: {save_dir}/{video_id}.mp4
    """

    import datetime
    from subprocess import call

    print("Downloading video from YouTube...")
    print("Video ID:", video_id)

    command = [
        "yt-dlp",
        "-o", "'{}%(id)s.%(ext)s'".format(save_dir),
        "--cookies ./youtube_cookies.txt",
        "--verbose",
        "--force-overwrites",
        f"https://www.youtube.com/watch?v={video_id}",
    ]
    try:
        call(" ".join(command), shell=True)
    except Exception as e:
        print(e)
        raise IOError("Failed to download to download YouTube video.")

    # If not mp4, convert to mp4
    from glob import glob
    saved_filepath = glob(os.path.join(save_dir, f"{video_id}.*"))[0]
    print("Saved file:", saved_filepath)

    if convert_to_mp4:
        ext = saved_filepath.split(".")[-1]
        to_save = saved_filepath.replace(ext, "mp4")
        if ext != "mp4":
            # convert to mp4 using ffmpeg
            command = "ffmpeg -y -i {} {}".format(saved_filepath, to_save)
            call(command, shell=True)
        return to_save
    else:
        return saved_filepath


def configure_input():
    gr.Markdown(
        "#### Either upload a video file or provide a YouTube link to a video. Note that the entire video shall be used.",
    )
    video_input = gr.Video(label="Upload Video", height=520)
    youtube_link = gr.Textbox(label="YouTube Link", value=None)
    gr.Markdown(
        "Note: Often, YouTube download can fail because the video may not be public or YouTube asks for Sign in."\
        "We recommend downloading the video in other ways on your machine and uploading it here."\
        " Alternatively, you can clone the repository and run the demo locally which can allow for Sign-in.",
    )
    return [video_input, youtube_link]


# video_backend = "decord"
video_backend = "torchvision"
def get_predictions(video_path):
    model = load_model()
    frame = load_frame(video_path, video_backend=video_backend)
    S = load_spectrogram(video_path)
    audio = load_audio_tensor(video_path)
    z_audio, y_audio = get_model_output(audio, model)
    image, df_show, tsne_image = show_output(frame, S, y_audio, z_audio)
    return image, df_show, tsne_image


def get_video_id_from_url(url):
    import re 
    if "v=" in url:
        video_id = re.findall(r"v=([a-zA-Z0-9_-]+)", url)
    elif "youtu.be" in url:
        video_id = re.findall(r"youtu.be/([a-zA-Z0-9_-]+)", url)
    elif "shorts" in url:
        video_id = re.findall(r"shorts/([a-zA-Z0-9_-]+)", url)
    else:
        raise ValueError("Invalid YouTube URL")
    print("Video URL:", url)
    print("Video ID:", video_id)

    if len(video_id) > 0:
        return video_id[0]
    else:
        raise ValueError("Invalid YouTube URL")


note = """
**Note**: Radius (as well as height) estimation depends on accurate wavelength estimation towards the end.
Thus, it may not be accurate if the wavelength is not estimated correctly at the end.

$$
H = l(0) = \\frac{\lambda(0) - \lambda(T)}{4} \ \ \\text{and} \ \ R = \\frac{\lambda(T)}{4\\beta}
$$
"""


# Example usage in a Gradio interface
def process_input(video, youtube_link):

    provided_video = video is not None
    if youtube_link is None:
        provided_link = False
    elif isinstance(youtube_link, str):
        provided_link = len(youtube_link) > 0
    else:
        raise ValueError(f"Invalid type of link {youtube_link}.")
    
    if provided_video and provided_link:
        raise ValueError("Please provide either a video file or a YouTube link, not both.")

    if provided_video:
        print(video)

        # # Load model globally
        # model = load_model()

        # The input is a video file path
        video_path = video

        # Get predictions
        image, df_show, tsne_image = get_predictions(video_path)

        return image, df_show, gr.Markdown(note), tsne_image

    else:
        print(provided_link)
    
        assert provided_link, \
            "YouTube Link cannot be empty if no video is provided."
        
        video_id = get_video_id_from_url(youtube_link)
        print("Video ID:", video_id)
        video_path = download_youtube_video_ytdlp(
            video_id, save_dir="/tmp/", 
        )

        # Get predictions
        image, df_show, tsne_image = get_predictions(video_path)

        # Add youtube link to the note
        local_note = f"{note}\n\nYou can watch the original video here: "\
            f"[YouTube Link](https://www.youtube.com/watch?v={video_id})"

        return image, df_show, gr.Markdown(local_note), tsne_image


def configure_outputs():
    image_wide = gr.Image(label="Estimated pitch")
    dataframe = gr.DataFrame(label="Estimated physical properties")
    image_tsne = gr.Image(label="TSNE of features", width=300)
    markdown = gr.Markdown(label="Note")
    return [image_wide, dataframe, markdown, image_tsne]


# Configure pre-defined examples
examples = [
    ["./media_assets/example_video.mp4", None],
    ["./media_assets/ayNzH0uygFw_9.0_21.0.mp4", None],
    ["./media_assets/biDn0Gi6V8U_7.0_15.0.mp4", None],
    ["./media_assets/goWgiQQMugA_2.5_9.0.mp4", None],
    ["./media_assets/K87g4RvO-9k_254.0_259.0.mp4", None],
    # Shows that it works with background noise
    ["./media_assets/l74zJHCZ9uA.webm", None],
    # Shows that it works with a slightly differently shaped container
    ["./media_assets/LpRPV0hIymU.webm", None],
    ["./media_assets/k-HnMsS36J8.webm", None],
    # [None, "https://www.youtube.com/shorts/6eUQTdkTooo"],
    # [None, "https://www.youtube.com/shorts/VxZT15cG6tw"],
    # [None, "https://www.youtube.com/shorts/GSXQnNhliDY"],
]


# Define Gradio interface
with gr.Blocks(
    css=custom_css,
    theme=gr.themes.Default(),
) as demo:

    # Add the header
    gr.HTML(header)
    
    gr.Interface(
        fn=process_input,
        inputs=configure_input(),
        outputs=configure_outputs(),
        examples=examples,
    )

    # Add the footer
    gr.HTML(footer)


# Launch the interface
demo.launch(allowed_paths=["."], share=True)