Spaces:

bpiyush
/

SoundOfWater

Running

App Files Files Community

SoundOfWater / app.py

bpiyush

Update app.py

49d9f95 verified 3 months ago

raw

history blame

10.4 kB

	import os
	import sys
	sys.path.append("../")

	import gradio as gr
	import torch
	import numpy as np
	import matplotlib.pyplot as plt
	plt.rcParams["font.family"] = "serif"
	import decord
	import PIL, PIL.Image
	import librosa
	from IPython.display import Markdown, display
	import pandas as pd

	from util import *


	css = """
	<style>
	body {
	font-family: 'Arial', serif;
	margin: 0;
	padding: 0;
	color: black;
	}
	.header {
	display: flex;
	align-items: center;
	justify-content: center;
	margin-top: 5px;
	color: black;
	}
	.footer {
	display: flex;
	align-items: center;
	justify-content: center;
	margin-top: 5px;
	}
	.image {
	margin-right: 20px;
	}
	.content {
	text-align: center;
	color: black;
	}
	.title {
	font-size: 2.5em;
	font-weight: bold;
	margin-bottom: 10px;
	}
	.authors {
	color: #4a90e2;
	font-size: 1.05em;
	margin: 10px 0;
	}
	.affiliations {
	font-size: 1.em;
	margin-bottom: 20px;
	}
	.buttons {
	display: flex;
	justify-content: center;
	gap: 10px;
	}
	.button {
	background-color: #545758;
	text-decoration: none;
	padding: 8px 16px;
	border-radius: 5px;
	font-size: 1.05em;
	}
	.button:hover {
	background-color: #333;
	}
	</style>
	"""


	header = css + """
	<div class="header">
	<!-- <div class="image">
	<img src="./media_assets/pouring-water-logo5.png" alt="logo" width="100">
	</div> -->
	<div class="content">
	<img src="https://bpiyush.github.io/pouring-water-website/assets/pouring-water-logo5.png" alt="logo" width="80" style="margin-bottom: -50px; margin-right: 30px;">
	<div class="title" style="font-size: 44px; margin-left: -30px;">The Sound of Water</div>
	<div style="font-size: 30px; margin-left: -30px;"><b>Inferring Physical Properties from Pouring Liquids</b></div>
	<div class="authors">
	<a style="color: #92eaff; href="https://bpiyush.github.io/">Piyush Bagad</a><sup>1</sup>,
	<a style="color: #92eaff; href="https://makarandtapaswi.github.io/">Makarand Tapaswi</a><sup>2</sup>,
	<a style="color: #92eaff; href="https://www.ceessnoek.info/">Cees G. M. Snoek</a><sup>3</sup>,
	<a style="color: #92eaff; href="https://www.robots.ox.ac.uk/~az/">Andrew Zisserman</a><sup>1</sup>,
	</div>
	<div class="affiliations">
	<sup>1</sup>University of Oxford, <sup>2</sup>IIIT Hyderabad, <sup>3</sup>University of Amsterdam
	</div>

	<div class="buttons">
	<a href="#" style="color: #92eaff;" class="button">arXiv</a>
	<a href="https://bpiyush.github.io/pouring-water-website/" style="color: #92eaff;" class="button">🌐 Project</a>
	<a href="https://github.com/bpiyush/SoundOfWater" style="color: #92eaff;" class="button"> <img src="https://bpiyush.github.io/pouring-water-website/assets/github-logo.png" alt="logo" style="height:16px; float: left;">  Code</a>
	<a href="https://huggingface.co/datasets/bpiyush/sound-of-water" style="color: #92eaff;" class="button">🤗 Data</a>
	<a href="https://huggingface.co/bpiyush/sound-of-water-models" style="color: #92eaff;" class="button">🤗 Models</a>
	<a href="#" style="color: #92eaff;" class="button">🎯 Demo</a>
	</div>
	</div>
	</div>
	"""

	footer = css + """
	<div class="header" style="justify-content: left;">
	<div class="content" style="font-size: 16px;">
	Please give us a 🌟 on <a href='https://github.com/bpiyush/SoundOfWater'>Github</a> if you like our work!
	Tips to get better results:
	<br><br>
	<ol style="text-align: left; font-size: 14px; margin-left: 30px">
	<li>The first example may take up to 30-60s for processing since the model is also loaded.</li>
	<li>
	If you are providing a link, it may take a few seconds to download video from YouTube.
	Note that the entire video shall be used.
	If the sound of pouring is not clear, the results will be random.
	</li>
	<li>Although the model is somewhat robust to noise, make sure there is not too much noise such that the pouring is audible.</li>
	<li>Note that the video is not used during the inference. The displayed frame is only for reference.</li>
	</ol>
	</div>
	</div>
	"""


	def download_from_youtube(
	video_id,
	save_dir="/tmp/",
	convert_to_mp4=False,
	):
	"""
	Downloads a YouTube video from start to end times.

	Args:
	video_id (str): YouTube video ID.
	save_dir (str): Directory to save the video.
	convert_to_mp4 (bool): Whether to convert the video to mp4 format.

	The saved video is in the format: {save_dir}/{video_id}.mp4
	"""

	import datetime
	from subprocess import call

	print("Downloading video from YouTube...")
	print("Video ID:", video_id)

	command = [
	"yt-dlp",
	"-o", "'{}%(id)s.%(ext)s'".format(save_dir),
	"--cookies ./chrome_cookies.txt",
	"--verbose",
	"--force-overwrites",
	f"https://www.youtube.com/watch?v={video_id}",
	]
	call(" ".join(command), shell=True)

	# If not mp4, convert to mp4
	from glob import glob
	saved_filepath = glob(os.path.join(save_dir, f"{video_id}.*"))[0]
	print("Saved file:", saved_filepath)

	if convert_to_mp4:
	ext = saved_filepath.split(".")[-1]
	to_save = saved_filepath.replace(ext, "mp4")
	if ext != "mp4":
	# convert to mp4 using ffmpeg
	command = "ffmpeg -y -i {} {}".format(saved_filepath, to_save)
	call(command, shell=True)
	return to_save
	else:
	return saved_filepath


	def configure_input():
	gr.Markdown(
	"#### Either upload a video file or provide a YouTube link to a video. Note that the entire video shall be used.",
	)
	video_input = gr.Video(label="Upload Video", height=520)
	youtube_link = gr.Textbox(label="YouTube Link", value=None)
	return [video_input, youtube_link]


	# video_backend = "decord"
	video_backend = "torchvision"
	def get_predictions(video_path):
	model = load_model()
	frame = load_frame(video_path, video_backend=video_backend)
	S = load_spectrogram(video_path)
	audio = load_audio_tensor(video_path)
	z_audio, y_audio = get_model_output(audio, model)
	image, df_show, tsne_image = show_output(frame, S, y_audio, z_audio)
	return image, df_show, tsne_image


	def get_video_id_from_url(url):
	import re
	if "v=" in url:
	video_id = re.findall(r"v=([a-zA-Z0-9_-]+)", url)
	elif "youtu.be" in url:
	video_id = re.findall(r"youtu.be/([a-zA-Z0-9_-]+)", url)
	elif "shorts" in url:
	video_id = re.findall(r"shorts/([a-zA-Z0-9_-]+)", url)
	else:
	raise ValueError("Invalid YouTube URL")
	print("Video URL:", url)
	print("Video ID:", video_id)

	if len(video_id) > 0:
	return video_id[0]
	else:
	raise ValueError("Invalid YouTube URL")


	note = """
	Note: Radius (as well as height) estimation depends on accurate wavelength estimation towards the end.
	Thus, it may not be accurate if the wavelength is not estimated correctly at the end.

	$$
	H = l(0) = \\frac{\lambda(0) - \lambda(T)}{4} \ \ \\text{and} \ \ R = \\frac{\lambda(T)}{4\\beta}
	$$
	"""


	# Example usage in a Gradio interface
	def process_input(video, youtube_link):

	provided_video = video is not None
	if youtube_link is None:
	provided_link = False
	elif isinstance(youtube_link, str):
	provided_link = len(youtube_link) > 0
	else:
	raise ValueError(f"Invalid type of link {youtube_link}.")

	if provided_video and provided_link:
	raise ValueError("Please provide either a video file or a YouTube link, not both.")

	if provided_video:
	print(video)

	# # Load model globally
	# model = load_model()

	# The input is a video file path
	video_path = video

	# Get predictions
	image, df_show, tsne_image = get_predictions(video_path)

	return image, df_show, gr.Markdown(note), tsne_image

	else:
	print(provided_link)

	assert provided_link, \
	"YouTube Link cannot be empty if no video is provided."

	video_id = get_video_id_from_url(youtube_link)
	video_path = download_from_youtube(
	video_id, save_dir="/tmp/", convert_to_mp4=False,
	)

	# Get predictions
	image, df_show, tsne_image = get_predictions(video_path)

	# Add youtube link to the note
	local_note = f"{note}\n\nYou can watch the original video here: "\
	f"[YouTube Link](https://www.youtube.com/watch?v={video_id})"

	return image, df_show, gr.Markdown(local_note), tsne_image


	def configure_outputs():
	image_wide = gr.Image(label="Estimated pitch")
	dataframe = gr.DataFrame(label="Estimated physical properties")
	image_tsne = gr.Image(label="TSNE of features", width=300)
	markdown = gr.Markdown(label="Note")
	return [image_wide, dataframe, markdown, image_tsne]


	# Configure pre-defined examples
	examples = [
	["./media_assets/example_video.mp4", None],
	["./media_assets/ayNzH0uygFw_9.0_21.0.mp4", None],
	["./media_assets/biDn0Gi6V8U_7.0_15.0.mp4", None],
	["./media_assets/goWgiQQMugA_2.5_9.0.mp4", None],
	["./media_assets/K87g4RvO-9k_254.0_259.0.mp4", None],
	# Shows that it works with background noise
	["./media_assets/l74zJHCZ9uA.webm", None],
	# Shows that it works with a slightly differently shaped container
	["./media_assets/LpRPV0hIymU.webm", None],
	["./media_assets/k-HnMsS36J8.webm", None],
	# [None, "https://www.youtube.com/shorts/6eUQTdkTooo"],
	# [None, "https://www.youtube.com/shorts/VxZT15cG6tw"],
	# [None, "https://www.youtube.com/shorts/GSXQnNhliDY"],
	]


	# Define Gradio interface
	with gr.Blocks(
	css=custom_css,
	theme=gr.themes.Default(),
	) as demo:

	# Add the header
	gr.HTML(header)

	gr.Interface(
	fn=process_input,
	inputs=configure_input(),
	outputs=configure_outputs(),
	examples=examples,
	)

	# Add the footer
	gr.HTML(footer)


	# Launch the interface
	demo.launch(allowed_paths=["."], share=True)