Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -115,93 +115,154 @@ Please give us a π on <a href='https://github.com/bpiyush/SoundOfWater'>Githu
|
|
115 |
Tips to get better results:
|
116 |
<br><br>
|
117 |
<ol style="text-align: left; font-size: 14px; margin-left: 30px">
|
118 |
-
<li>
|
119 |
-
<li>
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
</ol>
|
121 |
</div>
|
122 |
</div>
|
123 |
"""
|
124 |
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
|
133 |
|
134 |
def configure_input():
|
135 |
gr.Markdown(
|
136 |
-
"#### Either upload a video file or provide a YouTube link
|
137 |
)
|
138 |
-
video_input = gr.Video(label="Upload Video", height=
|
139 |
-
|
140 |
-
|
141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
|
143 |
|
144 |
# Example usage in a Gradio interface
|
145 |
-
def process_input(video,
|
|
|
|
|
|
|
146 |
if video is not None:
|
147 |
print(video)
|
148 |
|
149 |
-
# Load model globally
|
150 |
-
model = load_model()
|
151 |
|
152 |
# The input is a video file path
|
153 |
video_path = video
|
154 |
|
155 |
-
#
|
156 |
-
|
157 |
-
|
158 |
-
# Load spectrogram
|
159 |
-
S = load_spectrogram(video_path)
|
160 |
-
|
161 |
-
# Load audio tensor
|
162 |
-
audio = load_audio_tensor(video_path)
|
163 |
-
|
164 |
-
# Get output
|
165 |
-
z_audio, y_audio = get_model_output(audio, model)
|
166 |
-
|
167 |
-
# Show image output
|
168 |
-
image, df_show, tsne_image = show_output(frame, S, y_audio, z_audio)
|
169 |
|
170 |
return image, df_show, gr.Markdown(note), tsne_image
|
171 |
|
172 |
-
elif (youtube_link_start is not None) and (youtube_link_end is not None):
|
173 |
-
# Using the provided YouTube link
|
174 |
-
# Example: https://youtu.be/6-HVn8Jzzuk?t=10
|
175 |
-
start_link = f"Processing YouTube link: {youtube_link_start}"
|
176 |
-
end_link = f"Processing YouTube link: {youtube_link_end}"
|
177 |
-
|
178 |
-
# Get video ID
|
179 |
-
video_id = youtube_link_start.split("/")[-1].split("?")[0]
|
180 |
-
assert video_id == youtube_link_end.split("/")[-1].split("?")[0], "Video IDs do not match"
|
181 |
-
start_time = float(youtube_link_start.split("t=")[-1])
|
182 |
-
end_time = float(youtube_link_end.split("t=")[-1])
|
183 |
-
|
184 |
-
raise NotImplementedError("YouTube link processing is not implemented yet")
|
185 |
else:
|
186 |
-
|
187 |
-
|
|
|
|
|
|
|
|
|
|
|
188 |
|
189 |
-
|
190 |
-
|
191 |
-
greeting = f"{salutation} {name}. It is {temperature} degrees today"
|
192 |
-
celsius = (temperature - 32) * 5 / 9
|
193 |
-
return greeting, round(celsius, 2)
|
194 |
|
|
|
|
|
|
|
195 |
|
196 |
-
|
197 |
-
note = """
|
198 |
-
**Note**: Radius (as well as height) estimation depends on accurate wavelength estimation towards the end.
|
199 |
-
Thus, it may not be accurate if the wavelength is not estimated correctly at the end.
|
200 |
-
|
201 |
-
$$
|
202 |
-
H = l(0) = \\frac{\lambda(0) - \lambda(T)}{4} \ \ \\text{and} \ \ R = \\frac{\lambda(T)}{4\\beta}
|
203 |
-
$$
|
204 |
-
"""
|
205 |
|
206 |
|
207 |
def configure_outputs():
|
@@ -209,17 +270,24 @@ def configure_outputs():
|
|
209 |
dataframe = gr.DataFrame(label="Estimated physical properties")
|
210 |
image_tsne = gr.Image(label="TSNE of features", width=300)
|
211 |
markdown = gr.Markdown(label="Note")
|
212 |
-
# ["image", "dataframe", "image", "markdown"]
|
213 |
return [image_wide, dataframe, markdown, image_tsne]
|
214 |
|
215 |
|
216 |
# Configure pre-defined examples
|
217 |
examples = [
|
218 |
-
["./media_assets/example_video.mp4", None
|
219 |
-
["./media_assets/ayNzH0uygFw_9.0_21.0.mp4", None
|
220 |
-
["./media_assets/biDn0Gi6V8U_7.0_15.0.mp4", None
|
221 |
-
["./media_assets/goWgiQQMugA_2.5_9.0.mp4", None
|
222 |
-
["./media_assets/K87g4RvO-9k_254.0_259.0.mp4", None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
223 |
]
|
224 |
|
225 |
|
@@ -238,7 +306,7 @@ with gr.Blocks(
|
|
238 |
outputs=configure_outputs(),
|
239 |
examples=examples,
|
240 |
)
|
241 |
-
|
242 |
# Add the footer
|
243 |
gr.HTML(footer)
|
244 |
|
|
|
115 |
Tips to get better results:
|
116 |
<br><br>
|
117 |
<ol style="text-align: left; font-size: 14px; margin-left: 30px">
|
118 |
+
<li>The first example may take up to 30-60s for processing since the model is also loaded.</li>
|
119 |
+
<li>
|
120 |
+
If you are providing a link, it may take a few seconds to download video from YouTube.
|
121 |
+
Note that the entire video shall be used.
|
122 |
+
If the sound of pouring is not clear, the results will be random.
|
123 |
+
</li>
|
124 |
+
<li>Although the model is somewhat robust to noise, make sure there is not too much noise such that the pouring is audible.</li>
|
125 |
+
<li>Note that the video is not used during the inference. The displayed frame is only for reference.</li>
|
126 |
</ol>
|
127 |
</div>
|
128 |
</div>
|
129 |
"""
|
130 |
|
131 |
+
|
132 |
+
def download_from_youtube(
|
133 |
+
video_id,
|
134 |
+
save_dir="/tmp/",
|
135 |
+
convert_to_mp4=False,
|
136 |
+
):
|
137 |
+
"""
|
138 |
+
Downloads a YouTube video from start to end times.
|
139 |
+
|
140 |
+
Args:
|
141 |
+
video_id (str): YouTube video ID.
|
142 |
+
save_dir (str): Directory to save the video.
|
143 |
+
convert_to_mp4 (bool): Whether to convert the video to mp4 format.
|
144 |
+
|
145 |
+
The saved video is in the format: {save_dir}/{video_id}.mp4
|
146 |
+
"""
|
147 |
+
|
148 |
+
import datetime
|
149 |
+
from subprocess import call
|
150 |
+
|
151 |
+
print("Downloading video from YouTube...")
|
152 |
+
print("Video ID:", video_id)
|
153 |
+
|
154 |
+
command = [
|
155 |
+
"yt-dlp",
|
156 |
+
"-o", "'{}%(id)s.%(ext)s'".format(save_dir),
|
157 |
+
"--verbose",
|
158 |
+
"--force-overwrites",
|
159 |
+
f"https://www.youtube.com/watch?v={video_id}",
|
160 |
+
]
|
161 |
+
call(" ".join(command), shell=True)
|
162 |
+
|
163 |
+
# If not mp4, convert to mp4
|
164 |
+
from glob import glob
|
165 |
+
saved_filepath = glob(os.path.join(save_dir, f"{video_id}.*"))[0]
|
166 |
+
print("Saved file:", saved_filepath)
|
167 |
+
|
168 |
+
if convert_to_mp4:
|
169 |
+
ext = saved_filepath.split(".")[-1]
|
170 |
+
to_save = saved_filepath.replace(ext, "mp4")
|
171 |
+
if ext != "mp4":
|
172 |
+
# convert to mp4 using ffmpeg
|
173 |
+
command = "ffmpeg -y -i {} {}".format(saved_filepath, to_save)
|
174 |
+
call(command, shell=True)
|
175 |
+
return to_save
|
176 |
+
else:
|
177 |
+
return saved_filepath
|
178 |
|
179 |
|
180 |
def configure_input():
|
181 |
gr.Markdown(
|
182 |
+
"#### Either upload a video file or provide a YouTube link to a video. Note that the entire video shall be used.",
|
183 |
)
|
184 |
+
video_input = gr.Video(label="Upload Video", height=520)
|
185 |
+
youtube_link = gr.Textbox(label="YouTube Link", value=None)
|
186 |
+
return [video_input, youtube_link]
|
187 |
+
|
188 |
+
|
189 |
+
# video_backend = "decord"
|
190 |
+
video_backend = "torchvision"
|
191 |
+
def get_predictions(video_path):
|
192 |
+
model = load_model()
|
193 |
+
frame = load_frame(video_path, video_backend=video_backend)
|
194 |
+
S = load_spectrogram(video_path)
|
195 |
+
audio = load_audio_tensor(video_path)
|
196 |
+
z_audio, y_audio = get_model_output(audio, model)
|
197 |
+
image, df_show, tsne_image = show_output(frame, S, y_audio, z_audio)
|
198 |
+
return image, df_show, tsne_image
|
199 |
+
|
200 |
+
|
201 |
+
def get_video_id_from_url(url):
|
202 |
+
import re
|
203 |
+
if "v=" in url:
|
204 |
+
video_id = re.findall(r"v=([a-zA-Z0-9_-]+)", url)
|
205 |
+
elif "youtu.be" in url:
|
206 |
+
video_id = re.findall(r"youtu.be/([a-zA-Z0-9_-]+)", url)
|
207 |
+
elif "shorts" in url:
|
208 |
+
video_id = re.findall(r"shorts/([a-zA-Z0-9_-]+)", url)
|
209 |
+
else:
|
210 |
+
raise ValueError("Invalid YouTube URL")
|
211 |
+
print("Video URL:", url)
|
212 |
+
print("Video ID:", video_id)
|
213 |
+
|
214 |
+
if len(video_id) > 0:
|
215 |
+
return video_id[0]
|
216 |
+
else:
|
217 |
+
raise ValueError("Invalid YouTube URL")
|
218 |
+
|
219 |
+
|
220 |
+
note = """
|
221 |
+
**Note**: Radius (as well as height) estimation depends on accurate wavelength estimation towards the end.
|
222 |
+
Thus, it may not be accurate if the wavelength is not estimated correctly at the end.
|
223 |
+
|
224 |
+
$$
|
225 |
+
H = l(0) = \\frac{\lambda(0) - \lambda(T)}{4} \ \ \\text{and} \ \ R = \\frac{\lambda(T)}{4\\beta}
|
226 |
+
$$
|
227 |
+
"""
|
228 |
|
229 |
|
230 |
# Example usage in a Gradio interface
|
231 |
+
def process_input(video, youtube_link):
|
232 |
+
if video is not None and len(youtube_link) > 0:
|
233 |
+
raise ValueError("Please provide either a video file or a YouTube link, not both.")
|
234 |
+
|
235 |
if video is not None:
|
236 |
print(video)
|
237 |
|
238 |
+
# # Load model globally
|
239 |
+
# model = load_model()
|
240 |
|
241 |
# The input is a video file path
|
242 |
video_path = video
|
243 |
|
244 |
+
# Get predictions
|
245 |
+
image, df_show, tsne_image = get_predictions(video_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
|
247 |
return image, df_show, gr.Markdown(note), tsne_image
|
248 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
249 |
else:
|
250 |
+
assert len(youtube_link) > 0, \
|
251 |
+
"YouTube Link cannot be empty if no video is provided."
|
252 |
+
|
253 |
+
video_id = get_video_id_from_url(youtube_link)
|
254 |
+
video_path = download_from_youtube(
|
255 |
+
video_id, save_dir="/tmp/", convert_to_mp4=False,
|
256 |
+
)
|
257 |
|
258 |
+
# Get predictions
|
259 |
+
image, df_show, tsne_image = get_predictions(video_path)
|
|
|
|
|
|
|
260 |
|
261 |
+
# Add youtube link to the note
|
262 |
+
local_note = f"{note}\n\nYou can watch the original video here: "\
|
263 |
+
f"[YouTube Link](https://www.youtube.com/watch?v={video_id})"
|
264 |
|
265 |
+
return image, df_show, gr.Markdown(local_note), tsne_image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
266 |
|
267 |
|
268 |
def configure_outputs():
|
|
|
270 |
dataframe = gr.DataFrame(label="Estimated physical properties")
|
271 |
image_tsne = gr.Image(label="TSNE of features", width=300)
|
272 |
markdown = gr.Markdown(label="Note")
|
|
|
273 |
return [image_wide, dataframe, markdown, image_tsne]
|
274 |
|
275 |
|
276 |
# Configure pre-defined examples
|
277 |
examples = [
|
278 |
+
["./media_assets/example_video.mp4", None],
|
279 |
+
["./media_assets/ayNzH0uygFw_9.0_21.0.mp4", None],
|
280 |
+
["./media_assets/biDn0Gi6V8U_7.0_15.0.mp4", None],
|
281 |
+
["./media_assets/goWgiQQMugA_2.5_9.0.mp4", None],
|
282 |
+
["./media_assets/K87g4RvO-9k_254.0_259.0.mp4", None],
|
283 |
+
# Shows that it works with background noise
|
284 |
+
["./media_assets/l74zJHCZ9uA.webm", None],
|
285 |
+
# Shows that it works with a slightly differently shaped container
|
286 |
+
["./media_assets/LpRPV0hIymU.webm", None],
|
287 |
+
["./media_assets/k-HnMsS36J8.webm", None],
|
288 |
+
# [None, "https://www.youtube.com/shorts/6eUQTdkTooo"],
|
289 |
+
# [None, "https://www.youtube.com/shorts/VxZT15cG6tw"],
|
290 |
+
# [None, "https://www.youtube.com/shorts/GSXQnNhliDY"],
|
291 |
]
|
292 |
|
293 |
|
|
|
306 |
outputs=configure_outputs(),
|
307 |
examples=examples,
|
308 |
)
|
309 |
+
|
310 |
# Add the footer
|
311 |
gr.HTML(footer)
|
312 |
|