bpiyush commited on
Commit
e21ebc5
β€’
1 Parent(s): f3ba4ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +135 -67
app.py CHANGED
@@ -115,93 +115,154 @@ Please give us a 🌟 on <a href='https://github.com/bpiyush/SoundOfWater'>Githu
115
  Tips to get better results:
116
  <br><br>
117
  <ol style="text-align: left; font-size: 14px; margin-left: 30px">
118
- <li>Make sure there is not too much noise such that the pouring is audible.</li>
119
- <li>Note that the video is not used during the inference. Only the audio must be clear enough.</li>
 
 
 
 
 
 
120
  </ol>
121
  </div>
122
  </div>
123
  """
124
 
125
- # def process_input(video=None, youtube_link=None, start_time=None, end_time=None):
126
- # if video:
127
- # return f"Video file uploaded: {video.name}"
128
- # elif youtube_link and start_time and end_time:
129
- # return f"YouTube link: {youtube_link} (Start: {start_time}, End: {end_time})"
130
- # else:
131
- # return "Please upload a video or provide a YouTube link with start and end times."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
 
134
  def configure_input():
135
  gr.Markdown(
136
- "#### Either upload a video file or provide a YouTube link with start and end times."
137
  )
138
- video_input = gr.Video(label="Upload Video", height=480)
139
- youtube_link_start = gr.Textbox(label="YouTube Link (Start time)")
140
- youtube_link_end = gr.Textbox(label="YouTube Link (End time)")
141
- return [video_input, youtube_link_start, youtube_link_end]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
 
144
  # Example usage in a Gradio interface
145
- def process_input(video, youtube_link_start, youtube_link_end):
 
 
 
146
  if video is not None:
147
  print(video)
148
 
149
- # Load model globally
150
- model = load_model()
151
 
152
  # The input is a video file path
153
  video_path = video
154
 
155
- # Load first frame
156
- frame = load_frame(video_path)
157
-
158
- # Load spectrogram
159
- S = load_spectrogram(video_path)
160
-
161
- # Load audio tensor
162
- audio = load_audio_tensor(video_path)
163
-
164
- # Get output
165
- z_audio, y_audio = get_model_output(audio, model)
166
-
167
- # Show image output
168
- image, df_show, tsne_image = show_output(frame, S, y_audio, z_audio)
169
 
170
  return image, df_show, gr.Markdown(note), tsne_image
171
 
172
- elif (youtube_link_start is not None) and (youtube_link_end is not None):
173
- # Using the provided YouTube link
174
- # Example: https://youtu.be/6-HVn8Jzzuk?t=10
175
- start_link = f"Processing YouTube link: {youtube_link_start}"
176
- end_link = f"Processing YouTube link: {youtube_link_end}"
177
-
178
- # Get video ID
179
- video_id = youtube_link_start.split("/")[-1].split("?")[0]
180
- assert video_id == youtube_link_end.split("/")[-1].split("?")[0], "Video IDs do not match"
181
- start_time = float(youtube_link_start.split("t=")[-1])
182
- end_time = float(youtube_link_end.split("t=")[-1])
183
-
184
- raise NotImplementedError("YouTube link processing is not implemented yet")
185
  else:
186
- return "No input provided"
187
-
 
 
 
 
 
188
 
189
- def greet(name, is_morning, temperature):
190
- salutation = "Good morning" if is_morning else "Good evening"
191
- greeting = f"{salutation} {name}. It is {temperature} degrees today"
192
- celsius = (temperature - 32) * 5 / 9
193
- return greeting, round(celsius, 2)
194
 
 
 
 
195
 
196
-
197
- note = """
198
- **Note**: Radius (as well as height) estimation depends on accurate wavelength estimation towards the end.
199
- Thus, it may not be accurate if the wavelength is not estimated correctly at the end.
200
-
201
- $$
202
- H = l(0) = \\frac{\lambda(0) - \lambda(T)}{4} \ \ \\text{and} \ \ R = \\frac{\lambda(T)}{4\\beta}
203
- $$
204
- """
205
 
206
 
207
  def configure_outputs():
@@ -209,17 +270,24 @@ def configure_outputs():
209
  dataframe = gr.DataFrame(label="Estimated physical properties")
210
  image_tsne = gr.Image(label="TSNE of features", width=300)
211
  markdown = gr.Markdown(label="Note")
212
- # ["image", "dataframe", "image", "markdown"]
213
  return [image_wide, dataframe, markdown, image_tsne]
214
 
215
 
216
  # Configure pre-defined examples
217
  examples = [
218
- ["./media_assets/example_video.mp4", None, None],
219
- ["./media_assets/ayNzH0uygFw_9.0_21.0.mp4", None, None],
220
- ["./media_assets/biDn0Gi6V8U_7.0_15.0.mp4", None, None],
221
- ["./media_assets/goWgiQQMugA_2.5_9.0.mp4", None, None],
222
- ["./media_assets/K87g4RvO-9k_254.0_259.0.mp4", None, None],
 
 
 
 
 
 
 
 
223
  ]
224
 
225
 
@@ -238,7 +306,7 @@ with gr.Blocks(
238
  outputs=configure_outputs(),
239
  examples=examples,
240
  )
241
-
242
  # Add the footer
243
  gr.HTML(footer)
244
 
 
115
  Tips to get better results:
116
  <br><br>
117
  <ol style="text-align: left; font-size: 14px; margin-left: 30px">
118
+ <li>The first example may take up to 30-60s for processing since the model is also loaded.</li>
119
+ <li>
120
+ If you are providing a link, it may take a few seconds to download video from YouTube.
121
+ Note that the entire video shall be used.
122
+ If the sound of pouring is not clear, the results will be random.
123
+ </li>
124
+ <li>Although the model is somewhat robust to noise, make sure there is not too much noise such that the pouring is audible.</li>
125
+ <li>Note that the video is not used during the inference. The displayed frame is only for reference.</li>
126
  </ol>
127
  </div>
128
  </div>
129
  """
130
 
131
+
132
+ def download_from_youtube(
133
+ video_id,
134
+ save_dir="/tmp/",
135
+ convert_to_mp4=False,
136
+ ):
137
+ """
138
+ Downloads a YouTube video from start to end times.
139
+
140
+ Args:
141
+ video_id (str): YouTube video ID.
142
+ save_dir (str): Directory to save the video.
143
+ convert_to_mp4 (bool): Whether to convert the video to mp4 format.
144
+
145
+ The saved video is in the format: {save_dir}/{video_id}.mp4
146
+ """
147
+
148
+ import datetime
149
+ from subprocess import call
150
+
151
+ print("Downloading video from YouTube...")
152
+ print("Video ID:", video_id)
153
+
154
+ command = [
155
+ "yt-dlp",
156
+ "-o", "'{}%(id)s.%(ext)s'".format(save_dir),
157
+ "--verbose",
158
+ "--force-overwrites",
159
+ f"https://www.youtube.com/watch?v={video_id}",
160
+ ]
161
+ call(" ".join(command), shell=True)
162
+
163
+ # If not mp4, convert to mp4
164
+ from glob import glob
165
+ saved_filepath = glob(os.path.join(save_dir, f"{video_id}.*"))[0]
166
+ print("Saved file:", saved_filepath)
167
+
168
+ if convert_to_mp4:
169
+ ext = saved_filepath.split(".")[-1]
170
+ to_save = saved_filepath.replace(ext, "mp4")
171
+ if ext != "mp4":
172
+ # convert to mp4 using ffmpeg
173
+ command = "ffmpeg -y -i {} {}".format(saved_filepath, to_save)
174
+ call(command, shell=True)
175
+ return to_save
176
+ else:
177
+ return saved_filepath
178
 
179
 
180
  def configure_input():
181
  gr.Markdown(
182
+ "#### Either upload a video file or provide a YouTube link to a video. Note that the entire video shall be used.",
183
  )
184
+ video_input = gr.Video(label="Upload Video", height=520)
185
+ youtube_link = gr.Textbox(label="YouTube Link", value=None)
186
+ return [video_input, youtube_link]
187
+
188
+
189
+ # video_backend = "decord"
190
+ video_backend = "torchvision"
191
+ def get_predictions(video_path):
192
+ model = load_model()
193
+ frame = load_frame(video_path, video_backend=video_backend)
194
+ S = load_spectrogram(video_path)
195
+ audio = load_audio_tensor(video_path)
196
+ z_audio, y_audio = get_model_output(audio, model)
197
+ image, df_show, tsne_image = show_output(frame, S, y_audio, z_audio)
198
+ return image, df_show, tsne_image
199
+
200
+
201
+ def get_video_id_from_url(url):
202
+ import re
203
+ if "v=" in url:
204
+ video_id = re.findall(r"v=([a-zA-Z0-9_-]+)", url)
205
+ elif "youtu.be" in url:
206
+ video_id = re.findall(r"youtu.be/([a-zA-Z0-9_-]+)", url)
207
+ elif "shorts" in url:
208
+ video_id = re.findall(r"shorts/([a-zA-Z0-9_-]+)", url)
209
+ else:
210
+ raise ValueError("Invalid YouTube URL")
211
+ print("Video URL:", url)
212
+ print("Video ID:", video_id)
213
+
214
+ if len(video_id) > 0:
215
+ return video_id[0]
216
+ else:
217
+ raise ValueError("Invalid YouTube URL")
218
+
219
+
220
+ note = """
221
+ **Note**: Radius (as well as height) estimation depends on accurate wavelength estimation towards the end.
222
+ Thus, it may not be accurate if the wavelength is not estimated correctly at the end.
223
+
224
+ $$
225
+ H = l(0) = \\frac{\lambda(0) - \lambda(T)}{4} \ \ \\text{and} \ \ R = \\frac{\lambda(T)}{4\\beta}
226
+ $$
227
+ """
228
 
229
 
230
  # Example usage in a Gradio interface
231
+ def process_input(video, youtube_link):
232
+ if video is not None and len(youtube_link) > 0:
233
+ raise ValueError("Please provide either a video file or a YouTube link, not both.")
234
+
235
  if video is not None:
236
  print(video)
237
 
238
+ # # Load model globally
239
+ # model = load_model()
240
 
241
  # The input is a video file path
242
  video_path = video
243
 
244
+ # Get predictions
245
+ image, df_show, tsne_image = get_predictions(video_path)
 
 
 
 
 
 
 
 
 
 
 
 
246
 
247
  return image, df_show, gr.Markdown(note), tsne_image
248
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
  else:
250
+ assert len(youtube_link) > 0, \
251
+ "YouTube Link cannot be empty if no video is provided."
252
+
253
+ video_id = get_video_id_from_url(youtube_link)
254
+ video_path = download_from_youtube(
255
+ video_id, save_dir="/tmp/", convert_to_mp4=False,
256
+ )
257
 
258
+ # Get predictions
259
+ image, df_show, tsne_image = get_predictions(video_path)
 
 
 
260
 
261
+ # Add youtube link to the note
262
+ local_note = f"{note}\n\nYou can watch the original video here: "\
263
+ f"[YouTube Link](https://www.youtube.com/watch?v={video_id})"
264
 
265
+ return image, df_show, gr.Markdown(local_note), tsne_image
 
 
 
 
 
 
 
 
266
 
267
 
268
  def configure_outputs():
 
270
  dataframe = gr.DataFrame(label="Estimated physical properties")
271
  image_tsne = gr.Image(label="TSNE of features", width=300)
272
  markdown = gr.Markdown(label="Note")
 
273
  return [image_wide, dataframe, markdown, image_tsne]
274
 
275
 
276
  # Configure pre-defined examples
277
  examples = [
278
+ ["./media_assets/example_video.mp4", None],
279
+ ["./media_assets/ayNzH0uygFw_9.0_21.0.mp4", None],
280
+ ["./media_assets/biDn0Gi6V8U_7.0_15.0.mp4", None],
281
+ ["./media_assets/goWgiQQMugA_2.5_9.0.mp4", None],
282
+ ["./media_assets/K87g4RvO-9k_254.0_259.0.mp4", None],
283
+ # Shows that it works with background noise
284
+ ["./media_assets/l74zJHCZ9uA.webm", None],
285
+ # Shows that it works with a slightly differently shaped container
286
+ ["./media_assets/LpRPV0hIymU.webm", None],
287
+ ["./media_assets/k-HnMsS36J8.webm", None],
288
+ # [None, "https://www.youtube.com/shorts/6eUQTdkTooo"],
289
+ # [None, "https://www.youtube.com/shorts/VxZT15cG6tw"],
290
+ # [None, "https://www.youtube.com/shorts/GSXQnNhliDY"],
291
  ]
292
 
293
 
 
306
  outputs=configure_outputs(),
307
  examples=examples,
308
  )
309
+
310
  # Add the footer
311
  gr.HTML(footer)
312