Spaces:

bpiyush
/

SoundOfWater

Running

App Files Files Community

bpiyush commited on Nov 18, 2024

Commit

e21ebc5

verified ·

1 Parent(s): f3ba4ac

Update app.py

Browse files

Files changed (1) hide show

app.py +135 -67

app.py CHANGED Viewed

@@ -115,93 +115,154 @@ Please give us a 🌟 on <a href='https://github.com/bpiyush/SoundOfWater'>Githu
 Tips to get better results:
 <br><br>
 <ol style="text-align: left; font-size: 14px; margin-left: 30px">
-    <li>Make sure there is not too much noise such that the pouring is audible.</li>
-    <li>Note that the video is not used during the inference. Only the audio must be clear enough.</li>
 </ol>
 </div>
 </div>
 """
-# def process_input(video=None, youtube_link=None, start_time=None, end_time=None):
-#     if video:
-#         return f"Video file uploaded: {video.name}"
-#     elif youtube_link and start_time and end_time:
-#         return f"YouTube link: {youtube_link} (Start: {start_time}, End: {end_time})"
-#     else:
-#         return "Please upload a video or provide a YouTube link with start and end times."
 def configure_input():
     gr.Markdown(
-        "#### Either upload a video file or provide a YouTube link with start and end times."
     )
-    video_input = gr.Video(label="Upload Video", height=480)
-    youtube_link_start = gr.Textbox(label="YouTube Link (Start time)")
-    youtube_link_end = gr.Textbox(label="YouTube Link (End time)")
-    return [video_input, youtube_link_start, youtube_link_end]
 # Example usage in a Gradio interface
-def process_input(video, youtube_link_start, youtube_link_end):
     if video is not None:
         print(video)
-        # Load model globally
-        model = load_model()
         # The input is a video file path
         video_path = video
-        # Load first frame
-        frame = load_frame(video_path)
-        # Load spectrogram
-        S = load_spectrogram(video_path)
-        # Load audio tensor
-        audio = load_audio_tensor(video_path)
-        # Get output
-        z_audio, y_audio = get_model_output(audio, model)
-        # Show image output
-        image, df_show, tsne_image = show_output(frame, S, y_audio, z_audio)
         return image, df_show, gr.Markdown(note), tsne_image
-    elif (youtube_link_start is not None) and (youtube_link_end is not None):
-        # Using the provided YouTube link
-        # Example: https://youtu.be/6-HVn8Jzzuk?t=10
-        start_link = f"Processing YouTube link: {youtube_link_start}"
-        end_link = f"Processing YouTube link: {youtube_link_end}"
-        # Get video ID
-        video_id = youtube_link_start.split("/")[-1].split("?")[0]
-        assert video_id == youtube_link_end.split("/")[-1].split("?")[0], "Video IDs do not match"
-        start_time = float(youtube_link_start.split("t=")[-1])
-        end_time = float(youtube_link_end.split("t=")[-1])
-        raise NotImplementedError("YouTube link processing is not implemented yet")
     else:
-        return "No input provided"
-def greet(name, is_morning, temperature):
-    salutation = "Good morning" if is_morning else "Good evening"
-    greeting = f"{salutation} {name}. It is {temperature} degrees today"
-    celsius = (temperature - 32) * 5 / 9
-    return greeting, round(celsius, 2)
-note = """
-**Note**: Radius (as well as height) estimation depends on accurate wavelength estimation towards the end.
-Thus, it may not be accurate if the wavelength is not estimated correctly at the end.
-$$
-H = l(0) = \\frac{\lambda(0) - \lambda(T)}{4} \ \ \\text{and} \ \ R = \\frac{\lambda(T)}{4\\beta}
-$$
-"""
 def configure_outputs():
@@ -209,17 +270,24 @@ def configure_outputs():
     dataframe = gr.DataFrame(label="Estimated physical properties")
     image_tsne = gr.Image(label="TSNE of features", width=300)
     markdown = gr.Markdown(label="Note")
-    # ["image", "dataframe", "image", "markdown"]
     return [image_wide, dataframe, markdown, image_tsne]
 # Configure pre-defined examples
 examples = [
-    ["./media_assets/example_video.mp4", None, None],
-    ["./media_assets/ayNzH0uygFw_9.0_21.0.mp4", None, None],
-    ["./media_assets/biDn0Gi6V8U_7.0_15.0.mp4", None, None],
-    ["./media_assets/goWgiQQMugA_2.5_9.0.mp4", None, None],
-    ["./media_assets/K87g4RvO-9k_254.0_259.0.mp4", None, None],
 ]
@@ -238,7 +306,7 @@ with gr.Blocks(
         outputs=configure_outputs(),
         examples=examples,
     )
     # Add the footer
     gr.HTML(footer)

 Tips to get better results:
 <br><br>
 <ol style="text-align: left; font-size: 14px; margin-left: 30px">
+    <li>The first example may take up to 30-60s for processing since the model is also loaded.</li>
+    <li>
+    If you are providing a link, it may take a few seconds to download video from YouTube.
+    Note that the entire video shall be used.
+    If the sound of pouring is not clear, the results will be random.
+    </li>
+    <li>Although the model is somewhat robust to noise, make sure there is not too much noise such that the pouring is audible.</li>
+    <li>Note that the video is not used during the inference. The displayed frame is only for reference.</li>
 </ol>
 </div>
 </div>
 """
+def download_from_youtube(
+        video_id,
+        save_dir="/tmp/",
+        convert_to_mp4=False,
+    ):
+    """
+    Downloads a YouTube video from start to end times.
+    Args:
+        video_id (str): YouTube video ID.
+        save_dir (str): Directory to save the video.
+        convert_to_mp4 (bool): Whether to convert the video to mp4 format.
+    The saved video is in the format: {save_dir}/{video_id}.mp4
+    """
+    import datetime
+    from subprocess import call
+    print("Downloading video from YouTube...")
+    print("Video ID:", video_id)
+    command = [
+        "yt-dlp",
+        "-o", "'{}%(id)s.%(ext)s'".format(save_dir),
+        "--verbose",
+        "--force-overwrites",
+        f"https://www.youtube.com/watch?v={video_id}",
+    ]
+    call(" ".join(command), shell=True)
+    # If not mp4, convert to mp4
+    from glob import glob
+    saved_filepath = glob(os.path.join(save_dir, f"{video_id}.*"))[0]
+    print("Saved file:", saved_filepath)
+    if convert_to_mp4:
+        ext = saved_filepath.split(".")[-1]
+        to_save = saved_filepath.replace(ext, "mp4")
+        if ext != "mp4":
+            # convert to mp4 using ffmpeg
+            command = "ffmpeg -y -i {} {}".format(saved_filepath, to_save)
+            call(command, shell=True)
+        return to_save
+    else:
+        return saved_filepath
 def configure_input():
     gr.Markdown(
+        "#### Either upload a video file or provide a YouTube link to a video. Note that the entire video shall be used.",
     )
+    video_input = gr.Video(label="Upload Video", height=520)
+    youtube_link = gr.Textbox(label="YouTube Link", value=None)
+    return [video_input, youtube_link]
+# video_backend = "decord"
+video_backend = "torchvision"
+def get_predictions(video_path):
+    model = load_model()
+    frame = load_frame(video_path, video_backend=video_backend)
+    S = load_spectrogram(video_path)
+    audio = load_audio_tensor(video_path)
+    z_audio, y_audio = get_model_output(audio, model)
+    image, df_show, tsne_image = show_output(frame, S, y_audio, z_audio)
+    return image, df_show, tsne_image
+def get_video_id_from_url(url):
+    import re
+    if "v=" in url:
+        video_id = re.findall(r"v=([a-zA-Z0-9_-]+)", url)
+    elif "youtu.be" in url:
+        video_id = re.findall(r"youtu.be/([a-zA-Z0-9_-]+)", url)
+    elif "shorts" in url:
+        video_id = re.findall(r"shorts/([a-zA-Z0-9_-]+)", url)
+    else:
+        raise ValueError("Invalid YouTube URL")
+    print("Video URL:", url)
+    print("Video ID:", video_id)
+    if len(video_id) > 0:
+        return video_id[0]
+    else:
+        raise ValueError("Invalid YouTube URL")
+note = """
+**Note**: Radius (as well as height) estimation depends on accurate wavelength estimation towards the end.
+Thus, it may not be accurate if the wavelength is not estimated correctly at the end.
+$$
+H = l(0) = \\frac{\lambda(0) - \lambda(T)}{4} \ \ \\text{and} \ \ R = \\frac{\lambda(T)}{4\\beta}
+$$
+"""
 # Example usage in a Gradio interface
+def process_input(video, youtube_link):
+    if video is not None and len(youtube_link) > 0:
+        raise ValueError("Please provide either a video file or a YouTube link, not both.")
     if video is not None:
         print(video)
+        # # Load model globally
+        # model = load_model()
         # The input is a video file path
         video_path = video
+        # Get predictions
+        image, df_show, tsne_image = get_predictions(video_path)
         return image, df_show, gr.Markdown(note), tsne_image
     else:
+        assert len(youtube_link) > 0, \
+            "YouTube Link cannot be empty if no video is provided."
+        video_id = get_video_id_from_url(youtube_link)
+        video_path = download_from_youtube(
+            video_id, save_dir="/tmp/", convert_to_mp4=False,
+        )
+        # Get predictions
+        image, df_show, tsne_image = get_predictions(video_path)
+        # Add youtube link to the note
+        local_note = f"{note}\n\nYou can watch the original video here: "\
+            f"[YouTube Link](https://www.youtube.com/watch?v={video_id})"
+        return image, df_show, gr.Markdown(local_note), tsne_image
 def configure_outputs():
     dataframe = gr.DataFrame(label="Estimated physical properties")
     image_tsne = gr.Image(label="TSNE of features", width=300)
     markdown = gr.Markdown(label="Note")
     return [image_wide, dataframe, markdown, image_tsne]
 # Configure pre-defined examples
 examples = [
+    ["./media_assets/example_video.mp4", None],
+    ["./media_assets/ayNzH0uygFw_9.0_21.0.mp4", None],
+    ["./media_assets/biDn0Gi6V8U_7.0_15.0.mp4", None],
+    ["./media_assets/goWgiQQMugA_2.5_9.0.mp4", None],
+    ["./media_assets/K87g4RvO-9k_254.0_259.0.mp4", None],
+    # Shows that it works with background noise
+    ["./media_assets/l74zJHCZ9uA.webm", None],
+    # Shows that it works with a slightly differently shaped container
+    ["./media_assets/LpRPV0hIymU.webm", None],
+    ["./media_assets/k-HnMsS36J8.webm", None],
+    # [None, "https://www.youtube.com/shorts/6eUQTdkTooo"],
+    # [None, "https://www.youtube.com/shorts/VxZT15cG6tw"],
+    # [None, "https://www.youtube.com/shorts/GSXQnNhliDY"],
 ]
         outputs=configure_outputs(),
         examples=examples,
     )
     # Add the footer
     gr.HTML(footer)