Spaces:

TechAudio
/

ldm_variations

Sleeping

j commited on Apr 12

Commit

cc9ce25

•

1 Parent(s): e9be68c

convert video inputs and remove HARP stuff

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,10 +4,17 @@ from audioldm import build_model, text_to_audio
 import gradio as gr
 import soundfile as sf
 from datetime import datetime
 audioldm = build_model(model_name="audioldm-l-full")
 def process_fn(input_audio_path, seed, guidance_scale, num_inference_steps, num_candidates, audio_length_in_s):
     waveform = text_to_audio(
         audioldm,
         'placeholder',
@@ -26,14 +33,17 @@ def process_fn(input_audio_path, seed, guidance_scale, num_inference_steps, num_
     return filename
-card = ModelCard(
-    name='AudioLDM Variations',
-    description='AudioLDM Variation Generator, operates on region selected in track.',
-    author='Team Audio',
-    tags=['AudioLDM', 'Variations', 'audio-to-audio']
-)
-with gr.Blocks() as webapp:
     # Define your Gradio interface
     inputs = [
         gr.Audio(
@@ -66,13 +76,11 @@ with gr.Blocks() as webapp:
             minimum=2.5, maximum=10.0,
             step=2.5, value=5,
             label="Duration"
-        ),
-    ]
     output = gr.Audio(label="Audio Output", type="filepath", format="wav", elem_id="audio")
-    ctrls_data, ctrls_button, process_button, cancel_button = build_endpoint(inputs, output, process_fn, card)
-# queue the webapp: https://www.gradio.app/guides/setting-up-a-demo-for-maximum-performance
 webapp.queue()
 webapp.launch()

 import gradio as gr
 import soundfile as sf
 from datetime import datetime
+import subprocess
+import os
+import sys
 audioldm = build_model(model_name="audioldm-l-full")
 def process_fn(input_audio_path, seed, guidance_scale, num_inference_steps, num_candidates, audio_length_in_s):
+    video_extensions = (".mp4", ".avi", ".mkv", ".flv", ".mov", ".wmv", ".webm")
+    if input_audio_path.lower().endswith(video_extensions):
+        input_audio_path = convert_video_to_audio_ffmpeg(input_audio_path)
     waveform = text_to_audio(
         audioldm,
         'placeholder',
     return filename
+def convert_video_to_audio_ffmpeg(video_file, output_ext="wav"):
+    """Converts video to audio directly using `ffmpeg` command
+    with the help of subprocess module"""
+    filename, ext = os.path.splitext(video_file)
+    subprocess.call(["ffmpeg", "-y", "-i", video_file, f"{filename}.{output_ext}"],
+                    stdout=subprocess.DEVNULL,
+                    stderr=subprocess.STDOUT)
+    return f"{filename}.{output_ext}"
+webapp = gr.Interface(
+    fn = process_fn,
     # Define your Gradio interface
     inputs = [
         gr.Audio(
             minimum=2.5, maximum=10.0,
             step=2.5, value=5,
             label="Duration"
+        )
+    ],
     output = gr.Audio(label="Audio Output", type="filepath", format="wav", elem_id="audio")
+    )
 webapp.queue()
 webapp.launch()