Spaces:
Sleeping
Sleeping
from pyharp import ModelCard, build_endpoint, save_and_return_filepath | |
from audiotools import AudioSignal | |
from audioldm import build_model, text_to_audio | |
import gradio as gr | |
import soundfile as sf | |
from datetime import datetime | |
import subprocess | |
import os | |
import sys | |
audioldm = build_model(model_name="audioldm-l-full") | |
def process_fn(input_audio_path, seed, guidance_scale, num_inference_steps, num_candidates, audio_length_in_s): | |
video_extensions = (".mp4", ".avi", ".mkv", ".flv", ".mov", ".wmv", ".webm") | |
if input_audio_path.lower().endswith(video_extensions): | |
input_audio_path = convert_video_to_audio_ffmpeg(input_audio_path) | |
waveform = text_to_audio( | |
audioldm, | |
'placeholder', | |
input_audio_path, | |
seed = int(seed), | |
duration = audio_length_in_s, | |
guidance_scale = guidance_scale, | |
n_candidate_gen_per_text = int(num_candidates), | |
ddim_steps = int(num_inference_steps) | |
) | |
timestamp = datetime.now().strftime("%Y%m%d%H%M%S") | |
filename = f"./ldm_variations_{timestamp}.wav" | |
sf.write(filename, waveform[0, 0], samplerate=16000) | |
#save_wave(waveform, "./", name="output.wav") | |
return filename | |
def convert_video_to_audio_ffmpeg(video_file, output_ext="wav"): | |
"""Converts video to audio directly using `ffmpeg` command | |
with the help of subprocess module""" | |
filename, ext = os.path.splitext(video_file) | |
subprocess.call(["ffmpeg", "-y", "-i", video_file, f"{filename}.{output_ext}"], | |
stdout=subprocess.DEVNULL, | |
stderr=subprocess.STDOUT) | |
return f"{filename}.{output_ext}" | |
webapp = gr.Interface( | |
fn = process_fn, | |
# Define your Gradio interface | |
inputs = [ | |
gr.Audio( | |
label="Audio Input", | |
type="filepath" | |
), | |
gr.Slider( | |
label="seed", | |
minimum="0", | |
maximum="65535", | |
value="43534", | |
step="1" | |
), | |
gr.Slider( | |
minimum=0, maximum=10, | |
step=0.1, value=2.5, | |
label="Guidance Scale" | |
), | |
gr.Slider( | |
minimum=1, maximum=500, | |
step=1, value=200, | |
label="Inference Steps" | |
), | |
gr.Slider( | |
minimum=1, maximum=10, | |
step=1, value=1, | |
label="Candidates" | |
), | |
gr.Slider( | |
minimum=2.5, maximum=10.0, | |
step=2.5, value=5, | |
label="Duration" | |
) | |
], | |
outputs = gr.Audio(label="Audio Output", type="filepath", format="wav", elem_id="audio") | |
) | |
webapp.queue() | |
webapp.launch() | |