File size: 2,541 Bytes
d57e374
 
4cf73d6
075c9a6
4cf73d6
 
075c9a6
d57e374
1834911
 
d57e374
075c9a6
d57e374
 
 
 
075c9a6
d57e374
 
 
 
075c9a6
d57e374
 
 
 
 
 
 
 
 
 
 
 
075c9a6
d57e374
 
 
 
 
 
 
 
 
 
 
 
 
075c9a6
d57e374
 
 
075c9a6
d57e374
 
075c9a6
d57e374
075c9a6
 
 
 
 
 
 
 
 
 
 
4cf73d6
 
 
 
 
075c9a6
 
d57e374
075c9a6
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import os
import torch
import torchaudio
import numpy as np
import gradio as gr
from pipeline.morph_pipeline_successed_ver1 import AudioLDM2MorphPipeline 
# Initialize AudioLDM2 Pipeline
pipeline = AudioLDM2MorphPipeline.from_pretrained("cvssp/audioldm2-large", torch_dtype=torch.float32)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pipeline.to(device)

# Audio morphing function
def morph_audio(audio_file1, audio_file2, prompt1, prompt2, negative_prompt1="Low quality", negative_prompt2="Low quality"):
    save_lora_dir = "output"
    os.makedirs(save_lora_dir, exist_ok=True)
    
    # Load audio and compute duration
    waveform, sample_rate = torchaudio.load(audio_file1)
    duration = waveform.shape[1] / sample_rate
    duration = int(duration)
    
    # Perform morphing using the pipeline
    _ = pipeline(
        audio_file=audio_file1,
        audio_file2=audio_file2,
        audio_length_in_s=duration,
        time_pooling=2,
        freq_pooling=2,
        prompt_1=prompt1,
        prompt_2=prompt2,
        negative_prompt_1=negative_prompt1,
        negative_prompt_2=negative_prompt2,
        save_lora_dir=save_lora_dir,
        use_adain=True,
        use_reschedule=False,
        num_inference_steps=50,
        lamd=0.6,
        output_path=save_lora_dir,
        num_frames=5,
        fix_lora=None,
        use_lora=True,
        lora_steps=50,
        noisy_latent_with_lora=True,
        morphing_with_lora=True,
        use_morph_prompt=True,
        guidance_scale=7.5,
    )
    
    # Collect the output file paths
    output_paths = [os.path.join(save_lora_dir, file) for file in os.listdir(save_lora_dir) if file.endswith(".wav")]
    return output_paths

# Gradio interface function
def interface(audio1, audio2, prompt1, prompt2):
    output_paths = morph_audio(audio1, audio2, prompt1, prompt2)
    return output_paths

# Gradio Interface
demo = gr.Interface(
    fn=interface,
    inputs=[
        gr.Audio(label="Upload Audio File 1", type="filepath"),
        gr.Audio(label="Upload Audio File 2", type="filepath"),
        # gr.Slider(4, 6, step=1, label="Octave 1"),
        gr.Textbox(label="Prompt for Audio File 1"),
        gr.Textbox(label="Prompt for Audio File 2")
    ],
    outputs=[
        gr.Audio(label="Morphing audio 1"),
        gr.Audio(label="Morphing audio 2"),
        gr.Audio(label="Morphing audio 3"),
        gr.Audio(label="Morphing audio 4"),
        gr.Audio(label="Morphing audio 5"),
    ],
)

if __name__ == "__main__":
    demo.launch()