File size: 1,451 Bytes
36f9d2b
 
 
 
 
 
 
f3bc24f
36f9d2b
f3bc24f
36f9d2b
f3bc24f
36f9d2b
 
 
 
 
 
 
f3bc24f
 
 
 
 
 
 
 
dacbe56
f3bc24f
 
 
 
 
 
 
 
 
d205b21
 
 
f3bc24f
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import gradio as gr
import torchaudio
from audiocraft.models import AudioGen
from audiocraft.data.audio import audio_write

model = AudioGen.get_pretrained('facebook/audiogen-medium')

def infer(prompt, duration):
    
    model.set_generation_params(duration=duration)  # generate 5 seconds.
    descriptions = [prompt]
    wav = model.generate(descriptions)  # generates n samples (referring to the number n of prompts in descriptions)

    for idx, one_wav in enumerate(wav):
        # Will save under {idx}.wav, with loudness normalization at -14 db LUFS.
        audio_write(f'{idx}', one_wav.cpu(), model.sample_rate, strategy="loudness", loudness_compressor=True)

    return "0.wav"

css="""
#col-container{
    margin: 0 auto;
    max-width: 640px;
}
"""

with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.HTML("""
        <h2 style="text-align: center;">
            AudioGen: Textually-guided audio generation
        </h2>
        <p style="text-align: center;">   
        </p>
        """)

        prompt_in = gr.Textbox(label="audio prompt")
        with gr.Row():
            duration = gr.Slider(label="Duration", minimum=5, maximum=10, step=5, value=5)
            submit_btn = gr.Button("Submit")
        audio_o = gr.Audio(label="AudioGen result")

    submit_btn.click(
        fn=infer,
        inputs=[prompt_in, duration],
        outputs=[audio_o]
    )

demo.queue().launch(debug=True)