Spaces:

amphion
/

PicoAudio

Running on Zero

App Files Files Community

ZeyuXie commited on Jul 16

Commit

012fbfa

•

1 Parent(s): c2201d6

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -20

app.py CHANGED Viewed

@@ -32,17 +32,17 @@ class InferRunner:
         ).eval().to(device)
         self.scheduler = DDPMScheduler.from_pretrained(train_args.scheduler_name, subfolder="scheduler")
-def infer(caption, runner, num_steps=200, guidance=3.0):
     with torch.no_grad():
-        latents = runner.picomodel.demo_inference(caption, runner.scheduler, num_steps=num_steps, guidance=guidance, num_samples=1, audio_len=16000*10, disable_progress=True)
         mel = runner.vae.decode_first_stage(latents)
         wave = runner.vae.decode_to_waveform(mel)[0][:audio_len]
-    sf.write(f"synthesized/{caption}.wav", wave, samplerate=16000, subtype='PCM_16')
-device = "cuda" if torch.cuda.is_available() else "cpu"
-infer_runner = InferRunner(device)
 with gr.Blocks() as demo:
     with gr.Row():
         gr.Markdown("## PicoAudio")
@@ -56,15 +56,16 @@ with gr.Blocks() as demo:
             with gr.Accordion("Advanced options", open=False):
                 num_steps = gr.Slider(label="num_steps", minimum=1,
                                        maximum=300, value=200, step=1)
-                guidance = gr.Slider(
-                    label="Guidance Scale:(Large => more relevant to text but the quality may drop)", minimum=0.1, maximum=8.0, value=3.0, step=0.1
                 )
         with gr.Column():
             outaudio = gr.Audio()
-    run_button.click(fn=infer, inputs=[
-                    prompt, infer_runner, num_steps, guidance], outputs=[outaudio])
     # with gr.Row():
     #     with gr.Column():
     #         gr.Examples(
@@ -74,16 +75,11 @@ with gr.Blocks() as demo:
     #                                 ['A eerie yet calming experimental electronic track featuring haunting synthesizer strings and pads',25,5,55],
     #                                 ['A slow tempo pop instrumental piece featuring only acoustic guitar with fingerstyle and percussive strumming techniques',25,5,55]],
     #                     inputs = [prompt, ddim_steps, scale, seed],
-    #                     outputs = [outaudio]
     #                     )
     #     with gr.Column():
     #         pass
 demo.launch()
-if __name__ == "__main__":
-    main()

         ).eval().to(device)
         self.scheduler = DDPMScheduler.from_pretrained(train_args.scheduler_name, subfolder="scheduler")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+runner = InferRunner(device)
+def infer(caption, num_steps=200, guidance_scale=3.0, audio_len=16000*10):
     with torch.no_grad():
+        latents = runner.pico_model.demo_inference(caption, runner.scheduler, num_steps=num_steps, guidance_scale=guidance_scale, num_samples_per_prompt=1, disable_progress=True)
         mel = runner.vae.decode_first_stage(latents)
         wave = runner.vae.decode_to_waveform(mel)[0][:audio_len]
+    outpath = f"synthesized/output.wav"
+    sf.write(outpath, wave, samplerate=16000, subtype='PCM_16')
+    return outpath
 with gr.Blocks() as demo:
     with gr.Row():
         gr.Markdown("## PicoAudio")
             with gr.Accordion("Advanced options", open=False):
                 num_steps = gr.Slider(label="num_steps", minimum=1,
                                        maximum=300, value=200, step=1)
+                guidance_scale = gr.Slider(
+                    label="guidance_scale Scale:(Large => more relevant to text but the quality may drop)", minimum=0.1, maximum=8.0, value=3.0, step=0.1
                 )
         with gr.Column():
             outaudio = gr.Audio()
+    run_button.click(fn=infer,
+                    inputs=[prompt, num_steps, guidance_scale],
+                    outputs=[outaudio])
     # with gr.Row():
     #     with gr.Column():
     #         gr.Examples(
     #                                 ['A eerie yet calming experimental electronic track featuring haunting synthesizer strings and pads',25,5,55],
     #                                 ['A slow tempo pop instrumental piece featuring only acoustic guitar with fingerstyle and percussive strumming techniques',25,5,55]],
     #                     inputs = [prompt, ddim_steps, scale, seed],
+    #                     outputs = [outaudio],
     #                     )
+    #                     cache_examples="lazy", # Turn on to cache.
     #     with gr.Column():
     #         pass
 demo.launch()