ZeyuXie commited on
Commit
012fbfa
1 Parent(s): c2201d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -20
app.py CHANGED
@@ -32,17 +32,17 @@ class InferRunner:
32
  ).eval().to(device)
33
  self.scheduler = DDPMScheduler.from_pretrained(train_args.scheduler_name, subfolder="scheduler")
34
 
35
- def infer(caption, runner, num_steps=200, guidance=3.0):
 
 
 
36
  with torch.no_grad():
37
- latents = runner.picomodel.demo_inference(caption, runner.scheduler, num_steps=num_steps, guidance=guidance, num_samples=1, audio_len=16000*10, disable_progress=True)
38
  mel = runner.vae.decode_first_stage(latents)
39
  wave = runner.vae.decode_to_waveform(mel)[0][:audio_len]
40
- sf.write(f"synthesized/{caption}.wav", wave, samplerate=16000, subtype='PCM_16')
41
-
42
- device = "cuda" if torch.cuda.is_available() else "cpu"
43
- infer_runner = InferRunner(device)
44
-
45
-
46
  with gr.Blocks() as demo:
47
  with gr.Row():
48
  gr.Markdown("## PicoAudio")
@@ -56,15 +56,16 @@ with gr.Blocks() as demo:
56
  with gr.Accordion("Advanced options", open=False):
57
  num_steps = gr.Slider(label="num_steps", minimum=1,
58
  maximum=300, value=200, step=1)
59
- guidance = gr.Slider(
60
- label="Guidance Scale:(Large => more relevant to text but the quality may drop)", minimum=0.1, maximum=8.0, value=3.0, step=0.1
61
  )
62
-
63
  with gr.Column():
64
  outaudio = gr.Audio()
65
 
66
- run_button.click(fn=infer, inputs=[
67
- prompt, infer_runner, num_steps, guidance], outputs=[outaudio])
 
68
  # with gr.Row():
69
  # with gr.Column():
70
  # gr.Examples(
@@ -74,16 +75,11 @@ with gr.Blocks() as demo:
74
  # ['A eerie yet calming experimental electronic track featuring haunting synthesizer strings and pads',25,5,55],
75
  # ['A slow tempo pop instrumental piece featuring only acoustic guitar with fingerstyle and percussive strumming techniques',25,5,55]],
76
  # inputs = [prompt, ddim_steps, scale, seed],
77
- # outputs = [outaudio]
78
  # )
 
79
  # with gr.Column():
80
  # pass
81
 
82
  demo.launch()
83
 
84
-
85
-
86
-
87
-
88
- if __name__ == "__main__":
89
- main()
 
32
  ).eval().to(device)
33
  self.scheduler = DDPMScheduler.from_pretrained(train_args.scheduler_name, subfolder="scheduler")
34
 
35
+ device = "cuda" if torch.cuda.is_available() else "cpu"
36
+ runner = InferRunner(device)
37
+
38
+ def infer(caption, num_steps=200, guidance_scale=3.0, audio_len=16000*10):
39
  with torch.no_grad():
40
+ latents = runner.pico_model.demo_inference(caption, runner.scheduler, num_steps=num_steps, guidance_scale=guidance_scale, num_samples_per_prompt=1, disable_progress=True)
41
  mel = runner.vae.decode_first_stage(latents)
42
  wave = runner.vae.decode_to_waveform(mel)[0][:audio_len]
43
+ outpath = f"synthesized/output.wav"
44
+ sf.write(outpath, wave, samplerate=16000, subtype='PCM_16')
45
+ return outpath
 
 
 
46
  with gr.Blocks() as demo:
47
  with gr.Row():
48
  gr.Markdown("## PicoAudio")
 
56
  with gr.Accordion("Advanced options", open=False):
57
  num_steps = gr.Slider(label="num_steps", minimum=1,
58
  maximum=300, value=200, step=1)
59
+ guidance_scale = gr.Slider(
60
+ label="guidance_scale Scale:(Large => more relevant to text but the quality may drop)", minimum=0.1, maximum=8.0, value=3.0, step=0.1
61
  )
62
+
63
  with gr.Column():
64
  outaudio = gr.Audio()
65
 
66
+ run_button.click(fn=infer,
67
+ inputs=[prompt, num_steps, guidance_scale],
68
+ outputs=[outaudio])
69
  # with gr.Row():
70
  # with gr.Column():
71
  # gr.Examples(
 
75
  # ['A eerie yet calming experimental electronic track featuring haunting synthesizer strings and pads',25,5,55],
76
  # ['A slow tempo pop instrumental piece featuring only acoustic guitar with fingerstyle and percussive strumming techniques',25,5,55]],
77
  # inputs = [prompt, ddim_steps, scale, seed],
78
+ # outputs = [outaudio],
79
  # )
80
+ # cache_examples="lazy", # Turn on to cache.
81
  # with gr.Column():
82
  # pass
83
 
84
  demo.launch()
85