kennethli319 commited on
Commit
7c1cb1d
·
1 Parent(s): e6e581b

update app

Browse files
Files changed (1) hide show
  1. app.py +12 -12
app.py CHANGED
@@ -13,18 +13,10 @@ from transformers import pipeline
13
  # spec_generator_2 = MixerTTSModel.from_pretrained("tts_en_lj_mixerttsx")
14
  # model1 = HifiGanModel.from_pretrained(model_name="tts_en_lj_hifigan_ft_mixerttsx")
15
 
16
-
17
  def greet(name):
18
  return "Hello " + name + "!!"
19
 
20
- def run():
21
-
22
- spec_generator = FastPitchModel.from_pretrained("tts_en_fastpitch_multispeaker")
23
- spec_generator.eval()
24
- voc_model = HifiGanModel.from_pretrained(model_name="tts_en_hifitts_hifigan_ft_fastpitch")
25
- voc_model.eval()
26
-
27
- pipe = pipeline("text-to-speech", model="suno/bark-small")
28
 
29
  def generate_tts(text: str, speaker: int = 0):
30
  sr = 44100
@@ -32,17 +24,25 @@ def run():
32
  spectrogram = spec_generator.generate_spectrogram(tokens=parsed, speaker=speaker)
33
  audio = voc_model.convert_spectrogram_to_audio(spec=spectrogram)
34
 
35
- return (sr, audio.squeeze(0).cpu().numpy())
36
 
37
  demo = gr.Interface(
38
  fn=generate_tts,
39
  inputs=[gr.Textbox(value="This is a test.", label="Text to Synthesize"),
40
  gr.Slider(0, 10, step=1, label="Speaker")],
41
  outputs=gr.Audio(label="Output", type="numpy"),
 
42
  )
43
 
44
  demo.launch(server_name="0.0.0.0", server_port=7860)
45
 
46
-
47
  if __name__ == "__main__":
48
- run()
 
 
 
 
 
 
 
 
 
13
  # spec_generator_2 = MixerTTSModel.from_pretrained("tts_en_lj_mixerttsx")
14
  # model1 = HifiGanModel.from_pretrained(model_name="tts_en_lj_hifigan_ft_mixerttsx")
15
 
 
16
  def greet(name):
17
  return "Hello " + name + "!!"
18
 
19
+ def run(spec_generator, voc_model, pipe):
 
 
 
 
 
 
 
20
 
21
  def generate_tts(text: str, speaker: int = 0):
22
  sr = 44100
 
24
  spectrogram = spec_generator.generate_spectrogram(tokens=parsed, speaker=speaker)
25
  audio = voc_model.convert_spectrogram_to_audio(spec=spectrogram)
26
 
27
+ return gr.Audio.update(sr, audio.squeeze(0).cpu().numpy())
28
 
29
  demo = gr.Interface(
30
  fn=generate_tts,
31
  inputs=[gr.Textbox(value="This is a test.", label="Text to Synthesize"),
32
  gr.Slider(0, 10, step=1, label="Speaker")],
33
  outputs=gr.Audio(label="Output", type="numpy"),
34
+ allow_flagging=False,
35
  )
36
 
37
  demo.launch(server_name="0.0.0.0", server_port=7860)
38
 
 
39
  if __name__ == "__main__":
40
+
41
+ spec_generator = FastPitchModel.from_pretrained("tts_en_fastpitch_multispeaker")
42
+ spec_generator.eval()
43
+ voc_model = HifiGanModel.from_pretrained(model_name="tts_en_hifitts_hifigan_ft_fastpitch")
44
+ voc_model.eval()
45
+
46
+ pipe = pipeline("text-to-speech", model="suno/bark-small")
47
+
48
+ run(spec_generator, voc_model, pipe)