Irpan commited on
Commit
30e5da4
·
1 Parent(s): 4c14db4
Files changed (2) hide show
  1. app.py +1 -1
  2. tts.py +10 -7
app.py CHANGED
@@ -37,7 +37,7 @@ mms_synthesize = gr.Interface(
37
  )
38
  ],
39
  outputs=[
40
- gr.Audio(label="Generated Audio", type="numpy"),
41
  ],
42
  #examples=TTS_EXAMPLES,
43
  title="Text-to-speech",
 
37
  )
38
  ],
39
  outputs=[
40
+ gr.Audio(label="Generated Audio"),
41
  ],
42
  #examples=TTS_EXAMPLES,
43
  title="Text-to-speech",
tts.py CHANGED
@@ -1,6 +1,6 @@
1
  from transformers import VitsModel, AutoTokenizer
2
  import torch
3
- import numpy as np
4
 
5
  # Load processor and model
6
  models_info = {
@@ -9,15 +9,18 @@ models_info = {
9
  "model": VitsModel.from_pretrained("facebook/mms-tts-uig-script_arabic"),
10
  },
11
  }
 
12
 
13
  def synthesize(text, model_id):
14
  processor = models_info[model_id]["processor"]
15
- model = models_info[model_id]["model"]
16
- inputs = processor(text, return_tensors="pt")
17
 
18
  with torch.no_grad():
19
- output = model(**inputs).waveform.cpu().float().numpy()
20
 
21
- sampling_rate = 22050
22
-
23
- return (sampling_rate, output)
 
 
 
1
  from transformers import VitsModel, AutoTokenizer
2
  import torch
3
+ import scipy.io.wavfile
4
 
5
  # Load processor and model
6
  models_info = {
 
9
  "model": VitsModel.from_pretrained("facebook/mms-tts-uig-script_arabic"),
10
  },
11
  }
12
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
 
14
  def synthesize(text, model_id):
15
  processor = models_info[model_id]["processor"]
16
+ model = models_info[model_id]["model"].to(device)
17
+ inputs = processor(text, return_tensors="pt").to(device)
18
 
19
  with torch.no_grad():
20
+ output = tts_model(**inputs).waveform.cpu() # Move output back to CPU for saving
21
 
22
+ output_path = "tts_output.wav"
23
+ sample_rate = 16000
24
+ scipy.io.wavfile.write(output_path, rate=sample_rate, data=output.numpy()[0])
25
+
26
+ return output_path