Irpan
asr
a651122
raw
history blame
1.03 kB
from transformers import VitsModel, AutoTokenizer
import torch
import scipy.io.wavfile
import util
# Load processor and model
models_info = {
"Meta-MMS": {
"processor": AutoTokenizer.from_pretrained("facebook/mms-tts-uig-script_arabic"),
"model": VitsModel.from_pretrained("facebook/mms-tts-uig-script_arabic"),
"arabic_script": True
},
}
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def synthesize(text, model_id):
if models_info[model_id]["arabic_script"]:
text = util.ug_latn_to_arab(text)
processor = models_info[model_id]["processor"]
model = models_info[model_id]["model"].to(device)
inputs = processor(text, return_tensors="pt").to(device)
with torch.no_grad():
output = model(**inputs).waveform.cpu() # Move output back to CPU for saving
output_path = "tts_output.wav"
sample_rate = model.config.sampling_rate
scipy.io.wavfile.write(output_path, rate=sample_rate, data=output.numpy()[0])
return output_path