Spaces:
Running
Running
from transformers import VitsModel, AutoTokenizer | |
import torch | |
import scipy.io.wavfile | |
import util | |
# Load processor and model | |
models_info = { | |
"Meta-MMS": { | |
"processor": AutoTokenizer.from_pretrained("facebook/mms-tts-uig-script_arabic"), | |
"model": VitsModel.from_pretrained("facebook/mms-tts-uig-script_arabic"), | |
"arabic_script": True | |
}, | |
} | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
def synthesize(text, model_id): | |
if models_info[model_id]["arabic_script"]: | |
text = util.ug_latn_to_arab(text) | |
processor = models_info[model_id]["processor"] | |
model = models_info[model_id]["model"].to(device) | |
inputs = processor(text, return_tensors="pt").to(device) | |
with torch.no_grad(): | |
output = model(**inputs).waveform.cpu() # Move output back to CPU for saving | |
output_path = "tts_output.wav" | |
sample_rate = model.config.sampling_rate | |
scipy.io.wavfile.write(output_path, rate=sample_rate, data=output.numpy()[0]) | |
return output_path |