from transformers import VitsModel, AutoTokenizer import torch import numpy as np # Load processor and model models_info = { "Meta-MMS": { "processor": AutoTokenizer.from_pretrained("facebook/mms-tts-uig-script_arabic"), "model": VitsModel.from_pretrained("facebook/mms-tts-uig-script_arabic"), }, } def synthesize(text, model_id): processor = models_info[model_id]["processor"] model = models_info[model_id]["model"] inputs = processor(text, return_tensors="pt") with torch.no_grad(): output = model(**inputs).waveform.cpu().float().numpy() sampling_rate = 22050 return (sampling_rate, output)