import os import soundfile as sf from espnet_onnx import Text2Speech from pydantic import BaseModel class TextInput(BaseModel): text: str = "" class TTSModel: def __init__(self): self.model = None self.model_path = "models/tts" self.output_path = "static/sample.wav" self.fs = None def load_model(self, model_path, fs): self.model = Text2Speech(model_dir=f"{self.model_path}/{model_path}") self.fs = fs def generate(self, text_input: TextInput): if self.model is None: raise RuntimeError("Model is not loaded.") if os.path.exists(self.output_path): os.remove(self.output_path) audio = self.model(text_input.text)["wav"] sf.write(self.output_path, audio, self.fs) return