import io import sys import os import numpy as np import soundfile as sf import sherpa_onnx from fastapi import FastAPI from fastapi.responses import StreamingResponse import uvicorn ljspeech_config = sherpa_onnx.OfflineTtsConfig(model=sherpa_onnx.OfflineTtsModelConfig(vits=sherpa_onnx.OfflineTtsVitsModelConfig(model='model.onnx', lexicon='', data_dir='espeak-ng-data', tokens='tokens.txt'), num_threads=4)) if not ljspeech_config.validate(): raise ValueError("Please check your config") ljspeech = sherpa_onnx.OfflineTts(ljspeech_config) app = FastAPI() @app.get("/tts", response_class=StreamingResponse) async def do_tts(text: str): audio = ljspeech.generate(text) f = io.BytesIO() sf.write(f, audio.samples, audio.sample_rate, 'PCM_24', format='WAV') f.seek(0) return StreamingResponse(f, media_type='audio/wav') if __name__ == '__main__': uvicorn.run(app, host='0.0.0.0', port=7860)