import base64 import kuznetsov_av.text_to_speech_converter as t2s from fastapi import FastAPI from pydantic import BaseModel class Request(BaseModel): """ Input text. """ text: str class Response(BaseModel): """ Result of text-to-audio generation. audio - base64 string """ audio: str sampling_rate: int app = FastAPI() @app.get('/') async def root() -> dict: """ Root method of API. """ return {"message": "Converter method: /text-to-speech/convert/"} @app.post('/text-to-speech/convert/') async def text_to_speech(entity: Request) -> Response: """ Text-to-audio generation method using text_to_speech_converter. """ synthesiser = t2s.load_model() embeddings_dataset = t2s.load_speaker_dataset() audio, sampling_rate = t2s.text_to_speech(entity.text, synthesiser, embeddings_dataset) return Response(audio=base64.b32encode(audio), sampling_rate=sampling_rate)