from fastapi import FastAPI, Form from fastapi.responses import StreamingResponse import io import wave from piper.voice import PiperVoice app = FastAPI() # Load the voice model model_path = 'model.onnx' voice = PiperVoice.load(model_path) @app.post("/synthesize/") def synthesize_text(text: str = Form(...)): """ Endpoint to synthesize text to speech. Args: text (str): The text to synthesize. Returns: StreamingResponse: The audio data as a stream. """ audio_buffer = io.BytesIO() # Synthesize speech and write to an in-memory WAV file with wave.open(audio_buffer, 'wb') as wav_file: wav_file.setnchannels(1) # Set the number of audio channels wav_file.setsampwidth(2) # Set sample width to 2 bytes wav_file.setframerate(16000) # Set the sampling rate audio = voice.synthesize(text, wav_file) # Seek to the beginning of the buffer so it can be read from the start audio_buffer.seek(0) return StreamingResponse(audio_buffer, media_type="audio/wav") @app.get("/") def read_root(): return {"message": "Welcome to the Piper TTS API. Use /synthesize/ to synthesize speech."}