from fastapi import FastAPI, Form
from fastapi.responses import StreamingResponse
import io
import wave
from piper.voice import PiperVoice

app = FastAPI()

# Load the voice model
model_path = 'model.onnx'
voice = PiperVoice.load(model_path)

@app.post("/synthesize/")
def synthesize_text(text: str = Form(...)):
    """
    Endpoint to synthesize text to speech.

    Args:
        text (str): The text to synthesize.

    Returns:
        StreamingResponse: The audio data as a stream.
    """
    audio_buffer = io.BytesIO()

    # Synthesize speech and write to an in-memory WAV file
    with wave.open(audio_buffer, 'wb') as wav_file:
        wav_file.setnchannels(1)  # Set the number of audio channels
        wav_file.setsampwidth(2)  # Set sample width to 2 bytes
        wav_file.setframerate(16000)  # Set the sampling rate
        audio = voice.synthesize(text, wav_file)

    # Seek to the beginning of the buffer so it can be read from the start
    audio_buffer.seek(0)

    return StreamingResponse(audio_buffer, media_type="audio/wav")

@app.get("/")
def read_root():
    return {"message": "Welcome to the Piper TTS API. Use /synthesize/ to synthesize speech."}