import streamlit as st
import sounddevice as sd
import numpy as np
import torch
from transformers import pipeline

# Load the pipelines
asr_pipe = pipeline("automatic-speech-recognition", model="alvanlii/whisper-small-cantonese")
translation_pipe = pipeline("translation", model="raptorkwok/cantonese-chinese-translation")
tts_pipe = pipeline("text-to-speech", model="myshell-ai/MeloTTS-Chinese")

# Function to record audio
def record_audio(duration=5, fs=16000):
    st.write("Recording...")
    audio = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='float32')
    sd.wait()
    st.write("Recording complete.")
    return audio.flatten()

# Function to play audio
def play_audio(audio, fs=16000):
    sd.play(audio, fs)
    sd.wait()

# Streamlit UI
st.title("Cantonese to Chinese Translator")
st.write("Click the button below to record your Cantonese speech.")

if st.button("Record"):
    audio = record_audio()

    # Recognize Cantonese speech
    audio_input = torch.tensor(audio)
    result = asr_pipe(audio_input)
    cantonese_text = result['text']
    st.write(f"Cantonese Text: {cantonese_text}")

    # Translate Cantonese to Chinese
    chinese_text = translation_pipe(cantonese_text)[0]['translation_text']
    st.write(f"Chinese Text: {chinese_text}")

    # Convert Chinese text to speech
    tts_output = tts_pipe(chinese_text)

    # Play back the Chinese output
    st.write("Playing back the Chinese translation...")
    play_audio(tts_output['audio'])

# Run the app using the command:
# streamlit run app.py