import streamlit as st import sounddevice as sd import numpy as np import torch from transformers import pipeline # Load the pipelines asr_pipe = pipeline("automatic-speech-recognition", model="alvanlii/whisper-small-cantonese") translation_pipe = pipeline("translation", model="raptorkwok/cantonese-chinese-translation") tts_pipe = pipeline("text-to-speech", model="myshell-ai/MeloTTS-Chinese") # Function to record audio def record_audio(duration=5, fs=16000): st.write("Recording...") audio = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='float32') sd.wait() st.write("Recording complete.") return audio.flatten() # Function to play audio def play_audio(audio, fs=16000): sd.play(audio, fs) sd.wait() # Streamlit UI st.title("Cantonese to Chinese Translator") st.write("Click the button below to record your Cantonese speech.") if st.button("Record"): audio = record_audio() # Recognize Cantonese speech audio_input = torch.tensor(audio) result = asr_pipe(audio_input) cantonese_text = result['text'] st.write(f"Cantonese Text: {cantonese_text}") # Translate Cantonese to Chinese chinese_text = translation_pipe(cantonese_text)[0]['translation_text'] st.write(f"Chinese Text: {chinese_text}") # Convert Chinese text to speech tts_output = tts_pipe(chinese_text) # Play back the Chinese output st.write("Playing back the Chinese translation...") play_audio(tts_output['audio']) # Run the app using the command: # streamlit run app.py