Spaces:

huimanho
/

Cantonese-to-Chinese

Sleeping

huimanho commited on Oct 7, 2024

Commit

cd7ce5f

verified ·

1 Parent(s): aa82b15

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,33 +1,24 @@
 import streamlit as st
-import sounddevice as sd
 import numpy as np
 import torch
 from transformers import pipeline
 # Load the pipelines
 asr_pipe = pipeline("automatic-speech-recognition", model="alvanlii/whisper-small-cantonese")
 translation_pipe = pipeline("translation", model="raptorkwok/cantonese-chinese-translation")
 tts_pipe = pipeline("text-to-speech", model="myshell-ai/MeloTTS-Chinese")
-# Function to record audio
-def record_audio(duration=5, fs=16000):
-    st.write("Recording...")
-    audio = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='float32')
-    sd.wait()
-    st.write("Recording complete.")
-    return audio.flatten()
-# Function to play audio
-def play_audio(audio, fs=16000):
-    sd.play(audio, fs)
-    sd.wait()
 # Streamlit UI
 st.title("Cantonese to Chinese Translator")
-st.write("Click the button below to record your Cantonese speech.")
-if st.button("Record"):
-    audio = record_audio()
     # Recognize Cantonese speech
     audio_input = torch.tensor(audio)
@@ -43,8 +34,4 @@ if st.button("Record"):
     tts_output = tts_pipe(chinese_text)
     # Play back the Chinese output
-    st.write("Playing back the Chinese translation...")
-    play_audio(tts_output['audio'])
-# Run the app using the command:
-# streamlit run app.py

 import streamlit as st
 import numpy as np
 import torch
 from transformers import pipeline
+import librosa
 # Load the pipelines
 asr_pipe = pipeline("automatic-speech-recognition", model="alvanlii/whisper-small-cantonese")
 translation_pipe = pipeline("translation", model="raptorkwok/cantonese-chinese-translation")
 tts_pipe = pipeline("text-to-speech", model="myshell-ai/MeloTTS-Chinese")
 # Streamlit UI
 st.title("Cantonese to Chinese Translator")
+st.write("Upload your Cantonese audio file (WAV format) below.")
+# File upload
+uploaded_file = st.file_uploader("Choose a WAV file", type="wav")
+if uploaded_file is not None:
+    # Load the audio file
+    audio, sr = librosa.load(uploaded_file, sr=16000)
     # Recognize Cantonese speech
     audio_input = torch.tensor(audio)
     tts_output = tts_pipe(chinese_text)
     # Play back the Chinese output
+    st.audio(tts_output['audio'], format='audio/wav')