Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,15 +1,10 @@
|
|
1 |
import io
|
2 |
import base64
|
3 |
-
import numpy as np
|
4 |
from gtts import gTTS
|
5 |
import streamlit as st
|
6 |
import speech_recognition as sr
|
7 |
from huggingface_hub import InferenceClient
|
8 |
from streamlit_mic_recorder import mic_recorder
|
9 |
-
import webrtcvad
|
10 |
-
import wave
|
11 |
-
|
12 |
-
temp_audio_file_path = "./output.wav"
|
13 |
|
14 |
if "history" not in st.session_state:
|
15 |
st.session_state.history = []
|
@@ -97,12 +92,6 @@ def display_recognition_result(audio_text, output, audio_file):
|
|
97 |
f"""<audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>""",
|
98 |
unsafe_allow_html=True)
|
99 |
|
100 |
-
def detect_silence(audio_data, sample_rate, silence_threshold=5000, silence_duration=5000):
|
101 |
-
is_silence = lambda x: max(x) < silence_threshold
|
102 |
-
chunks = [audio_data[i:i+silence_duration] for i in range(0, len(audio_data), silence_duration)]
|
103 |
-
silent_chunks = [chunk for chunk in chunks if is_silence(chunk)]
|
104 |
-
return silent_chunks
|
105 |
-
|
106 |
def main():
|
107 |
if not st.session_state.pre_prompt_sent:
|
108 |
st.session_state.pre_prompt_sent = True
|
@@ -112,25 +101,12 @@ def main():
|
|
112 |
if audio:
|
113 |
st.audio(audio['bytes'], format="audio/wav")
|
114 |
audio_bytes = audio["bytes"]
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
wave_file.setframerate(sample_rate)
|
122 |
-
wave_file.writeframes(audio_bytes)
|
123 |
-
|
124 |
-
audio_data = np.frombuffer(audio_bytes, dtype=np.int16)
|
125 |
-
silent_chunks = detect_silence(audio_data, sample_rate)
|
126 |
-
|
127 |
-
for silent_chunk in silent_chunks:
|
128 |
-
audio_text = recognize_speech(silent_chunk)
|
129 |
-
st.subheader("Texto Reconocido:")
|
130 |
-
st.write(audio_text)
|
131 |
-
st.session_state.history.append((audio_text, ""))
|
132 |
-
response, audio_file = generate(audio_text, st.session_state.history)
|
133 |
-
display_recognition_result(audio_text, response, audio_file)
|
134 |
|
135 |
if __name__ == "__main__":
|
136 |
main()
|
|
|
1 |
import io
|
2 |
import base64
|
|
|
3 |
from gtts import gTTS
|
4 |
import streamlit as st
|
5 |
import speech_recognition as sr
|
6 |
from huggingface_hub import InferenceClient
|
7 |
from streamlit_mic_recorder import mic_recorder
|
|
|
|
|
|
|
|
|
8 |
|
9 |
if "history" not in st.session_state:
|
10 |
st.session_state.history = []
|
|
|
92 |
f"""<audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>""",
|
93 |
unsafe_allow_html=True)
|
94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
def main():
|
96 |
if not st.session_state.pre_prompt_sent:
|
97 |
st.session_state.pre_prompt_sent = True
|
|
|
101 |
if audio:
|
102 |
st.audio(audio['bytes'], format="audio/wav")
|
103 |
audio_bytes = audio["bytes"]
|
104 |
+
|
105 |
+
audio_text = recognize_speech(audio_bytes)
|
106 |
+
|
107 |
+
if audio_text:
|
108 |
+
output, audio_file = generate(audio_text, history=st.session_state.history)
|
109 |
+
display_recognition_result(audio_text, output, audio_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
if __name__ == "__main__":
|
112 |
main()
|