Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
import io
|
2 |
import base64
|
|
|
|
|
3 |
from gtts import gTTS
|
4 |
import streamlit as st
|
5 |
import speech_recognition as sr
|
@@ -14,11 +16,12 @@ if "pre_prompt_sent" not in st.session_state:
|
|
14 |
|
15 |
pre_prompt_text = "eres una IA conductual, tus respuestas serán breves."
|
16 |
|
17 |
-
def recognize_speech(audio_data, show_messages=True):
|
18 |
recognizer = sr.Recognizer()
|
19 |
|
20 |
try:
|
21 |
-
|
|
|
22 |
if show_messages:
|
23 |
st.subheader("Texto Reconocido:")
|
24 |
st.write(audio_text)
|
@@ -95,17 +98,17 @@ def main():
|
|
95 |
if not st.session_state.pre_prompt_sent:
|
96 |
st.session_state.pre_prompt_sent = True
|
97 |
|
98 |
-
audio = mic_recorder(start_prompt="▶️", stop_prompt="🛑", key='recorder'
|
99 |
|
100 |
if audio:
|
101 |
st.audio(audio['bytes'], format="audio/wav")
|
102 |
-
audio_bytes = audio["bytes"]
|
|
|
103 |
|
104 |
-
audio_text = recognize_speech(audio_bytes)
|
105 |
|
106 |
if audio_text:
|
107 |
-
|
108 |
-
display_recognition_result(audio_text, output, audio_file)
|
109 |
|
110 |
if __name__ == "__main__":
|
111 |
main()
|
|
|
1 |
import io
|
2 |
import base64
|
3 |
+
import numpy as np
|
4 |
+
import soundfile as sf
|
5 |
from gtts import gTTS
|
6 |
import streamlit as st
|
7 |
import speech_recognition as sr
|
|
|
16 |
|
17 |
pre_prompt_text = "eres una IA conductual, tus respuestas serán breves."
|
18 |
|
19 |
+
def recognize_speech(audio_data, sample_rate, show_messages=True):
|
20 |
recognizer = sr.Recognizer()
|
21 |
|
22 |
try:
|
23 |
+
adjusted_audio_data = sf.resample(audio_data, sample_rate, 16000, subtype='PCM_16')
|
24 |
+
audio_text = recognizer.recognize_google(adjusted_audio_data, language="es-ES")
|
25 |
if show_messages:
|
26 |
st.subheader("Texto Reconocido:")
|
27 |
st.write(audio_text)
|
|
|
98 |
if not st.session_state.pre_prompt_sent:
|
99 |
st.session_state.pre_prompt_sent = True
|
100 |
|
101 |
+
audio = mic_recorder(start_prompt="▶️", stop_prompt="🛑", key='recorder')
|
102 |
|
103 |
if audio:
|
104 |
st.audio(audio['bytes'], format="audio/wav")
|
105 |
+
audio_bytes = np.frombuffer(audio["bytes"], dtype=np.int16)
|
106 |
+
sample_rate = audio["sample_rate"]
|
107 |
|
108 |
+
audio_text = recognize_speech(audio_bytes, sample_rate)
|
109 |
|
110 |
if audio_text:
|
111 |
+
st.session_state.history.append((audio_text, ""))
|
|
|
112 |
|
113 |
if __name__ == "__main__":
|
114 |
main()
|