Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,12 +1,16 @@
|
|
1 |
import io
|
2 |
import base64
|
3 |
-
import numpy as np
|
4 |
-
import soundfile as sf
|
5 |
from gtts import gTTS
|
6 |
import streamlit as st
|
7 |
import speech_recognition as sr
|
8 |
from huggingface_hub import InferenceClient
|
9 |
from streamlit_mic_recorder import mic_recorder
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
if "history" not in st.session_state:
|
12 |
st.session_state.history = []
|
@@ -14,24 +18,22 @@ if "history" not in st.session_state:
|
|
14 |
if "pre_prompt_sent" not in st.session_state:
|
15 |
st.session_state.pre_prompt_sent = False
|
16 |
|
17 |
-
|
18 |
-
|
19 |
-
def recognize_speech(audio_data, sample_rate, show_messages=True):
|
20 |
recognizer = sr.Recognizer()
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
|
36 |
return audio_text
|
37 |
|
@@ -100,15 +102,19 @@ def main():
|
|
100 |
|
101 |
audio = mic_recorder(start_prompt="▶️", stop_prompt="🛑", key='recorder')
|
102 |
|
103 |
-
if audio:
|
104 |
-
st.audio(audio['bytes']
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
|
|
|
|
|
|
|
|
112 |
|
113 |
if __name__ == "__main__":
|
114 |
main()
|
|
|
1 |
import io
|
2 |
import base64
|
|
|
|
|
3 |
from gtts import gTTS
|
4 |
import streamlit as st
|
5 |
import speech_recognition as sr
|
6 |
from huggingface_hub import InferenceClient
|
7 |
from streamlit_mic_recorder import mic_recorder
|
8 |
+
import wave
|
9 |
+
import numpy as np
|
10 |
+
import os
|
11 |
+
|
12 |
+
pre_prompt_text = "eres una IA conductual, tus respuestas serán breves."
|
13 |
+
temp_audio_file_path = "./output.wav"
|
14 |
|
15 |
if "history" not in st.session_state:
|
16 |
st.session_state.history = []
|
|
|
18 |
if "pre_prompt_sent" not in st.session_state:
|
19 |
st.session_state.pre_prompt_sent = False
|
20 |
|
21 |
+
def recognize_speech(audio_data, show_messages=True):
|
|
|
|
|
22 |
recognizer = sr.Recognizer()
|
23 |
|
24 |
+
with io.BytesIO(audio_data) as audio_file:
|
25 |
+
try:
|
26 |
+
audio_text = recognizer.recognize_google(audio_file, language="es-ES")
|
27 |
+
if show_messages:
|
28 |
+
st.subheader("Texto Reconocido:")
|
29 |
+
st.write(audio_text)
|
30 |
+
st.success("Reconocimiento de voz completado.")
|
31 |
+
except sr.UnknownValueError:
|
32 |
+
st.warning("No se pudo reconocer el audio. ¿Intentaste grabar algo?")
|
33 |
+
audio_text = ""
|
34 |
+
except sr.RequestError:
|
35 |
+
st.error("Hablame para comenzar!")
|
36 |
+
audio_text = ""
|
37 |
|
38 |
return audio_text
|
39 |
|
|
|
102 |
|
103 |
audio = mic_recorder(start_prompt="▶️", stop_prompt="🛑", key='recorder')
|
104 |
|
105 |
+
if audio:
|
106 |
+
st.audio(audio['bytes'])
|
107 |
+
|
108 |
+
audio_bytes = audio["bytes"]
|
109 |
+
sample_width = audio["sample_width"] # 2 bytes per sample for 16-bit PCM
|
110 |
+
sample_rate = audio["sample_rate"] # 44.1 kHz sample rate
|
111 |
+
num_channels = 1 # 1 channel for mono, 2 for stereo
|
112 |
+
|
113 |
+
with wave.open(temp_audio_file_path, 'w') as wave_file:
|
114 |
+
wave_file.setnchannels(num_channels)
|
115 |
+
wave_file.setsampwidth(sample_width)
|
116 |
+
wave_file.setframerate(sample_rate)
|
117 |
+
wave_file.writeframes(audio_bytes)
|
118 |
|
119 |
if __name__ == "__main__":
|
120 |
main()
|