salomonsky commited on
Commit
713e319
·
verified ·
1 Parent(s): 08bb6b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -7
app.py CHANGED
@@ -1,5 +1,7 @@
1
  import io
2
  import base64
 
 
3
  from gtts import gTTS
4
  import streamlit as st
5
  import speech_recognition as sr
@@ -14,11 +16,12 @@ if "pre_prompt_sent" not in st.session_state:
14
 
15
  pre_prompt_text = "eres una IA conductual, tus respuestas serán breves."
16
 
17
- def recognize_speech(audio_data, show_messages=True):
18
  recognizer = sr.Recognizer()
19
 
20
  try:
21
- audio_text = recognizer.recognize_google(audio_data, language="es-ES")
 
22
  if show_messages:
23
  st.subheader("Texto Reconocido:")
24
  st.write(audio_text)
@@ -95,17 +98,17 @@ def main():
95
  if not st.session_state.pre_prompt_sent:
96
  st.session_state.pre_prompt_sent = True
97
 
98
- audio = mic_recorder(start_prompt="▶️", stop_prompt="🛑", key='recorder', sample_rate=16000)
99
 
100
  if audio:
101
  st.audio(audio['bytes'], format="audio/wav")
102
- audio_bytes = audio["bytes"]
 
103
 
104
- audio_text = recognize_speech(audio_bytes)
105
 
106
  if audio_text:
107
- output, audio_file = generate(audio_text, history=st.session_state.history)
108
- display_recognition_result(audio_text, output, audio_file)
109
 
110
  if __name__ == "__main__":
111
  main()
 
1
  import io
2
  import base64
3
+ import numpy as np
4
+ import soundfile as sf
5
  from gtts import gTTS
6
  import streamlit as st
7
  import speech_recognition as sr
 
16
 
17
  pre_prompt_text = "eres una IA conductual, tus respuestas serán breves."
18
 
19
+ def recognize_speech(audio_data, sample_rate, show_messages=True):
20
  recognizer = sr.Recognizer()
21
 
22
  try:
23
+ adjusted_audio_data = sf.resample(audio_data, sample_rate, 16000, subtype='PCM_16')
24
+ audio_text = recognizer.recognize_google(adjusted_audio_data, language="es-ES")
25
  if show_messages:
26
  st.subheader("Texto Reconocido:")
27
  st.write(audio_text)
 
98
  if not st.session_state.pre_prompt_sent:
99
  st.session_state.pre_prompt_sent = True
100
 
101
+ audio = mic_recorder(start_prompt="▶️", stop_prompt="🛑", key='recorder')
102
 
103
  if audio:
104
  st.audio(audio['bytes'], format="audio/wav")
105
+ audio_bytes = np.frombuffer(audio["bytes"], dtype=np.int16)
106
+ sample_rate = audio["sample_rate"]
107
 
108
+ audio_text = recognize_speech(audio_bytes, sample_rate)
109
 
110
  if audio_text:
111
+ st.session_state.history.append((audio_text, ""))
 
112
 
113
  if __name__ == "__main__":
114
  main()