salomonsky commited on
Commit
0ee9f45
verified
1 Parent(s): 5bbd161

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -31
app.py CHANGED
@@ -3,33 +3,68 @@ import base64
3
  import io
4
  from huggingface_hub import InferenceClient
5
  from gtts import gTTS
6
- from audiorecorder import audiorecorder
 
7
  import speech_recognition as sr
8
- from pydub import AudioSegment
9
 
10
  if "history" not in st.session_state:
11
  st.session_state.history = []
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def recognize_speech(audio_data, show_messages=True):
14
  recognizer = sr.Recognizer()
15
- audio_recording = sr.AudioFile(audio_data)
16
-
17
- with audio_recording as source:
18
- audio = recognizer.record(source)
19
 
20
  try:
21
- audio_text = recognizer.recognize_google(audio, language="es-ES")
22
  if show_messages:
23
  st.subheader("Texto Reconocido:")
24
  st.write(audio_text)
25
  st.success("Reconocimiento de voz completado.")
26
  except sr.UnknownValueError:
27
  st.warning("No se pudo reconocer el audio. 驴Intentaste grabar algo?")
28
- audio_text = ""
29
  except sr.RequestError:
30
- st.error("Hablame para comenzar!")
31
- audio_text = ""
32
-
33
  return audio_text
34
 
35
  def format_prompt(message, history):
@@ -84,26 +119,25 @@ def text_to_speech(text, speed=1.3):
84
 
85
  def main():
86
  st.title("Chatbot de Voz a Voz")
87
- audio_data = audiorecorder("Habla para grabar", "Deteniendo la grabaci贸n...")
88
-
89
- if not audio_data.empty():
90
- st.audio(audio_data.export().read(), format="audio/wav")
91
- audio_data.export("audio.wav", format="wav")
92
- audio_text = recognize_speech("audio.wav")
93
-
94
- if audio_text:
95
- output, audio_file = generate(audio_text, history=st.session_state.history)
96
-
97
- if audio_text:
98
- st.session_state.history.append((audio_text, output))
99
-
100
- if audio_file is not None:
101
- st.markdown(
102
- f"""
103
- <audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>
104
- """,
105
- unsafe_allow_html=True
106
- )
107
 
108
  if __name__ == "__main__":
109
  main()
 
3
  import io
4
  from huggingface_hub import InferenceClient
5
  from gtts import gTTS
6
+ import pyaudio
7
+ import numpy as np
8
  import speech_recognition as sr
 
9
 
10
  if "history" not in st.session_state:
11
  st.session_state.history = []
12
 
13
+ def find_input_device():
14
+ p = pyaudio.PyAudio()
15
+ for i in range(p.get_device_count()):
16
+ info = p.get_device_info_by_index(i)
17
+ if info['maxInputChannels'] > 0:
18
+ return i
19
+ return None
20
+
21
+ def record_audio(prompt):
22
+ st.subheader(prompt)
23
+
24
+ fs = 44100 # Sample rate
25
+ seconds = 5 # Duration of recording
26
+
27
+ device_index = find_input_device()
28
+ if device_index is None:
29
+ st.warning("No se detectaron dispositivos de entrada. Aseg煤rate de tener un micr贸fono conectado y configurado correctamente.")
30
+ st.stop()
31
+
32
+ p = pyaudio.PyAudio()
33
+ stream = p.open(format=pyaudio.paInt16,
34
+ channels=2,
35
+ rate=fs,
36
+ input=True,
37
+ input_device_index=device_index,
38
+ frames_per_buffer=1024)
39
+
40
+ frames = []
41
+
42
+ with st.spinner("Grabando..."):
43
+ for i in range(int(fs / 1024 * seconds)):
44
+ frames.append(stream.read(1024))
45
+
46
+ stream.stop_stream()
47
+ stream.close()
48
+ p.terminate()
49
+
50
+ return np.frombuffer(b''.join(frames), dtype=np.int16)
51
+
52
  def recognize_speech(audio_data, show_messages=True):
53
  recognizer = sr.Recognizer()
54
+ audio_array = np.array(audio_data, dtype=np.int16)
55
+ audio_text = ""
 
 
56
 
57
  try:
58
+ audio_text = recognizer.recognize_google(audio_array, language="es-ES")
59
  if show_messages:
60
  st.subheader("Texto Reconocido:")
61
  st.write(audio_text)
62
  st.success("Reconocimiento de voz completado.")
63
  except sr.UnknownValueError:
64
  st.warning("No se pudo reconocer el audio. 驴Intentaste grabar algo?")
 
65
  except sr.RequestError:
66
+ st.error("Error al reconocer el audio. Aseg煤rate de tener una conexi贸n a Internet.")
67
+
 
68
  return audio_text
69
 
70
  def format_prompt(message, history):
 
119
 
120
  def main():
121
  st.title("Chatbot de Voz a Voz")
122
+
123
+ audio_data = record_audio("Habla para grabar")
124
+
125
+ st.audio(audio_data, format="audio/wav", sample_rate=44100)
126
+ audio_text = recognize_speech(audio_data)
127
+
128
+ if audio_text:
129
+ output, audio_file = generate(audio_text, history=st.session_state.history)
130
+
131
+ if audio_text:
132
+ st.session_state.history.append((audio_text, output))
133
+
134
+ if audio_file is not None:
135
+ st.markdown(
136
+ f"""
137
+ <audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>
138
+ """,
139
+ unsafe_allow_html=True
140
+ )
 
141
 
142
  if __name__ == "__main__":
143
  main()