salomonsky commited on
Commit
ff663fa
·
verified ·
1 Parent(s): badb078

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -30
app.py CHANGED
@@ -1,15 +1,10 @@
1
  import io
2
  import base64
3
- import numpy as np
4
  from gtts import gTTS
5
  import streamlit as st
6
  import speech_recognition as sr
7
  from huggingface_hub import InferenceClient
8
  from streamlit_mic_recorder import mic_recorder
9
- import webrtcvad
10
- import wave
11
-
12
- temp_audio_file_path = "./output.wav"
13
 
14
  if "history" not in st.session_state:
15
  st.session_state.history = []
@@ -97,12 +92,6 @@ def display_recognition_result(audio_text, output, audio_file):
97
  f"""<audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>""",
98
  unsafe_allow_html=True)
99
 
100
- def detect_silence(audio_data, sample_rate, silence_threshold=5000, silence_duration=5000):
101
- is_silence = lambda x: max(x) < silence_threshold
102
- chunks = [audio_data[i:i+silence_duration] for i in range(0, len(audio_data), silence_duration)]
103
- silent_chunks = [chunk for chunk in chunks if is_silence(chunk)]
104
- return silent_chunks
105
-
106
  def main():
107
  if not st.session_state.pre_prompt_sent:
108
  st.session_state.pre_prompt_sent = True
@@ -112,25 +101,12 @@ def main():
112
  if audio:
113
  st.audio(audio['bytes'], format="audio/wav")
114
  audio_bytes = audio["bytes"]
115
- sample_width = audio["sample_width"]
116
- sample_rate = audio["sample_rate"]
117
-
118
- with wave.open(temp_audio_file_path, 'wb') as wave_file:
119
- wave_file.setnchannels(1)
120
- wave_file.setsampwidth(sample_width)
121
- wave_file.setframerate(sample_rate)
122
- wave_file.writeframes(audio_bytes)
123
-
124
- audio_data = np.frombuffer(audio_bytes, dtype=np.int16)
125
- silent_chunks = detect_silence(audio_data, sample_rate)
126
-
127
- for silent_chunk in silent_chunks:
128
- audio_text = recognize_speech(silent_chunk)
129
- st.subheader("Texto Reconocido:")
130
- st.write(audio_text)
131
- st.session_state.history.append((audio_text, ""))
132
- response, audio_file = generate(audio_text, st.session_state.history)
133
- display_recognition_result(audio_text, response, audio_file)
134
 
135
  if __name__ == "__main__":
136
  main()
 
1
  import io
2
  import base64
 
3
  from gtts import gTTS
4
  import streamlit as st
5
  import speech_recognition as sr
6
  from huggingface_hub import InferenceClient
7
  from streamlit_mic_recorder import mic_recorder
 
 
 
 
8
 
9
  if "history" not in st.session_state:
10
  st.session_state.history = []
 
92
  f"""<audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>""",
93
  unsafe_allow_html=True)
94
 
 
 
 
 
 
 
95
  def main():
96
  if not st.session_state.pre_prompt_sent:
97
  st.session_state.pre_prompt_sent = True
 
101
  if audio:
102
  st.audio(audio['bytes'], format="audio/wav")
103
  audio_bytes = audio["bytes"]
104
+
105
+ audio_text = recognize_speech(audio_bytes)
106
+
107
+ if audio_text:
108
+ output, audio_file = generate(audio_text, history=st.session_state.history)
109
+ display_recognition_result(audio_text, output, audio_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
  if __name__ == "__main__":
112
  main()