salomonsky commited on
Commit
eb77a73
·
verified ·
1 Parent(s): 1d6048e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -56
app.py CHANGED
@@ -1,10 +1,9 @@
1
  import streamlit as st
2
  import torch
 
3
  import numpy as np
4
- import pyaudio
5
  from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
6
  from gtts import gTTS
7
- import os
8
 
9
  class VoiceAssistant:
10
  def __init__(self):
@@ -14,67 +13,31 @@ class VoiceAssistant:
14
  self.sample_rate = 16000
15
  self.chunk_size = 480
16
 
17
- self.p = pyaudio.PyAudio()
18
- self.input_device_index = self.select_input_device()
19
-
20
- self.stream = self.p.open(
21
- format=pyaudio.paFloat32,
22
- channels=1,
23
- rate=self.sample_rate,
24
- input=True,
25
- input_device_index=self.input_device_index,
26
- frames_per_buffer=self.chunk_size
27
- )
28
-
29
  self.keyword_activation = "jarvis"
30
  self.keyword_deactivation = "detente"
31
 
32
  self.listening = False
33
 
34
- def select_input_device(self):
35
- for i in range(self.p.get_device_count()):
36
- dev = self.p.get_device_info_by_index(i)
37
- if dev['maxInputChannels'] > 0:
38
- print(f"Dispositivo {i}: {dev['name']}")
39
-
40
- for i in range(self.p.get_device_count()):
41
- dev = self.p.get_device_info_by_index(i)
42
- if dev['maxInputChannels'] > 0:
43
- try:
44
- test_stream = self.p.open(
45
- format=pyaudio.paFloat32,
46
- channels=1,
47
- rate=self.sample_rate,
48
- input=True,
49
- input_device_index=i,
50
- frames_per_buffer=self.chunk_size
51
- )
52
- test_stream.close()
53
- return i
54
- except Exception:
55
- continue
56
-
57
- raise RuntimeError("No input device found")
58
-
59
  def vad_collector(self):
60
  audio_chunks, keyword_detected = [], False
61
- while self.listening:
62
- try:
63
- data = self.stream.read(self.chunk_size)
64
- audio_chunk = np.frombuffer(data, dtype=np.float32)
65
-
66
- if self.keyword_activation.lower() in str(audio_chunk).lower():
67
- keyword_detected = True
68
- break
69
-
70
- if self.keyword_deactivation.lower() in str(audio_chunk).lower():
71
- self.listening = False
 
 
 
 
 
 
72
  break
73
-
74
- audio_chunks.append(audio_chunk)
75
- except Exception as e:
76
- st.error(f"Audio capture error: {e}")
77
- break
78
 
79
  return audio_chunks, keyword_detected
80
 
@@ -125,4 +88,4 @@ def main():
125
  assistant.run()
126
 
127
  if __name__ == "__main__":
128
- main()
 
1
  import streamlit as st
2
  import torch
3
+ import torchaudio
4
  import numpy as np
 
5
  from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
6
  from gtts import gTTS
 
7
 
8
  class VoiceAssistant:
9
  def __init__(self):
 
13
  self.sample_rate = 16000
14
  self.chunk_size = 480
15
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  self.keyword_activation = "jarvis"
17
  self.keyword_deactivation = "detente"
18
 
19
  self.listening = False
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def vad_collector(self):
22
  audio_chunks, keyword_detected = [], False
23
+ with torchaudio.io.AudioStream(sample_rate=self.sample_rate, channels=1, format='wav') as stream:
24
+ while self.listening:
25
+ try:
26
+ data = stream.read(self.chunk_size)
27
+ audio_chunk = torch.from_numpy(np.frombuffer(data, dtype=np.float32))
28
+
29
+ if self.keyword_activation.lower() in str(audio_chunk).lower():
30
+ keyword_detected = True
31
+ break
32
+
33
+ if self.keyword_deactivation.lower() in str(audio_chunk).lower():
34
+ self.listening = False
35
+ break
36
+
37
+ audio_chunks.append(audio_chunk.numpy())
38
+ except Exception as e:
39
+ st.error(f"Audio capture error: {e}")
40
  break
 
 
 
 
 
41
 
42
  return audio_chunks, keyword_detected
43
 
 
88
  assistant.run()
89
 
90
  if __name__ == "__main__":
91
+ main()