Helw150 commited on
Commit
94540c3
·
1 Parent(s): ec083e1

Restructure

Browse files
Files changed (1) hide show
  1. app.py +18 -18
app.py CHANGED
@@ -17,11 +17,12 @@ import tempfile
17
 
18
  from utils.vad import VadOptions, collect_chunks, get_speech_timestamps
19
 
20
- diva_model = AutoModel.from_pretrained(
21
- "WillHeld/DiVA-llama-3-v0-8b", trust_remote_code=True
22
- )
 
23
 
24
- resampler = Audio(sampling_rate=16_000)
25
 
26
 
27
  @spaces.GPU(duration=20)
@@ -44,10 +45,12 @@ def diva_audio(audio_input, do_sample=False, temperature=0.001, prev_outs=None):
44
  )
45
 
46
 
47
- def run_vad(ori_audio, sr):
48
  _st = time.time()
49
  try:
50
  audio = ori_audio
 
 
51
  audio = audio.astype(np.float32) / 32768.0
52
  sampling_rate = 16000
53
  if sr != sampling_rate:
@@ -76,7 +79,7 @@ def run_vad(ori_audio, sr):
76
 
77
  def warm_up():
78
  frames = np.ones(2048) # 1024 frames of 2 bytes each
79
- dur, frames, tcost = run_vad(frames, 16000)
80
  print(f"warm up done, time_cost: {tcost:.3f} s")
81
 
82
 
@@ -97,19 +100,19 @@ class AppState:
97
  def determine_pause(audio: np.ndarray, sampling_rate: int, state: AppState) -> bool:
98
  """Take in the stream, determine if a pause happened"""
99
 
100
- temp_audio = audio
101
 
102
- dur_vad, _, time_vad = run_vad(temp_audio, sampling_rate)
103
  duration = len(audio) / sampling_rate
 
104
 
105
- if dur_vad > 0.5 and not state.started_talking:
106
  print("started talking")
107
  state.started_talking = True
108
  return False
109
 
110
  print(f"duration_after_vad: {dur_vad:.3f} s, time_vad: {time_vad:.3f} s")
111
 
112
- return (duration - dur_vad) > 1
113
 
114
 
115
  def process_audio(audio: tuple, state: AppState):
@@ -180,12 +183,9 @@ theme = gr.themes.Soft(
180
 
181
  with gr.Blocks(theme=theme) as demo:
182
  with gr.Row():
183
- with gr.Column():
184
- input_audio = gr.Audio(
185
- label="Input Audio", sources="microphone", type="numpy"
186
- )
187
- with gr.Column():
188
- chatbot = gr.Chatbot(label="Conversation", type="messages")
189
  state = gr.State(value=AppState())
190
 
191
  stream = input_audio.stream(
@@ -206,5 +206,5 @@ with gr.Blocks(theme=theme) as demo:
206
  cancels=[respond, stream],
207
  )
208
 
209
-
210
- demo.launch()
 
17
 
18
  from utils.vad import VadOptions, collect_chunks, get_speech_timestamps
19
 
20
+ if gr.NO_RELOAD:
21
+ diva_model = AutoModel.from_pretrained(
22
+ "WillHeld/DiVA-llama-3-v0-8b", trust_remote_code=True
23
+ )
24
 
25
+ resampler = Audio(sampling_rate=16_000)
26
 
27
 
28
  @spaces.GPU(duration=20)
 
45
  )
46
 
47
 
48
+ def run_vad(ori_audio, sr, duration):
49
  _st = time.time()
50
  try:
51
  audio = ori_audio
52
+ if duration < 1:
53
+ return -1, ori_audio, round(time.time() - _st, 4)
54
  audio = audio.astype(np.float32) / 32768.0
55
  sampling_rate = 16000
56
  if sr != sampling_rate:
 
79
 
80
  def warm_up():
81
  frames = np.ones(2048) # 1024 frames of 2 bytes each
82
+ dur, frames, tcost = run_vad(frames, 16000, 10)
83
  print(f"warm up done, time_cost: {tcost:.3f} s")
84
 
85
 
 
100
  def determine_pause(audio: np.ndarray, sampling_rate: int, state: AppState) -> bool:
101
  """Take in the stream, determine if a pause happened"""
102
 
103
+ temp_audio = audio[-2 * sampling_rate :]
104
 
 
105
  duration = len(audio) / sampling_rate
106
+ dur_vad, _, time_vad = run_vad(temp_audio, sampling_rate, duration)
107
 
108
+ if dur_vad > 0.25 and not state.started_talking:
109
  print("started talking")
110
  state.started_talking = True
111
  return False
112
 
113
  print(f"duration_after_vad: {dur_vad:.3f} s, time_vad: {time_vad:.3f} s")
114
 
115
+ return dur_vad < 0.5
116
 
117
 
118
  def process_audio(audio: tuple, state: AppState):
 
183
 
184
  with gr.Blocks(theme=theme) as demo:
185
  with gr.Row():
186
+ input_audio = gr.Audio(label="Input Audio", sources="microphone", type="numpy")
187
+ with gr.Row():
188
+ chatbot = gr.Chatbot(label="Conversation", type="messages")
 
 
 
189
  state = gr.State(value=AppState())
190
 
191
  stream = input_audio.stream(
 
206
  cancels=[respond, stream],
207
  )
208
 
209
+ if __name__ == "__main__":
210
+ demo.launch()