Siddhant commited on
Commit
0b47b7c
1 Parent(s): daefe1f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -1
app.py CHANGED
@@ -185,7 +185,7 @@ def transcribe(stream, new_chunk):
185
  if (vad.is_speech(y[i*960:(i+1)*960].tobytes(), orig_sr)):
186
  vad_count+=1
187
  print(vad_count)
188
- if vad_output is None and vad_count>15:
189
  vad_curr=True
190
  if vad_output is None:
191
  vad_output=[torch.from_numpy(audio_float32)]
@@ -216,6 +216,8 @@ def transcribe(stream, new_chunk):
216
  # pred_ids, skip_special_tokens=True, decode_with_timestamps=False
217
  # )[0]
218
  print(len(array))
 
 
219
  start_time = time.time()
220
  prompt=" ".join(s2t(array)[0][0].split()[1:])
221
  vad_output = None
@@ -224,6 +226,7 @@ def transcribe(stream, new_chunk):
224
  return stream, text_str1, audio_output
225
 
226
  # prompt=transcriber({"sampling_rate": sr, "raw": array})["text"]
 
227
  print(prompt)
228
  print("--- %s seconds ---" % (time.time() - start_time))
229
  # prompt=ASR_model.transcribe(array)["text"].strip()
 
185
  if (vad.is_speech(y[i*960:(i+1)*960].tobytes(), orig_sr)):
186
  vad_count+=1
187
  print(vad_count)
188
+ if vad_output is None and vad_count>12:
189
  vad_curr=True
190
  if vad_output is None:
191
  vad_output=[torch.from_numpy(audio_float32)]
 
216
  # pred_ids, skip_special_tokens=True, decode_with_timestamps=False
217
  # )[0]
218
  print(len(array))
219
+ array = librosa.util.fix_length(array, size=(16000 * 30))
220
+ print(len(array))
221
  start_time = time.time()
222
  prompt=" ".join(s2t(array)[0][0].split()[1:])
223
  vad_output = None
 
226
  return stream, text_str1, audio_output
227
 
228
  # prompt=transcriber({"sampling_rate": sr, "raw": array})["text"]
229
+ print(len(prompt.strip().split()))
230
  print(prompt)
231
  print("--- %s seconds ---" % (time.time() - start_time))
232
  # prompt=ASR_model.transcribe(array)["text"].strip()