theodotus commited on
Commit
0808d5f
·
1 Parent(s): 7ff9e79

Fix always mono

Browse files
Files changed (1) hide show
  1. app.py +6 -8
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  import numpy as np
3
- import resampy
4
  import torch
5
 
6
  from math import ceil
@@ -31,10 +31,9 @@ mid_delay = ceil((chunk_len + (buffer_len - chunk_len) / 2) / model_stride_in_se
31
 
32
 
33
 
34
- def resample(sr, audio_data):
35
- audio_fp32 = np.divide(audio_data, np.iinfo(audio_data.dtype).max, dtype=np.float32)
36
- audio_16k = resampy.resample(audio_fp32, sr, asr_model.cfg["sample_rate"])
37
-
38
  return audio_16k
39
 
40
 
@@ -70,8 +69,7 @@ def transcribe(audio, state):
70
  if state is None:
71
  state = [np.array([], dtype=np.float32), []]
72
 
73
- sr, audio_data = audio
74
- audio_16k = resample(sr, audio_data)
75
 
76
  # join to audio sequence
77
  state[0] = np.concatenate([state[0], audio_16k])
@@ -94,7 +92,7 @@ def transcribe(audio, state):
94
  gr.Interface(
95
  fn=transcribe,
96
  inputs=[
97
- gr.Audio(source="microphone", type="numpy", streaming=True),
98
  gr.State(None)
99
  ],
100
  outputs=[
 
1
  import gradio as gr
2
  import numpy as np
3
+ import librosa
4
  import torch
5
 
6
  from math import ceil
 
31
 
32
 
33
 
34
+ def resample(audio):
35
+ audio_16k, sr = librosa.load(audio, sr = asr_model.cfg["sample_rate"],
36
+ mono=True, res_type='kaiser_fast')
 
37
  return audio_16k
38
 
39
 
 
69
  if state is None:
70
  state = [np.array([], dtype=np.float32), []]
71
 
72
+ audio_16k = resample(audio)
 
73
 
74
  # join to audio sequence
75
  state[0] = np.concatenate([state[0], audio_16k])
 
92
  gr.Interface(
93
  fn=transcribe,
94
  inputs=[
95
+ gr.Audio(source="microphone", type="filepath", streaming=True),
96
  gr.State(None)
97
  ],
98
  outputs=[