Spaces:

theodotus
/

streaming-asr-uk

Sleeping

theodotus commited on Oct 7, 2022

Commit

0808d5f

1 Parent(s): 7ff9e79

Fix always mono

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
 import numpy as np
-import resampy
 import torch
 from math import ceil
@@ -31,10 +31,9 @@ mid_delay = ceil((chunk_len + (buffer_len - chunk_len) / 2) / model_stride_in_se
-def resample(sr, audio_data):
-    audio_fp32 = np.divide(audio_data, np.iinfo(audio_data.dtype).max, dtype=np.float32)
-    audio_16k = resampy.resample(audio_fp32, sr, asr_model.cfg["sample_rate"])
     return audio_16k
@@ -70,8 +69,7 @@ def transcribe(audio, state):
     if state is None:
         state = [np.array([], dtype=np.float32), []]
-    sr, audio_data = audio
-    audio_16k = resample(sr, audio_data)
     # join to audio sequence
     state[0] = np.concatenate([state[0], audio_16k])
@@ -94,7 +92,7 @@ def transcribe(audio, state):
 gr.Interface(
     fn=transcribe,
     inputs=[
-        gr.Audio(source="microphone", type="numpy", streaming=True),
         gr.State(None)
     ],
     outputs=[

 import gradio as gr
 import numpy as np
+import librosa
 import torch
 from math import ceil
+def resample(audio):
+    audio_16k, sr = librosa.load(audio, sr = asr_model.cfg["sample_rate"],
+                            mono=True,  res_type='kaiser_fast')
     return audio_16k
     if state is None:
         state = [np.array([], dtype=np.float32), []]
+    audio_16k = resample(audio)
     # join to audio sequence
     state[0] = np.concatenate([state[0], audio_16k])
 gr.Interface(
     fn=transcribe,
     inputs=[
+        gr.Audio(source="microphone", type="filepath", streaming=True),
         gr.State(None)
     ],
     outputs=[