Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
|
|
2 |
import torch
|
3 |
import time
|
4 |
import librosa
|
|
|
5 |
import soundfile
|
6 |
import nemo.collections.asr as nemo_asr
|
7 |
import tempfile
|
@@ -15,8 +16,9 @@ model.change_decoding_strategy(None)
|
|
15 |
model.eval()
|
16 |
|
17 |
|
18 |
-
def process_audio_file(file):
|
19 |
-
|
|
|
20 |
|
21 |
if sr != SAMPLE_RATE:
|
22 |
data = librosa.resample(data, orig_sr=sr, target_sr=SAMPLE_RATE)
|
@@ -30,10 +32,15 @@ def transcribe(state, audio):
|
|
30 |
# Grant additional context
|
31 |
# time.sleep(1)
|
32 |
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
-
audio_data = process_audio_file(audio)
|
37 |
|
38 |
with tempfile.TemporaryDirectory() as tmpdir:
|
39 |
# Filepath transcribe
|
@@ -50,15 +57,15 @@ def transcribe(state, audio):
|
|
50 |
|
51 |
transcriptions = transcriptions[0]
|
52 |
|
53 |
-
state = state + transcriptions + " "
|
54 |
-
return state,
|
55 |
|
56 |
|
57 |
iface = gr.Interface(
|
58 |
fn=transcribe,
|
59 |
inputs=[
|
60 |
"state",
|
61 |
-
gr.Audio(source="microphone",
|
62 |
],
|
63 |
outputs=[
|
64 |
"state",
|
|
|
2 |
import torch
|
3 |
import time
|
4 |
import librosa
|
5 |
+
import numpy as np
|
6 |
import soundfile
|
7 |
import nemo.collections.asr as nemo_asr
|
8 |
import tempfile
|
|
|
16 |
model.eval()
|
17 |
|
18 |
|
19 |
+
# def process_audio_file(file):
|
20 |
+
def process_audio_file(data, sr):
|
21 |
+
# data, sr = librosa.load(file)
|
22 |
|
23 |
if sr != SAMPLE_RATE:
|
24 |
data = librosa.resample(data, orig_sr=sr, target_sr=SAMPLE_RATE)
|
|
|
32 |
# Grant additional context
|
33 |
# time.sleep(1)
|
34 |
|
35 |
+
sr, audio = audio
|
36 |
+
audio = audio.astype(np.float32)
|
37 |
+
audio /= np.max(np.abs(audio))
|
38 |
+
|
39 |
+
#if state is None:
|
40 |
+
# state = ""
|
41 |
+
state = audio
|
42 |
|
43 |
+
audio_data = process_audio_file(audio, sr)
|
44 |
|
45 |
with tempfile.TemporaryDirectory() as tmpdir:
|
46 |
# Filepath transcribe
|
|
|
57 |
|
58 |
transcriptions = transcriptions[0]
|
59 |
|
60 |
+
# state = state + transcriptions + " "
|
61 |
+
return state, transcriptions
|
62 |
|
63 |
|
64 |
iface = gr.Interface(
|
65 |
fn=transcribe,
|
66 |
inputs=[
|
67 |
"state",
|
68 |
+
gr.Audio(source="microphone", streaming=True),
|
69 |
],
|
70 |
outputs=[
|
71 |
"state",
|