global probs
Browse files
app.py
CHANGED
@@ -3,12 +3,14 @@ import numpy as np
|
|
3 |
from vad_utils import get_speech_probs, make_visualization, probs2speech_timestamps, read_audio
|
4 |
import torch
|
5 |
|
|
|
6 |
def process_audio(audio_input):
|
|
|
7 |
wav = read_audio(audio_input, sampling_rate=16_000)
|
8 |
probs = get_speech_probs(wav, sampling_rate=16_000)
|
9 |
return make_visualization(probs, 512 / 16_000)
|
10 |
|
11 |
-
def process_parameters(
|
12 |
return probs2speech_timestamps(probs, threshold, min_speech_duration_ms, min_silence_duration_ms, window_size_samples, speech_pad_ms)
|
13 |
|
14 |
def main():
|
@@ -24,7 +26,6 @@ def main():
|
|
24 |
button1.click(process_audio, inputs=[audio_input], outputs=figure)
|
25 |
|
26 |
with gr.Row():
|
27 |
-
probs = gr.State(None)
|
28 |
threshold = gr.Number(label="Threshold", value=0.5, minimum=0.0, maximum=1.0)
|
29 |
min_speech_duration_ms = gr.Number(label="Min Speech Duration (ms)", value=250)
|
30 |
min_silence_duration_ms = gr.Number(label="Min Silence Duration (ms)", value=100)
|
@@ -33,7 +34,7 @@ def main():
|
|
33 |
button2 = gr.Button("Process Parameters")
|
34 |
output_text = gr.Textbox()
|
35 |
|
36 |
-
button2.click(process_parameters, inputs=[
|
37 |
|
38 |
demo.launch()
|
39 |
|
|
|
3 |
from vad_utils import get_speech_probs, make_visualization, probs2speech_timestamps, read_audio
|
4 |
import torch
|
5 |
|
6 |
+
probs = None
|
7 |
def process_audio(audio_input):
|
8 |
+
global probs
|
9 |
wav = read_audio(audio_input, sampling_rate=16_000)
|
10 |
probs = get_speech_probs(wav, sampling_rate=16_000)
|
11 |
return make_visualization(probs, 512 / 16_000)
|
12 |
|
13 |
+
def process_parameters(threshold, min_speech_duration_ms, min_silence_duration_ms, window_size_samples, speech_pad_ms):
|
14 |
return probs2speech_timestamps(probs, threshold, min_speech_duration_ms, min_silence_duration_ms, window_size_samples, speech_pad_ms)
|
15 |
|
16 |
def main():
|
|
|
26 |
button1.click(process_audio, inputs=[audio_input], outputs=figure)
|
27 |
|
28 |
with gr.Row():
|
|
|
29 |
threshold = gr.Number(label="Threshold", value=0.5, minimum=0.0, maximum=1.0)
|
30 |
min_speech_duration_ms = gr.Number(label="Min Speech Duration (ms)", value=250)
|
31 |
min_silence_duration_ms = gr.Number(label="Min Silence Duration (ms)", value=100)
|
|
|
34 |
button2 = gr.Button("Process Parameters")
|
35 |
output_text = gr.Textbox()
|
36 |
|
37 |
+
button2.click(process_parameters, inputs=[threshold, min_speech_duration_ms, min_silence_duration_ms, window_size_samples, speech_pad_ms], outputs=output_text)
|
38 |
|
39 |
demo.launch()
|
40 |
|