introduce state for probs and other
Browse files
app.py
CHANGED
@@ -5,23 +5,24 @@ import torch
|
|
5 |
import pandas as pd
|
6 |
import gdown
|
7 |
|
8 |
-
probs = None
|
9 |
audio_length_samples = None
|
10 |
def process_audio(audio_input):
|
11 |
-
global probs
|
12 |
-
global audio_length_samples
|
13 |
wav = read_audio(audio_input, sampling_rate=16_000)
|
14 |
audio_length_samples = len(wav)
|
15 |
probs = get_speech_probs(wav, sampling_rate=16_000)
|
16 |
-
return make_visualization(probs, 512 / 16_000)
|
|
|
|
|
17 |
|
18 |
-
def process_parameters(threshold, min_speech_duration_ms, min_silence_duration_ms, window_size_samples, speech_pad_ms):
|
19 |
timestamps = probs2speech_timestamps(probs, audio_length_samples,
|
20 |
threshold = threshold,
|
21 |
min_speech_duration_ms = min_speech_duration_ms,
|
22 |
min_silence_duration_ms=min_silence_duration_ms,
|
23 |
window_size_samples=window_size_samples,
|
24 |
-
speech_pad_ms=speech_pad_ms
|
|
|
|
|
|
|
25 |
df = pd.DataFrame(timestamps)
|
26 |
df["note"] = ""
|
27 |
df.to_csv("timestamps.txt", sep = '\t', header=False, index=False)
|
@@ -34,8 +35,9 @@ def download_gdrive(id):
|
|
34 |
return output_file
|
35 |
|
36 |
def main():
|
37 |
-
|
38 |
with gr.Blocks() as demo:
|
|
|
|
|
39 |
with gr.Row():
|
40 |
info = """Input the Google Drive file id from the shared link.
|
41 |
It comes after https://drive.google.com/file/d/ <id here.
|
@@ -50,7 +52,7 @@ def main():
|
|
50 |
|
51 |
download_button.click(download_gdrive, inputs=[gdrive_str], outputs=audio_input)
|
52 |
|
53 |
-
button1.click(process_audio, inputs=[audio_input], outputs=figure)
|
54 |
|
55 |
with gr.Row():
|
56 |
threshold = gr.Number(label="Threshold", value=0.6, minimum=0.0, maximum=1.0)
|
@@ -63,7 +65,7 @@ def main():
|
|
63 |
with gr.Row():
|
64 |
output_df = gr.DataFrame()
|
65 |
|
66 |
-
button2.click(process_parameters, inputs=[threshold, min_speech_duration_ms, min_silence_duration_ms, window_size_samples, speech_pad_ms],
|
67 |
outputs=[output_file, output_df])
|
68 |
|
69 |
demo.launch()
|
|
|
5 |
import pandas as pd
|
6 |
import gdown
|
7 |
|
|
|
8 |
audio_length_samples = None
|
9 |
def process_audio(audio_input):
|
|
|
|
|
10 |
wav = read_audio(audio_input, sampling_rate=16_000)
|
11 |
audio_length_samples = len(wav)
|
12 |
probs = get_speech_probs(wav, sampling_rate=16_000)
|
13 |
+
return make_visualization(probs, 512 / 16_000), probs, audio_length_samples
|
14 |
+
|
15 |
+
def process_parameters(probs, audio_length_samples, threshold, min_speech_duration_ms, min_silence_duration_ms, window_size_samples, speech_pad_ms):
|
16 |
|
|
|
17 |
timestamps = probs2speech_timestamps(probs, audio_length_samples,
|
18 |
threshold = threshold,
|
19 |
min_speech_duration_ms = min_speech_duration_ms,
|
20 |
min_silence_duration_ms=min_silence_duration_ms,
|
21 |
window_size_samples=window_size_samples,
|
22 |
+
speech_pad_ms=speech_pad_ms,
|
23 |
+
return_seconds=True,
|
24 |
+
rounding=3)
|
25 |
+
|
26 |
df = pd.DataFrame(timestamps)
|
27 |
df["note"] = ""
|
28 |
df.to_csv("timestamps.txt", sep = '\t', header=False, index=False)
|
|
|
35 |
return output_file
|
36 |
|
37 |
def main():
|
|
|
38 |
with gr.Blocks() as demo:
|
39 |
+
probs = gr.State()
|
40 |
+
audio_length_samples = gr.State()
|
41 |
with gr.Row():
|
42 |
info = """Input the Google Drive file id from the shared link.
|
43 |
It comes after https://drive.google.com/file/d/ <id here.
|
|
|
52 |
|
53 |
download_button.click(download_gdrive, inputs=[gdrive_str], outputs=audio_input)
|
54 |
|
55 |
+
button1.click(process_audio, inputs=[audio_input], outputs=[figure, probs, audio_length_samples])
|
56 |
|
57 |
with gr.Row():
|
58 |
threshold = gr.Number(label="Threshold", value=0.6, minimum=0.0, maximum=1.0)
|
|
|
65 |
with gr.Row():
|
66 |
output_df = gr.DataFrame()
|
67 |
|
68 |
+
button2.click(process_parameters, inputs=[probs, audio_length_samples, threshold, min_speech_duration_ms, min_silence_duration_ms, window_size_samples, speech_pad_ms],
|
69 |
outputs=[output_file, output_df])
|
70 |
|
71 |
demo.launch()
|