Spaces:

mizoru
/

wav2tsv

Sleeping

App Files Files Community

mizoru commited on Apr 19

Commit

8e14b4c

•

1 Parent(s): 865b8d5

introduce state for probs and other

Browse files

Files changed (1) hide show

app.py +11 -9

app.py CHANGED Viewed

@@ -5,23 +5,24 @@ import torch
 import pandas as pd
 import gdown
-probs = None
 audio_length_samples = None
 def process_audio(audio_input):
-    global probs
-    global audio_length_samples
     wav = read_audio(audio_input, sampling_rate=16_000)
     audio_length_samples = len(wav)
     probs = get_speech_probs(wav, sampling_rate=16_000)
-    return make_visualization(probs, 512 / 16_000)
-def process_parameters(threshold, min_speech_duration_ms, min_silence_duration_ms, window_size_samples, speech_pad_ms):
     timestamps = probs2speech_timestamps(probs, audio_length_samples,
                                          threshold = threshold,
                                          min_speech_duration_ms = min_speech_duration_ms,
                                          min_silence_duration_ms=min_silence_duration_ms,
                                          window_size_samples=window_size_samples,
-                                         speech_pad_ms=speech_pad_ms)
     df = pd.DataFrame(timestamps)
     df["note"] = ""
     df.to_csv("timestamps.txt", sep = '\t', header=False, index=False)
@@ -34,8 +35,9 @@ def download_gdrive(id):
     return output_file
 def main():
     with gr.Blocks() as demo:
         with gr.Row():
             info = """Input the Google Drive file id from the shared link.
             It comes after https://drive.google.com/file/d/ <id here.
@@ -50,7 +52,7 @@ def main():
         download_button.click(download_gdrive, inputs=[gdrive_str], outputs=audio_input)
-        button1.click(process_audio, inputs=[audio_input], outputs=figure)
         with gr.Row():
             threshold = gr.Number(label="Threshold", value=0.6, minimum=0.0, maximum=1.0)
@@ -63,7 +65,7 @@ def main():
         with gr.Row():
             output_df = gr.DataFrame()
-        button2.click(process_parameters, inputs=[threshold, min_speech_duration_ms, min_silence_duration_ms, window_size_samples, speech_pad_ms],
                       outputs=[output_file, output_df])
     demo.launch()

 import pandas as pd
 import gdown
 audio_length_samples = None
 def process_audio(audio_input):
     wav = read_audio(audio_input, sampling_rate=16_000)
     audio_length_samples = len(wav)
     probs = get_speech_probs(wav, sampling_rate=16_000)
+    return make_visualization(probs, 512 / 16_000), probs, audio_length_samples
+def process_parameters(probs, audio_length_samples, threshold, min_speech_duration_ms, min_silence_duration_ms, window_size_samples, speech_pad_ms):
     timestamps = probs2speech_timestamps(probs, audio_length_samples,
                                          threshold = threshold,
                                          min_speech_duration_ms = min_speech_duration_ms,
                                          min_silence_duration_ms=min_silence_duration_ms,
                                          window_size_samples=window_size_samples,
+                                         speech_pad_ms=speech_pad_ms,
+                                         return_seconds=True,
+                                         rounding=3)
     df = pd.DataFrame(timestamps)
     df["note"] = ""
     df.to_csv("timestamps.txt", sep = '\t', header=False, index=False)
     return output_file
 def main():
     with gr.Blocks() as demo:
+        probs = gr.State()
+        audio_length_samples = gr.State()
         with gr.Row():
             info = """Input the Google Drive file id from the shared link.
             It comes after https://drive.google.com/file/d/ <id here.
         download_button.click(download_gdrive, inputs=[gdrive_str], outputs=audio_input)
+        button1.click(process_audio, inputs=[audio_input], outputs=[figure, probs, audio_length_samples])
         with gr.Row():
             threshold = gr.Number(label="Threshold", value=0.6, minimum=0.0, maximum=1.0)
         with gr.Row():
             output_df = gr.DataFrame()
+        button2.click(process_parameters, inputs=[probs, audio_length_samples, threshold, min_speech_duration_ms, min_silence_duration_ms, window_size_samples, speech_pad_ms],
                       outputs=[output_file, output_df])
     demo.launch()