w/e
Browse files- app.py +1 -2
- vad_utils.py +1 -1
app.py
CHANGED
@@ -5,7 +5,6 @@ import torch
|
|
5 |
import pandas as pd
|
6 |
import gdown
|
7 |
|
8 |
-
audio_length_samples = None
|
9 |
def process_audio(audio_input):
|
10 |
wav = read_audio(audio_input, sampling_rate=16_000)
|
11 |
audio_length_samples = len(wav)
|
@@ -13,7 +12,7 @@ def process_audio(audio_input):
|
|
13 |
return make_visualization(probs, 512 / 16_000), probs, audio_length_samples
|
14 |
|
15 |
def process_parameters(probs, audio_length_samples, threshold, min_speech_duration_ms, min_silence_duration_ms, window_size_samples, speech_pad_ms):
|
16 |
-
|
17 |
timestamps = probs2speech_timestamps(probs, audio_length_samples,
|
18 |
threshold = threshold,
|
19 |
min_speech_duration_ms = min_speech_duration_ms,
|
|
|
5 |
import pandas as pd
|
6 |
import gdown
|
7 |
|
|
|
8 |
def process_audio(audio_input):
|
9 |
wav = read_audio(audio_input, sampling_rate=16_000)
|
10 |
audio_length_samples = len(wav)
|
|
|
12 |
return make_visualization(probs, 512 / 16_000), probs, audio_length_samples
|
13 |
|
14 |
def process_parameters(probs, audio_length_samples, threshold, min_speech_duration_ms, min_silence_duration_ms, window_size_samples, speech_pad_ms):
|
15 |
+
print(probs, audio_length_samples)
|
16 |
timestamps = probs2speech_timestamps(probs, audio_length_samples,
|
17 |
threshold = threshold,
|
18 |
min_speech_duration_ms = min_speech_duration_ms,
|
vad_utils.py
CHANGED
@@ -66,7 +66,7 @@ def probs2speech_timestamps(speech_probs, audio_length_samples,
|
|
66 |
min_silence_duration_ms: int = 100,
|
67 |
window_size_samples: int = 512,
|
68 |
speech_pad_ms: int = 30,
|
69 |
-
return_seconds: bool =
|
70 |
rounding: int = 1,):
|
71 |
|
72 |
step = sampling_rate // 16000
|
|
|
66 |
min_silence_duration_ms: int = 100,
|
67 |
window_size_samples: int = 512,
|
68 |
speech_pad_ms: int = 30,
|
69 |
+
return_seconds: bool = True,
|
70 |
rounding: int = 1,):
|
71 |
|
72 |
step = sampling_rate // 16000
|