Jimmy-test
#3
by
Zhongzhimin
- opened
- README.md +4 -4
- hyperparams.yaml +1 -1
README.md
CHANGED
@@ -72,7 +72,7 @@ Please notice that we encourage you to read our tutorials and learn more about
|
|
72 |
### Perform Voice Activity Detection
|
73 |
|
74 |
```
|
75 |
-
from speechbrain.
|
76 |
|
77 |
VAD = VAD.from_hparams(source="speechbrain/vad-crdnn-libriparty", savedir="pretrained_models/vad-crdnn-libriparty")
|
78 |
boundaries = VAD.get_speech_segments("speechbrain/vad-crdnn-libriparty/example_vad.wav")
|
@@ -93,7 +93,7 @@ To do it:
|
|
93 |
|
94 |
```
|
95 |
import torchaudio
|
96 |
-
upsampled_boundaries = VAD.upsample_boundaries(boundaries, 'example_vad.wav')
|
97 |
torchaudio.save('vad_final.wav', upsampled_boundaries.cpu(), 16000)
|
98 |
```
|
99 |
|
@@ -116,11 +116,11 @@ We designed the VAD such that you can have access to all of these steps (this mi
|
|
116 |
|
117 |
|
118 |
```python
|
119 |
-
from speechbrain.
|
120 |
VAD = VAD.from_hparams(source="speechbrain/vad-crdnn-libriparty", savedir="pretrained_models/vad-crdnn-libriparty")
|
121 |
|
122 |
# 1- Let's compute frame-level posteriors first
|
123 |
-
audio_file =
|
124 |
prob_chunks = VAD.get_speech_prob_file(audio_file)
|
125 |
|
126 |
# 2- Let's apply a threshold on top of the posteriors
|
|
|
72 |
### Perform Voice Activity Detection
|
73 |
|
74 |
```
|
75 |
+
from speechbrain.pretrained import VAD
|
76 |
|
77 |
VAD = VAD.from_hparams(source="speechbrain/vad-crdnn-libriparty", savedir="pretrained_models/vad-crdnn-libriparty")
|
78 |
boundaries = VAD.get_speech_segments("speechbrain/vad-crdnn-libriparty/example_vad.wav")
|
|
|
93 |
|
94 |
```
|
95 |
import torchaudio
|
96 |
+
upsampled_boundaries = VAD.upsample_boundaries(boundaries, 'pretrained_model_checkpoints/example_vad.wav')
|
97 |
torchaudio.save('vad_final.wav', upsampled_boundaries.cpu(), 16000)
|
98 |
```
|
99 |
|
|
|
116 |
|
117 |
|
118 |
```python
|
119 |
+
from speechbrain.pretrained import VAD
|
120 |
VAD = VAD.from_hparams(source="speechbrain/vad-crdnn-libriparty", savedir="pretrained_models/vad-crdnn-libriparty")
|
121 |
|
122 |
# 1- Let's compute frame-level posteriors first
|
123 |
+
audio_file = 'pretrained_model_checkpoints/example_vad.wav'
|
124 |
prob_chunks = VAD.get_speech_prob_file(audio_file)
|
125 |
|
126 |
# 2- Let's apply a threshold on top of the posteriors
|
hyperparams.yaml
CHANGED
@@ -21,7 +21,7 @@ rnn_bidirectional: True
|
|
21 |
dnn_blocks: 1
|
22 |
dnn_neurons: 16
|
23 |
output_neurons: 1
|
24 |
-
device: 'cpu' #
|
25 |
|
26 |
# Feature/Model objects
|
27 |
compute_features: !new:speechbrain.lobes.features.Fbank
|
|
|
21 |
dnn_blocks: 1
|
22 |
dnn_neurons: 16
|
23 |
output_neurons: 1
|
24 |
+
device: 'cpu' # set 'cuda:0' for gpu
|
25 |
|
26 |
# Feature/Model objects
|
27 |
compute_features: !new:speechbrain.lobes.features.Fbank
|