Files changed (2) hide show
  1. README.md +4 -4
  2. hyperparams.yaml +1 -1
README.md CHANGED
@@ -72,7 +72,7 @@ Please notice that we encourage you to read our tutorials and learn more about
72
  ### Perform Voice Activity Detection
73
 
74
  ```
75
- from speechbrain.inference.VAD import VAD
76
 
77
  VAD = VAD.from_hparams(source="speechbrain/vad-crdnn-libriparty", savedir="pretrained_models/vad-crdnn-libriparty")
78
  boundaries = VAD.get_speech_segments("speechbrain/vad-crdnn-libriparty/example_vad.wav")
@@ -93,7 +93,7 @@ To do it:
93
 
94
  ```
95
  import torchaudio
96
- upsampled_boundaries = VAD.upsample_boundaries(boundaries, 'example_vad.wav')
97
  torchaudio.save('vad_final.wav', upsampled_boundaries.cpu(), 16000)
98
  ```
99
 
@@ -116,11 +116,11 @@ We designed the VAD such that you can have access to all of these steps (this mi
116
 
117
 
118
  ```python
119
- from speechbrain.inference.VAD import VAD
120
  VAD = VAD.from_hparams(source="speechbrain/vad-crdnn-libriparty", savedir="pretrained_models/vad-crdnn-libriparty")
121
 
122
  # 1- Let's compute frame-level posteriors first
123
- audio_file = "example.wav"
124
  prob_chunks = VAD.get_speech_prob_file(audio_file)
125
 
126
  # 2- Let's apply a threshold on top of the posteriors
 
72
  ### Perform Voice Activity Detection
73
 
74
  ```
75
+ from speechbrain.pretrained import VAD
76
 
77
  VAD = VAD.from_hparams(source="speechbrain/vad-crdnn-libriparty", savedir="pretrained_models/vad-crdnn-libriparty")
78
  boundaries = VAD.get_speech_segments("speechbrain/vad-crdnn-libriparty/example_vad.wav")
 
93
 
94
  ```
95
  import torchaudio
96
+ upsampled_boundaries = VAD.upsample_boundaries(boundaries, 'pretrained_model_checkpoints/example_vad.wav')
97
  torchaudio.save('vad_final.wav', upsampled_boundaries.cpu(), 16000)
98
  ```
99
 
 
116
 
117
 
118
  ```python
119
+ from speechbrain.pretrained import VAD
120
  VAD = VAD.from_hparams(source="speechbrain/vad-crdnn-libriparty", savedir="pretrained_models/vad-crdnn-libriparty")
121
 
122
  # 1- Let's compute frame-level posteriors first
123
+ audio_file = 'pretrained_model_checkpoints/example_vad.wav'
124
  prob_chunks = VAD.get_speech_prob_file(audio_file)
125
 
126
  # 2- Let's apply a threshold on top of the posteriors
hyperparams.yaml CHANGED
@@ -21,7 +21,7 @@ rnn_bidirectional: True
21
  dnn_blocks: 1
22
  dnn_neurons: 16
23
  output_neurons: 1
24
- device: 'cpu' # or 'cuda'
25
 
26
  # Feature/Model objects
27
  compute_features: !new:speechbrain.lobes.features.Fbank
 
21
  dnn_blocks: 1
22
  dnn_neurons: 16
23
  output_neurons: 1
24
+ device: 'cpu' # set 'cuda:0' for gpu
25
 
26
  # Feature/Model objects
27
  compute_features: !new:speechbrain.lobes.features.Fbank