csukuangfj's picture
add models
0c707d2
#!/usr/bin/env python3
import torch
from pyannote.audio import Model
from pyannote.audio.pipelines import (
VoiceActivityDetection as VoiceActivityDetectionPipeline,
)
@torch.no_grad()
def main():
# Please download it from
# https://huggingface.co/csukuangfj/pyannote-models/tree/main/segmentation-3.0
pt_filename = "./pytorch_model.bin"
model = Model.from_pretrained(pt_filename)
model.eval()
pipeline = VoiceActivityDetectionPipeline(segmentation=model)
# https://huggingface.co/pyannote/voice-activity-detection/blob/main/config.yaml
# https://github.com/pyannote/pyannote-audio/issues/1215
initial_params = {
"min_duration_on": 0.0,
"min_duration_off": 0.0,
}
pipeline.onset = 0.5
pipeline.offset = 0.5
pipeline.instantiate(initial_params)
# wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
t = pipeline("./lei-jun-test.wav")
print(type(t))
print(t)
if __name__ == "__main__":
main()