rafaaa2105's picture
Create app.py
b0b070a verified
raw
history blame
1.14 kB
import gradio as gr
from pyannote.audio import Pipeline
import torch
# Initialize the diarization pipeline
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token="YOUR_HF_TOKEN")
pipeline.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
def diarize(audio):
diarization = pipeline({"waveform": audio, "sample_rate": 16000})
speaker1_segments = []
speaker2_segments = []
for segment, _, speaker in diarization.itertracks(yield_label=True):
if speaker == 'SPEAKER_1':
speaker1_segments.append((segment.start, segment.end))
elif speaker == 'SPEAKER_2':
speaker2_segments.append((segment.start, segment.end))
return speaker1_segments, speaker2_segments
interface = gr.Interface(
fn=diarize,
inputs=gr.Audio(source="upload", type="numpy"),
outputs=[
gr.Textbox(label="Speaker 1 Segments (start, end)"),
gr.Textbox(label="Speaker 2 Segments (start, end)")
],
title="Speaker Diarization",
description="Upload an audio file and get the segments where each speaker talks."
)
interface.launch()