|
import os |
|
import gradio as gr |
|
from pyannote.audio import Pipeline |
|
|
|
|
|
hf_auth_token = os.getenv("HF_AUTH_TOKEN") |
|
|
|
if not hf_auth_token: |
|
raise ValueError("The environment variable 'HF_AUTH_TOKEN' is not set. Please set it in the Space.") |
|
|
|
|
|
pipeline = Pipeline.from_pretrained( |
|
"pyannote/speaker-diarization", |
|
use_auth_token=hf_auth_token |
|
) |
|
|
|
|
|
def diarize_audio(audio_path): |
|
try: |
|
|
|
diarization = pipeline(audio_path) |
|
results = [] |
|
for turn, _, speaker in diarization.itertracks(yield_label=True): |
|
results.append(f"{turn.start:.1f}s - {turn.end:.1f}s: {speaker}") |
|
return "\n".join(results) |
|
except Exception as e: |
|
return f"Error processing audio: {str(e)}" |
|
|
|
|
|
interface = gr.Interface( |
|
fn=diarize_audio, |
|
inputs=gr.Audio(type="filepath"), |
|
outputs="text", |
|
title="Speaker Diarization", |
|
description="Upload an audio file to perform speaker diarization." |
|
) |
|
|
|
|
|
interface.launch() |
|
|