a.pourmand
update
76dce3d
import gradio as gr
import os
from dotenv import load_dotenv
load_dotenv()
HF_API_KEY = os.getenv("HF_API_KEY")
# gr.Interface.load("models/pyannote/speaker-diarization").launch()
# 1. visit hf.co/pyannote/speaker-diarization and accept user conditions
# 2. visit hf.co/pyannote/segmentation and accept user conditions
# 3. visit hf.co/settings/tokens to create an access token
# 4. instantiate pretrained speaker diarization pipeline
from pyannote.audio import Pipeline
pipeline = Pipeline.from_pretrained(
"pyannote/speaker-diarization@2.1",
use_auth_token=HF_API_KEY,
)
def transcribe_audio(audio):
# Perform speech-to-text on audio file
# apply the pipeline to an audio file
diarization = pipeline(audio)
text = ""
for turn, _, speaker in diarization.itertracks(yield_label=True):
text = (
text
+ f"start={turn.start:.1f}s stop={turn.end:.1f}s speaker_{speaker}"
+ "\n"
)
return text
iface = gr.Interface(
fn=transcribe_audio, inputs=gr.Audio(source="upload"), outputs="text"
)
iface.launch()