File size: 1,266 Bytes
219bc21
c09680d
9cba922
c09680d
219bc21
 
 
 
 
 
 
357116d
 
219bc21
357116d
9cba922
 
 
bb2cfda
 
 
 
 
 
 
 
 
9cba922
 
 
 
357116d
9cba922
357116d
 
9cba922
 
357116d
9cba922
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import os
import gradio as gr
from pyannote.audio import Pipeline

# ืงืจื ืืช ื˜ื•ืงืŸ ื”ื’ื™ืฉื” ืžืžืฉืชื ื” ื”ืกื‘ื™ื‘ื”
hf_auth_token = os.getenv("HF_AUTH_TOKEN")

if not hf_auth_token:
    raise ValueError("The environment variable 'HF_AUTH_TOKEN' is not set. Please set it in the Space.")

# ื˜ืขืŸ ืืช ื”ืžื•ื“ืœ ืฉืœ pyannote ืขื ื˜ื•ืงืŸ ื”ื’ื™ืฉื”
pipeline = Pipeline.from_pretrained(
    "pyannote/speaker-diarization",
    use_auth_token=hf_auth_token
)

# ืคื•ื ืงืฆื™ื” ืœืขื™ื‘ื•ื“ ืงื•ื‘ืฅ ืื•ื“ื™ื•
def diarize_audio(audio_path):
    try:
        # ื‘ืฆืข ื—ืœื•ืงืช ื“ื•ื‘ืจื™ื ืขื pyannote
        diarization = pipeline(audio_path)
        results = []
        for turn, _, speaker in diarization.itertracks(yield_label=True):
            results.append(f"{turn.start:.1f}s - {turn.end:.1f}s: {speaker}")
        return "\n".join(results)
    except Exception as e:
        return f"Error processing audio: {str(e)}"

# ื”ื’ื“ืจืช ืžืžืฉืง Gradio
interface = gr.Interface(
    fn=diarize_audio,
    inputs=gr.Audio(type="filepath"),  # ืฉื™ืžื•ืฉ ื‘-type="filepath"
    outputs="text",
    title="Speaker Diarization",
    description="Upload an audio file to perform speaker diarization."
)

# ื”ืคืขืœืช ื”ื™ื™ืฉื•ื
interface.launch()