|
import gradio as gr |
|
import numpy as np |
|
from faster_whisper import WhisperModel |
|
|
|
|
|
model = WhisperModel("medium", device="cpu", compute_type="int8") |
|
|
|
|
|
def transcribe(audio): |
|
|
|
if isinstance(audio, tuple): |
|
audio = np.array(audio[1]) |
|
|
|
|
|
segments, info = model.transcribe(audio, beam_size=5) |
|
transcription = "" |
|
for segment in segments: |
|
transcription += f"[{segment.start:.2f}s -> {segment.end:.2f}s]: {segment.text}\n" |
|
return transcription |
|
|
|
|
|
audio_input = gr.Audio(type="numpy", label="Speak into the microphone for live transcription") |
|
|
|
output_text = gr.Textbox(label="Transcription") |
|
|
|
|
|
demo = gr.Interface(fn=transcribe, inputs=audio_input, outputs=output_text, live=True) |
|
|
|
|
|
demo.launch(share=True) |
|
|
|
|