import gradio as gr import numpy as np from faster_whisper import WhisperModel # Load the Whisper model model = WhisperModel("medium", device="cpu", compute_type="int8") # Function to handle transcription def transcribe(audio): # Check if the audio is in the correct format (numpy array) if isinstance(audio, tuple): audio = np.array(audio[1]) # Transcribe the audio segments, info = model.transcribe(audio, beam_size=5) transcription = "" for segment in segments: transcription += f"[{segment.start:.2f}s -> {segment.end:.2f}s]: {segment.text}\n" return transcription # Gradio interface for live transcription audio_input = gr.Audio(type="numpy", label="Speak into the microphone for live transcription") output_text = gr.Textbox(label="Transcription") # Create a Gradio interface with live microphone input and set `live=True` demo = gr.Interface(fn=transcribe, inputs=audio_input, outputs=output_text, live=True) # Launch the Gradio app demo.launch(share=True)