import gradio as gr import os import whisper # Load the Whisper model model = whisper.load_model("base") # Function to process the uploaded audio file and perform transcription def process_audio(upload): # Save the uploaded audio file file_path = "uploaded_audio.mp3" upload.save(file_path) # Load the audio file and perform preprocessing audio = whisper.load_audio(file_path) audio = whisper.pad_or_trim(audio) mel = whisper.log_mel_spectrogram(audio).to(model.device) # Detect the spoken language _, probs = model.detect_language(mel) detected_language = max(probs, key=probs.get) # Perform transcription using Whisper ASR options = whisper.DecodingOptions() result = whisper.decode(model, mel, options) transcription = result.text return transcription # Create an audio input component for uploading the audio file audio_input = gr.inputs.Audio(label="Upload Audio", type="file") # Create a text output component for displaying the transcription text_output = gr.outputs.Textbox(label="Transcription") # Create a Gradio interface gr.Interface(fn=process_audio, inputs=audio_input, outputs=text_output, title="Audio Transcription").launch()