import gradio as gr import os import whisper # Load the Whisper model model = whisper.load_model("base") # Function to process the uploaded audio file and perform transcription def process_audio(upload): # Save the uploaded audio file file_path = "uploaded_audio.wav" with open(file_path, "wb") as f: f.write(upload.read()) # Load the audio file and perform preprocessing audio = whisper.load_audio(file_path) audio = whisper.pad_or_trim(audio) mel = whisper.log_mel_spectrogram(audio).to(model.device) # Detect the spoken language _, probs = model.detect_language(mel) detected_language = max(probs, key=probs.get) # Perform transcription using Whisper ASR options = whisper.DecodingOptions() result = whisper.decode(model, mel, options) transcription = result.text # Delete the temporary audio file os.remove(file_path) return transcription # Create a file input component for uploading the audio file audio_input = gr.inputs.File(label="Upload Audio") # Create a text output component for displaying the transcription text_output = gr.outputs.Textbox(label="Transcription") # Create a Gradio interface gr.Interface(fn=process_audio, inputs=audio_input, outputs=text_output, title="Audio Transcription").launch()