Spaces:
Build error
Build error
import gradio as gr | |
import os | |
import whisper | |
from pydub import AudioSegment | |
# Load the Whisper model | |
model = whisper.load_model("base") | |
# Function to process the uploaded audio file and perform transcription | |
def process_audio(upload): | |
# Save the uploaded audio file | |
file_path = "uploaded_audio" | |
upload_path = f"{file_path}.mp3" | |
upload.save(upload_path) | |
# Convert the audio file to WAV format | |
wav_path = f"{file_path}.wav" | |
audio = AudioSegment.from_file(upload_path) | |
audio.export(wav_path, format="wav") | |
# Load the audio file and perform preprocessing | |
audio = whisper.load_audio(wav_path) | |
audio = whisper.pad_or_trim(audio) | |
mel = whisper.log_mel_spectrogram(audio).to(model.device) | |
# Detect the spoken language | |
_, probs = model.detect_language(mel) | |
detected_language = max(probs, key=probs.get) | |
# Perform transcription using Whisper ASR | |
options = whisper.DecodingOptions() | |
result = whisper.decode(model, mel, options) | |
transcription = result.text | |
# Delete the temporary audio files | |
os.remove(upload_path) | |
os.remove(wav_path) | |
return transcription | |
# Create a file input component for uploading the audio file | |
audio_input = gr.inputs.File(label="Upload Audio", accept=".wav, .mp3") | |
# Create a text output component for displaying the transcription | |
text_output = gr.outputs.Textbox(label="Transcription") | |
# Create a Gradio interface | |
gr.Interface(fn=process_audio, inputs=audio_input, outputs=text_output, title="Audio Transcription").launch() | |