Spaces:

frogcho123
/

s2s

Build error

File size: 1,541 Bytes

36bec1c
a66dfeb
ec72da9
b3ba25a
2bacaf7
a66dfeb
ec72da9
2bacaf7
a66dfeb
 
 
b3ba25a
 
 
 
 
 
 
 
 
a66dfeb
b3ba25a
ec72da9
 
b3ba25a
a66dfeb
 
 
b3ba25a
a66dfeb
ec72da9
 
a66dfeb
b3ba25a
 
 
 
 
a66dfeb
2bacaf7
2920572
b3ba25a
a66dfeb
 
 
 
 
 
b3ba25a

import gradio as gr
import os
import whisper
from pydub import AudioSegment

# Load the Whisper model
model = whisper.load_model("base")

# Function to process the uploaded audio file and perform transcription
def process_audio(upload):
    # Save the uploaded audio file
    file_path = "uploaded_audio"
    upload_path = f"{file_path}.mp3"
    upload.save(upload_path)

    # Convert the audio file to WAV format
    wav_path = f"{file_path}.wav"
    audio = AudioSegment.from_file(upload_path)
    audio.export(wav_path, format="wav")

    # Load the audio file and perform preprocessing
    audio = whisper.load_audio(wav_path)
    audio = whisper.pad_or_trim(audio)
    mel = whisper.log_mel_spectrogram(audio).to(model.device)

    # Detect the spoken language
    _, probs = model.detect_language(mel)
    detected_language = max(probs, key=probs.get)

    # Perform transcription using Whisper ASR
    options = whisper.DecodingOptions()
    result = whisper.decode(model, mel, options)
    transcription = result.text

    # Delete the temporary audio files
    os.remove(upload_path)
    os.remove(wav_path)

    return transcription

# Create a file input component for uploading the audio file
audio_input = gr.inputs.File(label="Upload Audio", accept=".wav, .mp3")

# Create a text output component for displaying the transcription
text_output = gr.outputs.Textbox(label="Transcription")

# Create a Gradio interface
gr.Interface(fn=process_audio, inputs=audio_input, outputs=text_output, title="Audio Transcription").launch()