|
import gradio as gr |
|
import torch |
|
from transformers import WhisperProcessor, WhisperForConditionalGeneration |
|
import numpy as np |
|
|
|
|
|
processor = WhisperProcessor.from_pretrained("openai/whisper-small") |
|
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small") |
|
model.config.forced_decoder_ids = None |
|
|
|
def transcribe_audio(audio_path): |
|
try: |
|
|
|
if audio_path is None: |
|
return "Please provide an audio input." |
|
|
|
|
|
import librosa |
|
audio, sr = librosa.load(audio_path, sr=16000) |
|
|
|
|
|
input_features = processor( |
|
audio, |
|
sampling_rate=16000, |
|
return_tensors="pt" |
|
).input_features |
|
|
|
|
|
predicted_ids = model.generate(input_features) |
|
transcription = processor.batch_decode( |
|
predicted_ids, |
|
skip_special_tokens=True |
|
) |
|
|
|
return transcription[0] |
|
except Exception as e: |
|
return f"Error processing audio: {str(e)}" |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Whisper Audio Transcription") |
|
|
|
with gr.Tabs(): |
|
with gr.TabItem("Upload Audio"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
audio_file = gr.Audio( |
|
sources=["upload"], |
|
type="filepath", |
|
label="Upload Audio File" |
|
) |
|
upload_button = gr.Button("Transcribe") |
|
with gr.Column(): |
|
output_text1 = gr.Textbox( |
|
label="Transcription", |
|
placeholder="Transcription will appear here...", |
|
lines=5 |
|
) |
|
upload_button.click( |
|
fn=transcribe_audio, |
|
inputs=audio_file, |
|
outputs=output_text1 |
|
) |
|
|
|
with gr.TabItem("Record Audio"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
audio_mic = gr.Audio( |
|
sources=["microphone"], |
|
type="filepath", |
|
label="Record Audio" |
|
) |
|
record_button = gr.Button("Transcribe") |
|
with gr.Column(): |
|
output_text2 = gr.Textbox( |
|
label="Transcription", |
|
placeholder="Transcription will appear here...", |
|
lines=5 |
|
) |
|
record_button.click( |
|
fn=transcribe_audio, |
|
inputs=audio_mic, |
|
outputs=output_text2 |
|
) |
|
|
|
gr.Markdown(""" |
|
### Instructions: |
|
1. Choose either 'Upload Audio' or 'Record Audio' tab |
|
2. Upload an audio file or record using your microphone |
|
3. Click 'Transcribe' to get the transcription |
|
4. The transcribed text will appear in the output box |
|
|
|
### Supported Audio Formats: |
|
- WAV |
|
- MP3 |
|
- FLAC |
|
- OGG |
|
""") |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |