DexterSptizu
commited on
Commit
•
f5c8681
1
Parent(s):
16f4628
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import torch
|
3 |
+
from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
4 |
+
import numpy as np
|
5 |
+
|
6 |
+
# Load model and processor globally
|
7 |
+
processor = WhisperProcessor.from_pretrained("openai/whisper-small")
|
8 |
+
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
|
9 |
+
model.config.forced_decoder_ids = None
|
10 |
+
|
11 |
+
def transcribe_audio(audio_path):
|
12 |
+
try:
|
13 |
+
# Load and process audio
|
14 |
+
if audio_path is None:
|
15 |
+
return "Please provide an audio input."
|
16 |
+
|
17 |
+
# Read audio file
|
18 |
+
import librosa
|
19 |
+
audio, sr = librosa.load(audio_path, sr=16000)
|
20 |
+
|
21 |
+
# Process audio
|
22 |
+
input_features = processor(
|
23 |
+
audio,
|
24 |
+
sampling_rate=16000,
|
25 |
+
return_tensors="pt"
|
26 |
+
).input_features
|
27 |
+
|
28 |
+
# Generate transcription
|
29 |
+
predicted_ids = model.generate(input_features)
|
30 |
+
transcription = processor.batch_decode(
|
31 |
+
predicted_ids,
|
32 |
+
skip_special_tokens=True
|
33 |
+
)
|
34 |
+
|
35 |
+
return transcription[0]
|
36 |
+
except Exception as e:
|
37 |
+
return f"Error processing audio: {str(e)}"
|
38 |
+
|
39 |
+
# Create Gradio interface
|
40 |
+
with gr.Blocks() as demo:
|
41 |
+
gr.Markdown("# Whisper Audio Transcription")
|
42 |
+
|
43 |
+
with gr.Tabs():
|
44 |
+
with gr.TabItem("Upload Audio"):
|
45 |
+
with gr.Row():
|
46 |
+
with gr.Column():
|
47 |
+
audio_file = gr.Audio(
|
48 |
+
sources=["upload"],
|
49 |
+
type="filepath",
|
50 |
+
label="Upload Audio File"
|
51 |
+
)
|
52 |
+
upload_button = gr.Button("Transcribe")
|
53 |
+
with gr.Column():
|
54 |
+
output_text1 = gr.Textbox(
|
55 |
+
label="Transcription",
|
56 |
+
placeholder="Transcription will appear here...",
|
57 |
+
lines=5
|
58 |
+
)
|
59 |
+
upload_button.click(
|
60 |
+
fn=transcribe_audio,
|
61 |
+
inputs=audio_file,
|
62 |
+
outputs=output_text1
|
63 |
+
)
|
64 |
+
|
65 |
+
with gr.TabItem("Record Audio"):
|
66 |
+
with gr.Row():
|
67 |
+
with gr.Column():
|
68 |
+
audio_mic = gr.Audio(
|
69 |
+
sources=["microphone"],
|
70 |
+
type="filepath",
|
71 |
+
label="Record Audio"
|
72 |
+
)
|
73 |
+
record_button = gr.Button("Transcribe")
|
74 |
+
with gr.Column():
|
75 |
+
output_text2 = gr.Textbox(
|
76 |
+
label="Transcription",
|
77 |
+
placeholder="Transcription will appear here...",
|
78 |
+
lines=5
|
79 |
+
)
|
80 |
+
record_button.click(
|
81 |
+
fn=transcribe_audio,
|
82 |
+
inputs=audio_mic,
|
83 |
+
outputs=output_text2
|
84 |
+
)
|
85 |
+
|
86 |
+
gr.Markdown("""
|
87 |
+
### Instructions:
|
88 |
+
1. Choose either 'Upload Audio' or 'Record Audio' tab
|
89 |
+
2. Upload an audio file or record using your microphone
|
90 |
+
3. Click 'Transcribe' to get the transcription
|
91 |
+
4. The transcribed text will appear in the output box
|
92 |
+
|
93 |
+
### Supported Audio Formats:
|
94 |
+
- WAV
|
95 |
+
- MP3
|
96 |
+
- FLAC
|
97 |
+
- OGG
|
98 |
+
""")
|
99 |
+
|
100 |
+
if __name__ == "__main__":
|
101 |
+
demo.launch()
|