DexterSptizu commited on
Commit
f5c8681
1 Parent(s): 16f4628

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -0
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration
4
+ import numpy as np
5
+
6
+ # Load model and processor globally
7
+ processor = WhisperProcessor.from_pretrained("openai/whisper-small")
8
+ model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
9
+ model.config.forced_decoder_ids = None
10
+
11
+ def transcribe_audio(audio_path):
12
+ try:
13
+ # Load and process audio
14
+ if audio_path is None:
15
+ return "Please provide an audio input."
16
+
17
+ # Read audio file
18
+ import librosa
19
+ audio, sr = librosa.load(audio_path, sr=16000)
20
+
21
+ # Process audio
22
+ input_features = processor(
23
+ audio,
24
+ sampling_rate=16000,
25
+ return_tensors="pt"
26
+ ).input_features
27
+
28
+ # Generate transcription
29
+ predicted_ids = model.generate(input_features)
30
+ transcription = processor.batch_decode(
31
+ predicted_ids,
32
+ skip_special_tokens=True
33
+ )
34
+
35
+ return transcription[0]
36
+ except Exception as e:
37
+ return f"Error processing audio: {str(e)}"
38
+
39
+ # Create Gradio interface
40
+ with gr.Blocks() as demo:
41
+ gr.Markdown("# Whisper Audio Transcription")
42
+
43
+ with gr.Tabs():
44
+ with gr.TabItem("Upload Audio"):
45
+ with gr.Row():
46
+ with gr.Column():
47
+ audio_file = gr.Audio(
48
+ sources=["upload"],
49
+ type="filepath",
50
+ label="Upload Audio File"
51
+ )
52
+ upload_button = gr.Button("Transcribe")
53
+ with gr.Column():
54
+ output_text1 = gr.Textbox(
55
+ label="Transcription",
56
+ placeholder="Transcription will appear here...",
57
+ lines=5
58
+ )
59
+ upload_button.click(
60
+ fn=transcribe_audio,
61
+ inputs=audio_file,
62
+ outputs=output_text1
63
+ )
64
+
65
+ with gr.TabItem("Record Audio"):
66
+ with gr.Row():
67
+ with gr.Column():
68
+ audio_mic = gr.Audio(
69
+ sources=["microphone"],
70
+ type="filepath",
71
+ label="Record Audio"
72
+ )
73
+ record_button = gr.Button("Transcribe")
74
+ with gr.Column():
75
+ output_text2 = gr.Textbox(
76
+ label="Transcription",
77
+ placeholder="Transcription will appear here...",
78
+ lines=5
79
+ )
80
+ record_button.click(
81
+ fn=transcribe_audio,
82
+ inputs=audio_mic,
83
+ outputs=output_text2
84
+ )
85
+
86
+ gr.Markdown("""
87
+ ### Instructions:
88
+ 1. Choose either 'Upload Audio' or 'Record Audio' tab
89
+ 2. Upload an audio file or record using your microphone
90
+ 3. Click 'Transcribe' to get the transcription
91
+ 4. The transcribed text will appear in the output box
92
+
93
+ ### Supported Audio Formats:
94
+ - WAV
95
+ - MP3
96
+ - FLAC
97
+ - OGG
98
+ """)
99
+
100
+ if __name__ == "__main__":
101
+ demo.launch()