Spaces:

DexterSptizu
/

openai-whisper-small-speech-to-text

Running

App Files Files Community

DexterSptizu commited on 16 days ago

Commit

f5c8681

•

1 Parent(s): 16f4628

Create app.py

Browse files

Files changed (1) hide show

app.py +101 -0

app.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import gradio as gr
+import torch
+from transformers import WhisperProcessor, WhisperForConditionalGeneration
+import numpy as np
+# Load model and processor globally
+processor = WhisperProcessor.from_pretrained("openai/whisper-small")
+model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
+model.config.forced_decoder_ids = None
+def transcribe_audio(audio_path):
+    try:
+        # Load and process audio
+        if audio_path is None:
+            return "Please provide an audio input."
+        # Read audio file
+        import librosa
+        audio, sr = librosa.load(audio_path, sr=16000)
+        # Process audio
+        input_features = processor(
+            audio,
+            sampling_rate=16000,
+            return_tensors="pt"
+        ).input_features
+        # Generate transcription
+        predicted_ids = model.generate(input_features)
+        transcription = processor.batch_decode(
+            predicted_ids,
+            skip_special_tokens=True
+        )
+        return transcription[0]
+    except Exception as e:
+        return f"Error processing audio: {str(e)}"
+# Create Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("# Whisper Audio Transcription")
+    with gr.Tabs():
+        with gr.TabItem("Upload Audio"):
+            with gr.Row():
+                with gr.Column():
+                    audio_file = gr.Audio(
+                        sources=["upload"],
+                        type="filepath",
+                        label="Upload Audio File"
+                    )
+                    upload_button = gr.Button("Transcribe")
+                with gr.Column():
+                    output_text1 = gr.Textbox(
+                        label="Transcription",
+                        placeholder="Transcription will appear here...",
+                        lines=5
+                    )
+            upload_button.click(
+                fn=transcribe_audio,
+                inputs=audio_file,
+                outputs=output_text1
+            )
+        with gr.TabItem("Record Audio"):
+            with gr.Row():
+                with gr.Column():
+                    audio_mic = gr.Audio(
+                        sources=["microphone"],
+                        type="filepath",
+                        label="Record Audio"
+                    )
+                    record_button = gr.Button("Transcribe")
+                with gr.Column():
+                    output_text2 = gr.Textbox(
+                        label="Transcription",
+                        placeholder="Transcription will appear here...",
+                        lines=5
+                    )
+            record_button.click(
+                fn=transcribe_audio,
+                inputs=audio_mic,
+                outputs=output_text2
+            )
+    gr.Markdown("""
+    ### Instructions:
+    1. Choose either 'Upload Audio' or 'Record Audio' tab
+    2. Upload an audio file or record using your microphone
+    3. Click 'Transcribe' to get the transcription
+    4. The transcribed text will appear in the output box
+    ### Supported Audio Formats:
+    - WAV
+    - MP3
+    - FLAC
+    - OGG
+    """)
+if __name__ == "__main__":
+    demo.launch()