Add-Vishnu commited on
Commit
92971a2
·
1 Parent(s): cf0a5a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -0
app.py CHANGED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import soundfile as sf
3
+ import tempfile
4
+ import shutil
5
+ import os
6
+ import librosa
7
+ import time
8
+ import numpy as np
9
+ import subprocess
10
+ from pywhispercpp.model import Model
11
+
12
+ model = Model('base.en', n_threads=6)
13
+
14
+ def resample_to_16k(audio, orig_sr):
15
+ y_resampled = librosa.resample(y=audio, orig_sr=orig_sr, target_sr = 16000)
16
+ return y_resampled
17
+
18
+ def transcribe(audio,):
19
+ sr,y = audio
20
+ y = y.astype(np.float32)
21
+ y /= np.max(np.abs(y))
22
+ y_resampled = resample_to_16k(y, sr)
23
+
24
+
25
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
26
+ temp_audio_path = temp_audio.name
27
+ sf.write(temp_audio_path, y_resampled, 16000)
28
+
29
+ start_time_py = time.time()
30
+ py_result = model.transcribe(f'{temp_audio_path}', n_threads=6)
31
+ end_time_py = time.time()
32
+ print("Py_result : ",py_result)
33
+ print("--------------------------")
34
+ print(f"Execution time using py: {end_time_py - start_time_py} seconds")
35
+ output_text = ""
36
+ for segment in py_result:
37
+ output_text+=segment.text
38
+ return output_text, (end_time_py - start_time_py)
39
+
40
+
41
+
42
+ demo = gr.Interface(
43
+ transcribe,
44
+ gr.Audio(sources=["microphone"]),
45
+ gr.Textbox(label="Py_Transcription")
46
+ gr.Textbox(label="Time taken for Transcription")
47
+ )
48
+
49
+ demo.launch()