imansarraf commited on
Commit
ccb1371
·
verified ·
1 Parent(s): 5f18828

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -2
app.py CHANGED
@@ -1,12 +1,44 @@
1
  import gradio as gr
2
  from sad_tf import *
 
 
 
 
3
 
4
  seg = Segmenter(ffmpeg_path="ffmpeg",model_path="keras_speech_music_noise_cnn.hdf5" , device="cpu",vad_type="vad")
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
 
7
  def transcribe_audio(audio_file):
8
- isig = seg(audio_file)
9
- return isig
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  # Define the Gradio interface
12
  interface = gr.Interface(
 
1
  import gradio as gr
2
  from sad_tf import *
3
+ from autosub import SpeechRecognizer
4
+ from autosub import GOOGLE_SPEECH_API_KEY
5
+ import soundfile as sf
6
+
7
 
8
  seg = Segmenter(ffmpeg_path="ffmpeg",model_path="keras_speech_music_noise_cnn.hdf5" , device="cpu",vad_type="vad")
9
 
10
+ recognizer = SpeechRecognizer(language="fa", rate=16000,api_key=GOOGLE_SPEECH_API_KEY, proxies=None)
11
+
12
+ def process_segment(args):
13
+ segment, wav = args
14
+ start, stop = segment
15
+ # pp = converter((start, stop))
16
+ pp = pcm_to_flac(wav[int(start*16000) : int(stop*16000)])
17
+ tr_beamsearch_lm = recognizer(pp)
18
+ return start, stop, tr_beamsearch_lm
19
+
20
+ def pcm_to_flac(pcm_data, sample_rate=16000):
21
+ buffer = io.BytesIO()
22
+ sf.write(buffer, pcm_data, sample_rate, format='FLAC')
23
+ flac_data = buffer.getvalue()
24
+ return flac_data
25
 
26
+
27
  def transcribe_audio(audio_file):
28
+ text=""
29
+ isig,wav = seg(audio_file)
30
+ isig = filter_output(isig , max_silence=0.5 ,ignore_small_speech_segments=0.1 , max_speech_len=15 ,split_speech_bigger_than=20)
31
+ isig = [(a,b) for x,a,b,_,_ in isig]
32
+ results=[]
33
+ for segment in tqdm(isig):
34
+ results.append (process_segment((segment, wav)))
35
+ for start, stop, tr_beamsearch_lm in results:
36
+ try:
37
+ text += ' ' + tr_beamsearch_lm + '\r\n'
38
+ except:
39
+ pass
40
+
41
+ return text
42
 
43
  # Define the Gradio interface
44
  interface = gr.Interface(