File size: 1,963 Bytes
3f298f4
f485045
ccb1371
 
 
2cf8580
ccb1371
3be8c66
8dfefc8
3037e1b
3f298f4
1a69e12
3be8c66
1a69e12
 
5f18828
b50bfc8
467cc5c
ccb1371
 
3be8c66
ccb1371
 
 
 
 
 
 
 
 
 
 
b50bfc8
ccb1371
3be8c66
 
ccb1371
 
 
 
51a052f
ccb1371
5a87b08
3be8c66
ccb1371
51a052f
ccb1371
51a052f
ccb1371
51a052f
 
 
ccb1371
 
 
 
3f298f4
2f5b4f8
 
 
3be8c66
 
8dfefc8
3037e1b
 
 
2f5b4f8
3f298f4
2f5b4f8
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import gradio as gr
from iman.sad_tfpy10 import *
from autosub import SpeechRecognizer
from autosub import GOOGLE_SPEECH_API_KEY
import soundfile as sf
import io

css = """ 
 textarea { direction: rtl; text-align: right; font-family: Calibri, sans-serif; font-size: 16px;} 
"""


recognizer = SpeechRecognizer(language="fa", rate=16000,api_key=GOOGLE_SPEECH_API_KEY, proxies=None)


seg = Segmenter(ffmpeg_path="ffmpeg",model_path="keras_speech_music_noise_cnn.hdf5" , device="cpu",vad_type="vad")



def process_segment(args):
 segment, wav = args
 start, stop = segment
 # pp = converter((start, stop))
 pp = pcm_to_flac(wav[int(start*16000) : int(stop*16000)])
 tr_beamsearch_lm = recognizer(pp) 
 return start, stop, tr_beamsearch_lm

def pcm_to_flac(pcm_data, sample_rate=16000):
 buffer = io.BytesIO()
 sf.write(buffer, pcm_data, sample_rate, format='FLAC')
 flac_data = buffer.getvalue() 
 return flac_data

    
def transcribe_audio(audio_file):

    text=""
    isig,wav =  seg(audio_file)
    isig = filter_output(isig , max_silence=0.5 ,ignore_small_speech_segments=0.1 , max_speech_len=15 ,split_speech_bigger_than=20)   
    isig = [(a,b) for x,a,b,_,_ in isig]
    print(isig)
    results=[]
    for segment in isig:
         results.append (process_segment((segment, wav)))
    for start, stop, tr_beamsearch_lm in results:
          
          try:
              
              text += ' ' + tr_beamsearch_lm + '\r\n'
              print(start)
              print(stop)
              print(text)
          except:
               pass
    
    return text

# Define the Gradio interface
interface = gr.Interface(
    fn=transcribe_audio,
    inputs=gr.Audio(type="filepath"),  

    outputs=gr.Textbox(label="Transcription", elem_id="output-text",interactive=True),
    title="Persian Audio Transcription",
    description="Upload an audio file or record audio to get the transcription.",
    css=css
)

# Launch the Gradio app
interface.launch()