Spaces:
Sleeping
Sleeping
File size: 2,258 Bytes
3f298f4 2f5b4f8 ccb1371 2cf8580 ccb1371 1a69e12 8dfefc8 3037e1b 3f298f4 1a69e12 5f18828 b50bfc8 467cc5c ccb1371 467cc5c ccb1371 b50bfc8 ccb1371 467cc5c 1a69e12 467cc5c ccb1371 51a052f ccb1371 5a87b08 467cc5c ccb1371 51a052f ccb1371 51a052f ccb1371 51a052f ccb1371 3f298f4 2f5b4f8 467cc5c 8dfefc8 3037e1b 2f5b4f8 3f298f4 2f5b4f8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import gradio as gr
from sad_tf import *
from autosub import SpeechRecognizer
from autosub import GOOGLE_SPEECH_API_KEY
import soundfile as sf
import io
cssfa = """
textarea { direction: rtl; text-align: right; font-family: Calibri, sans-serif; font-size: 16px;}
"""
cssen = """
textarea { direction: ltr; text-align: left; font-family: Calibri, sans-serif; font-size: 16px;}
"""
css=""
seg = Segmenter(ffmpeg_path="ffmpeg",model_path="keras_speech_music_noise_cnn.hdf5" , device="cpu",vad_type="vad")
def process_segment(args):
segment, wav,recognizer = args
start, stop = segment
# pp = converter((start, stop))
pp = pcm_to_flac(wav[int(start*16000) : int(stop*16000)])
tr_beamsearch_lm = recognizer(pp)
return start, stop, tr_beamsearch_lm
def pcm_to_flac(pcm_data, sample_rate=16000):
buffer = io.BytesIO()
sf.write(buffer, pcm_data, sample_rate, format='FLAC')
flac_data = buffer.getvalue()
return flac_data
def transcribe_audio(audio_file,lan):
if (lan=="en"):
css = cssen
else:
css = cssfa
recognizer = SpeechRecognizer(language=lan, rate=16000,api_key=GOOGLE_SPEECH_API_KEY, proxies=None)
text=""
isig,wav = seg(audio_file)
isig = filter_output(isig , max_silence=0.5 ,ignore_small_speech_segments=0.1 , max_speech_len=15 ,split_speech_bigger_than=20)
isig = [(a,b) for x,a,b,_,_ in isig]
print(isig)
results=[]
for segment in isig:
results.append (process_segment((segment, wav,recognizer)))
for start, stop, tr_beamsearch_lm in results:
try:
text += ' ' + tr_beamsearch_lm + '\r\n'
print(start)
print(stop)
print(text)
except:
pass
return text
# Define the Gradio interface
interface = gr.Interface(
fn=transcribe_audio,
inputs=[
gr.Audio(type="filepath"),
gr.Radio(choices=["fa", "en", "ar"], label="Language")
],
outputs=gr.Textbox(label="Transcription", elem_id="output-text",interactive=True),
title="Persian Audio Transcription",
description="Upload an audio file or record audio to get the transcription.",
css=css
)
# Launch the Gradio app
interface.launch()
|