Spaces:
Sleeping
Sleeping
File size: 1,963 Bytes
3f298f4 f485045 ccb1371 2cf8580 ccb1371 3be8c66 8dfefc8 3037e1b 3f298f4 1a69e12 3be8c66 1a69e12 5f18828 b50bfc8 467cc5c ccb1371 3be8c66 ccb1371 b50bfc8 ccb1371 3be8c66 ccb1371 51a052f ccb1371 5a87b08 3be8c66 ccb1371 51a052f ccb1371 51a052f ccb1371 51a052f ccb1371 3f298f4 2f5b4f8 3be8c66 8dfefc8 3037e1b 2f5b4f8 3f298f4 2f5b4f8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import gradio as gr
from iman.sad_tfpy10 import *
from autosub import SpeechRecognizer
from autosub import GOOGLE_SPEECH_API_KEY
import soundfile as sf
import io
css = """
textarea { direction: rtl; text-align: right; font-family: Calibri, sans-serif; font-size: 16px;}
"""
recognizer = SpeechRecognizer(language="fa", rate=16000,api_key=GOOGLE_SPEECH_API_KEY, proxies=None)
seg = Segmenter(ffmpeg_path="ffmpeg",model_path="keras_speech_music_noise_cnn.hdf5" , device="cpu",vad_type="vad")
def process_segment(args):
segment, wav = args
start, stop = segment
# pp = converter((start, stop))
pp = pcm_to_flac(wav[int(start*16000) : int(stop*16000)])
tr_beamsearch_lm = recognizer(pp)
return start, stop, tr_beamsearch_lm
def pcm_to_flac(pcm_data, sample_rate=16000):
buffer = io.BytesIO()
sf.write(buffer, pcm_data, sample_rate, format='FLAC')
flac_data = buffer.getvalue()
return flac_data
def transcribe_audio(audio_file):
text=""
isig,wav = seg(audio_file)
isig = filter_output(isig , max_silence=0.5 ,ignore_small_speech_segments=0.1 , max_speech_len=15 ,split_speech_bigger_than=20)
isig = [(a,b) for x,a,b,_,_ in isig]
print(isig)
results=[]
for segment in isig:
results.append (process_segment((segment, wav)))
for start, stop, tr_beamsearch_lm in results:
try:
text += ' ' + tr_beamsearch_lm + '\r\n'
print(start)
print(stop)
print(text)
except:
pass
return text
# Define the Gradio interface
interface = gr.Interface(
fn=transcribe_audio,
inputs=gr.Audio(type="filepath"),
outputs=gr.Textbox(label="Transcription", elem_id="output-text",interactive=True),
title="Persian Audio Transcription",
description="Upload an audio file or record audio to get the transcription.",
css=css
)
# Launch the Gradio app
interface.launch()
|