Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,13 @@ from pydub import AudioSegment
|
|
4 |
import os
|
5 |
import speech_recognition as sr
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
model_name = "voidful/wav2vec2-xlsr-multilingual-56"
|
8 |
model0 = pipeline(task="automatic-speech-recognition",
|
9 |
model=model_name)
|
@@ -24,7 +31,7 @@ def predict_fa(speech,model):
|
|
24 |
text = model1(speech,return_timestamps="word" )
|
25 |
elif model== "voidful/wav2vec2-xlsr-multilingual-56":
|
26 |
text = model0(speech,return_timestamps="word" )
|
27 |
-
return [text['text'],json.dumps(text)]
|
28 |
|
29 |
|
30 |
def convert_to_wav(filename):
|
@@ -64,6 +71,7 @@ with gr.Blocks() as demo:
|
|
64 |
inputs_model_fa =gr.inputs.Radio(label="Language", choices=["ghofrani/common8","SLPL/Sharif-wav2vec2","voidful/wav2vec2-xlsr-multilingual-56"])
|
65 |
output_transcribe1_fa = gr.Textbox(label="Transcribed text:")
|
66 |
output_transcribe1_fa1 = gr.Textbox(label="Transcribed text with timestamps:")
|
|
|
67 |
transcribe_audio1_fa= gr.Button("Submit")
|
68 |
with gr.Tab("google"):
|
69 |
gr.Markdown("set your speech language")
|
@@ -77,7 +85,7 @@ with gr.Blocks() as demo:
|
|
77 |
|
78 |
transcribe_audio1_fa.click(fn=predict_fa,
|
79 |
inputs=[inputs_speech_fa ,inputs_model_fa ],
|
80 |
-
outputs=[output_transcribe1_fa ,output_transcribe1_fa1 ] )
|
81 |
|
82 |
transcribe_audio1_go.click(fn=g_rec,
|
83 |
inputs=inputs_speech1 ,
|
|
|
4 |
import os
|
5 |
import speech_recognition as sr
|
6 |
|
7 |
+
|
8 |
+
html_seeker='''
|
9 |
+
<html> <head> <meta charset="utf-8" /> <title>Gentle</title> <style> html, body { margin: 0; padding: 0; min-width: 900px; } #header { position: fixed; top: 0; left: 0; height: 50px; min-width: 900px; line-height: 50px; width: 100%; background-color: #999; box-shadow: 0px 0px 5px 0px rgba(0,0,0,0.5); font-family: Helvetica, sans-serif; } #header, #header a { color: white; } .home { margin: 0; font-weight: bold; text-transform: lowercase; width: 100px; } h4.home { margin: 0; background: #666; padding-left: 25px; padding-right: 30px; margin-right: 20px; float: left; text-decoration: none; } .home:hover a { background: #555; } #audio { margin-top: 9px; width: 500px; display: inline-block; } #transcript { margin: 0 15px; margin-bottom: 5em; white-space: pre-wrap; line-height: 2em; max-width: 600px; color: #999; clear: both; margin-top: 75px; /*direction: rtl;*/ } .success { color: black; } .success:hover { text-decoration: underline; } .active { color: magenta; background-color: yellow; } #preloader { visibility: hidden; } </style> </head> <body> <div id="header"> <h4 class="home">Model name</h4>'''
|
10 |
+
html_seeker1='''</div> </div> <div id="transcript" dir="auto"></div> <script> var $a = document.getElementById("audio"); window.onkeydown = function(ev) { if(ev.keyCode == 32) { ev.preventDefault(); $a.pause(); } } var $trans = document.getElementById("transcript"); var wds = []; var cur_wd; function highlight_word() { var t = $a.currentTime; // XXX: O(N); use binary search var hits = wds.filter(function(x) { return (t - x['timestamp']['0']) > 0.01 && (x['timestamp']['1'] - t) > 0.01; }, wds); var next_wd = hits[hits.length - 1]; if(cur_wd != next_wd) { var active = document.querySelectorAll('.active'); for(var i = 0; i < active.length; i++) { active[i].classList.remove('active'); } if(next_wd && next_wd.$div) { next_wd.$div.classList.add('active'); //render_phones(next_wd); } } cur_wd = next_wd; //highlight_phone(t); window.requestAnimationFrame(highlight_word); } window.requestAnimationFrame(highlight_word); $trans.innerHTML = "Loading..."; function render(ret) { wds = ret['chunks'] || []; transcript = ret['text']; $trans.innerHTML = ''; var currentOffset = 0; wds.forEach(function(wd) { var $wd = document.createElement('span'); var txt = wd['text']; var $wdText = document.createTextNode(txt); $wd.appendChild($wdText); wd.$div = $wd; $wd.className = 'success'; $wd.onclick = function() { console.log(wd['timestamp']['0']); $a.currentTime = wd['timestamp']['0']; $a.play(); }; $trans.appendChild($wd); $trans.appendChild(document.createTextNode(' ')); }); } function update() { if(INLINE_JSON) { // We want this to work from file:/// domains, so we provide a // mechanism for inlining the alignment data. render(INLINE_JSON); } } var INLINE_JSON='''
|
11 |
+
html_seeker2=''';update();
|
12 |
+
</script>'''
|
13 |
+
|
14 |
model_name = "voidful/wav2vec2-xlsr-multilingual-56"
|
15 |
model0 = pipeline(task="automatic-speech-recognition",
|
16 |
model=model_name)
|
|
|
31 |
text = model1(speech,return_timestamps="word" )
|
32 |
elif model== "voidful/wav2vec2-xlsr-multilingual-56":
|
33 |
text = model0(speech,return_timestamps="word" )
|
34 |
+
return [text['text'],json.dumps(text),html_seeker+speech+html_seeker1+json.dumps(text)+html_seeker2]
|
35 |
|
36 |
|
37 |
def convert_to_wav(filename):
|
|
|
71 |
inputs_model_fa =gr.inputs.Radio(label="Language", choices=["ghofrani/common8","SLPL/Sharif-wav2vec2","voidful/wav2vec2-xlsr-multilingual-56"])
|
72 |
output_transcribe1_fa = gr.Textbox(label="Transcribed text:")
|
73 |
output_transcribe1_fa1 = gr.Textbox(label="Transcribed text with timestamps:")
|
74 |
+
output_transcribe1_fa2 =gradio.HTML("")
|
75 |
transcribe_audio1_fa= gr.Button("Submit")
|
76 |
with gr.Tab("google"):
|
77 |
gr.Markdown("set your speech language")
|
|
|
85 |
|
86 |
transcribe_audio1_fa.click(fn=predict_fa,
|
87 |
inputs=[inputs_speech_fa ,inputs_model_fa ],
|
88 |
+
outputs=[output_transcribe1_fa ,output_transcribe1_fa1,output_transcribe1_fa2 ] )
|
89 |
|
90 |
transcribe_audio1_go.click(fn=g_rec,
|
91 |
inputs=inputs_speech1 ,
|