Spaces:

arampacha
/

asr-ukrainian

Runtime error

arampacha commited on Mar 21, 2022

Commit

c369354

1 Parent(s): 6b3bdcb

add example

Files changed (3) hide show

app.py CHANGED Viewed

@@ -3,6 +3,11 @@ from librosa import to_mono, resample
 import numpy as np
 import gradio as gr
 model_id = "arampacha/wav2vec2-xls-r-1b-uk"
 processor = Wav2Vec2ProcessorWithLM.from_pretrained(model_id)
@@ -12,22 +17,20 @@ asr = pipeline(
     feature_extractor=processor.feature_extractor, decoder=processor.decoder
 )
 def run_asr(audio):
-    try:
-        sr, audio_array = audio
-        audio_array = audio_array.astype(np.float32)
-        if len(audio_array.shape) > 1:
-            if audio_array.shape[1] == 1:
-                audio_array = audio_array.squeeze()
-            elif audio_array.shape[1] == 2:
-                audio_array = to_mono(audio_array.T)
-            else:
-                raise ValueError("Audio with > 2 channels not supported")
-        if sr != 16_000:
-            audio_array = resample(audio_array, orig_sr=sr, target_sr=16_000)
-        res = asr(audio_array, chunk_length_s=20, stride_length_s=2)["text"]
-    except Exception as e:
-        res = e
-    return res
 text_out = gr.outputs.Textbox(label="transcript")
 interface = gr.Interface(
@@ -37,7 +40,9 @@ interface = gr.Interface(
     layout="horizontal",
     theme="huggingface",
     title="Speech-to-text Ukrainian",
-    flagging_options=["incorrect"]
 )
 interface.launch(debug=True)

 import numpy as np
 import gradio as gr
+DESC = """\
+Ukrainian speech recognition app/
+Розпізнавання голосу для української мови
+"""
 model_id = "arampacha/wav2vec2-xls-r-1b-uk"
 processor = Wav2Vec2ProcessorWithLM.from_pretrained(model_id)
     feature_extractor=processor.feature_extractor, decoder=processor.decoder
 )
 def run_asr(audio):
+    sr, audio_array = audio
+    audio_array = audio_array.astype(np.float32)
+    if len(audio_array.shape) > 1:
+        if audio_array.shape[1] == 1:
+            audio_array = audio_array.squeeze()
+        elif audio_array.shape[1] == 2:
+            audio_array = to_mono(audio_array.T)
+        else:
+            raise ValueError("Audio with > 2 channels not supported")
+    if sr != 16_000:
+        audio_array = resample(audio_array, orig_sr=sr, target_sr=16_000)
+    res = asr(audio_array, chunk_length_s=20, stride_length_s=2)
+    return res["text"]
 text_out = gr.outputs.Textbox(label="transcript")
 interface = gr.Interface(
     layout="horizontal",
     theme="huggingface",
     title="Speech-to-text Ukrainian",
+    description=DESC,
+    flagging_options=["incorrect"],
+    examples=["examples/dobryi_den.wav"]
 )
 interface.launch(debug=True)

examples/dobryi_den.wav ADDED Viewed

Binary file (317 kB). View file

requirements.txt CHANGED Viewed

@@ -3,4 +3,5 @@ torch==1.10.2+cpu
 librosa
 transformers
 pypi-kenlm
-pyctcdecode

 librosa
 transformers
 pypi-kenlm
+pyctcdecode
+ffprobe