Spaces:

jkang
/

espnet2_librispeech_100h_word_vs_bpe_vs_char

Runtime error

jaekookang commited on Feb 23, 2022

Commit

e80f2b5

•

1 Parent(s): 4a43745

add txt comparison

Files changed (1) hide show

gradio_asr_en_libri100_word_vs_bpe.py CHANGED Viewed

@@ -5,14 +5,12 @@
 '''
 import os
 from glob import glob
 from loguru import logger
-# import soundfile as sf
 import librosa
-# from scipy.io import wavfile
 import gradio as gr
-# from espnet_model_zoo.downloader import ModelDownloader
 from espnet2.bin.asr_inference import Speech2Text
 # ---------- Settings ----------
@@ -28,6 +26,8 @@ MODEL_DIR = './model'
 EXAMPLE_DIR = './examples'
 examples = sorted(glob(os.path.join(EXAMPLE_DIR, '*.wav')))
 # ---------- Logging ----------
 logger.add('app.log', mode='a')
 logger.info('============================= App restarted =============================')
@@ -46,9 +46,10 @@ def predict(wav_file):
     # Run inference
     W = model_word(speech)[0]
     B = model_bpe(speech)[0]
     logger.info('predicted')
-    return W[0], B[0]
 iface = gr.Interface(
     predict,
@@ -58,8 +59,9 @@ iface = gr.Interface(
         gr.inputs.Audio(label='wav file', source='microphone', type='filepath')
     ],
     outputs=[
-        gr.outputs.Textbox(label='decoding result (word-token model)'),
-        gr.outputs.Textbox(label='decoding result (BPE-token model)'),
     ],
     examples=examples,
     # article='<p style="text-align:center">Model URL<a target="_blank" href="https://huggingface.co/jkang/espnet2_librispeech_100_conformer">🤗</a></p>',

 '''
 import os
+from difflib import Differ
 from glob import glob
 from loguru import logger
 import librosa
 import gradio as gr
 from espnet2.bin.asr_inference import Speech2Text
 # ---------- Settings ----------
 EXAMPLE_DIR = './examples'
 examples = sorted(glob(os.path.join(EXAMPLE_DIR, '*.wav')))
+d = Differ()
 # ---------- Logging ----------
 logger.add('app.log', mode='a')
 logger.info('============================= App restarted =============================')
     # Run inference
     W = model_word(speech)[0]
     B = model_bpe(speech)[0]
+    comparison = list(d.compare([W+'\n'], [B+'\n']))
     logger.info('predicted')
+    return W[0], B[0], comparison
 iface = gr.Interface(
     predict,
         gr.inputs.Audio(label='wav file', source='microphone', type='filepath')
     ],
     outputs=[
+        gr.outputs.Textbox(label='Decoding result (word-token model)'),
+        gr.outputs.Textbox(label='Decoding result (BPE-token model)'),
+        gr.outputs.Textbox(label='Comparison'),
     ],
     examples=examples,
     # article='<p style="text-align:center">Model URL<a target="_blank" href="https://huggingface.co/jkang/espnet2_librispeech_100_conformer">🤗</a></p>',