jaekookang commited on
Commit
e80f2b5
1 Parent(s): 4a43745

add txt comparison

Browse files
gradio_asr_en_libri100_word_vs_bpe.py CHANGED
@@ -5,14 +5,12 @@
5
  '''
6
 
7
  import os
 
8
  from glob import glob
9
  from loguru import logger
10
- # import soundfile as sf
11
  import librosa
12
- # from scipy.io import wavfile
13
  import gradio as gr
14
 
15
- # from espnet_model_zoo.downloader import ModelDownloader
16
  from espnet2.bin.asr_inference import Speech2Text
17
 
18
  # ---------- Settings ----------
@@ -28,6 +26,8 @@ MODEL_DIR = './model'
28
  EXAMPLE_DIR = './examples'
29
  examples = sorted(glob(os.path.join(EXAMPLE_DIR, '*.wav')))
30
 
 
 
31
  # ---------- Logging ----------
32
  logger.add('app.log', mode='a')
33
  logger.info('============================= App restarted =============================')
@@ -46,9 +46,10 @@ def predict(wav_file):
46
  # Run inference
47
  W = model_word(speech)[0]
48
  B = model_bpe(speech)[0]
 
49
 
50
  logger.info('predicted')
51
- return W[0], B[0]
52
 
53
  iface = gr.Interface(
54
  predict,
@@ -58,8 +59,9 @@ iface = gr.Interface(
58
  gr.inputs.Audio(label='wav file', source='microphone', type='filepath')
59
  ],
60
  outputs=[
61
- gr.outputs.Textbox(label='decoding result (word-token model)'),
62
- gr.outputs.Textbox(label='decoding result (BPE-token model)'),
 
63
  ],
64
  examples=examples,
65
  # article='<p style="text-align:center">Model URL<a target="_blank" href="https://huggingface.co/jkang/espnet2_librispeech_100_conformer">🤗</a></p>',
 
5
  '''
6
 
7
  import os
8
+ from difflib import Differ
9
  from glob import glob
10
  from loguru import logger
 
11
  import librosa
 
12
  import gradio as gr
13
 
 
14
  from espnet2.bin.asr_inference import Speech2Text
15
 
16
  # ---------- Settings ----------
 
26
  EXAMPLE_DIR = './examples'
27
  examples = sorted(glob(os.path.join(EXAMPLE_DIR, '*.wav')))
28
 
29
+ d = Differ()
30
+
31
  # ---------- Logging ----------
32
  logger.add('app.log', mode='a')
33
  logger.info('============================= App restarted =============================')
 
46
  # Run inference
47
  W = model_word(speech)[0]
48
  B = model_bpe(speech)[0]
49
+ comparison = list(d.compare([W+'\n'], [B+'\n']))
50
 
51
  logger.info('predicted')
52
+ return W[0], B[0], comparison
53
 
54
  iface = gr.Interface(
55
  predict,
 
59
  gr.inputs.Audio(label='wav file', source='microphone', type='filepath')
60
  ],
61
  outputs=[
62
+ gr.outputs.Textbox(label='Decoding result (word-token model)'),
63
+ gr.outputs.Textbox(label='Decoding result (BPE-token model)'),
64
+ gr.outputs.Textbox(label='Comparison'),
65
  ],
66
  examples=examples,
67
  # article='<p style="text-align:center">Model URL<a target="_blank" href="https://huggingface.co/jkang/espnet2_librispeech_100_conformer">🤗</a></p>',