jaekookang commited on
Commit
836ff83
·
1 Parent(s): 1aa7e27

changed from filepath to numpy

Browse files
gradio_asr_en_libri100_word_vs_bpe.py CHANGED
@@ -8,8 +8,8 @@ import os
8
  from difflib import Differ
9
  from glob import glob
10
  from loguru import logger
11
- # import librosa
12
- import wavio
13
  import gradio as gr
14
 
15
  from espnet2.bin.asr_inference import Speech2Text
@@ -45,9 +45,10 @@ def predict(wav_file):
45
  logger.info('wav file loaded')
46
  # Load audio
47
  # speech, rate = librosa.load(wav_file, sr=16000)
48
- wav = wavio.read(wav_file)
49
- speech = wav.data
50
- rate = wav.rate
 
51
 
52
  # number_of_samples = round(len(speech_raw) * float(16000) / rate)
53
  # speech = sps.resample(speech_raw, number_of_samples)
@@ -68,7 +69,7 @@ iface = gr.Interface(
68
  title='Comparison between word vs BPE tokens based on ESPNet2 ASR models',
69
  description='Two models were trained on Librispeech (clean-100h)',
70
  inputs=[
71
- gr.inputs.Audio(label='wav file', source='microphone', type='filepath')
72
  ],
73
  outputs=[
74
  gr.outputs.Textbox(label='Decoding result (word-token model)'),
 
8
  from difflib import Differ
9
  from glob import glob
10
  from loguru import logger
11
+ import librosa
12
+ # import wavio
13
  import gradio as gr
14
 
15
  from espnet2.bin.asr_inference import Speech2Text
 
45
  logger.info('wav file loaded')
46
  # Load audio
47
  # speech, rate = librosa.load(wav_file, sr=16000)
48
+ rate, speech = wav_file
49
+ # wav = wavio.read(wav_file)
50
+ # speech = wav.data
51
+ # rate = wav.rate
52
 
53
  # number_of_samples = round(len(speech_raw) * float(16000) / rate)
54
  # speech = sps.resample(speech_raw, number_of_samples)
 
69
  title='Comparison between word vs BPE tokens based on ESPNet2 ASR models',
70
  description='Two models were trained on Librispeech (clean-100h)',
71
  inputs=[
72
+ gr.inputs.Audio(label='wav file', source='microphone', type='numpy')
73
  ],
74
  outputs=[
75
  gr.outputs.Textbox(label='Decoding result (word-token model)'),