jaekookang commited on
Commit
63da1fa
1 Parent(s): 3d0edc0

added librosa

Browse files
gradio_asr_en_libri100_word_vs_bpe.py CHANGED
@@ -44,17 +44,17 @@ logger.info('model loaded')
44
  def predict(wav_file):
45
  logger.info('wav file loaded')
46
  # Load audio
47
- # speech, rate = librosa.load(wav_file, sr=16000)
48
- rate, speech = wav_file
49
- if len(speech.shape) == 2:
50
- speech = speech.T[0]
51
  # wav = wavio.read(wav_file)
52
  # speech = wav.data
53
  # rate = wav.rate
54
  logger.info(f'speech.shape: {speech.shape}')
55
  logger.info(f'speech.dtype: {speech.dtype}')
56
  logger.info(f'speech rate: {rate}')
57
- speech = speech.astype('float32')
58
 
59
  # number_of_samples = round(len(speech_raw) * float(16000) / rate)
60
  # speech = sps.resample(speech_raw, number_of_samples)
@@ -76,7 +76,7 @@ iface = gr.Interface(
76
  title='Comparison between word vs BPE tokens based on ESPNet2 ASR models',
77
  description='Two models were trained on Librispeech (clean-100h)',
78
  inputs=[
79
- gr.inputs.Audio(label='wav file', source='microphone', type='numpy')
80
  ],
81
  outputs=[
82
  gr.outputs.Textbox(label='Decoding result (word-token model)'),
 
44
  def predict(wav_file):
45
  logger.info('wav file loaded')
46
  # Load audio
47
+ speech, rate = librosa.load(wav_file, sr=16000)
48
+ # rate, speech = wav_file
49
+ # if len(speech.shape) == 2:
50
+ # speech = speech.T[0]
51
  # wav = wavio.read(wav_file)
52
  # speech = wav.data
53
  # rate = wav.rate
54
  logger.info(f'speech.shape: {speech.shape}')
55
  logger.info(f'speech.dtype: {speech.dtype}')
56
  logger.info(f'speech rate: {rate}')
57
+ # speech = speech.astype('float32')
58
 
59
  # number_of_samples = round(len(speech_raw) * float(16000) / rate)
60
  # speech = sps.resample(speech_raw, number_of_samples)
 
76
  title='Comparison between word vs BPE tokens based on ESPNet2 ASR models',
77
  description='Two models were trained on Librispeech (clean-100h)',
78
  inputs=[
79
+ gr.inputs.Audio(label='wav file', source='microphone', type='filepath')
80
  ],
81
  outputs=[
82
  gr.outputs.Textbox(label='Decoding result (word-token model)'),